Merge pull request #296 from Smithsonian/predict_sexagesimal

awilson110 · web-flow · commit 1a675f04207a · 2025-10-29T14:18:36.000Z
Sexagesimal Output
diff --git a/.gitignore b/.gitignore
@@ -58,6 +58,7 @@ coverage.xml
 
 # Django stuff:
 *.log
+*.err
 local_settings.py
 db.sqlite3
 db.sqlite3-journal
diff --git a/src/layup/predict.py b/src/layup/predict.py
@@ -55,6 +55,40 @@ def _get_result_dtypes(primary_id_column_name: str):
     )
 
 
+def _convert_to_sg(data):
+    """This function appends two columns of the RA and Dec in sexagesimal to the input array.
+
+    Parameters
+    ----------
+    data : numpy structured array
+        The data to be processed.
+
+    Returns
+    -------
+    input array with ra and dec in sexagesimal appended, called ra_str_hms and dec_str_dms respectively.
+    """
+    ra_deg = (data["ra_deg"] / 15) % 24  # Ensuring ra is within 24 hours/360 degrees
+    ra_h = ra_deg.astype(int)
+    dec_deg = data["dec_deg"]
+    dec_d = dec_deg.astype(int)
+    ra_decimal = (ra_deg % 1) * 60
+    ra_m = ra_decimal.astype(int)
+    dec_decimal = (np.abs(dec_deg) % 1) * 60
+    dec_m = dec_decimal.astype(int)
+    ra_s = (ra_decimal % 1) * 60  # Take decimal portion again for arcseconds
+    dec_s = (dec_decimal % 1) * 60
+
+    ra = np.empty(len(ra_h), dtype="<U16")
+    dec = np.empty(len(ra_h), dtype="<U16")
+
+    for i in range(len(ra_h)):
+
+        ra[i] = f"{ra_h[i]:02} {ra_m[i]:02} {ra_s[i]:05.2f}"  # Same format as
+        dec[i] = f"{dec_d[i]:+03} {dec_m[i]:02} {dec_s[i]:04.1f}"  # JPL Horizons
+
+    return np.lib.recfunctions.append_fields(data, ["ra_str_hms", "dec_str_dms"], [ra, dec], usemask=False)
+
+
 def _predict(data, obs_pos_vel, times, cache_dir, primary_id_column_name):
     """This function is called by the parallelization function to call the C++ code.
 
@@ -244,4 +278,8 @@ def predict_cli(
         )
 
         if len(predictions) > 0:
-            write_csv(predictions, output_file)
+            if cli_args.sexagesimal:
+                predictions = _convert_to_sg(predictions)
+                write_csv(predictions, output_file, move_columns={"ra_str_hms": 3, "dec_str_dms": 4})
+            else:
+                write_csv(predictions, output_file)
diff --git a/src/layup/utilities/file_io/file_output.py b/src/layup/utilities/file_io/file_output.py
@@ -5,7 +5,7 @@
 import pandas as pd
 
 
-def write_csv(data, filepath):
+def write_csv(data, filepath, move_columns=None):
     """Write a numpy structured array to a CSV file.
 
     Parameters
@@ -14,8 +14,26 @@ def write_csv(data, filepath):
         The data to write to the file.
     filepath : str
         The path to the file to write.
+    move_columns : dict, optional
+        Dict of any column names that need moved, paired with their new position.
     """
     df = pd.DataFrame(data)
+
+    if move_columns != None:
+        column_names = list(df.columns.values)
+        for col in move_columns.keys():
+            if abs(move_columns[col]) > len(column_names):
+                raise IndexError(
+                    f"Column position is outside of range. Must be between +-{len(column_names)}"
+                )
+            try:
+                column_names.pop(column_names.index((col)))
+                column_names.insert(move_columns[col], col)
+            except:
+                raise ValueError(f"column {col} not found in df.columns.values.")
+
+        df = df.reindex(columns=column_names)
+
     if os.path.exists(filepath):
         df.to_csv(filepath, mode="a", header=False, index=False)
     else:
diff --git a/src/layup_cmdline/predict.py b/src/layup_cmdline/predict.py
@@ -175,6 +175,13 @@ def main():
         required=False,
     )
 
+    optional.add_argument(
+        "-sg",
+        "--sexagesimal",
+        action="store_true",
+        help="Flag to add RA and Dec in sexagesimal format to the output.",
+    )
+
     args = parser.parse_args()
 
     return execute(args)
diff --git a/tests/data/holman_expected_predict_sg.csv b/tests/data/holman_expected_predict_sg.csv
diff --git a/tests/data/known_sexagesimal.csv b/tests/data/known_sexagesimal.csv
@@ -0,0 +1,7 @@
+provID,ra_deg, dec_deg, ra_str_hms_CHECK, dec_str_dms_CHECK
+1,30.35697564, 30.937654376,02 01 25.67,+30 56 15.6
+2,352.52968317256614,-4.613618011505283,23 30 07.12,-04 36 49.0
+3,-300., 3.5487537,04 00 00.00,+03 32 55.5
+4,0.5, 0.5,00 02 00.00,+00 30 00.0
+5,0., 0.,00 00 00.00,+00 00 00.0
+6,0.005, 0.005,00 00 01.20,+00 00 18.0
diff --git a/tests/layup/test_file_output.py b/tests/layup/test_file_output.py
@@ -1,5 +1,6 @@
 import numpy as np
 from numpy.testing import assert_equal
+from unittest import TestCase
 import tempfile
 
 from layup.utilities.file_io.CSVReader import CSVDataReader
@@ -49,6 +50,29 @@ def test_write_csv(tmpdir):
     assert_equal(appended_data[2:4], data[3:5])
 
 
+def test_write_csv_move_columns(tmpdir):
+    # Read a test CSV file into a numpy structured array.
+    csv_reader = CSVDataReader(get_test_filepath("CART.csv"))
+    data = csv_reader.read_rows()
+
+    # Get a temp filepath to use in the function
+    temp_filepath = os.path.join(tmpdir, "test_output.csv")
+
+    # Pass a nonexistent column into write_csv to check it returns a ValueError
+    TestCase().assertRaises(ValueError, write_csv, data, temp_filepath, move_columns={"fake_col": 0})
+    TestCase().assertRaises(IndexError, write_csv, data, temp_filepath, move_columns={"x": 20})
+
+    # Write to temp filepath with swapped columns
+    write_csv(data, temp_filepath, move_columns={"x": 0, "y": 1, "z": 2})
+
+    # Read the data back in and check the columns have been swapped
+    csv_reader2 = CSVDataReader(temp_filepath)
+    data2 = csv_reader2.read_rows()
+    assert_equal(
+        data2.dtype.names, ["x", "y", "z", "ObjID", "FORMAT", "xdot", "ydot", "zdot", "epochMJD_TDB"]
+    )
+
+
 def test_write_empty_hdf5(tmpdir):
     # Write an empty numpy structured array to a temporary HDF5 file.
     data = np.array([], dtype=[("ObjID", "<U7"), ("FORMAT", "<U4")])
diff --git a/tests/layup/test_predict.py b/tests/layup/test_predict.py
@@ -4,7 +4,7 @@
 import pytest
 from numpy.testing import assert_equal
 
-from layup.predict import predict, predict_cli
+from layup.predict import predict, predict_cli, _convert_to_sg
 from layup.utilities.data_utilities_for_tests import get_test_filepath
 from layup.utilities.file_io.CSVReader import CSVDataReader
 
@@ -39,6 +39,7 @@ def __init__(self, g=None):
             self.n = 1
             self.chunk = chunk_size
             self.station = "X05"
+            self.sexagesimal = False
 
     # The naming scheme for the test files indicates its orbit format
     test_filename = f"predict_chunk_{input_format}.csv"
@@ -151,3 +152,52 @@ def test_predict_output(tmpdir):
     # assert np.allclose(output_data["obs_cov1"], known_data["obs_cov1"])
     # assert np.allclose(output_data["obs_cov2"], known_data["obs_cov2"])
     # assert np.allclose(output_data["obs_cov3"], known_data["obs_cov3"])
+
+    # Testing the output of the sexagesimal conversion separately
+
+    result = subprocess.run(
+        [
+            "layup",
+            "predict",
+            str(input_file),
+            "-f",
+            "-o",
+            str(temp_out_file),
+            "-s",
+            start,
+            "-sg",
+        ]
+    )
+
+    assert result.returncode == 0
+
+    result_file = Path(f"{tmpdir}/{temp_out_file}.csv")
+    assert result_file.exists
+
+    # Create a new CSV reader to read in our output file
+    output_csv_reader = CSVDataReader(str(result_file), "csv", primary_id_column_name="provID")
+    output_data = output_csv_reader.read_rows()
+
+    # Read in the known output
+    known_output_file = get_test_filepath("holman_expected_predict_sg.csv")
+    known_output_csv_reader = CSVDataReader(known_output_file, "csv", primary_id_column_name="provID")
+    known_data = known_output_csv_reader.read_rows()
+
+    assert (output_data["ra_str_hms"] == known_data["ra_str_hms"]).all() == True
+    assert (output_data["dec_str_dms"] == known_data["dec_str_dms"]).all() == True
+
+    # Check the columns have been swapped too
+    assert (known_data.dtype.names == output_data.dtype.names) == True
+
+
+def test_convert_to_sg(tmpdir):
+    """Compare the output given by _convert_to_sg() with an expected output, seeing how it handles edge cases."""
+
+    data = CSVDataReader(
+        get_test_filepath("known_sexagesimal.csv"), "csv", primary_id_column_name="provID"
+    ).read_rows()
+
+    data = _convert_to_sg(data)
+
+    assert (data["ra_str_hms"] == data["ra_str_hms_CHECK"]).all() == True
+    assert (data["dec_str_dms"] == data["dec_str_dms_CHECK"]).all() == True