Merge pull request #26 from GeoscienceAustralia/NPI-3331-updates-for-…

…new-py-packages NPI-3331 - Update to gnssanalysis for new packages
GeoscienceAustralia · May 30, 2024 · 12c5a9d · 12c5a9d
2 parents 01730d1 + 502b322
commit 12c5a9d
Show file tree

Hide file tree

Showing 3 changed files with 54 additions and 52 deletions.
diff --git a/gnssanalysis/gn_io/igslog.py b/gnssanalysis/gn_io/igslog.py
@@ -1,4 +1,5 @@
 """IGS log files parser"""
+
 import glob as _glob
 import re as _re
 from multiprocessing import Pool as _Pool
@@ -297,7 +298,9 @@ def gather_metadata(logs_glob_path="/data/station_logs/station_logs_IGS/*/*.log"
     xyz_norm = (xyz_array[valid_mask] ** 2).sum(axis=1) ** 0.5
     valid_mask[valid_mask] = (xyz_norm > 6000000) & (xyz_norm < 6500000)
 
-    llh = _gn_transform.xyz2llh(xyz_array[valid_mask], method="heikkinen", ellipsoid=_gn_const.WGS84, latlon_as_deg=True)
+    llh = _gn_transform.xyz2llh(
+        xyz_array[valid_mask], method="heikkinen", ellipsoid=_gn_const.WGS84, latlon_as_deg=True
+    )
     llh_snx = _gn_io.sinex.llh2snxdms(llh)
 
     llh2 = id_loc_df[~valid_mask][["LAT", "LON", "HEI"]]
@@ -435,7 +438,7 @@ def meta2sting(id_loc_df, rec_df, ant_df):
         + " "
         + rec_df.END_SNX
         + " "
-        + +rec_df.RECEIVER.str.ljust(20, " ")
+        + rec_df.RECEIVER.str.ljust(20, " ")
         + " "
         + rec_df["S/N"].str.ljust(5, " ")
         + " "

diff --git a/gnssanalysis/gn_io/nanu.py b/gnssanalysis/gn_io/nanu.py
@@ -99,7 +99,7 @@ def get_bad_sv_from_nanu_df(nanu_df: _pd.DataFrame, datetime: _Union[_np.datetim
         "UNUSABLE START TIME ZULU",
         "UNUSABLE START CALENDAR DATE",
         "LAUNCH JDAY",
-        "LAUNCH TIME ZULU"
+        "LAUNCH TIME ZULU",
         # 'DECOMMISSIONING TIME ZULU', 'DECOMMISSIONING CALENDAR DATE',
         # 'DECOMMISSIONING START TIME ZULU',
         # 'DECOMMISSIONING START CALENDAR DATE'
@@ -123,7 +123,7 @@ def get_bad_sv_from_nanu_df(nanu_df: _pd.DataFrame, datetime: _Union[_np.datetim
     hhmm = np_time[na_time_mask].astype("U4").view("<U2").reshape(-1, 2)
 
     nd = _np.ndarray(shape=np_time.shape, dtype="timedelta64[s]")
-    nd.fill(_np.nan)
+    nd.fill(_np.timedelta64("nat"))
     nd[na_time_mask] = hhmm[:, 0].astype("timedelta64[h]") + hhmm[:, 1].astype("timedelta64[m]")
 
     dt_df = _pd.concat([df.drop(labels=columns_date + columns_time, axis=1), dates + nd], axis=1)

diff --git a/gnssanalysis/gn_io/sp3.py b/gnssanalysis/gn_io/sp3.py
@@ -36,11 +36,11 @@
 _RE_SP3_HEAD_FDESCR = _re.compile(rb"\%c[ ]+(\w{1})[ ]+cc[ ](\w{3})")
 
 # Nodata ie NaN constants for SP3 format
-SP3_CLOCK_NODATA_STRING  = " 999999.999999" # Not used for reading, as fractional components are optional
+SP3_CLOCK_NODATA_STRING = " 999999.999999"  # Not used for reading, as fractional components are optional
 SP3_CLOCK_NODATA_NUMERIC = 999999
-SP3_POS_NODATA_STRING    = "      0.000000"
-SP3_CLOCK_STD_NODATA     = -1000
-SP3_POS_STD_NODATA       = -100
+SP3_POS_NODATA_STRING = "      0.000000"
+SP3_CLOCK_STD_NODATA = -1000
+SP3_POS_STD_NODATA = -100
 
 
 def sp3_clock_nodata_to_nan(sp3_df):
@@ -159,15 +159,15 @@ def read_sp3(sp3_path, pOnly=True):
     sp3_df.attrs["path"] = sp3_path
 
     # Check for duplicate epochs, dedupe and log warning
-    if sp3_df.index.has_duplicates: # a literaly free check
-        duplicated_indexes = sp3_df.index.duplicated() # Typically sub ms time. Marks all but first instance as duped.
+    if sp3_df.index.has_duplicates:  # a literaly free check
+        duplicated_indexes = sp3_df.index.duplicated()  # Typically sub ms time. Marks all but first instance as duped.
         first_dupe = sp3_df.index.get_level_values(0)[duplicated_indexes][0]
         logging.warning(
             f"Duplicate epoch(s) found in SP3 ({duplicated_indexes.sum()} additional entries, potentially non-unique). "
             f"First duplicate (as J2000): {first_dupe} (as date): {first_dupe + gn_const.J2000_ORIGIN} "
             f"SP3 path is: '{str(sp3_path)}'. Duplicates will be removed, keeping first."
         )
-        sp3_df = sp3_df[~sp3_df.index.duplicated(keep="first")] # Now dedupe them, keeping the first of any clashes
+        sp3_df = sp3_df[~sp3_df.index.duplicated(keep="first")]  # Now dedupe them, keeping the first of any clashes
 
     return sp3_df
 
@@ -295,9 +295,7 @@ def gen_sp3_header(sp3_df):
     return "".join(line1 + line2 + sats_header.tolist() + sv_orb_head.tolist() + head_c + head_fi + comment)
 
 
-def gen_sp3_content(
-    sp3_df: _pd.DataFrame, sort_outputs: bool = False, buf: Union[None, _io.TextIOBase] = None
-):
+def gen_sp3_content(sp3_df: _pd.DataFrame, sort_outputs: bool = False, buf: Union[None, _io.TextIOBase] = None):
     """
     Organises, formats (including nodata values), then writes out SP3 content to a buffer if provided, or returns
      it otherwise.
@@ -316,25 +314,27 @@ def gen_sp3_content(
         clk_base = 1.025
 
         def pos_log(x):
-            return _np.minimum( # Cap value at 99
-                        _np.nan_to_num( # If there is data, use the following formula. Else return NODATA value.
-                            _np.rint(_np.log(x) / _np.log(pos_base)), # Rounded to nearest int
-                            nan=SP3_POS_STD_NODATA
-                        ),
-                       99
-                   ).astype(int)
+            return _np.minimum(  # Cap value at 99
+                _np.nan_to_num(  # If there is data, use the following formula. Else return NODATA value.
+                    _np.rint(_np.log(x) / _np.log(pos_base)), nan=SP3_POS_STD_NODATA  # Rounded to nearest int
+                ),
+                99,
+            ).astype(int)
 
         def clk_log(x):
             return _np.minimum(
-                       _np.nan_to_num(_np.rint(_np.log(x) / _np.log(clk_base)), nan=SP3_CLOCK_STD_NODATA),
-                       999 # Cap at 999
-                  ).astype(int)
-
-        std_df = (
-            sp3_df["STD"]
-            .transform({"X": pos_log, "Y": pos_log, "Z": pos_log, "CLK": clk_log})
-            .rename(columns=lambda x: "STD" + x)
-        )
+                _np.nan_to_num(_np.rint(_np.log(x) / _np.log(clk_base)), nan=SP3_CLOCK_STD_NODATA), 999  # Cap at 999
+            ).astype(int)
+
+        std_df = sp3_df["STD"]
+        #  Remove attribute data from this shallow copy of the Dataframe.
+        #  This works around an apparent bug in Pandas, due to the fact calling == on two Series produces a list
+        #  of element-wise comparisons, rather than a single boolean value. This list seems to break Pandas
+        #  concat() when it is invoked within transform() and tries to check if attributes match between columns
+        #  being concatenated.
+        std_df.attrs = {}
+        std_df = std_df.transform({"X": pos_log, "Y": pos_log, "Z": pos_log, "CLK": clk_log})
+        std_df = std_df.rename(columns=lambda x: "STD_" + x)
         out_df = _pd.concat([out_df, std_df], axis="columns")
 
     def prn_formatter(x):
@@ -349,16 +349,16 @@ def prn_formatter(x):
     # Longer term we should maybe reimplement this again, maybe just processing groups line by line to format them?
 
     def pos_formatter(x):
-        if isinstance(x, str): # Presume an inf/NaN value, already formatted as nodata string. Pass through.
-            return x # Expected value "      0.000000"
-        return format(x, "13.6f") # Numeric value, format as usual
+        if isinstance(x, str):  # Presume an inf/NaN value, already formatted as nodata string. Pass through.
+            return x  # Expected value "      0.000000"
+        return format(x, "13.6f")  # Numeric value, format as usual
 
     def clk_formatter(x):
         # If this value (nominally a numpy float64) is actually a string, moreover containing the mandated part of the
         # clock nodata value (per the SP3 spec), we deduce nodata formatting has already been done, and return as is.
-        if isinstance(x, str) and x.strip(' ').startswith("999999."): # TODO performance: could do just type check
+        if isinstance(x, str) and x.strip(" ").startswith("999999."):  # TODO performance: could do just type check
             return x
-        return format(x, "13.6f") # Not infinite or NaN: proceed with normal formatting
+        return format(x, "13.6f")  # Not infinite or NaN: proceed with normal formatting
 
     # NOTE: the following formatters are fine, as the nodata value is actually a *numeric value*,
     # so DataFrame.to_string() will invoke them for those values.
@@ -377,14 +377,14 @@ def clk_std_formatter(x):
 
     formatters = {
         "PRN": prn_formatter,
-        "X": pos_formatter,   # pos_formatter() can't handle nodata (Inf / NaN). Handled prior.
+        "X": pos_formatter,  # pos_formatter() can't handle nodata (Inf / NaN). Handled prior.
         "Y": pos_formatter,
         "Z": pos_formatter,
-        "CLK": clk_formatter, # Can't handle CLK nodata (Inf or NaN). Handled prior to invoking DataFrame.to_string()
-        "STDX": pos_std_formatter,   # Nodata is represented as an integer, so can be handled here.
+        "CLK": clk_formatter,  # Can't handle CLK nodata (Inf or NaN). Handled prior to invoking DataFrame.to_string()
+        "STDX": pos_std_formatter,  # Nodata is represented as an integer, so can be handled here.
         "STDY": pos_std_formatter,
         "STDZ": pos_std_formatter,
-        "STDCLK": clk_std_formatter, # ditto above
+        "STDCLK": clk_std_formatter,  # ditto above
     }
     for epoch, epoch_vals in out_df.reset_index("PRN").groupby(axis=0, level="J2000"):
         # Format and write out the epoch in the SP3 format
@@ -402,26 +402,25 @@ def clk_std_formatter(x):
         # NOTE: DataFrame.to_string() as called below, takes formatter functions per column. It does not, however
         # invoke them on NaN values!! As such, trying to handle NaNs in the formatter is a fool's errand.
         # Instead, we do it here, and get the formatters to recognise and skip over the already processed nodata values
-        
+
         # POS nodata formatting
         # Fill +/- infinity values with SP3 nodata value for POS columns
-        epoch_vals['X'].replace(to_replace=[_np.inf, _np.NINF], value=SP3_POS_NODATA_STRING, inplace=True)
-        epoch_vals['Y'].replace(to_replace=[_np.inf, _np.NINF], value=SP3_POS_NODATA_STRING, inplace=True)
-        epoch_vals['Z'].replace(to_replace=[_np.inf, _np.NINF], value=SP3_POS_NODATA_STRING, inplace=True)
+        epoch_vals["X"].replace(to_replace=[_np.inf, _np.NINF], value=SP3_POS_NODATA_STRING, inplace=True)
+        epoch_vals["Y"].replace(to_replace=[_np.inf, _np.NINF], value=SP3_POS_NODATA_STRING, inplace=True)
+        epoch_vals["Z"].replace(to_replace=[_np.inf, _np.NINF], value=SP3_POS_NODATA_STRING, inplace=True)
         # Now do the same for NaNs
-        epoch_vals['X'].fillna(value=SP3_POS_NODATA_STRING, inplace=True)
-        epoch_vals['Y'].fillna(value=SP3_POS_NODATA_STRING, inplace=True)
-        epoch_vals['Z'].fillna(value=SP3_POS_NODATA_STRING, inplace=True)
+        epoch_vals["X"].fillna(value=SP3_POS_NODATA_STRING, inplace=True)
+        epoch_vals["Y"].fillna(value=SP3_POS_NODATA_STRING, inplace=True)
+        epoch_vals["Z"].fillna(value=SP3_POS_NODATA_STRING, inplace=True)
         # NOTE: we could use replace() for all this, though fillna() might be faster in some
         # cases: https://stackoverflow.com/a/76225227
         # replace() will also handle other types of nodata constants: https://stackoverflow.com/a/54738894
-
 
         # CLK nodata formatting
         # Throw both +/- infinity, and NaN values to the SP3 clock nodata value.
         # See https://stackoverflow.com/a/17478495
-        epoch_vals['CLK'].replace(to_replace=[_np.inf, _np.NINF], value=SP3_CLOCK_NODATA_STRING, inplace=True)
-        epoch_vals['CLK'].fillna(value=SP3_CLOCK_NODATA_STRING, inplace=True)
+        epoch_vals["CLK"].replace(to_replace=[_np.inf, _np.NINF], value=SP3_CLOCK_NODATA_STRING, inplace=True)
+        epoch_vals["CLK"].fillna(value=SP3_CLOCK_NODATA_STRING, inplace=True)
 
         # Now invoke DataFrame to_string() to write out the values, leveraging our formatting functions for the
         # relevant columns.
@@ -519,9 +518,9 @@ def diff_sp3_rac(
     nd_rac = diff_eci.values[:, _np.newaxis] @ _gn_transform.eci2rac_rot(sp3_baseline_eci_vel)
     df_rac = _pd.DataFrame(
         nd_rac.reshape(-1, 3),
-        index=sp3_baseline.index, # Note that if the test and baseline have different SVs, this index will refer to
-                                  # data which is missing in the 'test' dataframe (and so is likely to be missing in
-                                  # the diff too).
+        index=sp3_baseline.index,  # Note that if the test and baseline have different SVs, this index will refer to
+        # data which is missing in the 'test' dataframe (and so is likely to be missing in
+        # the diff too).
         columns=[["EST_RAC"] * 3, ["Radial", "Along-track", "Cross-track"]],
     )