Skip to content

Commit

Permalink
Merge pull request #26 from GeoscienceAustralia/NPI-3331-updates-for-…
Browse files Browse the repository at this point in the history
…new-py-packages

NPI-3331 - Update to gnssanalysis for new packages
  • Loading branch information
ronaldmaj authored May 30, 2024
2 parents 01730d1 + 502b322 commit 12c5a9d
Show file tree
Hide file tree
Showing 3 changed files with 54 additions and 52 deletions.
7 changes: 5 additions & 2 deletions gnssanalysis/gn_io/igslog.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""IGS log files parser"""

import glob as _glob
import re as _re
from multiprocessing import Pool as _Pool
Expand Down Expand Up @@ -297,7 +298,9 @@ def gather_metadata(logs_glob_path="/data/station_logs/station_logs_IGS/*/*.log"
xyz_norm = (xyz_array[valid_mask] ** 2).sum(axis=1) ** 0.5
valid_mask[valid_mask] = (xyz_norm > 6000000) & (xyz_norm < 6500000)

llh = _gn_transform.xyz2llh(xyz_array[valid_mask], method="heikkinen", ellipsoid=_gn_const.WGS84, latlon_as_deg=True)
llh = _gn_transform.xyz2llh(
xyz_array[valid_mask], method="heikkinen", ellipsoid=_gn_const.WGS84, latlon_as_deg=True
)
llh_snx = _gn_io.sinex.llh2snxdms(llh)

llh2 = id_loc_df[~valid_mask][["LAT", "LON", "HEI"]]
Expand Down Expand Up @@ -435,7 +438,7 @@ def meta2sting(id_loc_df, rec_df, ant_df):
+ " "
+ rec_df.END_SNX
+ " "
+ +rec_df.RECEIVER.str.ljust(20, " ")
+ rec_df.RECEIVER.str.ljust(20, " ")
+ " "
+ rec_df["S/N"].str.ljust(5, " ")
+ " "
Expand Down
4 changes: 2 additions & 2 deletions gnssanalysis/gn_io/nanu.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ def get_bad_sv_from_nanu_df(nanu_df: _pd.DataFrame, datetime: _Union[_np.datetim
"UNUSABLE START TIME ZULU",
"UNUSABLE START CALENDAR DATE",
"LAUNCH JDAY",
"LAUNCH TIME ZULU"
"LAUNCH TIME ZULU",
# 'DECOMMISSIONING TIME ZULU', 'DECOMMISSIONING CALENDAR DATE',
# 'DECOMMISSIONING START TIME ZULU',
# 'DECOMMISSIONING START CALENDAR DATE'
Expand All @@ -123,7 +123,7 @@ def get_bad_sv_from_nanu_df(nanu_df: _pd.DataFrame, datetime: _Union[_np.datetim
hhmm = np_time[na_time_mask].astype("U4").view("<U2").reshape(-1, 2)

nd = _np.ndarray(shape=np_time.shape, dtype="timedelta64[s]")
nd.fill(_np.nan)
nd.fill(_np.timedelta64("nat"))
nd[na_time_mask] = hhmm[:, 0].astype("timedelta64[h]") + hhmm[:, 1].astype("timedelta64[m]")

dt_df = _pd.concat([df.drop(labels=columns_date + columns_time, axis=1), dates + nd], axis=1)
Expand Down
95 changes: 47 additions & 48 deletions gnssanalysis/gn_io/sp3.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,11 +36,11 @@
_RE_SP3_HEAD_FDESCR = _re.compile(rb"\%c[ ]+(\w{1})[ ]+cc[ ](\w{3})")

# Nodata ie NaN constants for SP3 format
SP3_CLOCK_NODATA_STRING = " 999999.999999" # Not used for reading, as fractional components are optional
SP3_CLOCK_NODATA_STRING = " 999999.999999" # Not used for reading, as fractional components are optional
SP3_CLOCK_NODATA_NUMERIC = 999999
SP3_POS_NODATA_STRING = " 0.000000"
SP3_CLOCK_STD_NODATA = -1000
SP3_POS_STD_NODATA = -100
SP3_POS_NODATA_STRING = " 0.000000"
SP3_CLOCK_STD_NODATA = -1000
SP3_POS_STD_NODATA = -100


def sp3_clock_nodata_to_nan(sp3_df):
Expand Down Expand Up @@ -159,15 +159,15 @@ def read_sp3(sp3_path, pOnly=True):
sp3_df.attrs["path"] = sp3_path

# Check for duplicate epochs, dedupe and log warning
if sp3_df.index.has_duplicates: # a literaly free check
duplicated_indexes = sp3_df.index.duplicated() # Typically sub ms time. Marks all but first instance as duped.
if sp3_df.index.has_duplicates: # a literaly free check
duplicated_indexes = sp3_df.index.duplicated() # Typically sub ms time. Marks all but first instance as duped.
first_dupe = sp3_df.index.get_level_values(0)[duplicated_indexes][0]
logging.warning(
f"Duplicate epoch(s) found in SP3 ({duplicated_indexes.sum()} additional entries, potentially non-unique). "
f"First duplicate (as J2000): {first_dupe} (as date): {first_dupe + gn_const.J2000_ORIGIN} "
f"SP3 path is: '{str(sp3_path)}'. Duplicates will be removed, keeping first."
)
sp3_df = sp3_df[~sp3_df.index.duplicated(keep="first")] # Now dedupe them, keeping the first of any clashes
sp3_df = sp3_df[~sp3_df.index.duplicated(keep="first")] # Now dedupe them, keeping the first of any clashes

return sp3_df

Expand Down Expand Up @@ -295,9 +295,7 @@ def gen_sp3_header(sp3_df):
return "".join(line1 + line2 + sats_header.tolist() + sv_orb_head.tolist() + head_c + head_fi + comment)


def gen_sp3_content(
sp3_df: _pd.DataFrame, sort_outputs: bool = False, buf: Union[None, _io.TextIOBase] = None
):
def gen_sp3_content(sp3_df: _pd.DataFrame, sort_outputs: bool = False, buf: Union[None, _io.TextIOBase] = None):
"""
Organises, formats (including nodata values), then writes out SP3 content to a buffer if provided, or returns
it otherwise.
Expand All @@ -316,25 +314,27 @@ def gen_sp3_content(
clk_base = 1.025

def pos_log(x):
return _np.minimum( # Cap value at 99
_np.nan_to_num( # If there is data, use the following formula. Else return NODATA value.
_np.rint(_np.log(x) / _np.log(pos_base)), # Rounded to nearest int
nan=SP3_POS_STD_NODATA
),
99
).astype(int)
return _np.minimum( # Cap value at 99
_np.nan_to_num( # If there is data, use the following formula. Else return NODATA value.
_np.rint(_np.log(x) / _np.log(pos_base)), nan=SP3_POS_STD_NODATA # Rounded to nearest int
),
99,
).astype(int)

def clk_log(x):
return _np.minimum(
_np.nan_to_num(_np.rint(_np.log(x) / _np.log(clk_base)), nan=SP3_CLOCK_STD_NODATA),
999 # Cap at 999
).astype(int)

std_df = (
sp3_df["STD"]
.transform({"X": pos_log, "Y": pos_log, "Z": pos_log, "CLK": clk_log})
.rename(columns=lambda x: "STD" + x)
)
_np.nan_to_num(_np.rint(_np.log(x) / _np.log(clk_base)), nan=SP3_CLOCK_STD_NODATA), 999 # Cap at 999
).astype(int)

std_df = sp3_df["STD"]
# Remove attribute data from this shallow copy of the Dataframe.
# This works around an apparent bug in Pandas, due to the fact calling == on two Series produces a list
# of element-wise comparisons, rather than a single boolean value. This list seems to break Pandas
# concat() when it is invoked within transform() and tries to check if attributes match between columns
# being concatenated.
std_df.attrs = {}
std_df = std_df.transform({"X": pos_log, "Y": pos_log, "Z": pos_log, "CLK": clk_log})
std_df = std_df.rename(columns=lambda x: "STD_" + x)
out_df = _pd.concat([out_df, std_df], axis="columns")

def prn_formatter(x):
Expand All @@ -349,16 +349,16 @@ def prn_formatter(x):
# Longer term we should maybe reimplement this again, maybe just processing groups line by line to format them?

def pos_formatter(x):
if isinstance(x, str): # Presume an inf/NaN value, already formatted as nodata string. Pass through.
return x # Expected value " 0.000000"
return format(x, "13.6f") # Numeric value, format as usual
if isinstance(x, str): # Presume an inf/NaN value, already formatted as nodata string. Pass through.
return x # Expected value " 0.000000"
return format(x, "13.6f") # Numeric value, format as usual

def clk_formatter(x):
# If this value (nominally a numpy float64) is actually a string, moreover containing the mandated part of the
# clock nodata value (per the SP3 spec), we deduce nodata formatting has already been done, and return as is.
if isinstance(x, str) and x.strip(' ').startswith("999999."): # TODO performance: could do just type check
if isinstance(x, str) and x.strip(" ").startswith("999999."): # TODO performance: could do just type check
return x
return format(x, "13.6f") # Not infinite or NaN: proceed with normal formatting
return format(x, "13.6f") # Not infinite or NaN: proceed with normal formatting

# NOTE: the following formatters are fine, as the nodata value is actually a *numeric value*,
# so DataFrame.to_string() will invoke them for those values.
Expand All @@ -377,14 +377,14 @@ def clk_std_formatter(x):

formatters = {
"PRN": prn_formatter,
"X": pos_formatter, # pos_formatter() can't handle nodata (Inf / NaN). Handled prior.
"X": pos_formatter, # pos_formatter() can't handle nodata (Inf / NaN). Handled prior.
"Y": pos_formatter,
"Z": pos_formatter,
"CLK": clk_formatter, # Can't handle CLK nodata (Inf or NaN). Handled prior to invoking DataFrame.to_string()
"STDX": pos_std_formatter, # Nodata is represented as an integer, so can be handled here.
"CLK": clk_formatter, # Can't handle CLK nodata (Inf or NaN). Handled prior to invoking DataFrame.to_string()
"STDX": pos_std_formatter, # Nodata is represented as an integer, so can be handled here.
"STDY": pos_std_formatter,
"STDZ": pos_std_formatter,
"STDCLK": clk_std_formatter, # ditto above
"STDCLK": clk_std_formatter, # ditto above
}
for epoch, epoch_vals in out_df.reset_index("PRN").groupby(axis=0, level="J2000"):
# Format and write out the epoch in the SP3 format
Expand All @@ -402,26 +402,25 @@ def clk_std_formatter(x):
# NOTE: DataFrame.to_string() as called below, takes formatter functions per column. It does not, however
# invoke them on NaN values!! As such, trying to handle NaNs in the formatter is a fool's errand.
# Instead, we do it here, and get the formatters to recognise and skip over the already processed nodata values

# POS nodata formatting
# Fill +/- infinity values with SP3 nodata value for POS columns
epoch_vals['X'].replace(to_replace=[_np.inf, _np.NINF], value=SP3_POS_NODATA_STRING, inplace=True)
epoch_vals['Y'].replace(to_replace=[_np.inf, _np.NINF], value=SP3_POS_NODATA_STRING, inplace=True)
epoch_vals['Z'].replace(to_replace=[_np.inf, _np.NINF], value=SP3_POS_NODATA_STRING, inplace=True)
epoch_vals["X"].replace(to_replace=[_np.inf, _np.NINF], value=SP3_POS_NODATA_STRING, inplace=True)
epoch_vals["Y"].replace(to_replace=[_np.inf, _np.NINF], value=SP3_POS_NODATA_STRING, inplace=True)
epoch_vals["Z"].replace(to_replace=[_np.inf, _np.NINF], value=SP3_POS_NODATA_STRING, inplace=True)
# Now do the same for NaNs
epoch_vals['X'].fillna(value=SP3_POS_NODATA_STRING, inplace=True)
epoch_vals['Y'].fillna(value=SP3_POS_NODATA_STRING, inplace=True)
epoch_vals['Z'].fillna(value=SP3_POS_NODATA_STRING, inplace=True)
epoch_vals["X"].fillna(value=SP3_POS_NODATA_STRING, inplace=True)
epoch_vals["Y"].fillna(value=SP3_POS_NODATA_STRING, inplace=True)
epoch_vals["Z"].fillna(value=SP3_POS_NODATA_STRING, inplace=True)
# NOTE: we could use replace() for all this, though fillna() might be faster in some
# cases: https://stackoverflow.com/a/76225227
# replace() will also handle other types of nodata constants: https://stackoverflow.com/a/54738894


# CLK nodata formatting
# Throw both +/- infinity, and NaN values to the SP3 clock nodata value.
# See https://stackoverflow.com/a/17478495
epoch_vals['CLK'].replace(to_replace=[_np.inf, _np.NINF], value=SP3_CLOCK_NODATA_STRING, inplace=True)
epoch_vals['CLK'].fillna(value=SP3_CLOCK_NODATA_STRING, inplace=True)
epoch_vals["CLK"].replace(to_replace=[_np.inf, _np.NINF], value=SP3_CLOCK_NODATA_STRING, inplace=True)
epoch_vals["CLK"].fillna(value=SP3_CLOCK_NODATA_STRING, inplace=True)

# Now invoke DataFrame to_string() to write out the values, leveraging our formatting functions for the
# relevant columns.
Expand Down Expand Up @@ -519,9 +518,9 @@ def diff_sp3_rac(
nd_rac = diff_eci.values[:, _np.newaxis] @ _gn_transform.eci2rac_rot(sp3_baseline_eci_vel)
df_rac = _pd.DataFrame(
nd_rac.reshape(-1, 3),
index=sp3_baseline.index, # Note that if the test and baseline have different SVs, this index will refer to
# data which is missing in the 'test' dataframe (and so is likely to be missing in
# the diff too).
index=sp3_baseline.index, # Note that if the test and baseline have different SVs, this index will refer to
# data which is missing in the 'test' dataframe (and so is likely to be missing in
# the diff too).
columns=[["EST_RAC"] * 3, ["Radial", "Along-track", "Cross-track"]],
)

Expand Down

0 comments on commit 12c5a9d

Please sign in to comment.