From 2fdaa5655470312261dd4713a692af100fdc2e60 Mon Sep 17 00:00:00 2001 From: Mads Christian Lund Date: Mon, 7 Oct 2024 10:08:34 +0200 Subject: [PATCH 1/6] Created a helper function get_directional_wind_speed * Added unit test for processing wind speed --- src/pypromice/process/L1toL2.py | 54 ++++++++++--------- tests/unit/test_l1tol2.py | 94 +++++++++++++++++++++++++++++++++ 2 files changed, 124 insertions(+), 24 deletions(-) create mode 100644 tests/unit/test_l1tol2.py diff --git a/src/pypromice/process/L1toL2.py b/src/pypromice/process/L1toL2.py index d2316579..8eaf8fe7 100644 --- a/src/pypromice/process/L1toL2.py +++ b/src/pypromice/process/L1toL2.py @@ -94,7 +94,7 @@ def toL2( .ffill().bfill()) mask = (np.abs(ds.gps_alt - baseline_elevation) < 100) & ds.gps_alt.notnull() ds[['gps_alt','gps_lon', 'gps_lat']] = ds[['gps_alt','gps_lon', 'gps_lat']].where(mask) - + # removing dlr and ulr that are missing t_rad # this is done now becasue t_rad can be filtered either manually or with persistence ds['dlr'] = ds.dlr.where(ds.t_rad.notnull()) @@ -113,12 +113,12 @@ def toL2( if ~ds['t_i'].isnull().all(): ds['rh_i_cor'] = correctHumidity(ds['rh_i'], ds['t_i'], T_0, T_100, ews, ei0) - + # Determiune cloud cover for on-ice stations cc = calcCloudCoverage(ds['t_u'], T_0, eps_overcast, eps_clear, # Calculate cloud coverage ds['dlr'], ds.attrs['station_id']) ds['cc'] = (('time'), cc.data) - + # Determine surface temperature ds['t_surf'] = calcSurfaceTemperature(T_0, ds['ulr'], ds['dlr'], # Calculate surface temperature emissivity) @@ -136,7 +136,7 @@ def toL2( else: lat = ds['gps_lat'].mean() lon = ds['gps_lon'].mean() - + # smoothing tilt and rot ds['tilt_x'] = smoothTilt(ds['tilt_x']) ds['tilt_y'] = smoothTilt(ds['tilt_y']) @@ -151,8 +151,8 @@ def toL2( ZenithAngle_rad, ZenithAngle_deg = calcZenith(lat, Declination_rad, # Calculate zenith HourAngle_rad, deg2rad, rad2deg) - - + + # Correct Downwelling shortwave radiation DifFrac = 0.2 + 0.8 * cc CorFac_all = calcCorrectionFactor(Declination_rad, phi_sensor_rad, # Calculate correction @@ -186,9 +186,9 @@ def toL2( TOA_crit_nopass = (ds['dsr_cor'] > (0.9 * isr_toa + 10)) # Determine filter ds['dsr_cor'][TOA_crit_nopass] = np.nan # Apply filter and interpolate ds['usr_cor'][TOA_crit_nopass] = np.nan - - ds['dsr_cor'] = ds.dsr_cor.where(ds.dsr.notnull()) - ds['usr_cor'] = ds.usr_cor.where(ds.usr.notnull()) + + ds['dsr_cor'] = ds.dsr_cor.where(ds.dsr.notnull()) + ds['usr_cor'] = ds.usr_cor.where(ds.usr.notnull()) # # Check sun position # sundown = ZenithAngle_deg >= 90 # _checkSunPos(ds, OKalbedos, sundown, sunonlowerdome, TOA_crit_nopass) @@ -205,27 +205,33 @@ def toL2( ds['precip_l_cor'], ds['precip_l_rate']= correctPrecip(ds['precip_l'], ds['wspd_l']) - # Get directional wind speed - ds['wdir_u'] = ds['wdir_u'].where(ds['wspd_u'] != 0) - ds['wspd_x_u'], ds['wspd_y_u'] = calcDirWindSpeeds(ds['wspd_u'], ds['wdir_u']) + get_directional_wind_speed(ds) # Get directional wind speed + + ds = clip_values(ds, vars_df) + return ds + +def get_directional_wind_speed(ds: xr.Dataset) -> xr.Dataset: + """ + Calculate directional wind speed from wind speed and direction and mutates the dataset + """ + + ds['wdir_u'] = ds['wdir_u'].where(ds['wspd_u'] != 0) + ds['wspd_x_u'], ds['wspd_y_u'] = calcDirWindSpeeds(ds['wspd_u'], ds['wdir_u']) - if ds.attrs['number_of_booms']==2: - ds['wdir_l'] = ds['wdir_l'].where(ds['wspd_l'] != 0) + if ds.attrs['number_of_booms']==2: + ds['wdir_l'] = ds['wdir_l'].where(ds['wspd_l'] != 0) ds['wspd_x_l'], ds['wspd_y_l'] = calcDirWindSpeeds(ds['wspd_l'], ds['wdir_l']) if hasattr(ds, 'wdir_i'): if ~ds['wdir_i'].isnull().all() and ~ds['wspd_i'].isnull().all(): - ds['wdir_i'] = ds['wdir_i'].where(ds['wspd_i'] != 0) - ds['wspd_x_i'], ds['wspd_y_i'] = calcDirWindSpeeds(ds['wspd_i'], ds['wdir_i']) - - - ds = clip_values(ds, vars_df) + ds['wdir_i'] = ds['wdir_i'].where(ds['wspd_i'] != 0) + ds['wspd_x_i'], ds['wspd_y_i'] = calcDirWindSpeeds(ds['wspd_i'], ds['wdir_i']) return ds def calcDirWindSpeeds(wspd, wdir, deg2rad=np.pi/180): '''Calculate directional wind speed from wind speed and direction - + Parameters ---------- wspd : xr.Dataarray @@ -234,16 +240,16 @@ def calcDirWindSpeeds(wspd, wdir, deg2rad=np.pi/180): Wind direction data array deg2rad : float Degree to radians coefficient. The default is np.pi/180 - + Returns ------- wspd_x : xr.Dataarray Wind speed in X direction wspd_y : xr.Datarray Wind speed in Y direction - ''' + ''' wspd_x = wspd * np.sin(wdir * deg2rad) - wspd_y = wspd * np.cos(wdir * deg2rad) + wspd_y = wspd * np.cos(wdir * deg2rad) return wspd_x, wspd_y @@ -328,7 +334,7 @@ def smoothTilt(da: xr.DataArray, threshold=0.2): either X or Y smoothed tilt inclinometer measurements ''' # we calculate the moving standard deviation over a 3-day sliding window - # hourly resampling is necessary to make sure the same threshold can be used + # hourly resampling is necessary to make sure the same threshold can be used # for 10 min and hourly data moving_std_gap_filled = da.to_series().resample('h').median().rolling( 3*24, center=True, min_periods=2 diff --git a/tests/unit/test_l1tol2.py b/tests/unit/test_l1tol2.py new file mode 100644 index 00000000..de1cb136 --- /dev/null +++ b/tests/unit/test_l1tol2.py @@ -0,0 +1,94 @@ +import unittest + +import pandas as pd +import xarray as xr +import numpy as np + +from pypromice.process.L1toL2 import get_directional_wind_speed + + +class DirectionalWindSpeedTestCase(unittest.TestCase): + def test_get_directional_wind_speed_one_boom(self): + n_entries = 4 + + df_in = pd.DataFrame( + data={ + "wspd_u": np.random.rand(n_entries) * 10, + "wspd_i": np.random.rand(n_entries) * 10, + "wdir_u": np.random.rand(n_entries) * 360, + "wdir_i": np.random.rand(n_entries) * 360, + }, + index=pd.date_range("2021-01-01", periods=n_entries, freq="H"), + ) + ds = xr.Dataset(df_in) + ds.attrs["number_of_booms"] = 1 + + ds_out = get_directional_wind_speed(ds.copy()) + + # Convert to dataframe for easier comparison + df_out = ds_out.to_dataframe() + new_columns = set(df_out.columns) - set(df_in.columns) + expected_new_columns = { + "wspd_x_u", + "wspd_y_u", + "wspd_x_i", + "wspd_y_i", + } + self.assertSetEqual(new_columns, expected_new_columns) + + def test_get_directional_wind_speed_two_booms(self): + n_entries = 4 + + df_in = pd.DataFrame( + data={ + "wspd_u": np.random.rand(n_entries) * 10, + "wspd_l": np.random.rand(n_entries) * 10, + "wspd_i": np.random.rand(n_entries) * 10, + "wdir_u": np.random.rand(n_entries) * 360, + "wdir_l": np.random.rand(n_entries) * 360, + "wdir_i": np.random.rand(n_entries) * 360, + }, + index=pd.date_range("2021-01-01", periods=n_entries, freq="H"), + ) + ds = xr.Dataset(df_in) + ds.attrs["number_of_booms"] = 2 + + ds_out = get_directional_wind_speed(ds.copy()) + + # Convert to dataframe for easier comparison + df_out = ds_out.to_dataframe() + new_columns = set(df_out.columns) - set(df_in.columns) + expected_new_columns = { + "wspd_x_u", + "wspd_y_u", + "wspd_x_i", + "wspd_y_i", + "wspd_x_l", + "wspd_y_l", + } + self.assertSetEqual(new_columns, expected_new_columns) + + def test_get_directional_wind_speed_without_instantaneous_data(self): + n_entries = 4 + + df_in = pd.DataFrame( + data={ + "wspd_u": np.random.rand(n_entries) * 10, + "wdir_u": np.random.rand(n_entries) * 360, + }, + index=pd.date_range("2021-01-01", periods=n_entries, freq="H"), + ) + ds = xr.Dataset(df_in) + ds.attrs["number_of_booms"] = 1 + + ds_out = get_directional_wind_speed(ds.copy()) + + # Convert to dataframe for easier comparison + df_out = ds_out.to_dataframe() + new_columns = set(df_out.columns) - set(df_in.columns) + # The function should ignore instantaneous data if it is not present + expected_new_columns = { + "wspd_x_u", + "wspd_y_u", + } + self.assertSetEqual(new_columns, expected_new_columns) From 3177eda8420442f414a86b7e6f3512784a646fa0 Mon Sep 17 00:00:00 2001 From: Mads Christian Lund Date: Mon, 7 Oct 2024 10:10:52 +0200 Subject: [PATCH 2/6] Bufr export: Changed airTemperature rounding to 1 We only of 1 digits precision measurments from out instantaneous values. This violates the bufr schema which expects 2 digits precision. --- src/pypromice/postprocess/bufr_utilities.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/pypromice/postprocess/bufr_utilities.py b/src/pypromice/postprocess/bufr_utilities.py index ed5a4154..e34870a1 100644 --- a/src/pypromice/postprocess/bufr_utilities.py +++ b/src/pypromice/postprocess/bufr_utilities.py @@ -120,7 +120,8 @@ class BUFRVariables: ) # https://vocabulary-manager.eumetsat.int/vocabularies/BUFR/WMO/32/TABLE_B/012101 # Scale: 2, unit: K - airTemperature: float = attrs.field(converter=round_converter(2)) + # NOTE: The expected scale is 2, but our instantanous data is rounded to 1 decimal. + airTemperature: float = attrs.field(converter=round_converter(1)) # There is also a Dewpoint temperature in this template: 012103 which is currently unused. # https://vocabulary-manager.eumetsat.int/vocabularies/BUFR/WMO/32/TABLE_B/012103 # Scale: 0, unit: % From eddc1bd42076c35c1c169aa2a69f5820de8dafe2 Mon Sep 17 00:00:00 2001 From: Mads Christian Lund Date: Mon, 7 Oct 2024 10:12:07 +0200 Subject: [PATCH 3/6] Updated variable dependency handling (OOL) OOL contains dependent variables (children) * Added unit test functions for clip_values * Add a helper class, DependencyGraph, for modelling the dependency relations to find the children closure and handle circular dependencies. * Updated clip_values to set children variables to nan whenever a parent is nan. * Fixed a bug in clipValues introduced in commit d1a6ac870976f58a593c0193c99a8b1622713f39, where child variables were not set to nan due to an erroneous change in variable names. Fixed variables.csv * Removed incorrect OOL relations from variables.csv. wspd_x_u, wspd_y_u... had wdir and wspd as children. The relation is the opposite. * Removed double quotes around ool * Removed comma between ool entries in wspeed --- src/pypromice/process/value_clipping.py | 55 ++++-- src/pypromice/resources/variables.csv | 16 +- src/pypromice/utilities/dependency_graph.py | 101 ++++++++++ tests/unit/test_value_clippping.py | 200 ++++++++++++++++++++ 4 files changed, 347 insertions(+), 25 deletions(-) create mode 100644 src/pypromice/utilities/dependency_graph.py create mode 100644 tests/unit/test_value_clippping.py diff --git a/src/pypromice/process/value_clipping.py b/src/pypromice/process/value_clipping.py index 23ed14e0..6ff60c54 100644 --- a/src/pypromice/process/value_clipping.py +++ b/src/pypromice/process/value_clipping.py @@ -1,7 +1,12 @@ +from typing import Dict, Set, Mapping + import numpy as np import pandas +import pandas as pd import xarray +from pypromice.utilities.dependency_graph import DependencyGraph + def clip_values( ds: xarray.Dataset, @@ -26,27 +31,43 @@ def clip_values( """ cols = ["lo", "hi", "OOL"] assert set(cols) <= set(var_configurations.columns) + # TODO: Check if this is necessary + # variable_limits = var_configurations[cols].dropna(how="all") - variable_limits = var_configurations[cols].dropna(how="all") - for var, row in variable_limits.iterrows(): + variable_limits = var_configurations[cols].assign( + dependents=lambda df: df.OOL.fillna("").str.split(), + # Find the closure of dependents using the DependencyGraph class + dependents_closure=lambda df: DependencyGraph.from_child_mapping( + df.dependents + ).child_closure_mapping(), + ) + for var, row in variable_limits.iterrows(): if var not in list(ds.variables): continue + # TODO: Check if this is necessary + # I guess the nan flagging is already handled below + # What if rh_u_cor is nan? + # What if row.lo/hi is nan? + + if var in ["rh_u_cor", "rh_l_cor"]: + ds[var] = ds[var].where(ds[var] >= row.lo, other=0) + ds[var] = ds[var].where(ds[var] <= row.hi, other=100) + + # Mask out invalid corrections based on uncorrected var + var_uncor = var.rstrip("_cor") + ds[var] = ds[var].where(~np.isnan(ds[var_uncor]), other=np.nan) + + else: + if ~np.isnan(row.lo): + ds[var] = ds[var].where(ds[var] >= row.lo) + if ~np.isnan(row.hi): + ds[var] = ds[var].where(ds[var] <= row.hi) - if ~np.isnan(row.lo): - ds[var] = ds[var].where(ds[var] >= row.lo) - if ~np.isnan(row.hi): - ds[var] = ds[var].where(ds[var] <= row.hi) - - other_vars = row.OOL - if isinstance(other_vars, str) and ~ds[var].isnull().all(): - for o in other_vars.split(): - if o not in list(ds.variables): - continue - else: - if ~np.isnan(row.lo): - ds[var] = ds[var].where(ds[var] >= row.lo) - if ~np.isnan(row.hi): - ds[var] = ds[var].where(ds[var] <= row.hi) + # Flag dependents as NaN if parent is NaN + for o in row.dependents_closure: + if o not in list(ds.variables): + continue + ds[o] = ds[o].where(ds[var].notnull()) return ds diff --git a/src/pypromice/resources/variables.csv b/src/pypromice/resources/variables.csv index 7630e813..cc31d35a 100644 --- a/src/pypromice/resources/variables.csv +++ b/src/pypromice/resources/variables.csv @@ -11,15 +11,15 @@ qh_u,specific_humidity,Specific humidity (upper boom),kg/kg,modelResult,time,FAL rh_l,relative_humidity,Relative humidity (lower boom),%,physicalMeasurement,time,FALSE,,0,100,rh_l_cor,two-boom,1,1,1,4 rh_l_cor,relative_humidity_corrected,Relative humidity (lower boom) - corrected,%,modelResult,time,FALSE,L2 or later,0,150,dshf_l dlhf_l qh_l,two-boom,0,1,1,4 qh_l,specific_humidity,Specific humidity (lower boom),kg/kg,modelResult,time,FALSE,L2 or later,0,100,,two-boom,0,1,1,4 -wspd_u,wind_speed,Wind speed (upper boom),m s-1,physicalMeasurement,time,FALSE,,0,100,"wdir_u wspd_x_u wspd_y_u dshf_u dlhf_u qh_u, precip_u",all,1,1,1,4 -wspd_l,wind_speed,Wind speed (lower boom),m s-1,physicalMeasurement,time,FALSE,,0,100,"wdir_l wspd_x_l wspd_y_l dshf_l dlhf_l qh_l , precip_l",two-boom,1,1,1,4 +wspd_u,wind_speed,Wind speed (upper boom),m s-1,physicalMeasurement,time,FALSE,,0,100,wdir_u wspd_x_u wspd_y_u dshf_u dlhf_u qh_u precip_u,all,1,1,1,4 +wspd_l,wind_speed,Wind speed (lower boom),m s-1,physicalMeasurement,time,FALSE,,0,100,wdir_l wspd_x_l wspd_y_l dshf_l dlhf_l qh_l precip_l,two-boom,1,1,1,4 wdir_u,wind_from_direction,Wind from direction (upper boom),degrees,physicalMeasurement,time,FALSE,,1,360,wspd_x_u wspd_y_u,all,1,1,1,4 wdir_std_u,wind_from_direction_standard_deviation,Wind from direction (standard deviation),degrees,qualityInformation,time,FALSE,L0 or L2,,,,one-boom,1,1,0,4 wdir_l,wind_from_direction,Wind from direction (lower boom),degrees,physicalMeasurement,time,FALSE,,1,360,wspd_x_l wspd_y_l,two-boom,1,1,1,4 -wspd_x_u,wind_speed_from_x_direction,Wind speed from x direction (upper boom),m s-1,modelResult,time,FALSE,L0 or L2,-100,100,wdir_u wspd_u,all,0,1,1,4 -wspd_y_u,wind_speed_from_y_direction,Wind speed from y direction (upper boom),m s-1,modelResult,time,FALSE,L0 or L2,-100,100,wdir_u wspd_u,all,0,1,1,4 -wspd_x_l,wind_speed_from_x_direction,Wind speed from x direction (lower boom),m s-1,modelResult,time,FALSE,L0 or L2,-100,100,wdir_l wspd_l,two-boom,0,1,1,4 -wspd_y_l,wind_speed_from_y_direction,Wind speed from y direction (lower boom),m s-1,modelResult,time,FALSE,L0 or L2,-100,100,wdir_l wspd_l,two-boom,0,1,1,4 +wspd_x_u,wind_speed_from_x_direction,Wind speed from x direction (upper boom),m s-1,modelResult,time,FALSE,L0 or L2,-100,100,"",all,0,1,1,4 +wspd_y_u,wind_speed_from_y_direction,Wind speed from y direction (upper boom),m s-1,modelResult,time,FALSE,L0 or L2,-100,100,"",all,0,1,1,4 +wspd_x_l,wind_speed_from_x_direction,Wind speed from x direction (lower boom),m s-1,modelResult,time,FALSE,L0 or L2,-100,100,"",two-boom,0,1,1,4 +wspd_y_l,wind_speed_from_y_direction,Wind speed from y direction (lower boom),m s-1,modelResult,time,FALSE,L0 or L2,-100,100,"",two-boom,0,1,1,4 dsr,surface_downwelling_shortwave_flux,Downwelling shortwave radiation,W m-2,physicalMeasurement,time,FALSE,,-10,1500,albedo dsr_cor usr_cor,all,1,1,1,4 dsr_cor,surface_downwelling_shortwave_flux_corrected,Downwelling shortwave radiation - corrected,W m-2,modelResult,time,FALSE,L2 or later,,,,all,0,1,1,4 usr,surface_upwelling_shortwave_flux,Upwelling shortwave radiation,W m-2,physicalMeasurement,time,FALSE,,-10,1000,albedo dsr_cor usr_cor,all,1,1,1,4 @@ -102,5 +102,5 @@ rh_i,relative_humidity,Relative humidity (instantaneous),%,physicalMeasurement,t rh_i_cor,relative_humidity_corrected,Relative humidity (instantaneous) – corrected,%,modelResult,time,TRUE,L2 or later,0,100,,all,0,1,1,4 wspd_i,wind_speed,Wind speed (instantaneous),m s-1,physicalMeasurement,time,TRUE,,0,100,wdir_i wspd_x_i wspd_y_i,all,1,1,1,4 wdir_i,wind_from_direction,Wind from direction (instantaneous),degrees,physicalMeasurement,time,TRUE,,1,360,wspd_x_i wspd_y_i,all,1,1,1,4 -wspd_x_i,wind_speed_from_x_direction,Wind speed from x direction (instantaneous),m s-1,modelResult,time,TRUE,L2 or later,-100,100,wdir_i wspd_i,all,0,1,1,4 -wspd_y_i,wind_speed_from_y_direction,Wind speed from y direction (instantaneous),m s-1,modelResult,time,TRUE,L2 or later,-100,100,wdir_i wspd_i,all,0,1,1,4 +wspd_x_i,wind_speed_from_x_direction,Wind speed from x direction (instantaneous),m s-1,modelResult,time,TRUE,L2 or later,-100,100,"",all,0,1,1,4 +wspd_y_i,wind_speed_from_y_direction,Wind speed from y direction (instantaneous),m s-1,modelResult,time,TRUE,L2 or later,-100,100,"",all,0,1,1,4 diff --git a/src/pypromice/utilities/dependency_graph.py b/src/pypromice/utilities/dependency_graph.py new file mode 100644 index 00000000..fbd27630 --- /dev/null +++ b/src/pypromice/utilities/dependency_graph.py @@ -0,0 +1,101 @@ +from typing import Mapping, Set, MutableMapping, Optional + +import attr + +__all__ = [ + "DependencyNode", + "DependencyGraph", +] + + +@attr.define +class DependencyNode: + name: str = attr.field() + parents: Set["DependencyNode"] = attr.field(factory=set) + children: Set["DependencyNode"] = attr.field(factory=set) + + def add_child(self, child: "DependencyNode"): + self.children.add(child) + child.parents.add(self) + + def get_children_closure(self, closure: Optional[Set[str]] = None) -> Set[str]: + is_root = closure is None + if closure is None: + closure = set() + if self.name in closure: + return closure + closure.add(self.name) + for child in self.children: + closure |= child.get_children_closure(closure) + + if is_root: + closure.remove(self.name) + return closure + + def get_parents_closure(self, closure: Optional[Set[str]] = None) -> Set[str]: + is_root = closure is None + if closure is None: + closure = set() + if self.name in closure: + return closure + closure.add(self.name) + for parent in self.parents: + closure |= parent.get_parents_closure(closure) + + if is_root: + closure.remove(self.name) + return closure + + def __hash__(self): + return hash(self.name) + + +@attr.define +class DependencyGraph: + nodes: MutableMapping[str, DependencyNode] = attr.field(factory=dict) + + def add_node(self, name: str) -> DependencyNode: + if name not in self.nodes: + self.nodes[name] = DependencyNode(name) + return self.nodes[name] + + def add_edge(self, parent: str, child: str): + parent_node = self.add_node(parent) + child_node = self.add_node(child) + parent_node.add_child(child_node) + + @classmethod + def from_child_mapping(cls, mapping: Mapping[str, Set[str]]) -> "DependencyGraph": + graph = cls() + for var, children in mapping.items(): + graph.add_node(var) + for child in children: + graph.add_edge(var, child) + return graph + + @classmethod + def from_parent_mapping(cls, mapping: Mapping[str, Set[str]]) -> "DependencyGraph": + graph = cls() + for var, parents in mapping.items(): + graph.add_node(var) + for parent in parents: + graph.add_edge(parent, var) + return graph + + def child_mapping(self) -> Mapping[str, Set[str]]: + return { + node.name: {child.name for child in node.children} + for node in self.nodes.values() + } + + def child_closure_mapping(self) -> Mapping[str, Set[str]]: + return {node.name: node.get_children_closure() for node in self.nodes.values()} + + def parent_mapping(self) -> Mapping[str, Set[str]]: + return { + node.name: {parent.name for parent in node.parents} + for node in self.nodes.values() + } + + def parent_closure_mapping(self) -> Mapping[str, Set[str]]: + return {node.name: node.get_parents_closure() for node in self.nodes.values()} diff --git a/tests/unit/test_value_clippping.py b/tests/unit/test_value_clippping.py new file mode 100644 index 00000000..ceb8e2ef --- /dev/null +++ b/tests/unit/test_value_clippping.py @@ -0,0 +1,200 @@ +import unittest + +import numpy as np +import pandas as pd +import xarray as xr + +import pypromice.resources +from pypromice.process.L1toL2 import get_directional_wind_speed +from pypromice.process.value_clipping import clip_values + + +class ClipValuesTestCase(unittest.TestCase): + def test_flag_wdir_on_nan_wspd(self): + n_entries = 4 + df_in = pd.DataFrame( + data={ + "wspd_u": n_entries * [np.nan], + "wspd_i": n_entries * [np.nan], + "wspd_l": n_entries * [np.nan], + "wdir_u": np.random.rand(n_entries) * 360, + "wdir_i": np.random.rand(n_entries) * 360, + "wdir_l": np.random.rand(n_entries) * 360, + }, + index=pd.date_range("2021-01-01", periods=n_entries, freq="H"), + ) + ds = xr.Dataset(df_in) + ds.attrs["number_of_booms"] = 2 + ds_out = get_directional_wind_speed(ds.copy()) + vars = pypromice.resources.load_variables(None) + + ds_out = clip_values(ds_out, vars) + # Convert to dataframe for easier comparison + df_out = ds_out.to_dataframe() + + # Assert all dir values are set nan + self.assertTrue(df_out["wdir_u"].isna().all()) + self.assertTrue(df_out["wdir_l"].isna().all()) + self.assertTrue(df_out["wdir_i"].isna().all()) + + def test_flag_wdir_zero_wspd(self): + n_entries = 4 + df_in = pd.DataFrame( + data={ + "wspd_u": n_entries * [0], + "wspd_i": n_entries * [0], + "wspd_l": n_entries * [0], + "wdir_u": np.random.rand(n_entries) * 360, + "wdir_i": np.random.rand(n_entries) * 360, + "wdir_l": np.random.rand(n_entries) * 360, + }, + index=pd.date_range("2021-01-01", periods=n_entries, freq="H"), + ) + ds = xr.Dataset(df_in) + ds.attrs["number_of_booms"] = 2 + ds_out = get_directional_wind_speed(ds.copy()) + vars = pypromice.resources.load_variables(None) + + ds_out = clip_values(ds_out, vars) + + # Convert to dataframe for easier comparison + df_out = ds_out.to_dataframe() + # Assert all dir values are set nan + self.assertTrue(df_out["wdir_u"].isna().all()) + self.assertTrue(df_out["wdir_l"].isna().all()) + self.assertTrue(df_out["wdir_i"].isna().all()) + + def test_flagging_depended_on_wspd(self): + # This unit test tests variable conditions for flagging wdir values + # Nan, 0, and negative values should be flagged while positive values should not + n_entries = 4 + df_in = pd.DataFrame( + data={ + "wspd_u": [np.nan, 0, 10, -3], + "wdir_u": [0, 180, 90, 270], + }, + index=pd.date_range("2021-01-01", periods=n_entries, freq="H"), + ) + ds = xr.Dataset(df_in) + ds.attrs["number_of_booms"] = 1 + ds_out = get_directional_wind_speed(ds.copy()) + vars = pypromice.resources.load_variables(None) + + ds_out = clip_values(ds_out, vars) + + # Convert to dataframe for easier comparison + df_out = ds_out.to_dataframe() + + expected_dataframe = pd.DataFrame( + data={ + "wspd_u": [np.nan, 0, 10, np.nan], + "wdir_u": [np.nan, np.nan, 90, np.nan], + "wspd_x_u": [np.nan, np.nan, 10, np.nan], + "wspd_y_u": [np.nan, np.nan, 0, np.nan], + }, + index=df_out.index, + ) + self.assertEqual( + df_out.columns.tolist(), + expected_dataframe.columns.tolist(), + ) + pd.testing.assert_frame_equal( + df_out, + expected_dataframe, + check_dtype=True, + check_like=True, + ) + + def test_recursive_flagging(self): + fields = ["a", "b", "c"] + variable_config = pd.DataFrame( + columns=["field", "lo", "hi", "OOL"], + data=[ + ["a", 0, 10, "b"], + ["b", 100, 110, ""], + ["c", 200, 210, "a"], + ], + ).set_index("field") + data_index = pd.RangeIndex(4) + data = pd.DataFrame( + columns=fields, + data=[ + [0, 100, 215], # c is out of range + [5, 115, 200], # b is out of range + [10, 100, 200], # All a withing range + [15, 100, 200], # a is out of range + ], + dtype=float, + index=data_index, + ) + expected_output = pd.DataFrame( + columns=fields, + data=[ + [np.nan, np.nan, np.nan], # c is nan -> a -> b + [5, np.nan, 200], # b is nan + [10, 100, 200], + [np.nan, np.nan, 200], # a is nan -> b + ], + dtype=float, + index=data.index, + ) + + data_set = xr.Dataset(data) + + data_set_out = clip_values(data_set, variable_config) + data_frame_out = data_set_out.to_dataframe() + + pd.testing.assert_frame_equal( + data_frame_out, + expected_output, + check_names=False, + check_dtype=True, + ) + + def test_circular_dependencies(self): + fields = ["a", "b", "c"] + variable_config = pd.DataFrame( + columns=["field", "lo", "hi", "OOL"], + data=[ + ["a", 0, 10, "b"], + ["b", 100, 110, "c"], + ["c", 200, 210, "a"], + ], + ).set_index("field") + data_index = pd.RangeIndex(4) + data = pd.DataFrame( + columns=fields, + data=[ + [0, 100, 215], # c is out of range + [5, 115, 200], # b is out of range + [10, 100, 200], # All a withing range + [15, 100, 200], # a is out of range + ], + dtype=float, + index=data_index, + ) + # All variables a dependent due to circular dependency + expected_output = pd.DataFrame( + columns=fields, + data=[ + [np.nan, np.nan, np.nan], + [np.nan, np.nan, np.nan], + [10, 100, 200], + [np.nan, np.nan, np.nan], + ], + dtype=float, + index=data.index, + ) + + data_set = xr.Dataset(data) + + data_set_out = clip_values(data_set, variable_config) + data_frame_out = data_set_out.to_dataframe() + + pd.testing.assert_frame_equal( + data_frame_out, + expected_output, + check_names=False, + check_dtype=True, + ) + From b662e0a27015e95f82f3a124f6b87b79ac67f35b Mon Sep 17 00:00:00 2001 From: Mads Christian Lund Date: Tue, 8 Oct 2024 10:43:39 +0200 Subject: [PATCH 4/6] Updated value_clipping wrt rh corr * The rh corrected clipping were asigning 0 to inpu nan * Added unit test specifically for rh corr --- src/pypromice/process/value_clipping.py | 24 +++----- tests/unit/test_value_clippping.py | 75 +++++++++++++++++++++++++ 2 files changed, 83 insertions(+), 16 deletions(-) diff --git a/src/pypromice/process/value_clipping.py b/src/pypromice/process/value_clipping.py index 6ff60c54..020e7317 100644 --- a/src/pypromice/process/value_clipping.py +++ b/src/pypromice/process/value_clipping.py @@ -1,8 +1,5 @@ -from typing import Dict, Set, Mapping - import numpy as np import pandas -import pandas as pd import xarray from pypromice.utilities.dependency_graph import DependencyGraph @@ -31,8 +28,6 @@ def clip_values( """ cols = ["lo", "hi", "OOL"] assert set(cols) <= set(var_configurations.columns) - # TODO: Check if this is necessary - # variable_limits = var_configurations[cols].dropna(how="all") variable_limits = var_configurations[cols].assign( dependents=lambda df: df.OOL.fillna("").str.split(), @@ -45,19 +40,16 @@ def clip_values( for var, row in variable_limits.iterrows(): if var not in list(ds.variables): continue - # TODO: Check if this is necessary - # I guess the nan flagging is already handled below - # What if rh_u_cor is nan? - # What if row.lo/hi is nan? + # This is a special case for rh_u_cor and rh_l_cor where values are clipped to 0 and 100. if var in ["rh_u_cor", "rh_l_cor"]: - ds[var] = ds[var].where(ds[var] >= row.lo, other=0) - ds[var] = ds[var].where(ds[var] <= row.hi, other=100) - - # Mask out invalid corrections based on uncorrected var - var_uncor = var.rstrip("_cor") - ds[var] = ds[var].where(~np.isnan(ds[var_uncor]), other=np.nan) - + # Nan inputs should stay nan + was_nan = ds[var].isnull() + if ~np.isnan(row.lo): + ds[var] = ds[var].where(ds[var] >= row.lo, other=0) + if ~np.isnan(row.hi): + ds[var] = ds[var].where( ds[var] <= row.hi, other=100) + ds[var] = ds[var].where(~was_nan) else: if ~np.isnan(row.lo): ds[var] = ds[var].where(ds[var] >= row.lo) diff --git a/tests/unit/test_value_clippping.py b/tests/unit/test_value_clippping.py index ceb8e2ef..063242fc 100644 --- a/tests/unit/test_value_clippping.py +++ b/tests/unit/test_value_clippping.py @@ -198,3 +198,78 @@ def test_circular_dependencies(self): check_dtype=True, ) + def test_rh_corrected_case(self): + """ + The rh corrected variables are treated differently in the clipping function. + """ + data_index = pd.RangeIndex(2) + rh_u = pd.Series(index=data_index, data=[0, 54], name="rh_u") + rh_u_cor = pd.Series(index=data_index, data=[0, np.nan], name="rh_u_cor") + rh_l = pd.Series(index=data_index, data=[-20, 54], name="rh_l") + rh_l_cor = pd.Series(index=data_index, data=[0, 254], name="rh_l_cor") + data = pd.concat([rh_u, rh_u_cor, rh_l, rh_l_cor], axis=1) + variable_config = pd.DataFrame( + columns=["field", "lo", "hi", "OOL"], + data=[ + ["rh_u", 0, 100, "rh_u_cor"], + ["rh_u_cor", 0, 150, ""], + ["rh_l_cor", np.nan, np.nan, ""], + ["rh_l", 0, 100, "rh_l_cor"], + ], + ).set_index("field") + data_set = xr.Dataset(data) + + data_set_out = clip_values(data_set, variable_config) + + # Convert to dataframe for easier comparison + data_frame_out = data_set_out.to_dataframe() + # The value of rh_u_cor should be nan since the input was nan + self.assertTrue(np.isnan(data_frame_out.iloc[1]["rh_u_cor"])) + # The value of rh_l_cor should not be changed since the hi threshold is not nan + self.assertEqual(data_frame_out.iloc[1]["rh_l_cor"], 254) + # The value of rh_l_cor should be nan since rh_l is below its threshold + self.assertTrue(np.isnan(data_frame_out.iloc[0]["rh_l_cor"])) + + def test_nan_input(self): + """ + Test that the function handles the case where nan input should cascade to child variables. + """ + fields = ["a", "b"] + variable_config = pd.DataFrame( + columns=["field", "lo", "hi", "OOL"], + data=[ + ["a", 0, 10, "b"], + ["b", 100, 110, ""], + ], + ).set_index("field") + data_index = pd.RangeIndex(2) + data = pd.DataFrame( + columns=fields, + data=[ + [0, 100], # All a withing range + [np.nan, 100], # a is nan + ], + dtype=float, + index=data_index, + ) + expected_output = pd.DataFrame( + columns=fields, + data=[ + [0, 100], + [np.nan, np.nan], # a is nan -> b + ], + dtype=float, + index=data.index, + ) + + data_set = xr.Dataset(data) + + data_set_out = clip_values(data_set, variable_config) + data_frame_out = data_set_out.to_dataframe() + + pd.testing.assert_frame_equal( + data_frame_out, + expected_output, + check_names=False, + check_dtype=True, + ) From a46e96e8e6e428fd3c5bee8bde12128082a596ce Mon Sep 17 00:00:00 2001 From: Mads Christian Lund Date: Wed, 9 Oct 2024 16:49:29 +0200 Subject: [PATCH 5/6] Removed OOL relations for all variables except the wspd and wdir --- src/pypromice/resources/variables.csv | 52 +++++++++++++-------------- 1 file changed, 26 insertions(+), 26 deletions(-) diff --git a/src/pypromice/resources/variables.csv b/src/pypromice/resources/variables.csv index cc31d35a..ec6a3b15 100644 --- a/src/pypromice/resources/variables.csv +++ b/src/pypromice/resources/variables.csv @@ -1,18 +1,18 @@ field,standard_name,long_name,units,coverage_content_type,coordinates,instantaneous_hourly,where_to_find,lo,hi,OOL,station_type,L0,L2,L3,max_decimals time,time,Time,yyyy-mm-dd HH:MM:SS,physicalMeasurement,time,,,,,,all,1,1,1, rec,record,Record,-,referenceInformation,time,,L0 or L2,,,,all,1,1,0,0 -p_u,air_pressure,Air pressure (upper boom),hPa,physicalMeasurement,time,FALSE,,650,1100,z_pt z_pt_cor dshf_u dlhf_u qh_u,all,1,1,1,4 -p_l,air_pressure,Air pressure (lower boom),hPa,physicalMeasurement,time,FALSE,,650,1100,dshf_l dlhf_l qh_l,two-boom,1,1,1,4 -t_u,air_temperature,Air temperature (upper boom),degrees_C,physicalMeasurement,time,FALSE,,-80,40,rh_u_cor cc dsr_cor usr_cor z_boom z_stake dshf_u dlhf_u qh_u,all,1,1,1,4 -t_l,air_temperature,Air temperature (lower boom),degrees_C,physicalMeasurement,time,FALSE,,-80,40,rh_l_cor z_boom_l dshf_l dlhf_l qh_l,two-boom,1,1,1,4 -rh_u,relative_humidity,Relative humidity (upper boom),%,physicalMeasurement,time,FALSE,,0,100,rh_u_cor,all,1,1,1,4 -rh_u_cor,relative_humidity_corrected,Relative humidity (upper boom) - corrected,%,modelResult,time,FALSE,L2 or later,0,150,dshf_u dlhf_u qh_u,all,0,1,1,4 -qh_u,specific_humidity,Specific humidity (upper boom),kg/kg,modelResult,time,FALSE,L2 or later,0,100,,all,0,1,1,4 -rh_l,relative_humidity,Relative humidity (lower boom),%,physicalMeasurement,time,FALSE,,0,100,rh_l_cor,two-boom,1,1,1,4 -rh_l_cor,relative_humidity_corrected,Relative humidity (lower boom) - corrected,%,modelResult,time,FALSE,L2 or later,0,150,dshf_l dlhf_l qh_l,two-boom,0,1,1,4 +p_u,air_pressure,Air pressure (upper boom),hPa,physicalMeasurement,time,FALSE,,650,1100,"",all,1,1,1,4 +p_l,air_pressure,Air pressure (lower boom),hPa,physicalMeasurement,time,FALSE,,650,1100,"",two-boom,1,1,1,4 +t_u,air_temperature,Air temperature (upper boom),degrees_C,physicalMeasurement,time,FALSE,,-80,40,"",all,1,1,1,4 +t_l,air_temperature,Air temperature (lower boom),degrees_C,physicalMeasurement,time,FALSE,,-80,40,"",two-boom,1,1,1,4 +rh_u,relative_humidity,Relative humidity (upper boom),%,physicalMeasurement,time,FALSE,,0,100,"",all,1,1,1,4 +rh_u_cor,relative_humidity_corrected,Relative humidity (upper boom) - corrected,%,modelResult,time,FALSE,L2 or later,0,150,"",all,0,1,1,4 +qh_u,specific_humidity,Specific humidity (upper boom),kg/kg,modelResult,time,FALSE,L2 or later,0,100,"",all,0,1,1,4 +rh_l,relative_humidity,Relative humidity (lower boom),%,physicalMeasurement,time,FALSE,,0,100,"",two-boom,1,1,1,4 +rh_l_cor,relative_humidity_corrected,Relative humidity (lower boom) - corrected,%,modelResult,time,FALSE,L2 or later,0,150,"",two-boom,0,1,1,4 qh_l,specific_humidity,Specific humidity (lower boom),kg/kg,modelResult,time,FALSE,L2 or later,0,100,,two-boom,0,1,1,4 -wspd_u,wind_speed,Wind speed (upper boom),m s-1,physicalMeasurement,time,FALSE,,0,100,wdir_u wspd_x_u wspd_y_u dshf_u dlhf_u qh_u precip_u,all,1,1,1,4 -wspd_l,wind_speed,Wind speed (lower boom),m s-1,physicalMeasurement,time,FALSE,,0,100,wdir_l wspd_x_l wspd_y_l dshf_l dlhf_l qh_l precip_l,two-boom,1,1,1,4 +wspd_u,wind_speed,Wind speed (upper boom),m s-1,physicalMeasurement,time,FALSE,,0,100,wdir_u wspd_x_u wspd_y_u,all,1,1,1,4 +wspd_l,wind_speed,Wind speed (lower boom),m s-1,physicalMeasurement,time,FALSE,,0,100,wdir_l wspd_x_l wspd_y_l,two-boom,1,1,1,4 wdir_u,wind_from_direction,Wind from direction (upper boom),degrees,physicalMeasurement,time,FALSE,,1,360,wspd_x_u wspd_y_u,all,1,1,1,4 wdir_std_u,wind_from_direction_standard_deviation,Wind from direction (standard deviation),degrees,qualityInformation,time,FALSE,L0 or L2,,,,one-boom,1,1,0,4 wdir_l,wind_from_direction,Wind from direction (lower boom),degrees,physicalMeasurement,time,FALSE,,1,360,wspd_x_l wspd_y_l,two-boom,1,1,1,4 @@ -20,34 +20,34 @@ wspd_x_u,wind_speed_from_x_direction,Wind speed from x direction (upper boom),m wspd_y_u,wind_speed_from_y_direction,Wind speed from y direction (upper boom),m s-1,modelResult,time,FALSE,L0 or L2,-100,100,"",all,0,1,1,4 wspd_x_l,wind_speed_from_x_direction,Wind speed from x direction (lower boom),m s-1,modelResult,time,FALSE,L0 or L2,-100,100,"",two-boom,0,1,1,4 wspd_y_l,wind_speed_from_y_direction,Wind speed from y direction (lower boom),m s-1,modelResult,time,FALSE,L0 or L2,-100,100,"",two-boom,0,1,1,4 -dsr,surface_downwelling_shortwave_flux,Downwelling shortwave radiation,W m-2,physicalMeasurement,time,FALSE,,-10,1500,albedo dsr_cor usr_cor,all,1,1,1,4 +dsr,surface_downwelling_shortwave_flux,Downwelling shortwave radiation,W m-2,physicalMeasurement,time,FALSE,,-10,1500,"",all,1,1,1,4 dsr_cor,surface_downwelling_shortwave_flux_corrected,Downwelling shortwave radiation - corrected,W m-2,modelResult,time,FALSE,L2 or later,,,,all,0,1,1,4 -usr,surface_upwelling_shortwave_flux,Upwelling shortwave radiation,W m-2,physicalMeasurement,time,FALSE,,-10,1000,albedo dsr_cor usr_cor,all,1,1,1,4 +usr,surface_upwelling_shortwave_flux,Upwelling shortwave radiation,W m-2,physicalMeasurement,time,FALSE,,-10,1000,"",all,1,1,1,4 usr_cor,surface_upwelling_shortwave_flux_corrected,Upwelling shortwave radiation - corrected,W m-2,modelResult,time,FALSE,L2 or later,0,1000,,all,0,1,1,4 albedo,surface_albedo,Albedo,-,modelResult,time,FALSE,L2 or later,,,,all,0,1,1,4 -dlr,surface_downwelling_longwave_flux,Downwelling longwave radiation,W m-2,physicalMeasurement,time,FALSE,,50,500,albedo dsr_cor usr_cor cc t_surf,all,1,1,1,4 -ulr,surface_upwelling_longwave_flux,Upwelling longwave radiation,W m-2,physicalMeasurement,time,FALSE,,50,500,t_surf,all,1,1,1,4 +dlr,surface_downwelling_longwave_flux,Downwelling longwave radiation,W m-2,physicalMeasurement,time,FALSE,,50,500,"",all,1,1,1,4 +ulr,surface_upwelling_longwave_flux,Upwelling longwave radiation,W m-2,physicalMeasurement,time,FALSE,,50,500,"",all,1,1,1,4 cc,cloud_area_fraction,Cloud cover,%,modelResult,time,FALSE,L2 or later,,,,all,0,1,1,4 -t_surf,surface_temperature,Surface temperature,C,modelResult,time,FALSE,L2 or later,-80,40,dshf_u dlhf_u qh_u,all,0,1,1,4 +t_surf,surface_temperature,Surface temperature,C,modelResult,time,FALSE,L2 or later,-80,40,"",all,0,1,1,4 dlhf_u,surface_downward_latent_heat_flux,Latent heat flux (upper boom),W m-2,modelResult,time,FALSE,L3 or later,,,,all,0,0,1,4 dlhf_l,surface_downward_latent_heat_flux,Latent heat flux (lower boom),W m-2,modelResult,time,FALSE,L3 or later,,,,two-boom,0,0,1,4 dshf_u,surface_downward_sensible_heat_flux,Sensible heat flux (upper boom),W m-2,modelResult,time,FALSE,L3 or later,,,,all,0,0,1,4 dshf_l,surface_downward_sensible_heat_flux,Sensible heat flux (lower boom),W m-2,modelResult,time,FALSE,L3 or later,,,,two-boom,0,0,1,4 -z_boom_u,distance_to_surface_from_boom,Upper boom height,m,physicalMeasurement,time,TRUE,,0.3,10,dshf_u dlhf_u qh_u,all,1,1,1,4 +z_boom_u,distance_to_surface_from_boom,Upper boom height,m,physicalMeasurement,time,TRUE,,0.3,10,"",all,1,1,1,4 z_boom_q_u,distance_to_surface_from_boom_quality,Upper boom height (quality),-,qualityInformation,time,TRUE,L0 or L2,,,,all,1,1,0,4 -z_boom_l,distance_to_surface_from_boom,Lower boom height,m,physicalMeasurement,time,TRUE,,0.3,5,dshf_l dlhf_l qh_l,two-boom,1,1,1,4 +z_boom_l,distance_to_surface_from_boom,Lower boom height,m,physicalMeasurement,time,TRUE,,0.3,5,"",two-boom,1,1,1,4 z_boom_q_l,distance_to_surface_from_boom_quality,Lower boom height (quality),-,qualityInformation,time,TRUE,L0 or L2,,,,two-boom,1,1,0,4 z_stake,distance_to_surface_from_stake_assembly,Stake height,m,physicalMeasurement,time,TRUE,,0.3,8,,one-boom,1,1,1,4 z_stake_q,distance_to_surface_from_stake_assembly_quality,Stake height (quality),-,qualityInformation,time,TRUE,L0 or L2,,,,one-boom,1,1,0,4 -z_pt,depth_of_pressure_transducer_in_ice,Depth of pressure transducer in ice,m,physicalMeasurement,time,FALSE,,0,30,z_pt_cor,one-boom,1,1,1,4 +z_pt,depth_of_pressure_transducer_in_ice,Depth of pressure transducer in ice,m,physicalMeasurement,time,FALSE,,0,30,"",one-boom,1,1,1,4 z_pt_cor,depth_of_pressure_transducer_in_ice_corrected,Depth of pressure transducer in ice - corrected,m,modelResult,time,FALSE,L2 or later,0,30,,one-boom,0,1,1,4 z_surf_combined,height_of_surface_combined,"Surface height combined from multiple sensors, relative to ice surface height at installation",m,modelResult,time,FALSE,L3,,,,all,0,0,1,4 z_ice_surf,height_of_ice_surface,"Ice surface height, relative to ice surface height at installation and calculated from pt_cor and z_stake",m,modelResult,time,FALSE,L3,,,,one-boom,0,0,1,4 snow_height,height_of_snow,"Snow surface height, relative to ice surface",m,modelResult,time,FALSE,L3,0,,,one-boom,0,0,1,4 -precip_u,precipitation,Precipitation (upper boom) (cumulative solid & liquid),mm,physicalMeasurement,time,TRUE,,0,,precip_u_cor precip_u_rate,all,1,1,1,4 +precip_u,precipitation,Precipitation (upper boom) (cumulative solid & liquid),mm,physicalMeasurement,time,TRUE,,0,,"",all,1,1,1,4 precip_u_cor,precipitation_corrected,Precipitation (upper boom) (cumulative solid & liquid) – corrected,mm,modelResult,time,TRUE,L2 or later,0,,,all,0,1,1,4 precip_u_rate,precipitation_rate,Precipitation rate (upper boom) (cumulative solid & liquid) – corrected,mm,modelResult,time,TRUE,L2 or later,0,,,all,0,1,1,4 -precip_l,precipitation,Precipitation (lower boom) (cumulative solid & liquid),mm,physicalMeasurement,time,TRUE,,0,,precip_l_cor precip_l_rate,two-boom,1,1,1,4 +precip_l,precipitation,Precipitation (lower boom) (cumulative solid & liquid),mm,physicalMeasurement,time,TRUE,,0,,"",two-boom,1,1,1,4 precip_l_cor,precipitation_corrected,Precipitation (lower boom) (cumulative solid & liquid) – corrected,mm,modelResult,time,TRUE,L2 or later,0,,,two-boom,0,1,1,4 precip_l_rate,precipitation_rate,Precipitation rate (lower boom) (cumulative solid & liquid) – corrected,mm,modelResult,time,TRUE,L2 or later,0,,,two-boom,0,1,1,4 t_i_1,ice_temperature_at_t1,Ice temperature at sensor 1,degrees_C,physicalMeasurement,time,FALSE,,-80,1,,all,1,1,1,4 @@ -73,8 +73,8 @@ d_t_i_9,depth_of_thermistor_9,Depth of thermistor 9,m,modelResult,time,FALSE,L3, d_t_i_10,depth_of_thermistor_10,Depth of thermistor 10,m,modelResult,time,FALSE,L3,-10,100,,two-boom,0,0,1,4 d_t_i_11,depth_of_thermistor_11,Depth of thermistor 11,m,modelResult,time,FALSE,L3,-10,100,,two-boom,0,0,1,4 t_i_10m,10m_subsurface_temperature,10 m subsurface temperature,degrees_C,modelResult,time,FALSE,L3,-70,0,,all,0,0,1,4 -tilt_x,platform_view_angle_x,Tilt to east,degrees,physicalMeasurement,time,FALSE,,-30,30,dsr_cor usr_cor albedo,all,1,1,1,4 -tilt_y,platform_view_angle_y,Tilt to north,degrees,physicalMeasurement,time,FALSE,,-30,30,dsr_cor usr_cor albedo,all,1,1,1,4 +tilt_x,platform_view_angle_x,Tilt to east,degrees,physicalMeasurement,time,FALSE,,-30,30,"",all,1,1,1,4 +tilt_y,platform_view_angle_y,Tilt to north,degrees,physicalMeasurement,time,FALSE,,-30,30,"",all,1,1,1,4 rot,platform_azimuth_angle,Station rotation from true North,degrees,physicalMeasurement,time,FALSE,,0,360,,all,1,1,1,2 gps_lat,gps_latitude,Latitude,degrees_north,physicalMeasurement,time,TRUE,,50,83,,all,1,1,1,6 gps_lon,gps_longitude,Longitude,degrees_east,physicalMeasurement,time,TRUE,,5,70,,all,1,1,1,6 @@ -93,12 +93,12 @@ batt_v_ini,,,-,physicalMeasurement,time,TRUE,L0 or L2,0,30,,,1,1,0,2 batt_v_ss,battery_voltage_at_sample_start,Battery voltage (sample start),V,physicalMeasurement,time,TRUE,L0 or L2,0,30,,,1,1,0,2 fan_dc_u,fan_current,Fan current (upper boom),mA,physicalMeasurement,time,TRUE,L0 or L2,0,200,,all,1,1,0,2 fan_dc_l,fan_current,Fan current (lower boom),mA,physicalMeasurement,time,TRUE,,0,200,,two-boom,1,1,0,2 -freq_vw,frequency_of_precipitation_wire_vibration,Frequency of vibrating wire in precipitation gauge,Hz,physicalMeasurement,time,TRUE,L0 or L2,0,10000,precip_u,,1,1,0, +freq_vw,frequency_of_precipitation_wire_vibration,Frequency of vibrating wire in precipitation gauge,Hz,physicalMeasurement,time,TRUE,L0 or L2,0,10000,"",,1,1,0, t_log,temperature_of_logger,Logger temperature,degrees_C,physicalMeasurement,time,TRUE,,-80,40,,one-boom,1,1,0,4 -t_rad,temperature_of_radiation_sensor,Radiation sensor temperature,degrees_C,physicalMeasurement,time,FALSE,,-80,40,t_surf dlr ulr,all,1,1,1,4 +t_rad,temperature_of_radiation_sensor,Radiation sensor temperature,degrees_C,physicalMeasurement,time,FALSE,,-80,40,"",all,1,1,1,4 p_i,air_pressure,Air pressure (instantaneous) minus 1000,hPa,physicalMeasurement,time,TRUE,,-350,100,,all,1,1,1,4 t_i,air_temperature,Air temperature (instantaneous),degrees_C,physicalMeasurement,time,TRUE,,-80,40,,all,1,1,1,4 -rh_i,relative_humidity,Relative humidity (instantaneous),%,physicalMeasurement,time,TRUE,,0,150,rh_i_cor,all,1,1,1,4 +rh_i,relative_humidity,Relative humidity (instantaneous),%,physicalMeasurement,time,TRUE,,0,150,"",all,1,1,1,4 rh_i_cor,relative_humidity_corrected,Relative humidity (instantaneous) – corrected,%,modelResult,time,TRUE,L2 or later,0,100,,all,0,1,1,4 wspd_i,wind_speed,Wind speed (instantaneous),m s-1,physicalMeasurement,time,TRUE,,0,100,wdir_i wspd_x_i wspd_y_i,all,1,1,1,4 wdir_i,wind_from_direction,Wind from direction (instantaneous),degrees,physicalMeasurement,time,TRUE,,1,360,wspd_x_i wspd_y_i,all,1,1,1,4 From 1dab25b59b781a46836b6f122c2a16494bec363e Mon Sep 17 00:00:00 2001 From: Mads Christian Lund Date: Mon, 14 Oct 2024 14:44:42 +0200 Subject: [PATCH 6/6] Removed special handling for `rh_cor` values in `value_clipping.py` The logic for handling `rh` values has been simplified to align with the general clipping rules. This change removes the exception cases previously applied to `rh`, ensuring consistent value clipping across all variables. --- src/pypromice/process/value_clipping.py | 18 ++------- tests/unit/test_value_clippping.py | 53 +++++++++++++++---------- 2 files changed, 36 insertions(+), 35 deletions(-) diff --git a/src/pypromice/process/value_clipping.py b/src/pypromice/process/value_clipping.py index 020e7317..f65cad52 100644 --- a/src/pypromice/process/value_clipping.py +++ b/src/pypromice/process/value_clipping.py @@ -41,20 +41,10 @@ def clip_values( if var not in list(ds.variables): continue - # This is a special case for rh_u_cor and rh_l_cor where values are clipped to 0 and 100. - if var in ["rh_u_cor", "rh_l_cor"]: - # Nan inputs should stay nan - was_nan = ds[var].isnull() - if ~np.isnan(row.lo): - ds[var] = ds[var].where(ds[var] >= row.lo, other=0) - if ~np.isnan(row.hi): - ds[var] = ds[var].where( ds[var] <= row.hi, other=100) - ds[var] = ds[var].where(~was_nan) - else: - if ~np.isnan(row.lo): - ds[var] = ds[var].where(ds[var] >= row.lo) - if ~np.isnan(row.hi): - ds[var] = ds[var].where(ds[var] <= row.hi) + if ~np.isnan(row.lo): + ds[var] = ds[var].where(ds[var] >= row.lo) + if ~np.isnan(row.hi): + ds[var] = ds[var].where(ds[var] <= row.hi) # Flag dependents as NaN if parent is NaN for o in row.dependents_closure: diff --git a/tests/unit/test_value_clippping.py b/tests/unit/test_value_clippping.py index 063242fc..52f33234 100644 --- a/tests/unit/test_value_clippping.py +++ b/tests/unit/test_value_clippping.py @@ -198,37 +198,48 @@ def test_circular_dependencies(self): check_dtype=True, ) - def test_rh_corrected_case(self): - """ - The rh corrected variables are treated differently in the clipping function. - """ - data_index = pd.RangeIndex(2) - rh_u = pd.Series(index=data_index, data=[0, 54], name="rh_u") - rh_u_cor = pd.Series(index=data_index, data=[0, np.nan], name="rh_u_cor") - rh_l = pd.Series(index=data_index, data=[-20, 54], name="rh_l") - rh_l_cor = pd.Series(index=data_index, data=[0, 254], name="rh_l_cor") - data = pd.concat([rh_u, rh_u_cor, rh_l, rh_l_cor], axis=1) + def test_rh_corrected(self): variable_config = pd.DataFrame( columns=["field", "lo", "hi", "OOL"], data=[ - ["rh_u", 0, 100, "rh_u_cor"], + ["rh_u", 0, 150, "rh_u_cor"], ["rh_u_cor", 0, 150, ""], - ["rh_l_cor", np.nan, np.nan, ""], - ["rh_l", 0, 100, "rh_l_cor"], ], ).set_index("field") - data_set = xr.Dataset(data) + rows_input = [] + rows_expected = [] + # All values are within the expected range + rows_input.append(dict(rh_u=42, rh_u_cor=43)) + rows_expected.append(dict(rh_u=42, rh_u_cor=43)) + # rh_u is below range, but rh_u_cor is within range. Both should be flagged due to the OOL relationship + rows_input.append(dict(rh_u=-10, rh_u_cor=3)) + rows_expected.append(dict(rh_u=np.nan, rh_u_cor=np.nan)) + # rh_u is within range, but rh_u_cor is below range; rh_u_cor should be flagged + rows_input.append(dict(rh_u=54, rh_u_cor=-4)) + rows_expected.append(dict(rh_u=54, rh_u_cor=np.nan)) + # rh_u is above range, but rh_u_cor is within range. Both should be flagged due to the OOL relationship + rows_input.append(dict(rh_u=160, rh_u_cor=120)) + rows_expected.append(dict(rh_u=np.nan, rh_u_cor=np.nan)) + # rh_u is within range, but rh_u_cor is above range; rh_u_cor should be flagged + rows_input.append(dict(rh_u=100, rh_u_cor=255)) + rows_expected.append(dict(rh_u=100, rh_u_cor=np.nan)) + + # Prepare the data + df_input = pd.DataFrame(rows_input, dtype=float) + df_expected = pd.DataFrame(rows_expected, dtype=float) + data_set = xr.Dataset(df_input) + + # Run the function data_set_out = clip_values(data_set, variable_config) - # Convert to dataframe for easier comparison data_frame_out = data_set_out.to_dataframe() - # The value of rh_u_cor should be nan since the input was nan - self.assertTrue(np.isnan(data_frame_out.iloc[1]["rh_u_cor"])) - # The value of rh_l_cor should not be changed since the hi threshold is not nan - self.assertEqual(data_frame_out.iloc[1]["rh_l_cor"], 254) - # The value of rh_l_cor should be nan since rh_l is below its threshold - self.assertTrue(np.isnan(data_frame_out.iloc[0]["rh_l_cor"])) + pd.testing.assert_frame_equal( + data_frame_out, + df_expected, + check_names=False, + check_dtype=True, + ) def test_nan_input(self): """