Skip to content

Commit

Permalink
Updated variable dependency handling (OOL)
Browse files Browse the repository at this point in the history
OOL contains dependent variables (children)
* Added unit test functions for clip_values
* Add a helper class, DependencyGraph, for modelling the dependency relations to find the children closure and handle circular dependencies.
* Updated clip_values to set children variables to nan whenever a parent is nan.
* Fixed a bug in clipValues introduced in commit d1a6ac8, where child variables were not set to nan due to an erroneous change in variable names.

Fixed variables.csv
* Removed incorrect OOL relations from variables.csv. wspd_x_u, wspd_y_u... had wdir and wspd as children. The relation is the opposite.
* Removed double quotes around ool
* Removed comma between ool entries in wspeed
  • Loading branch information
ladsmund committed Oct 8, 2024
1 parent 3177eda commit eddc1bd
Show file tree
Hide file tree
Showing 4 changed files with 347 additions and 25 deletions.
55 changes: 38 additions & 17 deletions src/pypromice/process/value_clipping.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,12 @@
from typing import Dict, Set, Mapping

import numpy as np
import pandas
import pandas as pd
import xarray

from pypromice.utilities.dependency_graph import DependencyGraph


def clip_values(
ds: xarray.Dataset,
Expand All @@ -26,27 +31,43 @@ def clip_values(
"""
cols = ["lo", "hi", "OOL"]
assert set(cols) <= set(var_configurations.columns)
# TODO: Check if this is necessary
# variable_limits = var_configurations[cols].dropna(how="all")

variable_limits = var_configurations[cols].dropna(how="all")
for var, row in variable_limits.iterrows():
variable_limits = var_configurations[cols].assign(
dependents=lambda df: df.OOL.fillna("").str.split(),
# Find the closure of dependents using the DependencyGraph class
dependents_closure=lambda df: DependencyGraph.from_child_mapping(
df.dependents
).child_closure_mapping(),
)

for var, row in variable_limits.iterrows():
if var not in list(ds.variables):
continue
# TODO: Check if this is necessary
# I guess the nan flagging is already handled below
# What if rh_u_cor is nan?
# What if row.lo/hi is nan?

if var in ["rh_u_cor", "rh_l_cor"]:
ds[var] = ds[var].where(ds[var] >= row.lo, other=0)
ds[var] = ds[var].where(ds[var] <= row.hi, other=100)

# Mask out invalid corrections based on uncorrected var
var_uncor = var.rstrip("_cor")
ds[var] = ds[var].where(~np.isnan(ds[var_uncor]), other=np.nan)

else:
if ~np.isnan(row.lo):
ds[var] = ds[var].where(ds[var] >= row.lo)
if ~np.isnan(row.hi):
ds[var] = ds[var].where(ds[var] <= row.hi)

if ~np.isnan(row.lo):
ds[var] = ds[var].where(ds[var] >= row.lo)
if ~np.isnan(row.hi):
ds[var] = ds[var].where(ds[var] <= row.hi)

other_vars = row.OOL
if isinstance(other_vars, str) and ~ds[var].isnull().all():
for o in other_vars.split():
if o not in list(ds.variables):
continue
else:
if ~np.isnan(row.lo):
ds[var] = ds[var].where(ds[var] >= row.lo)
if ~np.isnan(row.hi):
ds[var] = ds[var].where(ds[var] <= row.hi)
# Flag dependents as NaN if parent is NaN
for o in row.dependents_closure:
if o not in list(ds.variables):
continue
ds[o] = ds[o].where(ds[var].notnull())

return ds
16 changes: 8 additions & 8 deletions src/pypromice/resources/variables.csv
Original file line number Diff line number Diff line change
Expand Up @@ -11,15 +11,15 @@ qh_u,specific_humidity,Specific humidity (upper boom),kg/kg,modelResult,time,FAL
rh_l,relative_humidity,Relative humidity (lower boom),%,physicalMeasurement,time,FALSE,,0,100,rh_l_cor,two-boom,1,1,1,4
rh_l_cor,relative_humidity_corrected,Relative humidity (lower boom) - corrected,%,modelResult,time,FALSE,L2 or later,0,150,dshf_l dlhf_l qh_l,two-boom,0,1,1,4
qh_l,specific_humidity,Specific humidity (lower boom),kg/kg,modelResult,time,FALSE,L2 or later,0,100,,two-boom,0,1,1,4
wspd_u,wind_speed,Wind speed (upper boom),m s-1,physicalMeasurement,time,FALSE,,0,100,"wdir_u wspd_x_u wspd_y_u dshf_u dlhf_u qh_u, precip_u",all,1,1,1,4
wspd_l,wind_speed,Wind speed (lower boom),m s-1,physicalMeasurement,time,FALSE,,0,100,"wdir_l wspd_x_l wspd_y_l dshf_l dlhf_l qh_l , precip_l",two-boom,1,1,1,4
wspd_u,wind_speed,Wind speed (upper boom),m s-1,physicalMeasurement,time,FALSE,,0,100,wdir_u wspd_x_u wspd_y_u dshf_u dlhf_u qh_u precip_u,all,1,1,1,4
wspd_l,wind_speed,Wind speed (lower boom),m s-1,physicalMeasurement,time,FALSE,,0,100,wdir_l wspd_x_l wspd_y_l dshf_l dlhf_l qh_l precip_l,two-boom,1,1,1,4
wdir_u,wind_from_direction,Wind from direction (upper boom),degrees,physicalMeasurement,time,FALSE,,1,360,wspd_x_u wspd_y_u,all,1,1,1,4
wdir_std_u,wind_from_direction_standard_deviation,Wind from direction (standard deviation),degrees,qualityInformation,time,FALSE,L0 or L2,,,,one-boom,1,1,0,4
wdir_l,wind_from_direction,Wind from direction (lower boom),degrees,physicalMeasurement,time,FALSE,,1,360,wspd_x_l wspd_y_l,two-boom,1,1,1,4
wspd_x_u,wind_speed_from_x_direction,Wind speed from x direction (upper boom),m s-1,modelResult,time,FALSE,L0 or L2,-100,100,wdir_u wspd_u,all,0,1,1,4
wspd_y_u,wind_speed_from_y_direction,Wind speed from y direction (upper boom),m s-1,modelResult,time,FALSE,L0 or L2,-100,100,wdir_u wspd_u,all,0,1,1,4
wspd_x_l,wind_speed_from_x_direction,Wind speed from x direction (lower boom),m s-1,modelResult,time,FALSE,L0 or L2,-100,100,wdir_l wspd_l,two-boom,0,1,1,4
wspd_y_l,wind_speed_from_y_direction,Wind speed from y direction (lower boom),m s-1,modelResult,time,FALSE,L0 or L2,-100,100,wdir_l wspd_l,two-boom,0,1,1,4
wspd_x_u,wind_speed_from_x_direction,Wind speed from x direction (upper boom),m s-1,modelResult,time,FALSE,L0 or L2,-100,100,"",all,0,1,1,4
wspd_y_u,wind_speed_from_y_direction,Wind speed from y direction (upper boom),m s-1,modelResult,time,FALSE,L0 or L2,-100,100,"",all,0,1,1,4
wspd_x_l,wind_speed_from_x_direction,Wind speed from x direction (lower boom),m s-1,modelResult,time,FALSE,L0 or L2,-100,100,"",two-boom,0,1,1,4
wspd_y_l,wind_speed_from_y_direction,Wind speed from y direction (lower boom),m s-1,modelResult,time,FALSE,L0 or L2,-100,100,"",two-boom,0,1,1,4
dsr,surface_downwelling_shortwave_flux,Downwelling shortwave radiation,W m-2,physicalMeasurement,time,FALSE,,-10,1500,albedo dsr_cor usr_cor,all,1,1,1,4
dsr_cor,surface_downwelling_shortwave_flux_corrected,Downwelling shortwave radiation - corrected,W m-2,modelResult,time,FALSE,L2 or later,,,,all,0,1,1,4
usr,surface_upwelling_shortwave_flux,Upwelling shortwave radiation,W m-2,physicalMeasurement,time,FALSE,,-10,1000,albedo dsr_cor usr_cor,all,1,1,1,4
Expand Down Expand Up @@ -102,5 +102,5 @@ rh_i,relative_humidity,Relative humidity (instantaneous),%,physicalMeasurement,t
rh_i_cor,relative_humidity_corrected,Relative humidity (instantaneous) – corrected,%,modelResult,time,TRUE,L2 or later,0,100,,all,0,1,1,4
wspd_i,wind_speed,Wind speed (instantaneous),m s-1,physicalMeasurement,time,TRUE,,0,100,wdir_i wspd_x_i wspd_y_i,all,1,1,1,4
wdir_i,wind_from_direction,Wind from direction (instantaneous),degrees,physicalMeasurement,time,TRUE,,1,360,wspd_x_i wspd_y_i,all,1,1,1,4
wspd_x_i,wind_speed_from_x_direction,Wind speed from x direction (instantaneous),m s-1,modelResult,time,TRUE,L2 or later,-100,100,wdir_i wspd_i,all,0,1,1,4
wspd_y_i,wind_speed_from_y_direction,Wind speed from y direction (instantaneous),m s-1,modelResult,time,TRUE,L2 or later,-100,100,wdir_i wspd_i,all,0,1,1,4
wspd_x_i,wind_speed_from_x_direction,Wind speed from x direction (instantaneous),m s-1,modelResult,time,TRUE,L2 or later,-100,100,"",all,0,1,1,4
wspd_y_i,wind_speed_from_y_direction,Wind speed from y direction (instantaneous),m s-1,modelResult,time,TRUE,L2 or later,-100,100,"",all,0,1,1,4
101 changes: 101 additions & 0 deletions src/pypromice/utilities/dependency_graph.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
from typing import Mapping, Set, MutableMapping, Optional

import attr

__all__ = [
"DependencyNode",
"DependencyGraph",
]


@attr.define
class DependencyNode:
name: str = attr.field()
parents: Set["DependencyNode"] = attr.field(factory=set)
children: Set["DependencyNode"] = attr.field(factory=set)

def add_child(self, child: "DependencyNode"):
self.children.add(child)
child.parents.add(self)

def get_children_closure(self, closure: Optional[Set[str]] = None) -> Set[str]:
is_root = closure is None
if closure is None:
closure = set()
if self.name in closure:
return closure
closure.add(self.name)
for child in self.children:
closure |= child.get_children_closure(closure)

if is_root:
closure.remove(self.name)
return closure

def get_parents_closure(self, closure: Optional[Set[str]] = None) -> Set[str]:
is_root = closure is None
if closure is None:
closure = set()
if self.name in closure:
return closure
closure.add(self.name)
for parent in self.parents:
closure |= parent.get_parents_closure(closure)

if is_root:
closure.remove(self.name)
return closure

def __hash__(self):
return hash(self.name)


@attr.define
class DependencyGraph:
nodes: MutableMapping[str, DependencyNode] = attr.field(factory=dict)

def add_node(self, name: str) -> DependencyNode:
if name not in self.nodes:
self.nodes[name] = DependencyNode(name)
return self.nodes[name]

def add_edge(self, parent: str, child: str):
parent_node = self.add_node(parent)
child_node = self.add_node(child)
parent_node.add_child(child_node)

@classmethod
def from_child_mapping(cls, mapping: Mapping[str, Set[str]]) -> "DependencyGraph":
graph = cls()
for var, children in mapping.items():
graph.add_node(var)
for child in children:
graph.add_edge(var, child)
return graph

@classmethod
def from_parent_mapping(cls, mapping: Mapping[str, Set[str]]) -> "DependencyGraph":
graph = cls()
for var, parents in mapping.items():
graph.add_node(var)
for parent in parents:
graph.add_edge(parent, var)
return graph

def child_mapping(self) -> Mapping[str, Set[str]]:
return {
node.name: {child.name for child in node.children}
for node in self.nodes.values()
}

def child_closure_mapping(self) -> Mapping[str, Set[str]]:
return {node.name: node.get_children_closure() for node in self.nodes.values()}

def parent_mapping(self) -> Mapping[str, Set[str]]:
return {
node.name: {parent.name for parent in node.parents}
for node in self.nodes.values()
}

def parent_closure_mapping(self) -> Mapping[str, Set[str]]:
return {node.name: node.get_parents_closure() for node in self.nodes.values()}
Loading

0 comments on commit eddc1bd

Please sign in to comment.