From 7f99782a2382a3a276061491046c3573d20a869b Mon Sep 17 00:00:00 2001 From: justin-richling Date: Thu, 11 Jan 2024 14:52:37 -0700 Subject: [PATCH 1/3] Add sympy equation parsing and xarray calcs --- lib/adf_diag.py | 158 +++++++++++++++++++++++++++++------------------- 1 file changed, 95 insertions(+), 63 deletions(-) diff --git a/lib/adf_diag.py b/lib/adf_diag.py index 2dff57f85..075e0b4f8 100644 --- a/lib/adf_diag.py +++ b/lib/adf_diag.py @@ -634,7 +634,7 @@ def call_ncrcat(cmd): if vars_to_derive: self.derive_variables( - vars_to_derive=vars_to_derive, ts_dir=ts_dir[case_idx] + res=res, vars_to_derive=vars_to_derive, ts_dir=ts_dir[case_idx] ) # End with @@ -1008,75 +1008,107 @@ def setup_run_cvdp(self): ######### - def derive_variables(self, vars_to_derive=None, ts_dir=None, overwrite=None): + def derive_variables(self, res=None, vars_to_derive=None, ts_dir=None, overwrite=None): """ Derive variables acccording to steps given here. Since derivations will depend on the variable, each variable to derive will need its own set of steps below. - Caution: this method assumes that there will be one time series file per variable - If the file for the derived variable exists, the kwarg `overwrite` determines whether to overwrite the file (true) or exit with a warning message. + NOTE: This is not usable for variables with vertical levels yet - JR """ + import sympy as sp for var in vars_to_derive: - if var == "PRECT": - # PRECT can be found by simply adding PRECL and PRECC - # grab file names for the PRECL and PRECC files from the case ts directory - if glob.glob(os.path.join(ts_dir, "*PRECC*")) and glob.glob( - os.path.join(ts_dir, "*PRECL*") - ): - constit_files = sorted(glob.glob(os.path.join(ts_dir, "*PREC*"))) - else: - ermsg = "PRECC and PRECL were not both present; PRECT cannot be calculated." - ermsg += " Please remove PRECT from diag_var_list or find the relevant CAM files." - raise FileNotFoundError(ermsg) - # create new file name for PRECT - prect_file = constit_files[0].replace("PRECC", "PRECT") - if Path(prect_file).is_file(): - if overwrite: - Path(prect_file).unlink() - else: - print( - f"[{__name__}] Warning: PRECT file was found and overwrite is False. Will use existing file." - ) - continue - # append PRECC to the file containing PRECL - os.system(f"ncks -A -v PRECC {constit_files[0]} {constit_files[1]}") - # create new file with the sum of PRECC and PRECL - os.system( - f"ncap2 -s 'PRECT=(PRECC+PRECL)' {constit_files[1]} {prect_file}" - ) - if var == "RESTOM": - # RESTOM = FSNT-FLNT - # Have to be more precise than with PRECT because FSNTOA, FSTNC, etc are valid variables - if glob.glob(os.path.join(ts_dir, "*.FSNT.*")) and glob.glob( - os.path.join(ts_dir, "*.FLNT.*") - ): - input_files = [ - sorted(glob.glob(os.path.join(ts_dir, f"*.{v}.*"))) - for v in ["FLNT", "FSNT"] - ] - constit_files = [] - for elem in input_files: - constit_files += elem + print(f"\t - derived time series for {var}") + + #Check whether there are parts to derive from and if there is an associated equation + vres = res.get(var, {}) + if "derivable_from" in vres: + constit_list = vres['derivable_from'] + else: + print("WARNING: No constituents listed in defaults config file, moving on") + pass + #Define the string equation involving the constituent variables + if "derived_eq" in vres: + der_eq = vres['derived_eq'] + else: + print("WARNING: No derived equation in defaults config file, moving on") + pass + + # continue + #else: + # msg = f"WARNING: {var} is not in the file {hist_files[0]}." + # msg += " No time series will be generated." + # print(msg) + # continue + + constit_files = [] + for constit in constit_list: + if glob.glob(os.path.join(ts_dir, f"*.{constit}.*.nc")): + constit_files.append(glob.glob(os.path.join(ts_dir, f"*.{constit}.*"))[0]) + + + #Check if all the constituent files were found + #if len(constit_files) == len(constit_list): + + print("constit_files:",constit_files) + #Check if all the constituent files were found + if len(constit_files) != len(constit_list): + ermsg = f"Not all constituent files present; {var} cannot be calculated." + ermsg += f" Please remove {var} from diag_var_list or find the relevant CAM files." + raise FileNotFoundError(ermsg) + + #Open a new dataset with all the constituent files/variables + ds = xr.open_mfdataset(constit_files) + + # create new file name for derived variable + derived_file = constit_files[0].replace(constit_list[0], var) + if Path(derived_file).is_file(): + if overwrite: + Path(derived_file).unlink() else: - ermsg = "FSNT and FLNT were not both present; RESTOM cannot be calculated." - ermsg += " Please remove RESTOM from diag_var_list or find the relevant CAM files." - raise FileNotFoundError(ermsg) - # create new file name for RESTOM - derived_file = constit_files[0].replace("FLNT", "RESTOM") - if Path(derived_file).is_file(): - if overwrite: - Path(derived_file).unlink() - else: - print( - f"[{__name__}] Warning: RESTOM file was found and overwrite is False. Will use existing file." - ) - continue - # append FSNT to the file containing FLNT - os.system(f"ncks -A -v FLNT {constit_files[0]} {constit_files[1]}") - # create new file with the difference of FLNT and FSNT - os.system( - f"ncap2 -s 'RESTOM=(FSNT-FLNT)' {constit_files[1]} {derived_file}" - ) + print( + f"[{__name__}] Warning: '{var}' file was found and overwrite is False. Will use existing file." + ) + return None + + variables = {} + for i in constit_list: + variables[i] = ds[constit_list[0]].dims + + #Get coordinate values from the first constituent file + coords={'lat': ds[constit_list[0]].lat.values, 'lon': ds[constit_list[0]].lon.values, + "time": ds[constit_list[0]].time.values} + + #Create data arrays from each constituent + #These variables will all be added to one file that will eventually contain the + #derived variable as well + #NOTE: This has to be done via xarray dataArrays + # - There might be a way with xarray dataSets but none that have worked thus far + data_arrays = [] + for var_const, dims in variables.items(): + values = ds[var_const].values + da = xr.DataArray(values, coords=coords, dims=dims) + data_arrays.append(da) + + # Convert the string equation to a SymPy expression + symbolic_expression = sp.sympify(der_eq) + + # Create a list of SymPy symbols based on the variable names + symbolic_vars = [sp.symbols(var_const) for var_const in variables] + + # Use lambdify to create a function that can handle symbolic and numeric evaluations + numeric_function = sp.lambdify(symbolic_vars, symbolic_expression, 'numpy') + + # Define a function to convert SymPy expressions to NumPy functions + def sympy_function(*args): + return numeric_function(*args) + + # Apply the symbolic function to the list of xarray arrays + result_da = xr.apply_ufunc(sympy_function, *data_arrays, + dask='parallelized', output_dtypes=[float]) + + # Add new derived vairable to the dataset and save file + ds[var] = result_da + ds.to_netcdf(derived_file, unlimited_dims='time', mode='w') From 0b7fc588cc29817b84065bf11b309e916038fd14 Mon Sep 17 00:00:00 2001 From: justin-richling Date: Thu, 11 Jan 2024 14:53:14 -0700 Subject: [PATCH 2/3] Add derived equations --- lib/adf_variable_defaults.yaml | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/lib/adf_variable_defaults.yaml b/lib/adf_variable_defaults.yaml index e223fa695..d1ddf3a81 100644 --- a/lib/adf_variable_defaults.yaml +++ b/lib/adf_variable_defaults.yaml @@ -482,6 +482,7 @@ PRECT: obs_var_name: "PRECT" category: "Hydrologic cycle" derivable_from: ['PRECL','PRECC'] + derived_eq: "PRECL+PRECC" QFLX: category: "Hydrologic cycle" @@ -726,6 +727,7 @@ RESTOM: label : "W m$^{-2}$" category: "TOA energy flux" derivable_from: ['FLNT','FSNT'] + derived_eq: "FSNT-FLNT" SWCF: colormap: "Blues" @@ -1085,5 +1087,19 @@ utendwtem: obs_name: "ERA5" obs_var_name: "utendwtem" + +#----------- + +MADEUPVAR: + colormap: "Blues" + diff_colormap: "seismic" + new_unit: "unit" + mpl: + colorbar: + label : "unit" + category: "Super Duper" + derivable_from: ['PRECL','PRECC',"PRECSC"] + derived_eq: "PRECC-(0.5*PRECL)+PRECSC" + #----------- #End of File From dd62d0828aeeff1436c44bd570699a44d825ef68 Mon Sep 17 00:00:00 2001 From: justin-richling Date: Thu, 11 Jan 2024 15:13:42 -0700 Subject: [PATCH 3/3] Update adf_diag.py --- lib/adf_diag.py | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/lib/adf_diag.py b/lib/adf_diag.py index 075e0b4f8..1fef2d773 100644 --- a/lib/adf_diag.py +++ b/lib/adf_diag.py @@ -1036,23 +1036,11 @@ def derive_variables(self, res=None, vars_to_derive=None, ts_dir=None, overwrite print("WARNING: No derived equation in defaults config file, moving on") pass - # continue - #else: - # msg = f"WARNING: {var} is not in the file {hist_files[0]}." - # msg += " No time series will be generated." - # print(msg) - # continue - constit_files = [] for constit in constit_list: if glob.glob(os.path.join(ts_dir, f"*.{constit}.*.nc")): constit_files.append(glob.glob(os.path.join(ts_dir, f"*.{constit}.*"))[0]) - - #Check if all the constituent files were found - #if len(constit_files) == len(constit_list): - - print("constit_files:",constit_files) #Check if all the constituent files were found if len(constit_files) != len(constit_list): ermsg = f"Not all constituent files present; {var} cannot be calculated."