From 4ca961876f5201575f48ffdbe64075900b01cf0c Mon Sep 17 00:00:00 2001 From: aradhakrishnanGFDL Date: Wed, 20 Nov 2024 12:38:39 -0500 Subject: [PATCH 1/3] likley a bug but revealed with cesm testing --- catalogbuilder/intakebuilder/gfdlcrawler.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/catalogbuilder/intakebuilder/gfdlcrawler.py b/catalogbuilder/intakebuilder/gfdlcrawler.py index d5e0816..a2b3c12 100644 --- a/catalogbuilder/intakebuilder/gfdlcrawler.py +++ b/catalogbuilder/intakebuilder/gfdlcrawler.py @@ -79,7 +79,8 @@ def crawlLocal(projectdir, dictFilter,dictFilterIgnore,logger,configyaml,slow): continue #if our filename expectations are not met compared to the output_file_path_template in config, skip the loop. TODO revisit for statics if ("static" not in filename): - if ((len(filename.split('.'))-1) != len(set_ftemplate)): + #set removes duplicates and orders them. Does not serve the purpose when there is more one NA as in CESM + if ((len(filename.split('.'))-1) != len(list_ftemplate)): logger.debug("Skipping "+filename) continue logger.debug(dirpath+"/"+filename) From cac733091c1d3d796958c88d84183f6a30801312 Mon Sep 17 00:00:00 2001 From: aradhakrishnanGFDL Date: Wed, 20 Nov 2024 12:53:39 -0500 Subject: [PATCH 2/3] cesm catalog generated with exception to tweaking realm and freq --- catalogbuilder/intakebuilder/getinfo.py | 3 ++- catalogbuilder/intakebuilder/gfdlcrawler.py | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/catalogbuilder/intakebuilder/getinfo.py b/catalogbuilder/intakebuilder/getinfo.py index 6f85b03..8a79843 100644 --- a/catalogbuilder/intakebuilder/getinfo.py +++ b/catalogbuilder/intakebuilder/getinfo.py @@ -227,8 +227,9 @@ def getInfoFromVarAtts(fname,variable_id,dictInfo,att="standard_name",filexra=No :return: dictInfo with all variable atts ''' #try: + filexr,filexra = return_xr(fname) - #print("Variable atts from file:",filexr[variable_id]) + #print("look up Variable atts from file:",filexr[variable_id]) if (dictInfo[att] == "na"): try: cfname = filexr[variable_id].attrs["standard_name"] diff --git a/catalogbuilder/intakebuilder/gfdlcrawler.py b/catalogbuilder/intakebuilder/gfdlcrawler.py index cdea5a1..d3e7666 100644 --- a/catalogbuilder/intakebuilder/gfdlcrawler.py +++ b/catalogbuilder/intakebuilder/gfdlcrawler.py @@ -141,7 +141,7 @@ def crawlLocal(projectdir, dictFilter,dictFilterIgnore,logger,configyaml,slow): if qualities in unique_datasets.keys(): standard_name=unique_datasets[qualities] dictInfo["standard_name"]=standard_name - + print("test..",qualities,standard_name) else: logger.info("Retrieving standard_name from "+ (str)(filename)) getinfo.getInfoFromVarAtts(dictInfo["path"],dictInfo["variable_id"],dictInfo) From 751c3ef89dd7f45c8c89dd352b30a27bbfd2dc03 Mon Sep 17 00:00:00 2001 From: aradhakrishnanGFDL Date: Wed, 20 Nov 2024 14:58:34 -0500 Subject: [PATCH 3/3] works with cam now --- catalogbuilder/intakebuilder/dat/gfdlcmipfreq.yaml | 2 ++ catalogbuilder/intakebuilder/getinfo.py | 11 +++++++++++ catalogbuilder/intakebuilder/gfdlcrawler.py | 3 +-- 3 files changed, 14 insertions(+), 2 deletions(-) diff --git a/catalogbuilder/intakebuilder/dat/gfdlcmipfreq.yaml b/catalogbuilder/intakebuilder/dat/gfdlcmipfreq.yaml index 240c103..d60c268 100644 --- a/catalogbuilder/intakebuilder/dat/gfdlcmipfreq.yaml +++ b/catalogbuilder/intakebuilder/dat/gfdlcmipfreq.yaml @@ -1,5 +1,7 @@ monthly: frequency: mon +h0: + frequency: mon daily: frequency: day hourly: diff --git a/catalogbuilder/intakebuilder/getinfo.py b/catalogbuilder/intakebuilder/getinfo.py index 8a79843..68b8506 100644 --- a/catalogbuilder/intakebuilder/getinfo.py +++ b/catalogbuilder/intakebuilder/getinfo.py @@ -134,6 +134,13 @@ def getInfoFromGFDLFilename(filename,dictInfo,logger,configyaml): dictInfo["table_id"] = "fx" return dictInfo +def getRealm(dictInfo): + realm = "" + if (dictInfo["source_id"] == "cam"): + realm = "atmos" + dictInfo["realm"] = realm + return(dictInfo) + def getInfoFromGFDLDRS(dirpath,projectdir,dictInfo,configyaml,variable_id,logger): ''' Returns info from project directory and the DRS path to the file @@ -193,6 +200,10 @@ def getInfoFromGFDLDRS(dirpath,projectdir,dictInfo,configyaml,variable_id,logger print("This is likely static") dictInfo["cell_methods"] = "" dictInfo["member_id"] = "" + #CAM ESM: If realm is empty, ensure if there is a helper utility to populate this + + if("realm" not in dictInfo.keys()): + dictInfo = getRealm(dictInfo) return dictInfo def getInfoFromDRS(dirpath,projectdir,dictInfo): diff --git a/catalogbuilder/intakebuilder/gfdlcrawler.py b/catalogbuilder/intakebuilder/gfdlcrawler.py index d3e7666..210e438 100644 --- a/catalogbuilder/intakebuilder/gfdlcrawler.py +++ b/catalogbuilder/intakebuilder/gfdlcrawler.py @@ -58,7 +58,6 @@ def crawlLocal(projectdir, dictFilter,dictFilterIgnore,logger,configyaml,slow): missingcols = [col for col in diffcols if col not in set_ftemplate] missingcols.remove("path") #because we get this anyway logger.debug("Missing cols from metadata sources:"+ (str)(missingcols)) - #Creating a dictionary to track the unique datasets we come across when using slow mode #The keys are the standard names and the values are lists tracking var_id,realm,etc.. unique_datasets = {'':''} @@ -156,6 +155,6 @@ def crawlLocal(projectdir, dictFilter,dictFilterIgnore,logger,configyaml,slow): cmipfreq = getinfo.getFreqFromYAML(yamlfile,gfdlfreq=dictInfo['frequency']) if(cmipfreq is not None): dictInfo['frequency'] = cmipfreq - #print("Adjusting frequency from ", gfdlfreq ," to ",cmipfreq) + print("Adjusting frequency from ", gfdlfreq ," to ",cmipfreq) listfiles.append(dictInfo) return listfiles