diff --git a/catalogbuilder/intakebuilder/dat/gfdlcmipfreq.yaml b/catalogbuilder/intakebuilder/dat/gfdlcmipfreq.yaml index 240c103..d60c268 100644 --- a/catalogbuilder/intakebuilder/dat/gfdlcmipfreq.yaml +++ b/catalogbuilder/intakebuilder/dat/gfdlcmipfreq.yaml @@ -1,5 +1,7 @@ monthly: frequency: mon +h0: + frequency: mon daily: frequency: day hourly: diff --git a/catalogbuilder/intakebuilder/getinfo.py b/catalogbuilder/intakebuilder/getinfo.py index 6f85b03..68b8506 100644 --- a/catalogbuilder/intakebuilder/getinfo.py +++ b/catalogbuilder/intakebuilder/getinfo.py @@ -134,6 +134,13 @@ def getInfoFromGFDLFilename(filename,dictInfo,logger,configyaml): dictInfo["table_id"] = "fx" return dictInfo +def getRealm(dictInfo): + realm = "" + if (dictInfo["source_id"] == "cam"): + realm = "atmos" + dictInfo["realm"] = realm + return(dictInfo) + def getInfoFromGFDLDRS(dirpath,projectdir,dictInfo,configyaml,variable_id,logger): ''' Returns info from project directory and the DRS path to the file @@ -193,6 +200,10 @@ def getInfoFromGFDLDRS(dirpath,projectdir,dictInfo,configyaml,variable_id,logger print("This is likely static") dictInfo["cell_methods"] = "" dictInfo["member_id"] = "" + #CAM ESM: If realm is empty, ensure if there is a helper utility to populate this + + if("realm" not in dictInfo.keys()): + dictInfo = getRealm(dictInfo) return dictInfo def getInfoFromDRS(dirpath,projectdir,dictInfo): @@ -227,8 +238,9 @@ def getInfoFromVarAtts(fname,variable_id,dictInfo,att="standard_name",filexra=No :return: dictInfo with all variable atts ''' #try: + filexr,filexra = return_xr(fname) - #print("Variable atts from file:",filexr[variable_id]) + #print("look up Variable atts from file:",filexr[variable_id]) if (dictInfo[att] == "na"): try: cfname = filexr[variable_id].attrs["standard_name"] diff --git a/catalogbuilder/intakebuilder/gfdlcrawler.py b/catalogbuilder/intakebuilder/gfdlcrawler.py index a02920b..210e438 100644 --- a/catalogbuilder/intakebuilder/gfdlcrawler.py +++ b/catalogbuilder/intakebuilder/gfdlcrawler.py @@ -58,7 +58,6 @@ def crawlLocal(projectdir, dictFilter,dictFilterIgnore,logger,configyaml,slow): missingcols = [col for col in diffcols if col not in set_ftemplate] missingcols.remove("path") #because we get this anyway logger.debug("Missing cols from metadata sources:"+ (str)(missingcols)) - #Creating a dictionary to track the unique datasets we come across when using slow mode #The keys are the standard names and the values are lists tracking var_id,realm,etc.. unique_datasets = {'':''} @@ -82,7 +81,8 @@ def crawlLocal(projectdir, dictFilter,dictFilterIgnore,logger,configyaml,slow): continue #if our filename expectations are not met compared to the output_file_path_template in config, skip the loop. TODO revisit for statics if ("static" not in filename): - if ((len(filename.split('.'))-1) != len(set_ftemplate)): + #set removes duplicates and orders them. Does not serve the purpose when there is more one NA as in CESM + if ((len(filename.split('.'))-1) != len(list_ftemplate)): logger.debug("Skipping "+filename) continue logger.debug(dirpath+"/"+filename) @@ -140,7 +140,7 @@ def crawlLocal(projectdir, dictFilter,dictFilterIgnore,logger,configyaml,slow): if qualities in unique_datasets.keys(): standard_name=unique_datasets[qualities] dictInfo["standard_name"]=standard_name - + print("test..",qualities,standard_name) else: logger.info("Retrieving standard_name from "+ (str)(filename)) getinfo.getInfoFromVarAtts(dictInfo["path"],dictInfo["variable_id"],dictInfo) @@ -155,6 +155,6 @@ def crawlLocal(projectdir, dictFilter,dictFilterIgnore,logger,configyaml,slow): cmipfreq = getinfo.getFreqFromYAML(yamlfile,gfdlfreq=dictInfo['frequency']) if(cmipfreq is not None): dictInfo['frequency'] = cmipfreq - #print("Adjusting frequency from ", gfdlfreq ," to ",cmipfreq) + print("Adjusting frequency from ", gfdlfreq ," to ",cmipfreq) listfiles.append(dictInfo) return listfiles