Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

53 static #75

Merged
merged 15 commits into from
Oct 30, 2024
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion catalogbuilder/intakebuilder/builderconfig.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
headerlist = ["activity_id", "institution_id", "source_id", "experiment_id",
"frequency", "realm", "table_id",
"member_id", "grid_label", "variable_id",
"time_range", "chunk_freq","grid_label","platform","dimensions","cell_methods","standard_name","path"]
"time_range", "chunk_freq","platform","dimensions","cell_methods","standard_name","path"]
aradhakrishnanGFDL marked this conversation as resolved.
Show resolved Hide resolved

#what kind of directory structure to expect?
#For a directory structure like /archive/am5/am5/am5f3b1r0/c96L65_am5f3b1r0_pdclim1850F/gfdl.ncrc5-deploy-prod-openmp/pp
Expand Down
27 changes: 23 additions & 4 deletions catalogbuilder/intakebuilder/getinfo.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,13 @@ def getInfoFromGFDLFilename(filename,dictInfo,logger,configyaml):
output_file_template = builderconfig.output_file_template
except:
sys.exit("No output_path_template found. Check configuration.")
if( "static" in filename ):
aradhakrishnanGFDL marked this conversation as resolved.
Show resolved Hide resolved
## For static we handle this differently . The GFDL PP expected pattern is atmos.static.nc
#TODO error checking as needed
output_file_template = ['realm']
dictInfo["variable_id"] = "fixed"
dictInfo["frequency"] = "fx"
dictInfo["table_id"] = "fx"
aradhakrishnanGFDL marked this conversation as resolved.
Show resolved Hide resolved
nlen = len(output_file_template)
for i in range(nlen-1,-1,-1): #nlen = 3
try:
Expand All @@ -122,13 +129,15 @@ def getInfoFromGFDLFilename(filename,dictInfo,logger,configyaml):
sys.exit("oops in getInfoFromGFDLFilename"+str(i)+str(j)+output_file_template[i]+stemdir[j])
j = j - 1
cnt = cnt + 1
print(dictInfo)
return dictInfo

def getInfoFromGFDLDRS(dirpath,projectdir,dictInfo,configyaml):
def getInfoFromGFDLDRS(dirpath,projectdir,dictInfo,configyaml,variable_id):
'''
Returns info from project directory and the DRS path to the file
:param dirpath:
:param drsstructure:
:param variable_id to check for static
:return:
'''
# we need thise dict keys "project", "institute", "model", "experiment_id",
Expand All @@ -151,7 +160,9 @@ def getInfoFromGFDLDRS(dirpath,projectdir,dictInfo,configyaml):
output_path_template = builderconfig.output_path_template
except:
sys.exit("No output_path_template found in builderconfig.py. Check configuration.")

#If variable_id is fixed, it's a GFDL PP static dataset and the output path template in config is aligned only up to a particular directory structure as this does not have the ts and frequency or time chunks
if(variable_id == "fixed"):
output_path_template = output_path_template[:-3 or None]
nlen = len(output_path_template)
for i in range(nlen-1,0,-1):
try:
Expand All @@ -168,10 +179,18 @@ def getInfoFromGFDLDRS(dirpath,projectdir,dictInfo,configyaml):
# WE do not want to work with anythi:1
# ng that's not time series
#TODO have verbose option to print message
#TODO Make this elegant and intuitive
#TODO logger messages, not print
if "cell_methods" in dictInfo.keys():
if (dictInfo["cell_methods"] != "ts"):
#print("Skipping non-timeseries data")
if (dictInfo["cell_methods"] == "av"):
print("Skipping time-average data")
return {}
elif (dictInfo["cell_methods"] == "ts"):
print("time-series data")
else:
print("This is likely static")
dictInfo["cell_methods"] = ""
dictInfo["member_id"] = ""
return dictInfo

def getInfoFromDRS(dirpath,projectdir,dictInfo):
Expand Down
11 changes: 8 additions & 3 deletions catalogbuilder/intakebuilder/gfdlcrawler.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ def crawlLocal(projectdir, dictFilter,dictFilterIgnore,logger,configyaml,slow):
if ( len(set_ftemplate) > 0 ):
missingcols = [col for col in diffcols if col not in set_ftemplate]
missingcols.remove("path") #because we get this anyway
logger.debug("Missing cols from metadata sources:", missingcols)
logger.debug("Missing cols from metadata sources:"+ (str)(missingcols))


#TODO INCLUDE filter in traversing through directories at the top
Expand Down Expand Up @@ -92,7 +92,12 @@ def crawlLocal(projectdir, dictFilter,dictFilterIgnore,logger,configyaml,slow):
dictInfo = getinfo.getInfoFromFilename(filename,dictInfo, logger)
else:
dictInfo = getinfo.getInfoFromGFDLFilename(filename,dictInfo, logger,configyaml)
dictInfo = getinfo.getInfoFromGFDLDRS(dirpath, projectdir, dictInfo,configyaml)
if "variable_id" in dictInfo.keys():
if dictInfo["variable_id"] is not None:
variable_id = dictInfo["variable_id"]
else:
variable_id = ""
dictInfo = getinfo.getInfoFromGFDLDRS(dirpath, projectdir, dictInfo,configyaml,variable_id)
list_bad_modellabel = ["","piControl","land-hist","piClim-SO2","abrupt-4xCO2","hist-piAer","hist-piNTCF","piClim-ghg","piClim-OC","hist-GHG","piClim-BC","1pctCO2"]
list_bad_chunklabel = ['DO_NOT_USE']
if "source_id" in dictInfo:
Expand Down Expand Up @@ -129,6 +134,6 @@ def crawlLocal(projectdir, dictFilter,dictFilterIgnore,logger,configyaml,slow):
cmipfreq = getinfo.getFreqFromYAML(yamlfile,gfdlfreq=dictInfo['frequency'])
if(cmipfreq is not None):
dictInfo['frequency'] = cmipfreq
#print("Adjusting frequency from ", gfdlfreq ," to ",cmipfreq)
#print("Adjusting frequency from ", gfdlfreq ," to ",cmipfreq)
listfiles.append(dictInfo)
return listfiles
4 changes: 2 additions & 2 deletions configs/config-template.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,8 @@ headerlist: ["activity_id", "institution_id", "source_id", "experiment_id",
#this is a valid value in headerlist as well.
#The fourth directory is am5f3b1r0 which does not map to an existing header value. So we simply NA in output_path_template
#for the fourth value.

output_path_template: ['NA','NA','source_id','NA','experiment_id','platform','custom_pp','realm','cell_methods','frequency','chunk_freq']
#/archive/a1r/fre/FMS2024.02_OM5_20240724/CM4.5v01_om5b06_piC_noBLING/gfdl.ncrc5-intel23-prod-openmp/pp/ocean_monthly/ocean_monthly.static.nc
output_path_template: ['NA','NA','NA','source_id','experiment_id','platform','custom_pp','realm','cell_methods','frequency','chunk_freq']

output_file_template: ['realm','time_range','variable_id']

Expand Down