From 5b94458a8e55c07a9421e42b54be61f82bbf1658 Mon Sep 17 00:00:00 2001 From: aradhakrishnanGFDL Date: Tue, 22 Oct 2024 11:07:29 -0400 Subject: [PATCH] the other columns for static seem ok for the first pass --- catalogbuilder/intakebuilder/getinfo.py | 10 ++++++++-- catalogbuilder/intakebuilder/gfdlcrawler.py | 7 ++++++- configs/config-template.yaml | 4 ++-- 3 files changed, 16 insertions(+), 5 deletions(-) diff --git a/catalogbuilder/intakebuilder/getinfo.py b/catalogbuilder/intakebuilder/getinfo.py index a82f2e3..d30f6d6 100644 --- a/catalogbuilder/intakebuilder/getinfo.py +++ b/catalogbuilder/intakebuilder/getinfo.py @@ -112,8 +112,10 @@ def getInfoFromGFDLFilename(filename,dictInfo,logger,configyaml): if( "static" in filename ): ## For static we handle this differently . The GFDL PP expected pattern is atmos.static.nc #TODO figure out better ways to set this and use fixed for frequency and table_id - output_file_template = ['realm','frequency'] + output_file_template = ['realm'] dictInfo["variable_id"] = "fixed" #TODO verify if variable_id is a key + dictInfo["frequency"] = "fixed" + dictInfo["table_id"] = "fixed" ## nlen = len(output_file_template) for i in range(nlen-1,-1,-1): #nlen = 3 @@ -132,11 +134,12 @@ def getInfoFromGFDLFilename(filename,dictInfo,logger,configyaml): print(dictInfo) return dictInfo -def getInfoFromGFDLDRS(dirpath,projectdir,dictInfo,configyaml): +def getInfoFromGFDLDRS(dirpath,projectdir,dictInfo,configyaml,variable_id): ''' Returns info from project directory and the DRS path to the file :param dirpath: :param drsstructure: + :param variable_id to check for static :return: ''' # we need thise dict keys "project", "institute", "model", "experiment_id", @@ -159,6 +162,9 @@ def getInfoFromGFDLDRS(dirpath,projectdir,dictInfo,configyaml): output_path_template = builderconfig.output_path_template except: sys.exit("No output_path_template found in builderconfig.py. Check configuration.") + #If variable_id is fixed, it's a GFDL PP static dataset and the output path template in config is aligned only up to a particular directory structure as this does not have the ts and frequency or time chunks + if(variable_id == "fixed"): + output_path_template = output_path_template[:-3 or None] nlen = len(output_path_template) for i in range(nlen-1,0,-1): try: diff --git a/catalogbuilder/intakebuilder/gfdlcrawler.py b/catalogbuilder/intakebuilder/gfdlcrawler.py index c1117c9..919ae5a 100644 --- a/catalogbuilder/intakebuilder/gfdlcrawler.py +++ b/catalogbuilder/intakebuilder/gfdlcrawler.py @@ -95,7 +95,12 @@ def crawlLocal(projectdir, dictFilter,dictFilterIgnore,logger,configyaml,slow): else: dictInfo = getinfo.getInfoFromGFDLFilename(filename,dictInfo, logger,configyaml) print("1. ", dictInfo) - dictInfo = getinfo.getInfoFromGFDLDRS(dirpath, projectdir, dictInfo,configyaml) + if "variable_id" in dictInfo.keys(): + if dictInfo["variable_id"] is not None: + variable_id = dictInfo["variable_id"] + else: + variable_id = "" + dictInfo = getinfo.getInfoFromGFDLDRS(dirpath, projectdir, dictInfo,configyaml,variable_id) print("2.", dictInfo) list_bad_modellabel = ["","piControl","land-hist","piClim-SO2","abrupt-4xCO2","hist-piAer","hist-piNTCF","piClim-ghg","piClim-OC","hist-GHG","piClim-BC","1pctCO2"] list_bad_chunklabel = ['DO_NOT_USE'] diff --git a/configs/config-template.yaml b/configs/config-template.yaml index 85417ad..d1863e2 100644 --- a/configs/config-template.yaml +++ b/configs/config-template.yaml @@ -26,8 +26,8 @@ headerlist: ["activity_id", "institution_id", "source_id", "experiment_id", #this is a valid value in headerlist as well. #The fourth directory is am5f3b1r0 which does not map to an existing header value. So we simply NA in output_path_template #for the fourth value. - -output_path_template: ['NA','NA','source_id','NA','experiment_id','platform','custom_pp','realm','cell_methods','frequency','chunk_freq'] +#/archive/a1r/fre/FMS2024.02_OM5_20240724/CM4.5v01_om5b06_piC_noBLING/gfdl.ncrc5-intel23-prod-openmp/pp/ocean_monthly/ocean_monthly.static.nc +output_path_template: ['NA','NA','NA','source_id','experiment_id','platform','custom_pp','realm','cell_methods','frequency','chunk_freq'] output_file_template: ['realm','time_range','variable_id']