From f602aca2eb06509a1588883f76c21b4129c77aa4 Mon Sep 17 00:00:00 2001 From: aradhakrishnanGFDL Date: Thu, 26 Sep 2024 10:30:21 -0400 Subject: [PATCH 1/2] bugfix - when we skip extraneous files, ensure the default config pathway also works. there were some assumptions.. and thats taken care of --- catalogbuilder/intakebuilder/gfdlcrawler.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/catalogbuilder/intakebuilder/gfdlcrawler.py b/catalogbuilder/intakebuilder/gfdlcrawler.py index 4071040..00455f2 100644 --- a/catalogbuilder/intakebuilder/gfdlcrawler.py +++ b/catalogbuilder/intakebuilder/gfdlcrawler.py @@ -25,7 +25,6 @@ def crawlLocal(projectdir, dictFilter,dictFilterIgnore,logger,configyaml,slow): headerlist = configyaml.headerlist else: headerlist = builderconfig.headerlist - #For those columns that we cannot find in output path template or output file template from config yaml, we have hooks #now to look up the netcdf dataset if slow option is True #todo catch exceptions upon furhter testing @@ -38,8 +37,18 @@ def crawlLocal(projectdir, dictFilter,dictFilterIgnore,logger,configyaml,slow): if (configyaml.output_path_template is not None) & (configyaml.output_file_template is not None) : list_ptemplate = configyaml.output_path_template list_ftemplate = configyaml.output_file_template - set_ptemplate = set(list_ptemplate) - set_ftemplate = set(list_ftemplate) + else: + #if it is none, the user is likely using default config which may be phased out, or redesigned to use a config template json rather than builderconfig + try: + list_ptemplate = builderconfig.output_path_template + except: + sys.exit("output_path_template is not set. Check your configuration") + try: + list_ftemplate = builderconfig.output_file_template + except: + sys.exit("output_file_template is not set. Check your configuration") + set_ptemplate = set(list_ptemplate) + set_ftemplate = set(list_ftemplate) #print(headerlist) #print(list_ptemplate) #print(list_ftemplate) From 34a6e3e986399e33c3e91a6f339b709ce6859209 Mon Sep 17 00:00:00 2001 From: aradhakrishnanGFDL Date: Thu, 26 Sep 2024 10:31:26 -0400 Subject: [PATCH 2/2] adding standard_name here. TODO eventually we need to streamline the code to ensure --config is used internally with a GFDL PP config template instead of using builderconfig --- catalogbuilder/intakebuilder/builderconfig.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/catalogbuilder/intakebuilder/builderconfig.py b/catalogbuilder/intakebuilder/builderconfig.py index 0ca71ae..d6c9ad1 100644 --- a/catalogbuilder/intakebuilder/builderconfig.py +++ b/catalogbuilder/intakebuilder/builderconfig.py @@ -15,7 +15,7 @@ headerlist = ["activity_id", "institution_id", "source_id", "experiment_id", "frequency", "realm", "table_id", "member_id", "grid_label", "variable_id", - "time_range", "chunk_freq","grid_label","platform","dimensions","cell_methods","path"] + "time_range", "chunk_freq","grid_label","platform","dimensions","cell_methods","standard_name","path"] #what kind of directory structure to expect? #For a directory structure like /archive/am5/am5/am5f3b1r0/c96L65_am5f3b1r0_pdclim1850F/gfdl.ncrc5-deploy-prod-openmp/pp