From f80c7f011cbbaf20de5a55299d04d57d52ddce74 Mon Sep 17 00:00:00 2001 From: Kristopher Rand Date: Thu, 24 Oct 2024 09:54:29 -0400 Subject: [PATCH 1/5] #36 Accomodate filenames that do not contain periods --- catalogbuilder/intakebuilder/gfdlcrawler.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/catalogbuilder/intakebuilder/gfdlcrawler.py b/catalogbuilder/intakebuilder/gfdlcrawler.py index b2e29c9..e560191 100644 --- a/catalogbuilder/intakebuilder/gfdlcrawler.py +++ b/catalogbuilder/intakebuilder/gfdlcrawler.py @@ -79,7 +79,8 @@ def crawlLocal(projectdir, dictFilter,dictFilterIgnore,logger,configyaml,slow): continue #if our filename expectations are not met compared to the output_file_path_template in config, skip the loop. TODO revisit for statics if ("static" not in filename): - if ((len(filename.split('.'))-1) != len(set_ftemplate)): + splitchar = "_" if "/uda" in filename else "." + if ((len(filename.split(splitchar))-1) != len(set_ftemplate)): logger.debug("Skipping "+filename) continue logger.debug(dirpath+"/"+filename) From 8b3a2c9fed891ae8f5a95af1258d61dd63388161 Mon Sep 17 00:00:00 2001 From: Kristopher Rand Date: Thu, 24 Oct 2024 10:15:05 -0400 Subject: [PATCH 2/5] #36 small adjustment --- catalogbuilder/intakebuilder/gfdlcrawler.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/catalogbuilder/intakebuilder/gfdlcrawler.py b/catalogbuilder/intakebuilder/gfdlcrawler.py index e560191..5c1a7d1 100644 --- a/catalogbuilder/intakebuilder/gfdlcrawler.py +++ b/catalogbuilder/intakebuilder/gfdlcrawler.py @@ -79,7 +79,7 @@ def crawlLocal(projectdir, dictFilter,dictFilterIgnore,logger,configyaml,slow): continue #if our filename expectations are not met compared to the output_file_path_template in config, skip the loop. TODO revisit for statics if ("static" not in filename): - splitchar = "_" if "/uda" in filename else "." + splitchar = "_" if "/uda" in filepath else "." if ((len(filename.split(splitchar))-1) != len(set_ftemplate)): logger.debug("Skipping "+filename) continue From c4cc716b0496dcb116d68d69e2a6f00e56f2dfe1 Mon Sep 17 00:00:00 2001 From: Kristopher Rand Date: Mon, 28 Oct 2024 09:18:21 -0400 Subject: [PATCH 3/5] #36 commenting out getinfo calls to make things work --- catalogbuilder/intakebuilder/gfdlcrawler.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/catalogbuilder/intakebuilder/gfdlcrawler.py b/catalogbuilder/intakebuilder/gfdlcrawler.py index 5c1a7d1..7b098b1 100644 --- a/catalogbuilder/intakebuilder/gfdlcrawler.py +++ b/catalogbuilder/intakebuilder/gfdlcrawler.py @@ -79,20 +79,20 @@ def crawlLocal(projectdir, dictFilter,dictFilterIgnore,logger,configyaml,slow): continue #if our filename expectations are not met compared to the output_file_path_template in config, skip the loop. TODO revisit for statics if ("static" not in filename): - splitchar = "_" if "/uda" in filepath else "." - if ((len(filename.split(splitchar))-1) != len(set_ftemplate)): + #splitchar = "_" if "/uda" in filepath else "." + if ((len(filename.split('.'))) != len(set_ftemplate)): logger.debug("Skipping "+filename) - continue + continue logger.debug(dirpath+"/"+filename) dictInfo = {} dictInfo = getinfo.getProject(projectdir, dictInfo) # get info from filename #filepath = os.path.join(dirpath,filename) # 1 AR: Bugfix: this needs to join dirpath and filename to get the full path to the file dictInfo["path"]=filepath - if (op.countOf(filename,".") == 1): - dictInfo = getinfo.getInfoFromFilename(filename,dictInfo, logger) - else: - dictInfo = getinfo.getInfoFromGFDLFilename(filename,dictInfo, logger,configyaml) + #if (op.countOf(filename,".") == 1): + # dictInfo = getinfo.getInfoFromFilename(filename,dictInfo, logger) + #else: + # dictInfo = getinfo.getInfoFromGFDLFilename(filename,dictInfo, logger,configyaml) dictInfo = getinfo.getInfoFromGFDLDRS(dirpath, projectdir, dictInfo,configyaml) list_bad_modellabel = ["","piControl","land-hist","piClim-SO2","abrupt-4xCO2","hist-piAer","hist-piNTCF","piClim-ghg","piClim-OC","hist-GHG","piClim-BC","1pctCO2"] list_bad_chunklabel = ['DO_NOT_USE'] From 1aedfb6c800ed87a9e040296d6dc97deda7cbd5d Mon Sep 17 00:00:00 2001 From: Kristopher Rand Date: Wed, 30 Oct 2024 09:14:45 -0400 Subject: [PATCH 4/5] #36 Insert 'UDA' as activity_id if in the path --- catalogbuilder/intakebuilder/gfdlcrawler.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/catalogbuilder/intakebuilder/gfdlcrawler.py b/catalogbuilder/intakebuilder/gfdlcrawler.py index 7b098b1..8f8fc6d 100644 --- a/catalogbuilder/intakebuilder/gfdlcrawler.py +++ b/catalogbuilder/intakebuilder/gfdlcrawler.py @@ -86,6 +86,8 @@ def crawlLocal(projectdir, dictFilter,dictFilterIgnore,logger,configyaml,slow): logger.debug(dirpath+"/"+filename) dictInfo = {} dictInfo = getinfo.getProject(projectdir, dictInfo) + if "/uda" in filepath: + dictInfo["activity_id"] = "UDA" # get info from filename #filepath = os.path.join(dirpath,filename) # 1 AR: Bugfix: this needs to join dirpath and filename to get the full path to the file dictInfo["path"]=filepath From d9def0d68bfa9ae94a1e9d1e8f644500c84835d4 Mon Sep 17 00:00:00 2001 From: Kristopher Rand Date: Thu, 31 Oct 2024 09:42:46 -0400 Subject: [PATCH 5/5] #36 Extend conditional for UDA activity_id naming --- catalogbuilder/intakebuilder/gfdlcrawler.py | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/catalogbuilder/intakebuilder/gfdlcrawler.py b/catalogbuilder/intakebuilder/gfdlcrawler.py index 8f8fc6d..7cd8651 100644 --- a/catalogbuilder/intakebuilder/gfdlcrawler.py +++ b/catalogbuilder/intakebuilder/gfdlcrawler.py @@ -77,17 +77,21 @@ def crawlLocal(projectdir, dictFilter,dictFilterIgnore,logger,configyaml,slow): if not filename.endswith(".nc"): logger.debug("FILE does not end with .nc. Skipping "+ filepath) continue - #if our filename expectations are not met compared to the output_file_path_template in config, skip the loop. TODO revisit for statics - if ("static" not in filename): - #splitchar = "_" if "/uda" in filepath else "." - if ((len(filename.split('.'))) != len(set_ftemplate)): - logger.debug("Skipping "+filename) - continue - logger.debug(dirpath+"/"+filename) dictInfo = {} - dictInfo = getinfo.getProject(projectdir, dictInfo) if "/uda" in filepath: + if len(filename.split('.')) != len(set_ftemplate): + logger.debug("Skipping "+filename) + continue dictInfo["activity_id"] = "UDA" + #if our filename expectations are not met compared to the output_file_path_template in config, skip the loop. TODO revisit for statics + else: + if ("static" not in filename): + if ((len(filename.split('.'))-1) != len(set_ftemplate)): + logger.debug("Skipping "+filename) + continue + + logger.debug(dirpath+"/"+filename) + dictInfo = getinfo.getProject(projectdir, dictInfo) # get info from filename #filepath = os.path.join(dirpath,filename) # 1 AR: Bugfix: this needs to join dirpath and filename to get the full path to the file dictInfo["path"]=filepath