From c4e45205e3e54b2d2aa287cb00ae5bae8a98231d Mon Sep 17 00:00:00 2001 From: Baptiste Vandecrux <35140661+BaptisteVandecrux@users.noreply.github.com> Date: Tue, 20 Aug 2024 11:12:54 +0200 Subject: [PATCH 1/6] passing adjustment_dir to L2toL3.py --- src/pypromice/process/L2toL3.py | 12 ++++++---- src/pypromice/process/get_l2.py | 31 +++++++++++++++++--------- src/pypromice/process/get_l2tol3.py | 27 ++++++++++++++++++---- src/pypromice/qc/github_data_issues.py | 2 +- 4 files changed, 52 insertions(+), 20 deletions(-) diff --git a/src/pypromice/process/L2toL3.py b/src/pypromice/process/L2toL3.py index 004cd615..5c50ee93 100755 --- a/src/pypromice/process/L2toL3.py +++ b/src/pypromice/process/L2toL3.py @@ -8,11 +8,15 @@ from sklearn.linear_model import LinearRegression from pypromice.qc.github_data_issues import adjustData from scipy.interpolate import interp1d +from pathlib import Path import logging logger = logging.getLogger(__name__) -def toL3(L2, station_config={}, T_0=273.15): +def toL3(L2, + data_adjustments_dir: Path, + station_config={}, + T_0=273.15): '''Process one Level 2 (L2) product to Level 3 (L3) meaning calculating all derived variables: - Turbulent fluxes @@ -109,7 +113,7 @@ def toL3(L2, station_config={}, T_0=273.15): # processing continuous surface height, ice surface height, snow height try: - ds = process_surface_height(ds, station_config) + ds = process_surface_height(ds, data_adjustments_dir, station_config) except Exception as e: logger.error("Error processing surface height at %s"%L2.attrs['station_id']) logging.error(e, exc_info=True) @@ -130,7 +134,7 @@ def toL3(L2, station_config={}, T_0=273.15): return ds -def process_surface_height(ds, station_config={}): +def process_surface_height(ds, data_adjustments_dir, station_config={}): """ Process surface height data for different site types and create surface height variables. @@ -180,7 +184,7 @@ def process_surface_height(ds, station_config={}): ds.z_boom_l.sel(time=first_valid_index) - ds['z_boom_l']) # Adjust data for the created surface height variables - ds = adjustData(ds, var_list=['z_surf_1', 'z_surf_2', 'z_ice_surf']) + ds = adjustData(ds, data_adjustments_dir, var_list=['z_surf_1', 'z_surf_2', 'z_ice_surf']) # Convert to dataframe and combine surface height variables df_in = ds[[v for v in ['z_surf_1', 'z_surf_2', 'z_ice_surf'] if v in ds.data_vars]].to_dataframe() diff --git a/src/pypromice/process/get_l2.py b/src/pypromice/process/get_l2.py index dd82f806..69a18317 100644 --- a/src/pypromice/process/get_l2.py +++ b/src/pypromice/process/get_l2.py @@ -31,10 +31,27 @@ def get_l2(config_file, inpath, outpath, variables, metadata, data_issues_path: # Define input path station_name = config_file.split('/')[-1].split('.')[0] station_path = os.path.join(inpath, station_name) + + # checking that data_issues_path is valid + if data_issues_path is None: + data_issues_path = Path("../PROMICE-AWS-data-issues") + if data_issues_path.exists(): + logging.warning(f"data_issues_path is missing. Using default data issues path: {data_issues_path}") + else: + raise ValueError("data_issues_path is missing. Please provide a valid path to the data issues repository") + if os.path.exists(station_path): - aws = AWS(config_file, station_path, data_issues_repository=data_issues_path, var_file=variables, meta_file=metadata) + aws = AWS(config_file, + station_path, + data_issues_repository=data_issues_path, + var_file=variables, + meta_file=metadata) else: - aws = AWS(config_file, inpath, data_issues_repository=data_issues_path, var_file=variables, meta_file=metadata) + aws = AWS(config_file, + inpath, + data_issues_repository=data_issues_path, + var_file=variables, + meta_file=metadata) # Perform level 1 and 2 processing aws.getL1() @@ -58,21 +75,13 @@ def main(): stream=sys.stdout, ) - data_issues_path = args.data_issues_path - if data_issues_path is None: - data_issues_path = Path("../PROMICE-AWS-data-issues") - if data_issues_path.exists(): - logging.warning(f"data_issues_path is missing. Using default data issues path: {data_issues_path}") - else: - raise ValueError(f"data_issues_path is missing. Please provide a valid path to the data issues repository") - _ = get_l2( args.config_file, args.inpath, args.outpath, args.variables, args.metadata, - data_issues_path=data_issues_path, + args.data_issues_path, ) diff --git a/src/pypromice/process/get_l2tol3.py b/src/pypromice/process/get_l2tol3.py index b68a29ba..d1202a18 100644 --- a/src/pypromice/process/get_l2tol3.py +++ b/src/pypromice/process/get_l2tol3.py @@ -25,11 +25,13 @@ def parse_arguments_l2tol3(debug_args=None): required=False, help='File path to variables look-up table') parser.add_argument('-m', '--metadata', default=None, type=str, required=False, help='File path to metadata') + parser.add_argument('--data_issues_path', '--issues', default=None, help="Path to data issues repository") + args = parser.parse_args(args=debug_args) return args -def get_l2tol3(config_folder: Path|str, inpath, outpath, variables, metadata): +def get_l2tol3(config_folder: Path|str, inpath, outpath, variables, metadata, data_issues_path: Path): if isinstance(config_folder, str): config_folder = Path(config_folder) @@ -68,9 +70,18 @@ def get_l2tol3(config_folder: Path|str, inpath, outpath, variables, metadata): "project": "PROMICE", "location_type": "ice sheet", } - + + # checking that the adjustement directory is properly given + if data_issues_path is None: + data_issues_path = Path("../PROMICE-AWS-data-issues") + if data_issues_path.exists(): + logging.warning(f"data_issues_path is missing. Using default data issues path: {data_issues_path}") + else: + raise ValueError("data_issues_path is missing. Please provide a valid path to the data issues repository") + data_adjustments_dir = data_issues_path / "adjustments" + # Perform Level 3 processing - l3 = toL3(l2, station_config) + l3 = toL3(l2, data_adjustments_dir, station_config) # Write Level 3 dataset to file if output directory given v = pypromice.resources.load_variables(variables) @@ -83,7 +94,15 @@ def get_l2tol3(config_folder: Path|str, inpath, outpath, variables, metadata): def main(): args = parse_arguments_l2tol3() - _ = get_l2tol3(args.config_folder, args.inpath, args.outpath, args.variables, args.metadata) + + + + _ = get_l2tol3(args.config_folder, + args.inpath, + args.outpath, + args.variables, + args.metadata, + args.data_issues_path) if __name__ == "__main__": main() diff --git a/src/pypromice/qc/github_data_issues.py b/src/pypromice/qc/github_data_issues.py index 1119b979..30469418 100644 --- a/src/pypromice/qc/github_data_issues.py +++ b/src/pypromice/qc/github_data_issues.py @@ -308,7 +308,7 @@ def _getDF(flag_file): ).dropna(how='all', axis='rows') else: df=None - logger.info(f"No {flag_file.split('/')[-2][:-1]} file to read.") + logger.info(f"No {flag_file} file to read.") return df From 167846591c5e9ba2aeb507370d1dbaee5e0b444e Mon Sep 17 00:00:00 2001 From: Baptiste Vandecrux <35140661+BaptisteVandecrux@users.noreply.github.com> Date: Tue, 20 Aug 2024 11:17:09 +0200 Subject: [PATCH 2/6] fixing attributes in join_l3 - station_attribute containing info from merged dataset was lost when concatenating the datasets - The key "source" is not present in the attributes of the old GC-Net files so `station_source = json.loads(station_attributes["source"])` was throwing an error --- src/pypromice/process/join_l3.py | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/src/pypromice/process/join_l3.py b/src/pypromice/process/join_l3.py index 2ff6cebd..b550a8c1 100644 --- a/src/pypromice/process/join_l3.py +++ b/src/pypromice/process/join_l3.py @@ -493,7 +493,9 @@ def join_l3(config_folder, site, folder_l3, folder_gcnet, outpath, variables, me l3_merged.z_ice_surf.to_series(), l3.z_ice_surf.to_series() ), ) - + + # saves attributes + attrs = l3_merged.attrs # merging by time block l3_merged = xr.concat( ( @@ -504,6 +506,9 @@ def join_l3(config_folder, site, folder_l3, folder_gcnet, outpath, variables, me ), dim="time", ) + + # restauring attributes + l3_merged.attrs = attrs # Assign site id if not l3_merged: @@ -519,13 +524,15 @@ def join_l3(config_folder, site, folder_l3, folder_gcnet, outpath, variables, me site_config_source_hash=get_commit_hash_and_check_dirty(config_folder), gcnet_source_hash=get_commit_hash_and_check_dirty(folder_gcnet), ) + for stid, station_attributes in l3_merged.attrs["stations_attributes"].items(): - station_source = json.loads(station_attributes["source"]) - for k, v in station_source.items(): - if k in site_source and site_source[k] != v: - site_source[k] = "multiple" - else: - site_source[k] = v + if "source" in station_attributes.keys(): + station_source = json.loads(station_attributes["source"]) + for k, v in station_source.items(): + if k in site_source and site_source[k] != v: + site_source[k] = "multiple" + else: + site_source[k] = v l3_merged.attrs["source"] = json.dumps(site_source) v = pypromice.resources.load_variables(variables) From b840bb8bcc9c8035b607d3a43a90217ede3bef9d Mon Sep 17 00:00:00 2001 From: Baptiste Vandecrux <35140661+BaptisteVandecrux@users.noreply.github.com> Date: Tue, 20 Aug 2024 11:35:02 +0200 Subject: [PATCH 3/6] give data_issues_path to get_l2tol3 in test_process --- tests/e2e/test_process.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/e2e/test_process.py b/tests/e2e/test_process.py index 597cf87f..2fd2d16c 100644 --- a/tests/e2e/test_process.py +++ b/tests/e2e/test_process.py @@ -153,6 +153,7 @@ def test_full_e2e(self): outpath=output_l3.as_posix(), variables=None, metadata=None, + data_issues_path=data_issues_path, ) # Part 4 Join L3: Merge Current data and historical GC-Net and convert to site From 9ddacb99b6aa5a56c26970c210b058050a58e386 Mon Sep 17 00:00:00 2001 From: Baptiste Vandecrux <35140661+BaptisteVandecrux@users.noreply.github.com> Date: Tue, 20 Aug 2024 11:39:15 +0200 Subject: [PATCH 4/6] using data_adjustments_dir as input in AWS.getL3 --- src/pypromice/process/aws.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pypromice/process/aws.py b/src/pypromice/process/aws.py index e82da6e0..28a1a90f 100644 --- a/src/pypromice/process/aws.py +++ b/src/pypromice/process/aws.py @@ -153,7 +153,7 @@ def getL3(self): """Perform L2 to L3 data processing, including resampling and metadata and attribute population""" logger.info("Level 3 processing...") - self.L3 = toL3(self.L2) + self.L3 = toL3(self.L2, data_adjustments_dir=self.data_issues_repository / "adjustments") def writeArr(self, dataset, outpath, t=None): """Write L3 data to .nc and .csv hourly and daily files From c1bb4599549b477fdd325741778969fc07a6e0c6 Mon Sep 17 00:00:00 2001 From: Baptiste Vandecrux <35140661+BaptisteVandecrux@users.noreply.github.com> Date: Tue, 20 Aug 2024 11:48:59 +0200 Subject: [PATCH 5/6] adding path to dummy data_issues folder to process_test --- .github/workflows/process_test.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/process_test.yml b/.github/workflows/process_test.yml index f6e0fbbb..914cd275 100644 --- a/.github/workflows/process_test.yml +++ b/.github/workflows/process_test.yml @@ -40,7 +40,7 @@ jobs: mkdir $GITHUB_WORKSPACE/out/L0toL2/ mkdir $GITHUB_WORKSPACE/data_issues for i in $(echo ${{ env.TEST_STATION }} | tr ' ' '\n'); do - python3 $GITHUB_WORKSPACE/main/src/pypromice/process/get_l2.py -c $GITHUB_WORKSPACE/aws-l0/tx/config/$i.toml -i $GITHUB_WORKSPACE/aws-l0/tx --issues $GITHUB_WORKSPACE/data_issues -o $GITHUB_WORKSPACE/out/L0toL2/ + python3 $GITHUB_WORKSPACE/main/src/pypromice/process/get_l2.py -c $GITHUB_WORKSPACE/aws-l0/tx/config/$i.toml -i $GITHUB_WORKSPACE/aws-l0/tx --issues $GITHUB_WORKSPACE/data_issues -o $GITHUB_WORKSPACE/out/L0toL2/ --data_issues_path $GITHUB_WORKSPACE/data_issues done - name: Run L2 to L3 processing env: @@ -50,7 +50,7 @@ jobs: mkdir $GITHUB_WORKSPACE/out/L2toL3/ for i in $(echo ${{ env.TEST_STATION }} | tr ' ' '\n'); do echo ${i}_hour.nc - python3 $GITHUB_WORKSPACE/main/src/pypromice/process/get_l2tol3.py -c $GITHUB_WORKSPACE/aws-l0/metadata/station_configurations/ -i $GITHUB_WORKSPACE/out/L0toL2/${i}/${i}_hour.nc -o $GITHUB_WORKSPACE/out/L2toL3/ + python3 $GITHUB_WORKSPACE/main/src/pypromice/process/get_l2tol3.py -c $GITHUB_WORKSPACE/aws-l0/metadata/station_configurations/ -i $GITHUB_WORKSPACE/out/L0toL2/${i}/${i}_hour.nc -o $GITHUB_WORKSPACE/out/L2toL3/ --data_issues_path $GITHUB_WORKSPACE/data_issues done - name: Upload test output uses: actions/upload-artifact@v3 From 20283401429cc1ff83a430aec26c3c8a143a8a3e Mon Sep 17 00:00:00 2001 From: Baptiste Vandecrux <35140661+BaptisteVandecrux@users.noreply.github.com> Date: Tue, 20 Aug 2024 11:55:40 +0200 Subject: [PATCH 6/6] making sure data_issues_path is Path in get_l2tol3 --- src/pypromice/process/get_l2tol3.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/pypromice/process/get_l2tol3.py b/src/pypromice/process/get_l2tol3.py index d1202a18..35f547c9 100644 --- a/src/pypromice/process/get_l2tol3.py +++ b/src/pypromice/process/get_l2tol3.py @@ -31,7 +31,7 @@ def parse_arguments_l2tol3(debug_args=None): args = parser.parse_args(args=debug_args) return args -def get_l2tol3(config_folder: Path|str, inpath, outpath, variables, metadata, data_issues_path: Path): +def get_l2tol3(config_folder: Path|str, inpath, outpath, variables, metadata, data_issues_path: Path|str): if isinstance(config_folder, str): config_folder = Path(config_folder) @@ -78,6 +78,9 @@ def get_l2tol3(config_folder: Path|str, inpath, outpath, variables, metadata, da logging.warning(f"data_issues_path is missing. Using default data issues path: {data_issues_path}") else: raise ValueError("data_issues_path is missing. Please provide a valid path to the data issues repository") + else: + data_issues_path = Path(data_issues_path) + data_adjustments_dir = data_issues_path / "adjustments" # Perform Level 3 processing