From f540cc32dd3e3a7f8c7aab91b5f172792fa9d27a Mon Sep 17 00:00:00 2001 From: Mads Christian Lund Date: Fri, 23 Aug 2024 13:15:11 +0200 Subject: [PATCH 01/16] Updated the `attrs['source']` hash strings: * Now includes the git commit ID in the source string when the repository has uncommitted changes for reference. * Removed assumptions about relative paths and repository roots in the input file paths. The previous approach assumed the data issues path was already the root, causing the function to fail. --- src/pypromice/process/aws.py | 11 ++++++++++- src/pypromice/utilities/git.py | 14 ++++++++------ tests/e2e/test_get_l2.py | 6 +++--- 3 files changed, 21 insertions(+), 10 deletions(-) diff --git a/src/pypromice/process/aws.py b/src/pypromice/process/aws.py index 28a1a90f..1d203ad4 100644 --- a/src/pypromice/process/aws.py +++ b/src/pypromice/process/aws.py @@ -55,7 +55,15 @@ def __init__( """ assert os.path.isfile(config_file), "cannot find " + config_file assert os.path.isdir(inpath), "cannot find " + inpath - logger.info("AWS object initialising...") + logger.info( + "AWS(" + f"config_file={config_file}," + f" inpath={inpath}," + f" data_issues_repository={data_issues_repository}," + f" var_file={var_file}," + f" meta_file={meta_file}" + ")" + ) # Load config, variables CSF standards, and L0 files self.config = self.loadConfig(config_file, inpath) @@ -73,6 +81,7 @@ def __init__( l0_data_root=inpath_hash, data_issues=data_issues_hash, ) + logger.debug('Source information: %s', source_dict) self.meta["source"] = json.dumps(source_dict) # Load config file diff --git a/src/pypromice/utilities/git.py b/src/pypromice/utilities/git.py index 1bff997c..2949019e 100644 --- a/src/pypromice/utilities/git.py +++ b/src/pypromice/utilities/git.py @@ -7,12 +7,16 @@ logger = logging.getLogger(__name__) -def get_commit_hash_and_check_dirty(file_path) -> str: - repo_path = Path(file_path).parent +def get_commit_hash_and_check_dirty(file_path: str | Path) -> str: + if isinstance(file_path, str): + file_path = Path(file_path) + if file_path.is_dir(): + repo_path = file_path + else: + repo_path = file_path.parent try: # Ensure the file path is relative to the repository - relative_file_path = os.path.relpath(file_path, repo_path) # Get the latest commit hash for the file commit_hash = ( @@ -25,8 +29,6 @@ def get_commit_hash_and_check_dirty(file_path) -> str: "-n", "1", "--pretty=format:%H", - #"--", - #relative_file_path, ], stderr=subprocess.STDOUT, ) @@ -49,7 +51,7 @@ def get_commit_hash_and_check_dirty(file_path) -> str: if is_dirty: logger.warning(f"Warning: The file {file_path} is dirty compared to the last commit. {commit_hash}") - return 'unknown' + return f'{commit_hash} (dirty)' if commit_hash == "": logger.warning(f"Warning: The file {file_path} is not under version control.") return 'unknown' diff --git a/tests/e2e/test_get_l2.py b/tests/e2e/test_get_l2.py index d825b74f..2796358f 100644 --- a/tests/e2e/test_get_l2.py +++ b/tests/e2e/test_get_l2.py @@ -129,6 +129,6 @@ def test_get_l2_raw(self): ) data_root_hash = source_decoded["l0_data_root"] data_issues_hash = source_decoded["data_issues"] - self.assertNotEquals(config_hash, 'unknown', 'This test will fail while the commit is dirty') - self.assertNotEquals(data_root_hash, 'unknown', 'This test will fail while the commit is dirty') - self.assertNotEquals(data_issues_hash, 'unknown', 'This test will fail while the commit is dirty') + self.assertFalse(config_hash.endswith(" (dirty)"), 'This test will fail while the commit is dirty') + self.assertFalse(data_root_hash.endswith(" (dirty)"), 'This test will fail while the commit is dirty') + self.assertFalse(data_issues_hash.endswith(" (dirty)"), 'This test will fail while the commit is dirty') From f4827bbdb15252b54e62253584501533422530fb Mon Sep 17 00:00:00 2001 From: Mads Christian Lund Date: Fri, 23 Aug 2024 13:34:25 +0200 Subject: [PATCH 02/16] Updated bufr_to_csv.py to allow multiple input files --- src/pypromice/postprocess/bufr_to_csv.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/pypromice/postprocess/bufr_to_csv.py b/src/pypromice/postprocess/bufr_to_csv.py index d80f99a3..914698d1 100644 --- a/src/pypromice/postprocess/bufr_to_csv.py +++ b/src/pypromice/postprocess/bufr_to_csv.py @@ -1,15 +1,22 @@ import argparse from pathlib import Path +import pandas as pd + from pypromice.postprocess.bufr_utilities import read_bufr_file def main(): parser = argparse.ArgumentParser("BUFR to CSV converter") - parser.add_argument("path", type=Path) + parser.add_argument("path", type=Path, nargs='+') args = parser.parse_args() - print(read_bufr_file(args.path).to_csv()) + paths = [] + for path in args.path: + paths += list(path.parent.glob(path.name)) + + df = pd.concat([read_bufr_file(path) for path in paths]) + print(df.to_csv()) if __name__ == "__main__": From 48967c876daf812217b0b76d08efe76f3f556c0e Mon Sep 17 00:00:00 2001 From: Mads Christian Lund Date: Fri, 23 Aug 2024 14:51:13 +0200 Subject: [PATCH 03/16] Removed print statement --- src/pypromice/utilities/git.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/pypromice/utilities/git.py b/src/pypromice/utilities/git.py index 2949019e..ec87b291 100644 --- a/src/pypromice/utilities/git.py +++ b/src/pypromice/utilities/git.py @@ -56,7 +56,6 @@ def get_commit_hash_and_check_dirty(file_path: str | Path) -> str: logger.warning(f"Warning: The file {file_path} is not under version control.") return 'unknown' - print(f"Commit hash: {commit_hash}") return commit_hash except subprocess.CalledProcessError as e: logger.warning(f"Error: {e.output.decode('utf-8')}") From 8e6192340350be952f84a543823bcbeeb45f4b14 Mon Sep 17 00:00:00 2001 From: Penny How Date: Wed, 21 Aug 2024 09:49:59 -0100 Subject: [PATCH 04/16] Minimum Python version updated --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 4f126237..3b02996d 100644 --- a/setup.py +++ b/setup.py @@ -29,7 +29,7 @@ package_dir={"": "src"}, include_package_data = True, packages=setuptools.find_packages(where="src"), - python_requires=">=3.8", + python_requires=">=3.10", package_data={ "pypromice.tx": ["payload_formats.csv", "payload_types.csv"], "pypromice.qc.percentiles": ["thresholds.csv"], From f856a722e8ccbf3d20cac56d3632dffc2a5c2bf0 Mon Sep 17 00:00:00 2001 From: Baptiste Vandecrux <35140661+BaptisteVandecrux@users.noreply.github.com> Date: Tue, 10 Sep 2024 15:01:28 +0200 Subject: [PATCH 05/16] update resample so that it copies the 10 min data into the hourly files --- src/pypromice/process/resample.py | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/src/pypromice/process/resample.py b/src/pypromice/process/resample.py index 698a5fab..b67b2b57 100644 --- a/src/pypromice/process/resample.py +++ b/src/pypromice/process/resample.py @@ -34,6 +34,15 @@ def resample_dataset(ds_h, t): ''' df_d = ds_h.to_dataframe().resample(t).mean() + # taking the 10 min data and using it as instantaneous values: + if (t == '60min') and (ds_h.time.diff(dim='time').isel(time=0).dt.total_seconds() == 600): + cols_to_update = ['p_i', 't_i', 'rh_i', 'rh_i_cor', 'wspd_i', 'wdir_i','wspd_x_i','wspd_y_i'] + for col in cols_to_update: + df_d[col] = ds_h.reindex(time=df_d.index)[col.replace('_i','_u')].values + if col == 'p_i': + df_d[col] = df_d[col].values-1000 + + # recalculating wind direction from averaged directional wind speeds for var in ['wdir_u','wdir_l']: boom = var.split('_')[1] @@ -60,9 +69,19 @@ def resample_dataset(ds_h, t): if var+'_cor' in df_d.keys(): df_d[var+'_cor'] = (p_vap.to_series().resample(t).mean() \ / es_cor.to_series().resample(t).mean())*100 + + # passing each variable attribute to the ressample dataset + vals = [] + for c in df_d.columns: + if c in ds_h.data_vars: + vals.append(xr.DataArray( + data=df_d[c], dims=['time'], + coords={'time':df_d.index}, attrs=ds_h[c].attrs)) + else: + vals.append(xr.DataArray( + data=df_d[c], dims=['time'], + coords={'time':df_d.index}, attrs=None)) - vals = [xr.DataArray(data=df_d[c], dims=['time'], - coords={'time':df_d.index}, attrs=ds_h[c].attrs) for c in df_d.columns] ds_d = xr.Dataset(dict(zip(df_d.columns,vals)), attrs=ds_h.attrs) return ds_d From 6351282499b805a711949b30a0a7b65dfbe730da Mon Sep 17 00:00:00 2001 From: Baptiste Vandecrux <35140661+BaptisteVandecrux@users.noreply.github.com> Date: Tue, 10 Sep 2024 15:02:02 +0200 Subject: [PATCH 06/16] minor edit in surface height processing --- src/pypromice/process/L2toL3.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/pypromice/process/L2toL3.py b/src/pypromice/process/L2toL3.py index 5c50ee93..8cd0ea5c 100755 --- a/src/pypromice/process/L2toL3.py +++ b/src/pypromice/process/L2toL3.py @@ -254,6 +254,7 @@ def process_surface_height(ds, data_adjustments_dir, station_config={}): ds['z_surf_combined'] = np.maximum(ds['z_surf_combined'], ds['z_ice_surf']) ds['snow_height'] = np.maximum(0, ds['z_surf_combined'] - ds['z_ice_surf']) + ds['z_ice_surf'] = ds['z_ice_surf'].where(ds.snow_height.notnull()) elif ds.attrs['site_type'] in ['accumulation', 'bedrock']: # Handle accumulation and bedrock site types ds['z_ice_surf'] = ('time', ds['z_surf_1'].data * np.nan) From 79ed274c37b3ebe4406e6ec60de304f552e8d4cb Mon Sep 17 00:00:00 2001 From: Baptiste Vandecrux <35140661+BaptisteVandecrux@users.noreply.github.com> Date: Wed, 11 Sep 2024 15:45:34 +0200 Subject: [PATCH 07/16] update version number --- docs/conf.py | 2 +- setup.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index 3578886d..dc8d750f 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -22,7 +22,7 @@ author = 'GEUS Glaciology and Climate' # The full version, including alpha/beta/rc tags -release = '1.3.6' +release = '1.4.1' # -- General configuration --------------------------------------------------- diff --git a/setup.py b/setup.py index 3b02996d..1f886ec0 100644 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ setuptools.setup( name="pypromice", - version="1.4.0", + version="1.4.1", author="GEUS Glaciology and Climate", description="PROMICE/GC-Net data processing toolbox", long_description=long_description, From 1dcd80dff419b44861115e71a3238ce5dd6c35d2 Mon Sep 17 00:00:00 2001 From: BaptisteVandecrux Date: Wed, 11 Sep 2024 23:55:29 +0200 Subject: [PATCH 08/16] extracting only *_i for timestamps with time.diff() == 10 min --- src/pypromice/process/resample.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/src/pypromice/process/resample.py b/src/pypromice/process/resample.py index b67b2b57..746a9b0b 100644 --- a/src/pypromice/process/resample.py +++ b/src/pypromice/process/resample.py @@ -35,10 +35,18 @@ def resample_dataset(ds_h, t): df_d = ds_h.to_dataframe().resample(t).mean() # taking the 10 min data and using it as instantaneous values: - if (t == '60min') and (ds_h.time.diff(dim='time').isel(time=0).dt.total_seconds() == 600): + msk = (ds_h.time.diff(dim='time') / np.timedelta64(1, 's') == 600) + if (t == '60min') and msk.any(): cols_to_update = ['p_i', 't_i', 'rh_i', 'rh_i_cor', 'wspd_i', 'wdir_i','wspd_x_i','wspd_y_i'] + timestamp_10min = ds_h.time.where(msk, drop=True).to_index() + timestamp_hour = df_d.index + for col in cols_to_update: - df_d[col] = ds_h.reindex(time=df_d.index)[col.replace('_i','_u')].values + if col not in df_d.columns: + df_d[col] = np.nan + df_d.loc[timestamp_hour.intersection(timestamp_10min), col] = ds_h.reindex( + time= timestamp_hour.intersection(timestamp_10min) + )[col.replace('_i','_u')].values if col == 'p_i': df_d[col] = df_d[col].values-1000 From 63cada55b27e22115e2f2af544cbf263046b2a79 Mon Sep 17 00:00:00 2001 From: Baptiste Vandecrux <35140661+BaptisteVandecrux@users.noreply.github.com> Date: Thu, 12 Sep 2024 09:13:37 +0200 Subject: [PATCH 09/16] only updated timestamp need to be subtracted 1000 --- src/pypromice/process/resample.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/pypromice/process/resample.py b/src/pypromice/process/resample.py index 746a9b0b..7e69a00b 100644 --- a/src/pypromice/process/resample.py +++ b/src/pypromice/process/resample.py @@ -40,15 +40,16 @@ def resample_dataset(ds_h, t): cols_to_update = ['p_i', 't_i', 'rh_i', 'rh_i_cor', 'wspd_i', 'wdir_i','wspd_x_i','wspd_y_i'] timestamp_10min = ds_h.time.where(msk, drop=True).to_index() timestamp_hour = df_d.index + timestamp_to_update = timestamp_hour.intersection(timestamp_10min) for col in cols_to_update: if col not in df_d.columns: df_d[col] = np.nan - df_d.loc[timestamp_hour.intersection(timestamp_10min), col] = ds_h.reindex( - time= timestamp_hour.intersection(timestamp_10min) + df_d.loc[timestamp_to_update, col] = ds_h.reindex( + time= timestamp_to_update )[col.replace('_i','_u')].values if col == 'p_i': - df_d[col] = df_d[col].values-1000 + df_d.loc[timestamp_to_update, col] = df_d.loc[timestamp_to_update, col].values-1000 # recalculating wind direction from averaged directional wind speeds From 1f12ecf9ea74588b8840ff4b93939616dd117c6c Mon Sep 17 00:00:00 2001 From: Baptiste Vandecrux <35140661+BaptisteVandecrux@users.noreply.github.com> Date: Thu, 12 Sep 2024 10:55:40 +0200 Subject: [PATCH 10/16] adding criteria preserving inst. values already there --- src/pypromice/process/resample.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/src/pypromice/process/resample.py b/src/pypromice/process/resample.py index 7e69a00b..3fde1b71 100644 --- a/src/pypromice/process/resample.py +++ b/src/pypromice/process/resample.py @@ -35,16 +35,22 @@ def resample_dataset(ds_h, t): df_d = ds_h.to_dataframe().resample(t).mean() # taking the 10 min data and using it as instantaneous values: - msk = (ds_h.time.diff(dim='time') / np.timedelta64(1, 's') == 600) - if (t == '60min') and msk.any(): + is_10_minutes_timestamp = (ds_h.time.diff(dim='time') / np.timedelta64(1, 's') == 600) + if (t == '60min') and is_10_minutes_timestamp.any(): cols_to_update = ['p_i', 't_i', 'rh_i', 'rh_i_cor', 'wspd_i', 'wdir_i','wspd_x_i','wspd_y_i'] - timestamp_10min = ds_h.time.where(msk, drop=True).to_index() - timestamp_hour = df_d.index - timestamp_to_update = timestamp_hour.intersection(timestamp_10min) + timestamp_10min = ds_h.time.where(is_10_minutes_timestamp, drop=True).to_index() + timestamp_round_hour = df_d.index + timestamp_to_update = timestamp_round_hour.intersection(timestamp_10min) for col in cols_to_update: if col not in df_d.columns: df_d[col] = np.nan + else: + # if there are already instantaneous values in the dataset + # we want to keep them as they are + # removing timestamps where there is already t_i filled from a TX file + missing_instantaneous = ds_h.reindex(time=timestamp_to_update)[col].isnull() + timestamp_to_update = timestamp_to_update[missing_instantaneous] df_d.loc[timestamp_to_update, col] = ds_h.reindex( time= timestamp_to_update )[col.replace('_i','_u')].values From 15fa7b6a2adef6eac5abd6e8d29b923f1a3ab121 Mon Sep 17 00:00:00 2001 From: Baptiste Vandecrux <35140661+BaptisteVandecrux@users.noreply.github.com> Date: Thu, 12 Sep 2024 13:16:42 +0200 Subject: [PATCH 11/16] explicit handling of non numeric columns --- src/pypromice/process/resample.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/src/pypromice/process/resample.py b/src/pypromice/process/resample.py index 3fde1b71..2280901e 100644 --- a/src/pypromice/process/resample.py +++ b/src/pypromice/process/resample.py @@ -32,8 +32,22 @@ def resample_dataset(ds_h, t): ds_d : xarray.Dataset L3 AWS dataset resampled to the frequency defined by t ''' - df_d = ds_h.to_dataframe().resample(t).mean() + # Convert dataset to DataFrame + df_d = ds_h.to_dataframe() + # Identify non-numeric columns + non_numeric_cols = df_d.select_dtypes(exclude=['number']).columns + + # Log a warning and drop non-numeric columns + if len(non_numeric_cols) > 0: + for col in non_numeric_cols: + unique_values = df_d[col].unique() + logger.warning(f"Dropping column '{col}' because it is of type '{df_d[col].dtype}' and contains unique values: {unique_values}") + + df_d = df_d.drop(columns=non_numeric_cols) + # Resample the DataFrame + df_d = df_d.resample(t).mean() + # taking the 10 min data and using it as instantaneous values: is_10_minutes_timestamp = (ds_h.time.diff(dim='time') / np.timedelta64(1, 's') == 600) if (t == '60min') and is_10_minutes_timestamp.any(): From 01665fc488c26c4091ddd5fe1d72833bc074b13d Mon Sep 17 00:00:00 2001 From: BaptisteVandecrux Date: Wed, 11 Sep 2024 23:55:29 +0200 Subject: [PATCH 12/16] extracting only *_i for timestamps with time.diff() == 10 min --- src/pypromice/process/resample.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/src/pypromice/process/resample.py b/src/pypromice/process/resample.py index b67b2b57..746a9b0b 100644 --- a/src/pypromice/process/resample.py +++ b/src/pypromice/process/resample.py @@ -35,10 +35,18 @@ def resample_dataset(ds_h, t): df_d = ds_h.to_dataframe().resample(t).mean() # taking the 10 min data and using it as instantaneous values: - if (t == '60min') and (ds_h.time.diff(dim='time').isel(time=0).dt.total_seconds() == 600): + msk = (ds_h.time.diff(dim='time') / np.timedelta64(1, 's') == 600) + if (t == '60min') and msk.any(): cols_to_update = ['p_i', 't_i', 'rh_i', 'rh_i_cor', 'wspd_i', 'wdir_i','wspd_x_i','wspd_y_i'] + timestamp_10min = ds_h.time.where(msk, drop=True).to_index() + timestamp_hour = df_d.index + for col in cols_to_update: - df_d[col] = ds_h.reindex(time=df_d.index)[col.replace('_i','_u')].values + if col not in df_d.columns: + df_d[col] = np.nan + df_d.loc[timestamp_hour.intersection(timestamp_10min), col] = ds_h.reindex( + time= timestamp_hour.intersection(timestamp_10min) + )[col.replace('_i','_u')].values if col == 'p_i': df_d[col] = df_d[col].values-1000 From eb64b9d60c128db7246ddb7c2501377b707a0411 Mon Sep 17 00:00:00 2001 From: Baptiste Vandecrux <35140661+BaptisteVandecrux@users.noreply.github.com> Date: Thu, 12 Sep 2024 09:13:37 +0200 Subject: [PATCH 13/16] only updated timestamp need to be subtracted 1000 --- src/pypromice/process/resample.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/pypromice/process/resample.py b/src/pypromice/process/resample.py index 746a9b0b..7e69a00b 100644 --- a/src/pypromice/process/resample.py +++ b/src/pypromice/process/resample.py @@ -40,15 +40,16 @@ def resample_dataset(ds_h, t): cols_to_update = ['p_i', 't_i', 'rh_i', 'rh_i_cor', 'wspd_i', 'wdir_i','wspd_x_i','wspd_y_i'] timestamp_10min = ds_h.time.where(msk, drop=True).to_index() timestamp_hour = df_d.index + timestamp_to_update = timestamp_hour.intersection(timestamp_10min) for col in cols_to_update: if col not in df_d.columns: df_d[col] = np.nan - df_d.loc[timestamp_hour.intersection(timestamp_10min), col] = ds_h.reindex( - time= timestamp_hour.intersection(timestamp_10min) + df_d.loc[timestamp_to_update, col] = ds_h.reindex( + time= timestamp_to_update )[col.replace('_i','_u')].values if col == 'p_i': - df_d[col] = df_d[col].values-1000 + df_d.loc[timestamp_to_update, col] = df_d.loc[timestamp_to_update, col].values-1000 # recalculating wind direction from averaged directional wind speeds From d093a572f7f3021a5161da07c213e05ed9f66cc4 Mon Sep 17 00:00:00 2001 From: Baptiste Vandecrux <35140661+BaptisteVandecrux@users.noreply.github.com> Date: Thu, 12 Sep 2024 10:55:40 +0200 Subject: [PATCH 14/16] adding criteria preserving inst. values already there --- src/pypromice/process/resample.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/src/pypromice/process/resample.py b/src/pypromice/process/resample.py index 7e69a00b..3fde1b71 100644 --- a/src/pypromice/process/resample.py +++ b/src/pypromice/process/resample.py @@ -35,16 +35,22 @@ def resample_dataset(ds_h, t): df_d = ds_h.to_dataframe().resample(t).mean() # taking the 10 min data and using it as instantaneous values: - msk = (ds_h.time.diff(dim='time') / np.timedelta64(1, 's') == 600) - if (t == '60min') and msk.any(): + is_10_minutes_timestamp = (ds_h.time.diff(dim='time') / np.timedelta64(1, 's') == 600) + if (t == '60min') and is_10_minutes_timestamp.any(): cols_to_update = ['p_i', 't_i', 'rh_i', 'rh_i_cor', 'wspd_i', 'wdir_i','wspd_x_i','wspd_y_i'] - timestamp_10min = ds_h.time.where(msk, drop=True).to_index() - timestamp_hour = df_d.index - timestamp_to_update = timestamp_hour.intersection(timestamp_10min) + timestamp_10min = ds_h.time.where(is_10_minutes_timestamp, drop=True).to_index() + timestamp_round_hour = df_d.index + timestamp_to_update = timestamp_round_hour.intersection(timestamp_10min) for col in cols_to_update: if col not in df_d.columns: df_d[col] = np.nan + else: + # if there are already instantaneous values in the dataset + # we want to keep them as they are + # removing timestamps where there is already t_i filled from a TX file + missing_instantaneous = ds_h.reindex(time=timestamp_to_update)[col].isnull() + timestamp_to_update = timestamp_to_update[missing_instantaneous] df_d.loc[timestamp_to_update, col] = ds_h.reindex( time= timestamp_to_update )[col.replace('_i','_u')].values From 6615eb3c86c9b4098dc2c938d4da00ebe6fb0a3d Mon Sep 17 00:00:00 2001 From: Baptiste Vandecrux <35140661+BaptisteVandecrux@users.noreply.github.com> Date: Thu, 12 Sep 2024 13:16:42 +0200 Subject: [PATCH 15/16] explicit handling of non numeric columns --- src/pypromice/process/resample.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/src/pypromice/process/resample.py b/src/pypromice/process/resample.py index 3fde1b71..2280901e 100644 --- a/src/pypromice/process/resample.py +++ b/src/pypromice/process/resample.py @@ -32,8 +32,22 @@ def resample_dataset(ds_h, t): ds_d : xarray.Dataset L3 AWS dataset resampled to the frequency defined by t ''' - df_d = ds_h.to_dataframe().resample(t).mean() + # Convert dataset to DataFrame + df_d = ds_h.to_dataframe() + # Identify non-numeric columns + non_numeric_cols = df_d.select_dtypes(exclude=['number']).columns + + # Log a warning and drop non-numeric columns + if len(non_numeric_cols) > 0: + for col in non_numeric_cols: + unique_values = df_d[col].unique() + logger.warning(f"Dropping column '{col}' because it is of type '{df_d[col].dtype}' and contains unique values: {unique_values}") + + df_d = df_d.drop(columns=non_numeric_cols) + # Resample the DataFrame + df_d = df_d.resample(t).mean() + # taking the 10 min data and using it as instantaneous values: is_10_minutes_timestamp = (ds_h.time.diff(dim='time') / np.timedelta64(1, 's') == 600) if (t == '60min') and is_10_minutes_timestamp.any(): From bcaf79ea789ed57d26345d590131364435fe0437 Mon Sep 17 00:00:00 2001 From: Baptiste Vandecrux <35140661+BaptisteVandecrux@users.noreply.github.com> Date: Thu, 12 Sep 2024 13:59:35 +0200 Subject: [PATCH 16/16] version bump --- docs/conf.py | 2 +- setup.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index dc8d750f..63e3f6ad 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -22,7 +22,7 @@ author = 'GEUS Glaciology and Climate' # The full version, including alpha/beta/rc tags -release = '1.4.1' +release = '1.4.2' # -- General configuration --------------------------------------------------- diff --git a/setup.py b/setup.py index 1f886ec0..8c6db22d 100644 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ setuptools.setup( name="pypromice", - version="1.4.1", + version="1.4.2", author="GEUS Glaciology and Climate", description="PROMICE/GC-Net data processing toolbox", long_description=long_description,