From f540cc32dd3e3a7f8c7aab91b5f172792fa9d27a Mon Sep 17 00:00:00 2001
From: Mads Christian Lund <maclu@geus.dk>
Date: Fri, 23 Aug 2024 13:15:11 +0200
Subject: [PATCH 01/16] Updated the `attrs['source']` hash strings:

* Now includes the git commit ID in the source string when the repository has uncommitted changes for reference.
* Removed assumptions about relative paths and repository roots in the input file paths. The previous approach assumed the data issues path was already the root, causing the function to fail.
---
 src/pypromice/process/aws.py   | 11 ++++++++++-
 src/pypromice/utilities/git.py | 14 ++++++++------
 tests/e2e/test_get_l2.py       |  6 +++---
 3 files changed, 21 insertions(+), 10 deletions(-)

diff --git a/src/pypromice/process/aws.py b/src/pypromice/process/aws.py
index 28a1a90f..1d203ad4 100644
--- a/src/pypromice/process/aws.py
+++ b/src/pypromice/process/aws.py
@@ -55,7 +55,15 @@ def __init__(
         """
         assert os.path.isfile(config_file), "cannot find " + config_file
         assert os.path.isdir(inpath), "cannot find " + inpath
-        logger.info("AWS object initialising...")
+        logger.info(
+            "AWS("
+            f"config_file={config_file},"
+            f" inpath={inpath},"
+            f" data_issues_repository={data_issues_repository},"
+            f" var_file={var_file},"
+            f" meta_file={meta_file}"
+            ")"
+        )
 
         # Load config, variables CSF standards, and L0 files
         self.config = self.loadConfig(config_file, inpath)
@@ -73,6 +81,7 @@ def __init__(
             l0_data_root=inpath_hash,
             data_issues=data_issues_hash,
         )
+        logger.debug('Source information: %s', source_dict)
         self.meta["source"] = json.dumps(source_dict)
 
         # Load config file
diff --git a/src/pypromice/utilities/git.py b/src/pypromice/utilities/git.py
index 1bff997c..2949019e 100644
--- a/src/pypromice/utilities/git.py
+++ b/src/pypromice/utilities/git.py
@@ -7,12 +7,16 @@
 logger = logging.getLogger(__name__)
 
 
-def get_commit_hash_and_check_dirty(file_path) -> str:
-    repo_path = Path(file_path).parent
+def get_commit_hash_and_check_dirty(file_path: str | Path) -> str:
+    if isinstance(file_path, str):
+        file_path = Path(file_path)
+    if file_path.is_dir():
+        repo_path = file_path
+    else:
+        repo_path = file_path.parent
 
     try:
         # Ensure the file path is relative to the repository
-        relative_file_path = os.path.relpath(file_path, repo_path)
 
         # Get the latest commit hash for the file
         commit_hash = (
@@ -25,8 +29,6 @@ def get_commit_hash_and_check_dirty(file_path) -> str:
                     "-n",
                     "1",
                     "--pretty=format:%H",
-                    #"--",
-                    #relative_file_path,
                 ],
                 stderr=subprocess.STDOUT,
             )
@@ -49,7 +51,7 @@ def get_commit_hash_and_check_dirty(file_path) -> str:
 
         if is_dirty:
             logger.warning(f"Warning: The file {file_path} is dirty compared to the last commit. {commit_hash}")
-            return 'unknown'
+            return f'{commit_hash} (dirty)'
         if commit_hash == "":
             logger.warning(f"Warning: The file {file_path} is not under version control.")
             return 'unknown'
diff --git a/tests/e2e/test_get_l2.py b/tests/e2e/test_get_l2.py
index d825b74f..2796358f 100644
--- a/tests/e2e/test_get_l2.py
+++ b/tests/e2e/test_get_l2.py
@@ -129,6 +129,6 @@ def test_get_l2_raw(self):
                 )
                 data_root_hash = source_decoded["l0_data_root"]
                 data_issues_hash = source_decoded["data_issues"]
-                self.assertNotEquals(config_hash, 'unknown', 'This test will fail while the commit is dirty')
-                self.assertNotEquals(data_root_hash, 'unknown', 'This test will fail while the commit is dirty')
-                self.assertNotEquals(data_issues_hash, 'unknown', 'This test will fail while the commit is dirty')
+                self.assertFalse(config_hash.endswith(" (dirty)"), 'This test will fail while the commit is dirty')
+                self.assertFalse(data_root_hash.endswith(" (dirty)"), 'This test will fail while the commit is dirty')
+                self.assertFalse(data_issues_hash.endswith(" (dirty)"), 'This test will fail while the commit is dirty')

From f4827bbdb15252b54e62253584501533422530fb Mon Sep 17 00:00:00 2001
From: Mads Christian Lund <maclu@geus.dk>
Date: Fri, 23 Aug 2024 13:34:25 +0200
Subject: [PATCH 02/16] Updated bufr_to_csv.py to allow multiple input files

---
 src/pypromice/postprocess/bufr_to_csv.py | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/src/pypromice/postprocess/bufr_to_csv.py b/src/pypromice/postprocess/bufr_to_csv.py
index d80f99a3..914698d1 100644
--- a/src/pypromice/postprocess/bufr_to_csv.py
+++ b/src/pypromice/postprocess/bufr_to_csv.py
@@ -1,15 +1,22 @@
 import argparse
 from pathlib import Path
 
+import pandas as pd
+
 from pypromice.postprocess.bufr_utilities import read_bufr_file
 
 
 def main():
     parser = argparse.ArgumentParser("BUFR to CSV converter")
-    parser.add_argument("path", type=Path)
+    parser.add_argument("path", type=Path, nargs='+')
     args = parser.parse_args()
 
-    print(read_bufr_file(args.path).to_csv())
+    paths = []
+    for path in args.path:
+        paths += list(path.parent.glob(path.name))
+
+    df = pd.concat([read_bufr_file(path) for path in paths])
+    print(df.to_csv())
 
 
 if __name__ == "__main__":

From 48967c876daf812217b0b76d08efe76f3f556c0e Mon Sep 17 00:00:00 2001
From: Mads Christian Lund <maclu@geus.dk>
Date: Fri, 23 Aug 2024 14:51:13 +0200
Subject: [PATCH 03/16] Removed print statement

---
 src/pypromice/utilities/git.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/pypromice/utilities/git.py b/src/pypromice/utilities/git.py
index 2949019e..ec87b291 100644
--- a/src/pypromice/utilities/git.py
+++ b/src/pypromice/utilities/git.py
@@ -56,7 +56,6 @@ def get_commit_hash_and_check_dirty(file_path: str | Path) -> str:
             logger.warning(f"Warning: The file {file_path} is not under version control.")
             return 'unknown'
 
-        print(f"Commit hash: {commit_hash}")
         return commit_hash
     except subprocess.CalledProcessError as e:
         logger.warning(f"Error: {e.output.decode('utf-8')}")

From 8e6192340350be952f84a543823bcbeeb45f4b14 Mon Sep 17 00:00:00 2001
From: Penny How <pho@geus.dk>
Date: Wed, 21 Aug 2024 09:49:59 -0100
Subject: [PATCH 04/16] Minimum Python version updated

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 4f126237..3b02996d 100644
--- a/setup.py
+++ b/setup.py
@@ -29,7 +29,7 @@
     package_dir={"": "src"},
     include_package_data = True,
     packages=setuptools.find_packages(where="src"),
-    python_requires=">=3.8",
+    python_requires=">=3.10",
     package_data={
     	"pypromice.tx": ["payload_formats.csv", "payload_types.csv"],
         "pypromice.qc.percentiles": ["thresholds.csv"],

From f856a722e8ccbf3d20cac56d3632dffc2a5c2bf0 Mon Sep 17 00:00:00 2001
From: Baptiste Vandecrux <35140661+BaptisteVandecrux@users.noreply.github.com>
Date: Tue, 10 Sep 2024 15:01:28 +0200
Subject: [PATCH 05/16] update resample so that it copies the 10 min data into
 the hourly files

---
 src/pypromice/process/resample.py | 23 +++++++++++++++++++++--
 1 file changed, 21 insertions(+), 2 deletions(-)

diff --git a/src/pypromice/process/resample.py b/src/pypromice/process/resample.py
index 698a5fab..b67b2b57 100644
--- a/src/pypromice/process/resample.py
+++ b/src/pypromice/process/resample.py
@@ -34,6 +34,15 @@ def resample_dataset(ds_h, t):
     '''
     df_d = ds_h.to_dataframe().resample(t).mean()
     
+    # taking the 10 min data and using it as instantaneous values:
+    if (t == '60min') and (ds_h.time.diff(dim='time').isel(time=0).dt.total_seconds() == 600):
+        cols_to_update = ['p_i', 't_i', 'rh_i', 'rh_i_cor', 'wspd_i', 'wdir_i','wspd_x_i','wspd_y_i']
+        for col in cols_to_update:
+            df_d[col] = ds_h.reindex(time=df_d.index)[col.replace('_i','_u')].values
+            if col == 'p_i':
+                df_d[col] = df_d[col].values-1000
+            
+
     # recalculating wind direction from averaged directional wind speeds
     for var in ['wdir_u','wdir_l']:
         boom = var.split('_')[1]
@@ -60,9 +69,19 @@ def resample_dataset(ds_h, t):
                 if var+'_cor' in df_d.keys():
                     df_d[var+'_cor'] = (p_vap.to_series().resample(t).mean() \
                                / es_cor.to_series().resample(t).mean())*100
+    
+    # passing each variable attribute to the ressample dataset
+    vals = []
+    for c in df_d.columns:
+        if c in ds_h.data_vars:
+            vals.append(xr.DataArray(
+                data=df_d[c], dims=['time'],
+               coords={'time':df_d.index}, attrs=ds_h[c].attrs))
+        else:
+            vals.append(xr.DataArray(
+                data=df_d[c], dims=['time'],
+               coords={'time':df_d.index}, attrs=None))
             
-    vals = [xr.DataArray(data=df_d[c], dims=['time'],
-           coords={'time':df_d.index}, attrs=ds_h[c].attrs) for c in df_d.columns]
     ds_d = xr.Dataset(dict(zip(df_d.columns,vals)), attrs=ds_h.attrs)
     return ds_d
 

From 6351282499b805a711949b30a0a7b65dfbe730da Mon Sep 17 00:00:00 2001
From: Baptiste Vandecrux <35140661+BaptisteVandecrux@users.noreply.github.com>
Date: Tue, 10 Sep 2024 15:02:02 +0200
Subject: [PATCH 06/16] minor edit in surface height processing

---
 src/pypromice/process/L2toL3.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/pypromice/process/L2toL3.py b/src/pypromice/process/L2toL3.py
index 5c50ee93..8cd0ea5c 100755
--- a/src/pypromice/process/L2toL3.py
+++ b/src/pypromice/process/L2toL3.py
@@ -254,6 +254,7 @@ def process_surface_height(ds, data_adjustments_dir, station_config={}):
         
         ds['z_surf_combined'] = np.maximum(ds['z_surf_combined'], ds['z_ice_surf'])
         ds['snow_height'] = np.maximum(0, ds['z_surf_combined'] - ds['z_ice_surf'])
+        ds['z_ice_surf'] = ds['z_ice_surf'].where(ds.snow_height.notnull())
     elif ds.attrs['site_type'] in ['accumulation', 'bedrock']:
         # Handle accumulation and bedrock site types
         ds['z_ice_surf'] = ('time', ds['z_surf_1'].data * np.nan)

From 79ed274c37b3ebe4406e6ec60de304f552e8d4cb Mon Sep 17 00:00:00 2001
From: Baptiste Vandecrux <35140661+BaptisteVandecrux@users.noreply.github.com>
Date: Wed, 11 Sep 2024 15:45:34 +0200
Subject: [PATCH 07/16] update version number

---
 docs/conf.py | 2 +-
 setup.py     | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/conf.py b/docs/conf.py
index 3578886d..dc8d750f 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -22,7 +22,7 @@
 author = 'GEUS Glaciology and Climate'
 
 # The full version, including alpha/beta/rc tags
-release = '1.3.6'
+release = '1.4.1'
 
 
 # -- General configuration ---------------------------------------------------
diff --git a/setup.py b/setup.py
index 3b02996d..1f886ec0 100644
--- a/setup.py
+++ b/setup.py
@@ -5,7 +5,7 @@
 
 setuptools.setup(
     name="pypromice",
-    version="1.4.0",
+    version="1.4.1",
     author="GEUS Glaciology and Climate",
     description="PROMICE/GC-Net data processing toolbox",
     long_description=long_description,

From 1dcd80dff419b44861115e71a3238ce5dd6c35d2 Mon Sep 17 00:00:00 2001
From: BaptisteVandecrux <b.vandecrux@gmail.com>
Date: Wed, 11 Sep 2024 23:55:29 +0200
Subject: [PATCH 08/16] extracting only *_i for timestamps with time.diff() ==
 10 min

---
 src/pypromice/process/resample.py | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/src/pypromice/process/resample.py b/src/pypromice/process/resample.py
index b67b2b57..746a9b0b 100644
--- a/src/pypromice/process/resample.py
+++ b/src/pypromice/process/resample.py
@@ -35,10 +35,18 @@ def resample_dataset(ds_h, t):
     df_d = ds_h.to_dataframe().resample(t).mean()
     
     # taking the 10 min data and using it as instantaneous values:
-    if (t == '60min') and (ds_h.time.diff(dim='time').isel(time=0).dt.total_seconds() == 600):
+    msk = (ds_h.time.diff(dim='time') / np.timedelta64(1, 's') == 600)
+    if (t == '60min') and msk.any():
         cols_to_update = ['p_i', 't_i', 'rh_i', 'rh_i_cor', 'wspd_i', 'wdir_i','wspd_x_i','wspd_y_i']
+        timestamp_10min = ds_h.time.where(msk, drop=True).to_index()
+        timestamp_hour = df_d.index
+        
         for col in cols_to_update:
-            df_d[col] = ds_h.reindex(time=df_d.index)[col.replace('_i','_u')].values
+            if col not in df_d.columns:
+                df_d[col] = np.nan
+            df_d.loc[timestamp_hour.intersection(timestamp_10min), col] = ds_h.reindex(
+                time= timestamp_hour.intersection(timestamp_10min)
+                )[col.replace('_i','_u')].values
             if col == 'p_i':
                 df_d[col] = df_d[col].values-1000
             

From 63cada55b27e22115e2f2af544cbf263046b2a79 Mon Sep 17 00:00:00 2001
From: Baptiste Vandecrux <35140661+BaptisteVandecrux@users.noreply.github.com>
Date: Thu, 12 Sep 2024 09:13:37 +0200
Subject: [PATCH 09/16] only updated timestamp need to be subtracted 1000

---
 src/pypromice/process/resample.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/pypromice/process/resample.py b/src/pypromice/process/resample.py
index 746a9b0b..7e69a00b 100644
--- a/src/pypromice/process/resample.py
+++ b/src/pypromice/process/resample.py
@@ -40,15 +40,16 @@ def resample_dataset(ds_h, t):
         cols_to_update = ['p_i', 't_i', 'rh_i', 'rh_i_cor', 'wspd_i', 'wdir_i','wspd_x_i','wspd_y_i']
         timestamp_10min = ds_h.time.where(msk, drop=True).to_index()
         timestamp_hour = df_d.index
+        timestamp_to_update = timestamp_hour.intersection(timestamp_10min)
         
         for col in cols_to_update:
             if col not in df_d.columns:
                 df_d[col] = np.nan
-            df_d.loc[timestamp_hour.intersection(timestamp_10min), col] = ds_h.reindex(
-                time= timestamp_hour.intersection(timestamp_10min)
+            df_d.loc[timestamp_to_update, col] = ds_h.reindex(
+                time= timestamp_to_update
                 )[col.replace('_i','_u')].values
             if col == 'p_i':
-                df_d[col] = df_d[col].values-1000
+                df_d.loc[timestamp_to_update, col] = df_d.loc[timestamp_to_update, col].values-1000
             
 
     # recalculating wind direction from averaged directional wind speeds

From 1f12ecf9ea74588b8840ff4b93939616dd117c6c Mon Sep 17 00:00:00 2001
From: Baptiste Vandecrux <35140661+BaptisteVandecrux@users.noreply.github.com>
Date: Thu, 12 Sep 2024 10:55:40 +0200
Subject: [PATCH 10/16] adding criteria preserving inst. values already there

---
 src/pypromice/process/resample.py | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/src/pypromice/process/resample.py b/src/pypromice/process/resample.py
index 7e69a00b..3fde1b71 100644
--- a/src/pypromice/process/resample.py
+++ b/src/pypromice/process/resample.py
@@ -35,16 +35,22 @@ def resample_dataset(ds_h, t):
     df_d = ds_h.to_dataframe().resample(t).mean()
     
     # taking the 10 min data and using it as instantaneous values:
-    msk = (ds_h.time.diff(dim='time') / np.timedelta64(1, 's') == 600)
-    if (t == '60min') and msk.any():
+    is_10_minutes_timestamp = (ds_h.time.diff(dim='time') / np.timedelta64(1, 's') == 600)
+    if (t == '60min') and is_10_minutes_timestamp.any():
         cols_to_update = ['p_i', 't_i', 'rh_i', 'rh_i_cor', 'wspd_i', 'wdir_i','wspd_x_i','wspd_y_i']
-        timestamp_10min = ds_h.time.where(msk, drop=True).to_index()
-        timestamp_hour = df_d.index
-        timestamp_to_update = timestamp_hour.intersection(timestamp_10min)
+        timestamp_10min = ds_h.time.where(is_10_minutes_timestamp, drop=True).to_index()
+        timestamp_round_hour = df_d.index
+        timestamp_to_update = timestamp_round_hour.intersection(timestamp_10min)
         
         for col in cols_to_update:
             if col not in df_d.columns:
                 df_d[col] = np.nan
+            else:
+                # if there are already instantaneous values in the dataset
+                # we want to keep them as they are
+                # removing timestamps where there is already t_i filled from a TX file
+                missing_instantaneous = ds_h.reindex(time=timestamp_to_update)[col].isnull()
+                timestamp_to_update = timestamp_to_update[missing_instantaneous]
             df_d.loc[timestamp_to_update, col] = ds_h.reindex(
                 time= timestamp_to_update
                 )[col.replace('_i','_u')].values

From 15fa7b6a2adef6eac5abd6e8d29b923f1a3ab121 Mon Sep 17 00:00:00 2001
From: Baptiste Vandecrux <35140661+BaptisteVandecrux@users.noreply.github.com>
Date: Thu, 12 Sep 2024 13:16:42 +0200
Subject: [PATCH 11/16] explicit handling of non numeric columns

---
 src/pypromice/process/resample.py | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/src/pypromice/process/resample.py b/src/pypromice/process/resample.py
index 3fde1b71..2280901e 100644
--- a/src/pypromice/process/resample.py
+++ b/src/pypromice/process/resample.py
@@ -32,8 +32,22 @@ def resample_dataset(ds_h, t):
     ds_d : xarray.Dataset
         L3 AWS dataset resampled to the frequency defined by t
     '''
-    df_d = ds_h.to_dataframe().resample(t).mean()
+    # Convert dataset to DataFrame
+    df_d = ds_h.to_dataframe()
     
+    # Identify non-numeric columns
+    non_numeric_cols = df_d.select_dtypes(exclude=['number']).columns
+    
+    # Log a warning and drop non-numeric columns
+    if len(non_numeric_cols) > 0:
+        for col in non_numeric_cols:
+            unique_values = df_d[col].unique()
+            logger.warning(f"Dropping column '{col}' because it is of type '{df_d[col].dtype}' and contains unique values: {unique_values}")
+
+        df_d = df_d.drop(columns=non_numeric_cols)
+    # Resample the DataFrame
+    df_d = df_d.resample(t).mean()
+
     # taking the 10 min data and using it as instantaneous values:
     is_10_minutes_timestamp = (ds_h.time.diff(dim='time') / np.timedelta64(1, 's') == 600)
     if (t == '60min') and is_10_minutes_timestamp.any():

From 01665fc488c26c4091ddd5fe1d72833bc074b13d Mon Sep 17 00:00:00 2001
From: BaptisteVandecrux <b.vandecrux@gmail.com>
Date: Wed, 11 Sep 2024 23:55:29 +0200
Subject: [PATCH 12/16] extracting only *_i for timestamps with time.diff() ==
 10 min

---
 src/pypromice/process/resample.py | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/src/pypromice/process/resample.py b/src/pypromice/process/resample.py
index b67b2b57..746a9b0b 100644
--- a/src/pypromice/process/resample.py
+++ b/src/pypromice/process/resample.py
@@ -35,10 +35,18 @@ def resample_dataset(ds_h, t):
     df_d = ds_h.to_dataframe().resample(t).mean()
     
     # taking the 10 min data and using it as instantaneous values:
-    if (t == '60min') and (ds_h.time.diff(dim='time').isel(time=0).dt.total_seconds() == 600):
+    msk = (ds_h.time.diff(dim='time') / np.timedelta64(1, 's') == 600)
+    if (t == '60min') and msk.any():
         cols_to_update = ['p_i', 't_i', 'rh_i', 'rh_i_cor', 'wspd_i', 'wdir_i','wspd_x_i','wspd_y_i']
+        timestamp_10min = ds_h.time.where(msk, drop=True).to_index()
+        timestamp_hour = df_d.index
+        
         for col in cols_to_update:
-            df_d[col] = ds_h.reindex(time=df_d.index)[col.replace('_i','_u')].values
+            if col not in df_d.columns:
+                df_d[col] = np.nan
+            df_d.loc[timestamp_hour.intersection(timestamp_10min), col] = ds_h.reindex(
+                time= timestamp_hour.intersection(timestamp_10min)
+                )[col.replace('_i','_u')].values
             if col == 'p_i':
                 df_d[col] = df_d[col].values-1000
             

From eb64b9d60c128db7246ddb7c2501377b707a0411 Mon Sep 17 00:00:00 2001
From: Baptiste Vandecrux <35140661+BaptisteVandecrux@users.noreply.github.com>
Date: Thu, 12 Sep 2024 09:13:37 +0200
Subject: [PATCH 13/16] only updated timestamp need to be subtracted 1000

---
 src/pypromice/process/resample.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/pypromice/process/resample.py b/src/pypromice/process/resample.py
index 746a9b0b..7e69a00b 100644
--- a/src/pypromice/process/resample.py
+++ b/src/pypromice/process/resample.py
@@ -40,15 +40,16 @@ def resample_dataset(ds_h, t):
         cols_to_update = ['p_i', 't_i', 'rh_i', 'rh_i_cor', 'wspd_i', 'wdir_i','wspd_x_i','wspd_y_i']
         timestamp_10min = ds_h.time.where(msk, drop=True).to_index()
         timestamp_hour = df_d.index
+        timestamp_to_update = timestamp_hour.intersection(timestamp_10min)
         
         for col in cols_to_update:
             if col not in df_d.columns:
                 df_d[col] = np.nan
-            df_d.loc[timestamp_hour.intersection(timestamp_10min), col] = ds_h.reindex(
-                time= timestamp_hour.intersection(timestamp_10min)
+            df_d.loc[timestamp_to_update, col] = ds_h.reindex(
+                time= timestamp_to_update
                 )[col.replace('_i','_u')].values
             if col == 'p_i':
-                df_d[col] = df_d[col].values-1000
+                df_d.loc[timestamp_to_update, col] = df_d.loc[timestamp_to_update, col].values-1000
             
 
     # recalculating wind direction from averaged directional wind speeds

From d093a572f7f3021a5161da07c213e05ed9f66cc4 Mon Sep 17 00:00:00 2001
From: Baptiste Vandecrux <35140661+BaptisteVandecrux@users.noreply.github.com>
Date: Thu, 12 Sep 2024 10:55:40 +0200
Subject: [PATCH 14/16] adding criteria preserving inst. values already there

---
 src/pypromice/process/resample.py | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/src/pypromice/process/resample.py b/src/pypromice/process/resample.py
index 7e69a00b..3fde1b71 100644
--- a/src/pypromice/process/resample.py
+++ b/src/pypromice/process/resample.py
@@ -35,16 +35,22 @@ def resample_dataset(ds_h, t):
     df_d = ds_h.to_dataframe().resample(t).mean()
     
     # taking the 10 min data and using it as instantaneous values:
-    msk = (ds_h.time.diff(dim='time') / np.timedelta64(1, 's') == 600)
-    if (t == '60min') and msk.any():
+    is_10_minutes_timestamp = (ds_h.time.diff(dim='time') / np.timedelta64(1, 's') == 600)
+    if (t == '60min') and is_10_minutes_timestamp.any():
         cols_to_update = ['p_i', 't_i', 'rh_i', 'rh_i_cor', 'wspd_i', 'wdir_i','wspd_x_i','wspd_y_i']
-        timestamp_10min = ds_h.time.where(msk, drop=True).to_index()
-        timestamp_hour = df_d.index
-        timestamp_to_update = timestamp_hour.intersection(timestamp_10min)
+        timestamp_10min = ds_h.time.where(is_10_minutes_timestamp, drop=True).to_index()
+        timestamp_round_hour = df_d.index
+        timestamp_to_update = timestamp_round_hour.intersection(timestamp_10min)
         
         for col in cols_to_update:
             if col not in df_d.columns:
                 df_d[col] = np.nan
+            else:
+                # if there are already instantaneous values in the dataset
+                # we want to keep them as they are
+                # removing timestamps where there is already t_i filled from a TX file
+                missing_instantaneous = ds_h.reindex(time=timestamp_to_update)[col].isnull()
+                timestamp_to_update = timestamp_to_update[missing_instantaneous]
             df_d.loc[timestamp_to_update, col] = ds_h.reindex(
                 time= timestamp_to_update
                 )[col.replace('_i','_u')].values

From 6615eb3c86c9b4098dc2c938d4da00ebe6fb0a3d Mon Sep 17 00:00:00 2001
From: Baptiste Vandecrux <35140661+BaptisteVandecrux@users.noreply.github.com>
Date: Thu, 12 Sep 2024 13:16:42 +0200
Subject: [PATCH 15/16] explicit handling of non numeric columns

---
 src/pypromice/process/resample.py | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/src/pypromice/process/resample.py b/src/pypromice/process/resample.py
index 3fde1b71..2280901e 100644
--- a/src/pypromice/process/resample.py
+++ b/src/pypromice/process/resample.py
@@ -32,8 +32,22 @@ def resample_dataset(ds_h, t):
     ds_d : xarray.Dataset
         L3 AWS dataset resampled to the frequency defined by t
     '''
-    df_d = ds_h.to_dataframe().resample(t).mean()
+    # Convert dataset to DataFrame
+    df_d = ds_h.to_dataframe()
     
+    # Identify non-numeric columns
+    non_numeric_cols = df_d.select_dtypes(exclude=['number']).columns
+    
+    # Log a warning and drop non-numeric columns
+    if len(non_numeric_cols) > 0:
+        for col in non_numeric_cols:
+            unique_values = df_d[col].unique()
+            logger.warning(f"Dropping column '{col}' because it is of type '{df_d[col].dtype}' and contains unique values: {unique_values}")
+
+        df_d = df_d.drop(columns=non_numeric_cols)
+    # Resample the DataFrame
+    df_d = df_d.resample(t).mean()
+
     # taking the 10 min data and using it as instantaneous values:
     is_10_minutes_timestamp = (ds_h.time.diff(dim='time') / np.timedelta64(1, 's') == 600)
     if (t == '60min') and is_10_minutes_timestamp.any():

From bcaf79ea789ed57d26345d590131364435fe0437 Mon Sep 17 00:00:00 2001
From: Baptiste Vandecrux <35140661+BaptisteVandecrux@users.noreply.github.com>
Date: Thu, 12 Sep 2024 13:59:35 +0200
Subject: [PATCH 16/16] version bump

---
 docs/conf.py | 2 +-
 setup.py     | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/conf.py b/docs/conf.py
index dc8d750f..63e3f6ad 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -22,7 +22,7 @@
 author = 'GEUS Glaciology and Climate'
 
 # The full version, including alpha/beta/rc tags
-release = '1.4.1'
+release = '1.4.2'
 
 
 # -- General configuration ---------------------------------------------------
diff --git a/setup.py b/setup.py
index 1f886ec0..8c6db22d 100644
--- a/setup.py
+++ b/setup.py
@@ -5,7 +5,7 @@
 
 setuptools.setup(
     name="pypromice",
-    version="1.4.1",
+    version="1.4.2",
     author="GEUS Glaciology and Climate",
     description="PROMICE/GC-Net data processing toolbox",
     long_description=long_description,