GEUS-Glaciology-and-Climate · BaptisteVandecrux · Sep 12, 2024 · Aug 23, 2024 · Aug 23, 2024 · Aug 23, 2024
diff --git a/docs/conf.py b/docs/conf.py
@@ -22,7 +22,7 @@
 author = 'GEUS Glaciology and Climate'
 
 # The full version, including alpha/beta/rc tags
-release = '1.4.1'
+release = '1.4.2'
 
 
 # -- General configuration ---------------------------------------------------

diff --git a/setup.py b/setup.py
@@ -5,7 +5,7 @@
 
 setuptools.setup(
     name="pypromice",
-    version="1.4.1",
+    version="1.4.2",
     author="GEUS Glaciology and Climate",
     description="PROMICE/GC-Net data processing toolbox",
     long_description=long_description,

diff --git a/src/pypromice/process/resample.py b/src/pypromice/process/resample.py
@@ -32,15 +32,44 @@ def resample_dataset(ds_h, t):
     ds_d : xarray.Dataset
         L3 AWS dataset resampled to the frequency defined by t
     '''
-    df_d = ds_h.to_dataframe().resample(t).mean()
+    # Convert dataset to DataFrame
+    df_d = ds_h.to_dataframe()
 
+    # Identify non-numeric columns
+    non_numeric_cols = df_d.select_dtypes(exclude=['number']).columns
+
+    # Log a warning and drop non-numeric columns
+    if len(non_numeric_cols) > 0:
+        for col in non_numeric_cols:
+            unique_values = df_d[col].unique()
+            logger.warning(f"Dropping column '{col}' because it is of type '{df_d[col].dtype}' and contains unique values: {unique_values}")
+
+        df_d = df_d.drop(columns=non_numeric_cols)
+    # Resample the DataFrame
+    df_d = df_d.resample(t).mean()
+
     # taking the 10 min data and using it as instantaneous values:
-    if (t == '60min') and (ds_h.time.diff(dim='time').isel(time=0).dt.total_seconds() == 600):
+    is_10_minutes_timestamp = (ds_h.time.diff(dim='time') / np.timedelta64(1, 's') == 600)
+    if (t == '60min') and is_10_minutes_timestamp.any():
         cols_to_update = ['p_i', 't_i', 'rh_i', 'rh_i_cor', 'wspd_i', 'wdir_i','wspd_x_i','wspd_y_i']
+        timestamp_10min = ds_h.time.where(is_10_minutes_timestamp, drop=True).to_index()
+        timestamp_round_hour = df_d.index
+        timestamp_to_update = timestamp_round_hour.intersection(timestamp_10min)
+
         for col in cols_to_update:
-            df_d[col] = ds_h.reindex(time=df_d.index)[col.replace('_i','_u')].values
+            if col not in df_d.columns:
+                df_d[col] = np.nan
+            else:
+                # if there are already instantaneous values in the dataset
+                # we want to keep them as they are
+                # removing timestamps where there is already t_i filled from a TX file
+                missing_instantaneous = ds_h.reindex(time=timestamp_to_update)[col].isnull()
+                timestamp_to_update = timestamp_to_update[missing_instantaneous]
+            df_d.loc[timestamp_to_update, col] = ds_h.reindex(
+                time= timestamp_to_update
+                )[col.replace('_i','_u')].values
             if col == 'p_i':
-                df_d[col] = df_d[col].values-1000
+                df_d.loc[timestamp_to_update, col] = df_d.loc[timestamp_to_update, col].values-1000
 
 
     # recalculating wind direction from averaged directional wind speeds