diff --git a/docs/conf.py b/docs/conf.py index dc8d750f..63e3f6ad 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -22,7 +22,7 @@ author = 'GEUS Glaciology and Climate' # The full version, including alpha/beta/rc tags -release = '1.4.1' +release = '1.4.2' # -- General configuration --------------------------------------------------- diff --git a/setup.py b/setup.py index 1f886ec0..8c6db22d 100644 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ setuptools.setup( name="pypromice", - version="1.4.1", + version="1.4.2", author="GEUS Glaciology and Climate", description="PROMICE/GC-Net data processing toolbox", long_description=long_description, diff --git a/src/pypromice/process/resample.py b/src/pypromice/process/resample.py index b67b2b57..2280901e 100644 --- a/src/pypromice/process/resample.py +++ b/src/pypromice/process/resample.py @@ -32,15 +32,44 @@ def resample_dataset(ds_h, t): ds_d : xarray.Dataset L3 AWS dataset resampled to the frequency defined by t ''' - df_d = ds_h.to_dataframe().resample(t).mean() + # Convert dataset to DataFrame + df_d = ds_h.to_dataframe() + # Identify non-numeric columns + non_numeric_cols = df_d.select_dtypes(exclude=['number']).columns + + # Log a warning and drop non-numeric columns + if len(non_numeric_cols) > 0: + for col in non_numeric_cols: + unique_values = df_d[col].unique() + logger.warning(f"Dropping column '{col}' because it is of type '{df_d[col].dtype}' and contains unique values: {unique_values}") + + df_d = df_d.drop(columns=non_numeric_cols) + # Resample the DataFrame + df_d = df_d.resample(t).mean() + # taking the 10 min data and using it as instantaneous values: - if (t == '60min') and (ds_h.time.diff(dim='time').isel(time=0).dt.total_seconds() == 600): + is_10_minutes_timestamp = (ds_h.time.diff(dim='time') / np.timedelta64(1, 's') == 600) + if (t == '60min') and is_10_minutes_timestamp.any(): cols_to_update = ['p_i', 't_i', 'rh_i', 'rh_i_cor', 'wspd_i', 'wdir_i','wspd_x_i','wspd_y_i'] + timestamp_10min = ds_h.time.where(is_10_minutes_timestamp, drop=True).to_index() + timestamp_round_hour = df_d.index + timestamp_to_update = timestamp_round_hour.intersection(timestamp_10min) + for col in cols_to_update: - df_d[col] = ds_h.reindex(time=df_d.index)[col.replace('_i','_u')].values + if col not in df_d.columns: + df_d[col] = np.nan + else: + # if there are already instantaneous values in the dataset + # we want to keep them as they are + # removing timestamps where there is already t_i filled from a TX file + missing_instantaneous = ds_h.reindex(time=timestamp_to_update)[col].isnull() + timestamp_to_update = timestamp_to_update[missing_instantaneous] + df_d.loc[timestamp_to_update, col] = ds_h.reindex( + time= timestamp_to_update + )[col.replace('_i','_u')].values if col == 'p_i': - df_d[col] = df_d[col].values-1000 + df_d.loc[timestamp_to_update, col] = df_d.loc[timestamp_to_update, col].values-1000 # recalculating wind direction from averaged directional wind speeds