diff --git a/ocf_data_sampler/numpy_batch/__init__.py b/ocf_data_sampler/numpy_batch/__init__.py index b9c778b..9b28009 100644 --- a/ocf_data_sampler/numpy_batch/__init__.py +++ b/ocf_data_sampler/numpy_batch/__init__.py @@ -1,5 +1,6 @@ """Conversion from Xarray to NumpyBatch""" +from .datetime_features import make_datetime_numpy_batch from .gsp import convert_gsp_to_numpy_batch, GSPBatchKey from .nwp import convert_nwp_to_numpy_batch, NWPBatchKey from .satellite import convert_satellite_to_numpy_batch, SatelliteBatchKey diff --git a/ocf_data_sampler/numpy_batch/datetime_features.py b/ocf_data_sampler/numpy_batch/datetime_features.py new file mode 100644 index 0000000..0472e01 --- /dev/null +++ b/ocf_data_sampler/numpy_batch/datetime_features.py @@ -0,0 +1,42 @@ +"""Datapipes to trigonometric date and time to NumpyBatch""" + +import numpy as np +import pandas as pd +from numpy.typing import NDArray + + +def _get_date_time_in_pi( + dt: pd.DatetimeIndex, +) -> tuple[NDArray[np.float64], NDArray[np.float64]]: + """ + Change the datetimes, into time and date scaled in radians + """ + + day_of_year = dt.dayofyear + minute_of_day = dt.minute + dt.hour * 60 + + # converting into positions on sin-cos circle + time_in_pi = (2 * np.pi) * (minute_of_day / (24 * 60)) + date_in_pi = (2 * np.pi) * (day_of_year / 365) + + return date_in_pi, time_in_pi + + +def make_datetime_numpy_batch(datetimes: pd.DatetimeIndex, key_prefix: str = "wind") -> dict: + """ Make dictionary of datetime features""" + time_numpy_batch = {} + + date_in_pi, time_in_pi = _get_date_time_in_pi(datetimes) + + # Store + date_sin_batch_key = key_prefix + "_date_sin" + date_cos_batch_key = key_prefix + "_date_cos" + time_sin_batch_key = key_prefix + "_time_sin" + time_cos_batch_key = key_prefix + "_time_cos" + + time_numpy_batch[date_sin_batch_key] = np.sin(date_in_pi) + time_numpy_batch[date_cos_batch_key] = np.cos(date_in_pi) + time_numpy_batch[time_sin_batch_key] = np.sin(time_in_pi) + time_numpy_batch[time_cos_batch_key] = np.cos(time_in_pi) + + return time_numpy_batch diff --git a/ocf_data_sampler/torch_datasets/process_and_combine.py b/ocf_data_sampler/torch_datasets/process_and_combine.py index cae1e5f..13f1826 100644 --- a/ocf_data_sampler/torch_datasets/process_and_combine.py +++ b/ocf_data_sampler/torch_datasets/process_and_combine.py @@ -9,6 +9,7 @@ convert_nwp_to_numpy_batch, convert_satellite_to_numpy_batch, convert_gsp_to_numpy_batch, + make_datetime_numpy_batch, make_sun_position_numpy_batch, ) from ocf_data_sampler.numpy_batch.gsp import GSPBatchKey diff --git a/tests/numpy_batch/test_datetime_features.py b/tests/numpy_batch/test_datetime_features.py new file mode 100644 index 0000000..d60697f --- /dev/null +++ b/tests/numpy_batch/test_datetime_features.py @@ -0,0 +1,26 @@ +import numpy as np +import pandas as pd + +from ocf_data_sampler.numpy_batch.datetime_features import make_datetime_numpy_batch + +from ocf_data_sampler.numpy_batch import GSPBatchKey + + +def test_calculate_azimuth_and_elevation(): + + # Pick the day of the summer solstice + datetimes = pd.to_datetime(["2024-06-20 12:00", "2024-06-20 12:30", "2024-06-20 13:00"]) + + # Calculate sun angles + datetime_features = make_datetime_numpy_batch(datetimes) + + assert len(datetime_features) == 4 + + assert len(datetime_features["wind_date_sin"]) == len(datetimes) + assert (datetime_features["wind_date_cos"] != datetime_features["wind_date_sin"]).all() + + # assert all values are between -1 and 1 + assert all(np.abs(datetime_features["wind_date_sin"]) <= 1) + assert all(np.abs(datetime_features["wind_date_cos"]) <= 1) + assert all(np.abs(datetime_features["wind_time_sin"]) <= 1) + assert all(np.abs(datetime_features["wind_time_cos"]) <= 1)