diff --git a/pvnet_app/data/satellite.py b/pvnet_app/data/satellite.py index 8c77d30..2c8c0cf 100644 --- a/pvnet_app/data/satellite.py +++ b/pvnet_app/data/satellite.py @@ -2,6 +2,7 @@ import pandas as pd import xarray as xr import logging +from typing import Optional import os import fsspec import ocf_blosc2 @@ -214,7 +215,7 @@ def interpolate_missing_satellite_timestamps(max_gap: pd.Timedelta) -> None: ds_sat.to_zarr(sat_path) -def extend_satellite_data_with_nans(t0: pd.Timestamp) -> None: +def extend_satellite_data_with_nans(t0: pd.Timestamp, satellite_data_path: Optional[str] = sat_path) -> None: """Fill the satellite data with NaNs out to time t0 Args: @@ -222,7 +223,7 @@ def extend_satellite_data_with_nans(t0: pd.Timestamp) -> None: """ # Find how delayed the satellite data is - ds_sat = xr.open_zarr(sat_path) + ds_sat = xr.open_zarr(satellite_data_path) sat_max_time = pd.to_datetime(ds_sat.time).max() delay = t0 - sat_max_time @@ -244,8 +245,8 @@ def extend_satellite_data_with_nans(t0: pd.Timestamp) -> None: ds_sat = ds_sat.reindex(time=np.concatenate([ds_sat.time, fill_times]), fill_value=np.nan) # Re-save inplace - os.system(f"rm -rf {sat_path}") - ds_sat.to_zarr(sat_path) + os.system(f"rm -rf {satellite_data_path}") + ds_sat.to_zarr(satellite_data_path) def check_model_satellite_inputs_available( diff --git a/tests/data/test_satellite.py b/tests/data/test_satellite.py index 5a367bc..b43c753 100644 --- a/tests/data/test_satellite.py +++ b/tests/data/test_satellite.py @@ -26,6 +26,7 @@ sat_path, sat_5_path, sat_15_path, + extend_satellite_data_with_nans ) @@ -183,7 +184,6 @@ def test_preprocess_old_sat_5_data(sat_5_data_delayed, sat_15_data, test_t0): check_timesteps(sat_path, expected_freq_mins=5) - def test_check_model_satellite_inputs_available(config_filename): t0 = datetime(2023,1,1) @@ -194,3 +194,46 @@ def test_check_model_satellite_inputs_available(config_filename): assert check_model_satellite_inputs_available(config_filename, t0, sat_datetime_1) assert check_model_satellite_inputs_available(config_filename, t0, sat_datetime_2) assert not check_model_satellite_inputs_available(config_filename, t0, sat_datetime_3) + + +def test_extend_satellite_data_with_nans(sat_5_data, test_t0): + + # make temporary directory + with tempfile.TemporaryDirectory() as tmpdirname: + + # Change to temporary working directory + os.chdir(tmpdirname) + + # save sat to zarr + filename = "sat_5_data.zarr" + sat_5_data.to_zarr(filename) + + time = sat_5_data.time.values + t0 = pd.to_datetime(sat_5_data.time).max() + extend_satellite_data_with_nans(t0=t0, satellite_data_path=filename) + + # load new file + ds = xr.open_zarr(filename) + assert (ds.time.values == time).all() + + +def test_extend_satellite_data_with_nans_over_3_hours(sat_5_data, test_t0): + + # make temporary directory + with tempfile.TemporaryDirectory() as tmpdirname: + + # Change to temporary working directory + os.chdir(tmpdirname) + + # save sat to zarr + filename = "sat_5_data.zarr" + sat_5_data.to_zarr(filename) + + time = sat_5_data.time.values + t0 = pd.to_datetime(sat_5_data.time).max() + pd.Timedelta(hours=4) + extend_satellite_data_with_nans(t0=t0, satellite_data_path=filename) + + # load new file + ds = xr.open_zarr(filename) + assert len(time) + 3*12 == len(ds.time) + assert ds.time.values[-1] == t0