Skip to content

Commit

Permalink
Merge pull request #136 from openclimatefix/max-nan-infill
Browse files Browse the repository at this point in the history
Only infill last 3 hours #135
  • Loading branch information
peterdudfield authored Oct 14, 2024
2 parents 2e4e70a + e100a1b commit 5ae2d0f
Show file tree
Hide file tree
Showing 2 changed files with 56 additions and 6 deletions.
17 changes: 12 additions & 5 deletions pvnet_app/data/satellite.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import pandas as pd
import xarray as xr
import logging
from typing import Optional
import os
import fsspec
import ocf_blosc2
Expand Down Expand Up @@ -214,20 +215,26 @@ def interpolate_missing_satellite_timestamps(max_gap: pd.Timedelta) -> None:
ds_sat.to_zarr(sat_path)


def extend_satellite_data_with_nans(t0: pd.Timestamp) -> None:
def extend_satellite_data_with_nans(t0: pd.Timestamp, satellite_data_path: Optional[str] = sat_path) -> None:
"""Fill the satellite data with NaNs out to time t0
Args:
t0: The init-time of the forecast
"""

# Find how delayed the satellite data is
ds_sat = xr.open_zarr(sat_path)
delay = t0 - pd.to_datetime(ds_sat.time).max()
ds_sat = xr.open_zarr(satellite_data_path)
sat_max_time = pd.to_datetime(ds_sat.time).max()
delay = t0 - sat_max_time

if delay > pd.Timedelta(0):
logger.info(f"Filling most recent {delay} with NaNs")

if delay > pd.Timedelta("3h"):
logger.warning("The satellite data is delayed by more than 3 hours. "
"Will only infill last 3 hours.")
delay = pd.Timedelta("3h")

# Load into memory so we can delete it on disk
ds_sat = ds_sat.compute()

Expand All @@ -238,8 +245,8 @@ def extend_satellite_data_with_nans(t0: pd.Timestamp) -> None:
ds_sat = ds_sat.reindex(time=np.concatenate([ds_sat.time, fill_times]), fill_value=np.nan)

# Re-save inplace
os.system(f"rm -rf {sat_path}")
ds_sat.to_zarr(sat_path)
os.system(f"rm -rf {satellite_data_path}")
ds_sat.to_zarr(satellite_data_path)


def check_model_satellite_inputs_available(
Expand Down
45 changes: 44 additions & 1 deletion tests/data/test_satellite.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
sat_path,
sat_5_path,
sat_15_path,
extend_satellite_data_with_nans
)


Expand Down Expand Up @@ -183,7 +184,6 @@ def test_preprocess_old_sat_5_data(sat_5_data_delayed, sat_15_data, test_t0):
check_timesteps(sat_path, expected_freq_mins=5)



def test_check_model_satellite_inputs_available(config_filename):

t0 = datetime(2023,1,1)
Expand All @@ -194,3 +194,46 @@ def test_check_model_satellite_inputs_available(config_filename):
assert check_model_satellite_inputs_available(config_filename, t0, sat_datetime_1)
assert check_model_satellite_inputs_available(config_filename, t0, sat_datetime_2)
assert not check_model_satellite_inputs_available(config_filename, t0, sat_datetime_3)


def test_extend_satellite_data_with_nans(sat_5_data, test_t0):

# make temporary directory
with tempfile.TemporaryDirectory() as tmpdirname:

# Change to temporary working directory
os.chdir(tmpdirname)

# save sat to zarr
filename = "sat_5_data.zarr"
sat_5_data.to_zarr(filename)

time = sat_5_data.time.values
t0 = pd.to_datetime(sat_5_data.time).max()
extend_satellite_data_with_nans(t0=t0, satellite_data_path=filename)

# load new file
ds = xr.open_zarr(filename)
assert (ds.time.values == time).all()


def test_extend_satellite_data_with_nans_over_3_hours(sat_5_data, test_t0):

# make temporary directory
with tempfile.TemporaryDirectory() as tmpdirname:

# Change to temporary working directory
os.chdir(tmpdirname)

# save sat to zarr
filename = "sat_5_data.zarr"
sat_5_data.to_zarr(filename)

time = sat_5_data.time.values
t0 = pd.to_datetime(sat_5_data.time).max() + pd.Timedelta(hours=4)
extend_satellite_data_with_nans(t0=t0, satellite_data_path=filename)

# load new file
ds = xr.open_zarr(filename)
assert len(time) + 3*12 == len(ds.time)
assert ds.time.values[-1] == t0

0 comments on commit 5ae2d0f

Please sign in to comment.