Skip to content

Commit

Permalink
remove any nans in the satellite data
Browse files Browse the repository at this point in the history
  • Loading branch information
peterdudfield committed Dec 17, 2024
1 parent 96c5041 commit b6c9761
Show file tree
Hide file tree
Showing 2 changed files with 41 additions and 1 deletion.
6 changes: 6 additions & 0 deletions pvnet_app/consts.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
sat_path = "sat.zarr"
nwp_ukv_path = "nwp_ukv.zarr"
nwp_ecmwf_path = "nwp_ecmwf.zarr"


uk_box = dict(
x_geostationary=[-996_133.85, -480_064.6],
y_geostationary=[4_512_606.3, 5_058_679.8],
)
36 changes: 35 additions & 1 deletion pvnet_app/data/satellite.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import ocf_blosc2
from ocf_datapipes.config.load import load_yaml_configuration

from pvnet_app.consts import sat_path
from pvnet_app.consts import sat_path, uk_box

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -312,6 +312,37 @@ def check_model_satellite_inputs_available(
return available


def remove_any_nans_in_satellite():
""" Remove any NaNs in the satellite data"""

ds_sat = xr.open_zarr(sat_path)

# slice to uk box
ds_sat_uk = ds_sat.sel(
x=slice(uk_box["x_geostationary"][0], uk_box["x_geostationary"][1]),
y=slice(uk_box["y_geostationary"][0], uk_box["y_geostationary"][1]),
)

# remove any nans
ds_sat_uk = ds_sat_uk.dropna(dim="time", how="any")

# see which timestamps have been dropped
dropped_timestamps = np.setdiff1d(ds_sat.time, ds_sat_uk.time)
if len(dropped_timestamps) > 0:
logger.info(f"Removing NaNs from satellite data."
f" The following timestamps have been dropped: {dropped_timestamps}")

# remove dropped timstamps from original dataset
ds_sat = ds_sat.sel(time=~ds_sat.time.isin(dropped_timestamps))

# save
os.system(f"rm -rf {sat_path}")
ds_sat.to_zarr(sat_path)

else:
logger.info("No NaNs found in satellite data.")


def preprocess_sat_data(t0: pd.Timestamp, use_legacy: bool = False) -> pd.DatetimeIndex:
"""Combine and 5- and 15-minutely satellite data and extend to t0 if required
Expand All @@ -327,6 +358,9 @@ def preprocess_sat_data(t0: pd.Timestamp, use_legacy: bool = False) -> pd.Dateti
# Deal with switching between the 5 and 15 minutely satellite data
combine_5_and_15_sat_data()

# check for any nans in the satellite data
remove_any_nans_in_satellite()

# Interpolate missing satellite timestamps
interpolate_missing_satellite_timestamps(pd.Timedelta("15min"))

Expand Down

0 comments on commit b6c9761

Please sign in to comment.