From b6c9761c9357ac54ea70e01bd41cf01d29d17e84 Mon Sep 17 00:00:00 2001 From: peterdudfield Date: Tue, 17 Dec 2024 13:26:28 +0000 Subject: [PATCH] remove any nans in the satellite data --- pvnet_app/consts.py | 6 ++++++ pvnet_app/data/satellite.py | 36 +++++++++++++++++++++++++++++++++++- 2 files changed, 41 insertions(+), 1 deletion(-) diff --git a/pvnet_app/consts.py b/pvnet_app/consts.py index 0a32205..7ca8af6 100644 --- a/pvnet_app/consts.py +++ b/pvnet_app/consts.py @@ -1,3 +1,9 @@ sat_path = "sat.zarr" nwp_ukv_path = "nwp_ukv.zarr" nwp_ecmwf_path = "nwp_ecmwf.zarr" + + +uk_box = dict( + x_geostationary=[-996_133.85, -480_064.6], + y_geostationary=[4_512_606.3, 5_058_679.8], +) diff --git a/pvnet_app/data/satellite.py b/pvnet_app/data/satellite.py index 7f95d9b..032a427 100644 --- a/pvnet_app/data/satellite.py +++ b/pvnet_app/data/satellite.py @@ -8,7 +8,7 @@ import ocf_blosc2 from ocf_datapipes.config.load import load_yaml_configuration -from pvnet_app.consts import sat_path +from pvnet_app.consts import sat_path, uk_box logger = logging.getLogger(__name__) @@ -312,6 +312,37 @@ def check_model_satellite_inputs_available( return available +def remove_any_nans_in_satellite(): + """ Remove any NaNs in the satellite data""" + + ds_sat = xr.open_zarr(sat_path) + + # slice to uk box + ds_sat_uk = ds_sat.sel( + x=slice(uk_box["x_geostationary"][0], uk_box["x_geostationary"][1]), + y=slice(uk_box["y_geostationary"][0], uk_box["y_geostationary"][1]), + ) + + # remove any nans + ds_sat_uk = ds_sat_uk.dropna(dim="time", how="any") + + # see which timestamps have been dropped + dropped_timestamps = np.setdiff1d(ds_sat.time, ds_sat_uk.time) + if len(dropped_timestamps) > 0: + logger.info(f"Removing NaNs from satellite data." + f" The following timestamps have been dropped: {dropped_timestamps}") + + # remove dropped timstamps from original dataset + ds_sat = ds_sat.sel(time=~ds_sat.time.isin(dropped_timestamps)) + + # save + os.system(f"rm -rf {sat_path}") + ds_sat.to_zarr(sat_path) + + else: + logger.info("No NaNs found in satellite data.") + + def preprocess_sat_data(t0: pd.Timestamp, use_legacy: bool = False) -> pd.DatetimeIndex: """Combine and 5- and 15-minutely satellite data and extend to t0 if required @@ -327,6 +358,9 @@ def preprocess_sat_data(t0: pd.Timestamp, use_legacy: bool = False) -> pd.Dateti # Deal with switching between the 5 and 15 minutely satellite data combine_5_and_15_sat_data() + # check for any nans in the satellite data + remove_any_nans_in_satellite() + # Interpolate missing satellite timestamps interpolate_missing_satellite_timestamps(pd.Timedelta("15min"))