diff --git a/pvnet_app/consts.py b/pvnet_app/consts.py index 0a32205..7ca8af6 100644 --- a/pvnet_app/consts.py +++ b/pvnet_app/consts.py @@ -1,3 +1,9 @@ sat_path = "sat.zarr" nwp_ukv_path = "nwp_ukv.zarr" nwp_ecmwf_path = "nwp_ecmwf.zarr" + + +uk_box = dict( + x_geostationary=[-996_133.85, -480_064.6], + y_geostationary=[4_512_606.3, 5_058_679.8], +) diff --git a/pvnet_app/data/satellite.py b/pvnet_app/data/satellite.py index 7f95d9b..db278c7 100644 --- a/pvnet_app/data/satellite.py +++ b/pvnet_app/data/satellite.py @@ -327,6 +327,9 @@ def preprocess_sat_data(t0: pd.Timestamp, use_legacy: bool = False) -> pd.Dateti # Deal with switching between the 5 and 15 minutely satellite data combine_5_and_15_sat_data() + # Check for nans in the satellite data + check_for_constant_values(value=np.nan, threshold=0) + # Interpolate missing satellite timestamps interpolate_missing_satellite_timestamps(pd.Timedelta("15min")) @@ -343,30 +346,30 @@ def preprocess_sat_data(t0: pd.Timestamp, use_legacy: bool = False) -> pd.Dateti extend_satellite_data_with_nans(t0) # Check for zeros in the satellite data - check_for_zeros() + check_for_constant_values() return sat_timestamps -def check_for_zeros(): - """Check the satellite data for zeros and raise an exception +def check_for_constant_values(value: Optional[float] = 0, threshold: Optional[float] = ERROR_ZERO_PERCENTAGE) -> None: + """Check the satellite data for constant values and raise an exception This sometimes happen when the satellite data is corrupt Note that in the UK, even at night, the values are not zero. """ # check satellite for zeros - logger.info("Checking satellite data for zeros") + logger.info(f"Checking satellite data for constant value ({value})") ds_sat = xr.open_zarr(sat_path) shape = ds_sat.data.shape n_data_points_per_timestep = shape[1] * shape[2] * shape[3] n_time_steps = shape[0] for i in range(n_time_steps): data = ds_sat.data[i].values - if (data == 0).sum() / n_data_points_per_timestep > ERROR_ZERO_PERCENTAGE: + if (data == value).sum() / n_data_points_per_timestep > threshold: time = ds_sat.time[i].values message = ( - f"Satellite data contains zeros (greater than {ERROR_ZERO_PERCENTAGE}), " + f"Satellite data contains zeros (greater than {threshold}), " f"This is for time step {time}" ) raise Exception(message) diff --git a/tests/data/test_satellite.py b/tests/data/test_satellite.py index 8b1d550..14d0ceb 100644 --- a/tests/data/test_satellite.py +++ b/tests/data/test_satellite.py @@ -28,7 +28,7 @@ sat_path, sat_5_path, sat_15_path, - extend_satellite_data_with_nans + extend_satellite_data_with_nans, ) @@ -242,7 +242,7 @@ def test_extend_satellite_data_with_nans_over_3_hours(sat_5_data, test_t0): def test_zeros_in_sat_data(sat_15_data_small, test_t0): - """Download and process only the 15 minute satellite data""" + """Check error is made if data has zeros""" # make temporary directory with tempfile.TemporaryDirectory() as tmpdirname: @@ -262,3 +262,24 @@ def test_zeros_in_sat_data(sat_15_data_small, test_t0): # check an error is made with pytest.raises(Exception): preprocess_sat_data(test_t0) + + +def test_remove_satellite_data(sat_15_data_small, test_t0): + """Check error is made if data has nans""" + # make temporary directory + with tempfile.TemporaryDirectory() as tmpdirname: + # Change to temporary working directory + os.chdir(tmpdirname) + + # make half the values zeros + sat_15_data_small.data[::2] = np.nan + + # Make 15-minutely satellite data available + save_to_zarr_zip(sat_15_data_small, filename="latest.zarr.zip") + + os.environ["SATELLITE_ZARR_PATH"] = "latest.zarr.zip" + download_all_sat_data() + + # check an error is made + with pytest.raises(Exception): + preprocess_sat_data(test_t0)