Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

No satellite nans #159

Merged
merged 5 commits into from
Dec 17, 2024
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions pvnet_app/consts.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
sat_path = "sat.zarr"
nwp_ukv_path = "nwp_ukv.zarr"
nwp_ecmwf_path = "nwp_ecmwf.zarr"


uk_box = dict(
peterdudfield marked this conversation as resolved.
Show resolved Hide resolved
x_geostationary=[-996_133.85, -480_064.6],
y_geostationary=[4_512_606.3, 5_058_679.8],
)
17 changes: 10 additions & 7 deletions pvnet_app/data/satellite.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import ocf_blosc2
from ocf_datapipes.config.load import load_yaml_configuration

from pvnet_app.consts import sat_path
from pvnet_app.consts import sat_path, uk_box

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -327,6 +327,9 @@ def preprocess_sat_data(t0: pd.Timestamp, use_legacy: bool = False) -> pd.Dateti
# Deal with switching between the 5 and 15 minutely satellite data
combine_5_and_15_sat_data()

# Check for nans in the satellite data
check_for_constant_values(value=np.nan, threshold=0)

# Interpolate missing satellite timestamps
interpolate_missing_satellite_timestamps(pd.Timedelta("15min"))

Expand All @@ -343,30 +346,30 @@ def preprocess_sat_data(t0: pd.Timestamp, use_legacy: bool = False) -> pd.Dateti
extend_satellite_data_with_nans(t0)

# Check for zeros in the satellite data
check_for_zeros()
check_for_constant_values()
peterdudfield marked this conversation as resolved.
Show resolved Hide resolved

return sat_timestamps


def check_for_zeros():
"""Check the satellite data for zeros and raise an exception
def check_for_constant_values(value: Optional[float] = 0, threshold: Optional[float] = ERROR_ZERO_PERCENTAGE) -> None:
"""Check the satellite data for constant values and raise an exception

This sometimes happen when the satellite data is corrupt

Note that in the UK, even at night, the values are not zero.
"""
# check satellite for zeros
logger.info("Checking satellite data for zeros")
logger.info("Checking satellite data for constant value ({value})")
peterdudfield marked this conversation as resolved.
Show resolved Hide resolved
ds_sat = xr.open_zarr(sat_path)
shape = ds_sat.data.shape
n_data_points_per_timestep = shape[1] * shape[2] * shape[3]
n_time_steps = shape[0]
for i in range(n_time_steps):
data = ds_sat.data[i].values
if (data == 0).sum() / n_data_points_per_timestep > ERROR_ZERO_PERCENTAGE:
if (data == value).sum() / n_data_points_per_timestep > threshold:
time = ds_sat.time[i].values
message = (
f"Satellite data contains zeros (greater than {ERROR_ZERO_PERCENTAGE}), "
f"Satellite data contains zeros (greater than {threshold}), "
f"This is for time step {time}"
)
raise Exception(message)
Expand Down
25 changes: 23 additions & 2 deletions tests/data/test_satellite.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
sat_path,
sat_5_path,
sat_15_path,
extend_satellite_data_with_nans
extend_satellite_data_with_nans,
)


Expand Down Expand Up @@ -242,7 +242,7 @@ def test_extend_satellite_data_with_nans_over_3_hours(sat_5_data, test_t0):


def test_zeros_in_sat_data(sat_15_data_small, test_t0):
"""Download and process only the 15 minute satellite data"""
"""Check error is made if data has zeros"""

# make temporary directory
with tempfile.TemporaryDirectory() as tmpdirname:
Expand All @@ -262,3 +262,24 @@ def test_zeros_in_sat_data(sat_15_data_small, test_t0):
# check an error is made
with pytest.raises(Exception):
preprocess_sat_data(test_t0)


def test_remove_satellite_data(sat_15_data_small, test_t0):
"""Check error is made if data has zeros"""
peterdudfield marked this conversation as resolved.
Show resolved Hide resolved
# make temporary directory
with tempfile.TemporaryDirectory() as tmpdirname:
# Change to temporary working directory
os.chdir(tmpdirname)

# make half the values zeros
sat_15_data_small.data[::2] = np.nan

# Make 15-minutely satellite data available
save_to_zarr_zip(sat_15_data_small, filename="latest.zarr.zip")

os.environ["SATELLITE_ZARR_PATH"] = "latest.zarr.zip"
download_all_sat_data()

# check an error is made
with pytest.raises(Exception):
preprocess_sat_data(test_t0)
Loading