Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add satellite quality control #28

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 48 additions & 7 deletions pvnet_app/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,38 @@ def regrid_nwp_data(nwp_path):
ds_regridded.chunk(dict(step=12, x=100, y=100)).to_zarr(nwp_path)

return


def sat_data_qualtiy_control(sat_path):
"""This function loads the satellite data, removes timestamps which don't pass some quality
checks and saves it back out. The data is resaved in-place.
"""
# Need to set this high enough not to catch the zeros in the 2 visible spectrum channels at
# night which is valid. 2 out of a total of 11 channels + 10% of 9 others will trigger
zero_frac_limit = (2 + 0.1*9)/11

# Pull the raw data into RAM
ds = xr.open_zarr(sat_path).compute()

# Check fraction of zeros at each time step
frac_zeros = (ds.data==0).mean(dim=("y_geostationary", "x_geostationary", "variable"))

logger.info(
f"Found zeros fractions in each timestamp:\n"
f"{frac_zeros.to_dataframe().rename({'data':'zero_fraction'}, axis=1)}"
)

if (frac_zeros<=zero_frac_limit).all():
logger.info("No sat quality issues - skipping this step")

else:
bad_timestamp_mask = frac_zeros>zero_frac_limit
bad_timestamps = frac_zeros.where(bad_timestamp_mask, drop=True).time.values
logger.info(f"Removing timestamps: {bad_timestamps}")
ds = ds.where(~bad_timestamp_mask, drop=True)
os.system(f"rm -fr {sat_path}")
ds.to_zarr(sat_path)



def populate_data_config_sources(input_path, output_path):
Expand Down Expand Up @@ -323,10 +355,9 @@ def app(
# ---------------------------------------------------------------------------
# 1. Prepare data sources

# Make pands Series of most recent GSP effective capacities

# ------------ GSP
logger.info("Loading GSP metadata")

ds_gsp = next(iter(OpenGSPFromDatabase()))

# Get capacities from the database
Expand All @@ -341,24 +372,34 @@ def app(

# Set up ID location query object
gsp_id_to_loc = GSPLocationLookup(ds_gsp.x_osgb, ds_gsp.y_osgb)


# ------------ SATELLITE

# Download satellite data
logger.info("Downloading zipped satellite data")
fs = fsspec.open(os.environ["SATELLITE_ZARR_PATH"]).fs
fs.get(os.environ["SATELLITE_ZARR_PATH"], "sat.zarr.zip")

# Satellite data quality control step
sat_data_qualtiy_control("sat.zarr.zip")

# Also download 15-minute satellite if it exists
sat_latest_15 = os.environ["SATELLITE_ZARR_PATH"].replace(".zarr.zip", "_15.zarr.zip")
if fs.exists(sat_latest_15):
logger.info("Downloading 15-minute satellite data")
fs.get(sat_latest_15, "sat_15.zarr.zip")

# Download nwp data

# 15-min satellite data quality control step
sat_data_qualtiy_control("sat_15.zarr.zip")

# ------------ NWP

# Download NWP data
logger.info("Downloading nwp data")
fs = fsspec.open(os.environ["NWP_ZARR_PATH"]).fs
fs.get(os.environ["NWP_ZARR_PATH"], "nwp.zarr", recursive=True)

# Regrid the nwp data if needed
# Regrid the NWP data if needed
regrid_nwp_data("nwp.zarr")

# ---------------------------------------------------------------------------
Expand Down
2 changes: 1 addition & 1 deletion tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ def sat_data():

# Add data to dataset
ds["data"] = xr.DataArray(
np.zeros([len(ds[c]) for c in ds.coords]),
np.ones([len(ds[c]) for c in ds.coords]),
coords=ds.coords,
)

Expand Down
Loading