From f1119afb8230df155d98dce864e1de569dd2ce69 Mon Sep 17 00:00:00 2001 From: peterdudfield Date: Mon, 14 Oct 2024 17:11:19 +0100 Subject: [PATCH] add optionl to only download and process some nwp data --- pvnet_app/app.py | 4 ++-- pvnet_app/data/nwp.py | 54 +++++++++++++++++++++++++++---------------- 2 files changed, 36 insertions(+), 22 deletions(-) diff --git a/pvnet_app/app.py b/pvnet_app/app.py index 3f48c49..9450cd5 100644 --- a/pvnet_app/app.py +++ b/pvnet_app/app.py @@ -186,10 +186,10 @@ def app( # Download NWP data logger.info("Downloading NWP data") - download_all_nwp_data() + download_all_nwp_data(download_ukv=~use_ecmwf_only) # Preprocess the NWP data - preprocess_nwp_data() + preprocess_nwp_data(use_ukv=~use_ecmwf_only) # --------------------------------------------------------------------------- # 2. Set up models diff --git a/pvnet_app/data/nwp.py b/pvnet_app/data/nwp.py index eb2f3a9..11d3ac2 100644 --- a/pvnet_app/data/nwp.py +++ b/pvnet_app/data/nwp.py @@ -2,6 +2,7 @@ import xarray as xr import xesmf as xe import logging +from typing import Optional import os import fsspec @@ -20,10 +21,16 @@ def _download_nwp_data(source, destination): fs.get(source, destination, recursive=True) -def download_all_nwp_data(): +def download_all_nwp_data(download_ukv: Optional[bool] = True, download_ecmwf: Optional[bool] = True): """Download the NWP data""" - _download_nwp_data(os.environ["NWP_UKV_ZARR_PATH"], nwp_ukv_path) - _download_nwp_data(os.environ["NWP_ECMWF_ZARR_PATH"], nwp_ecmwf_path) + if download_ukv: + _download_nwp_data(os.environ["NWP_UKV_ZARR_PATH"], nwp_ukv_path) + else: + logger.info(f"Skipping download of UKV data") + if download_ecmwf: + _download_nwp_data(os.environ["NWP_ECMWF_ZARR_PATH"], nwp_ecmwf_path) + else: + logger.info(f"Skipping download of ECMWF data") def regrid_nwp_data(nwp_zarr, target_coords_path, method): @@ -129,24 +136,31 @@ def fix_ukv_data(): ds.to_zarr(nwp_ukv_path) -def preprocess_nwp_data(): +def preprocess_nwp_data(use_ukv: Optional[bool] = True, use_ecmwf: Optional[bool] = True): - # Regrid the UKV data - regrid_nwp_data( - nwp_zarr=nwp_ukv_path, - target_coords_path=f"{this_dir}/../../data/nwp_ukv_target_coords.nc", - method="bilinear", - ) + if use_ukv: + # Regrid the UKV data + regrid_nwp_data( + nwp_zarr=nwp_ukv_path, + target_coords_path=f"{this_dir}/../../data/nwp_ukv_target_coords.nc", + method="bilinear", + ) - # Regrid the ECMWF data - regrid_nwp_data( - nwp_zarr=nwp_ecmwf_path, - target_coords_path=f"{this_dir}/../../data/nwp_ecmwf_target_coords.nc", - method="conservative", # this is needed to avoid zeros around edges of ECMWF data - ) + # UKV data must be float16 to allow overflow to inf like in training + fix_ukv_data() + else: + logger.info(f"Skipping UKV data preprocessing") + + if use_ecmwf: + # Regrid the ECMWF data + regrid_nwp_data( + nwp_zarr=nwp_ecmwf_path, + target_coords_path=f"{this_dir}/../../data/nwp_ecmwf_target_coords.nc", + method="conservative", # this is needed to avoid zeros around edges of ECMWF data + ) - # UKV data must be float16 to allow overflow to inf like in training - fix_ukv_data() + # Names need to be aligned between training and prod, and we need to infill the shetlands + fix_ecmwf_data() + else: + logger.info(f"Skipping ECMWF data preprocessing") - # Names need to be aligned between training and prod, and we need to infill the shetlands - fix_ecmwf_data()