Skip to content

Commit

Permalink
update to run more effeciently in app
Browse files Browse the repository at this point in the history
  • Loading branch information
peterdudfield committed Nov 29, 2024
1 parent fbfc6e8 commit 034ceaf
Show file tree
Hide file tree
Showing 3 changed files with 34 additions and 24 deletions.
29 changes: 18 additions & 11 deletions india_forecast_app/data/nwp.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,16 @@
logger = logging.getLogger(__name__)


def regrid_nwp_data(nwp_zarr: str, target_coords_path: str, nwp_zarr_save: str):
def regrid_nwp_data(nwp_ds: xr.Dataset, target_coords_path: str) -> xr.Dataset:
"""This function loads the NWP data, then regrids and saves it back out if the data is not
on the same grid as expected. The data is resaved in-place.
method can be 'conservative' or 'bilinear'
"""

logger.info(f"Regridding NWP data {nwp_zarr} to expected grid to {target_coords_path}")
logger.info(f"Regridding NWP data to expected grid to {target_coords_path}")

ds_raw = xr.open_zarr(nwp_zarr)
ds_raw = nwp_ds

# These are the coords we are aiming for
ds_target_coords = xr.load_dataset(target_coords_path)
Expand All @@ -26,21 +26,26 @@ def regrid_nwp_data(nwp_zarr: str, target_coords_path: str, nwp_zarr_save: str):
)

if not needs_regridding:
logger.info(f"No NWP regridding required for {nwp_zarr} - skipping this step")
logger.info(f"No NWP regridding required - skipping this step")
return

logger.info(f"Regridding NWP {nwp_zarr} to expected grid")
# flip latitude, so its in ascending order
if ds_raw.latitude[0] > ds_raw.latitude[-1]:
ds_raw = ds_raw.reindex(latitude=ds_raw.latitude[::-1])

# Pull the raw data into RAM
ds_raw = ds_raw.compute()
# clip to india coordindates
ds_raw = ds_raw.sel(
latitude=slice(0, 40),
longitude=slice(65, 100),
)

# regrid
logger.info(f"Regridding NWP to expected grid")
ds_regridded = ds_raw.interp(
latitude=ds_target_coords.latitude, longitude=ds_target_coords.longitude
)

# Re-save - including rechunking
os.system(f"rm -rf {nwp_zarr_save}")
# rechunking
ds_regridded["variable"] = ds_regridded["variable"].astype(str)

# Rechunk to these dimensions when saving
Expand All @@ -52,6 +57,8 @@ def regrid_nwp_data(nwp_zarr: str, target_coords_path: str, nwp_zarr_save: str):
"y": 100,
}

ds_regridded.chunk(
ds_regridded = ds_regridded.chunk(
{k: save_chunk_dict[k] for k in list(ds_raw.xindexes) if k in save_chunk_dict}
).to_zarr(nwp_zarr_save)
)

return ds_regridded
19 changes: 13 additions & 6 deletions india_forecast_app/models/pvnet/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,8 @@ def process_and_cache_nwp(nwp_config: NWPProcessAndCacheConfig):
source_nwp_path = nwp_config.source_nwp_path
dest_nwp_path = nwp_config.dest_nwp_path

log.info(f"Processing and caching NWP data for {source_nwp_path} and saving to {dest_nwp_path}")
log.info(f"Processing and caching NWP data for {source_nwp_path} "
f"and saving to {dest_nwp_path} for {nwp_config.source}")

if os.path.exists(dest_nwp_path):
log.info(f"File already exists at {dest_nwp_path}")
Expand Down Expand Up @@ -149,15 +150,20 @@ def process_and_cache_nwp(nwp_config: NWPProcessAndCacheConfig):
ds = data_var.to_dataset(dim="variable")
ds = ds.rename({"t2m": "t"})

# Save destination path
ds.to_zarr(dest_nwp_path, mode="a")

if nwp_config.source == "mo_global":

# only select the variables we need
ds = ds.sel(variable=["temperature_sl", "wind_u_component_10m", "wind_v_component_10m"])

# regrid data
regrid_nwp_data(
dest_nwp_path, "india_forecast_app/data/mo_global/india_coords.nc", dest_nwp_path
ds = regrid_nwp_data(
ds, "india_forecast_app/data/mo_global/india_coords.nc"
)

# Save destination path
log.info(f"Saving NWP data to {dest_nwp_path}")
ds.to_zarr(dest_nwp_path, mode="a")


def download_satellite_data(satellite_source_file_path: str) -> None:
"""Download the sat data"""
Expand Down Expand Up @@ -225,6 +231,7 @@ def save_batch(batch, i: int, model_name, site_uuid, save_batches_dir: Optional[
save_batches_dir: The directory to save the batch to,
defaults to environment variable SAVE_BATCHES_DIR
"""
return

if save_batches_dir is None:
save_batches_dir = os.getenv("SAVE_BATCHES_DIR", None)
Expand Down
10 changes: 3 additions & 7 deletions tests/data/test_nwp.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,10 @@ def test_regrid_nwp_data(nwp_mo_global_data):
nwp_zarr = os.environ["NWP_MO_GLOBAL_ZARR_PATH"]

# regrid the data
nwp_zarr_save = f"{temp_dir}/nwp_regrid.zarr"
regrid_nwp_data(
nwp_zarr, "india_forecast_app/data/mo_global/india_coords.nc", nwp_zarr_save
)

# open the regridded data
nwp_xr = xr.open_zarr(nwp_zarr)
nwp_xr_regridded = xr.open_zarr(nwp_zarr_save)
nwp_xr_regridded = regrid_nwp_data(
nwp_xr, "india_forecast_app/data/mo_global/india_coords.nc"
)

# check the data is different in latitude and longitude
assert not nwp_xr_regridded.latitude.equals(nwp_xr.latitude)
Expand Down

0 comments on commit 034ceaf

Please sign in to comment.