From d021915b748ad71b32ea42aad1c67dcf077b4928 Mon Sep 17 00:00:00 2001 From: peterdudfield Date: Fri, 29 Nov 2024 15:03:37 +0000 Subject: [PATCH 01/11] add regridding --- .../data/mo_global/india_coords.nc | Bin 0 -> 3224 bytes india_forecast_app/data/nwp.py | 57 ++++++++++++++++++ india_forecast_app/models/pvnet/model.py | 42 +++++++++---- india_forecast_app/models/pvnet/utils.py | 31 +++++++--- tests/data/test_nwp.py | 34 +++++++++++ 5 files changed, 142 insertions(+), 22 deletions(-) create mode 100644 india_forecast_app/data/mo_global/india_coords.nc create mode 100644 india_forecast_app/data/nwp.py create mode 100644 tests/data/test_nwp.py diff --git a/india_forecast_app/data/mo_global/india_coords.nc b/india_forecast_app/data/mo_global/india_coords.nc new file mode 100644 index 0000000000000000000000000000000000000000..b97874c4aa754768fff0db989cfd7f161ebad112 GIT binary patch literal 3224 zcmb7GX>b%}5DkYR1QZBYIAZRDBaqEzvpIINZ$+Yjz|K8|$Yngh0Lr0Uf=XG62%?rC z2#9hk2wI2~DvIEVfPyz7pi)2y3Zk4|Uw_^H@t;#QH8Z<2{dK>7@AcQ~+S-Ome59%i z-KW$KpD=R5#JXG5{SkFGtsgt)wts(6&-2y!@6k-%{P&v%@@I2(rTf&OwIl26-Qo2U zDPmGmvN{{gxS;Rlxk-vV9ah)>u17KG{2%ceZ&dHAqsj66^ff6dw&VPsuiHJ;Ew*2- z2hVQq{oF^}V?W6A@!#KhY;e)J*eUt_%%so$JiECzc2@pBclQUID*cwR3-aFO32%&= ze(^v|y+m=4r2ZVye?J=VcWAi9`02r`x1bS!k1N9c-dp|4QY3R6G@;Mlo{1EWhtyDP z+NsBHM>CF#=Cs;~0k~4)w+QM3EoGgSR87O?XeH~Vr9Y{@oPji1w>71+?^m>z_1i3% zGw@J#9okABZNE&7-nslsw3B?=>u`P1Uh+zJ8&gE2OMV@mx<5JZU{7?EJUbFRy`M)X z$+vTxDMzk*sSP?y-Wir&7#WiP)s)`!OURTuGWVTXd->RCWJx_)vna9C$dbCUi)i64 z$d>xDHT)xxBX#E7K5k>y&c(=)dUI0(1t%tbj$Elbcl9qzUMao`xl(_gN^Z)L>yalo z5QC=?6s#mlLX6(|<`d#GYnis&JI=%LeVkDk(xo-@2K*_1y{fK5tKd$OR^{+g3|9GD;+`UdvLE}`=AU2 zrT=A_+o+Q$6CRX}C3?6h6F!u^Me-YuGT}w2#bBxgA>l`8s0!D49YVsB&<53=yFWxo z_!9c-;(?q`j-p(6Q$9cggL2_d`BEw*6Xn9A@}Kp>2n(OWHTp$_g;(LZM5Kub3%|nO zX`-M)cvjIv69E;%w~BkI(YsI~ysOx)8;J_hZ)FCR+ZdI?!^+W*E2OhgDSWJaQ@KHE z6qUlusw>GZ2BAv$S#=Z9@g7tOPpj6eXz5R)O88p!hk|e1MpO%Lt8e&N9lKF2{H!yCL=0*k4~k= z%|}#tAKj&aMpXD;(}{rDiyF~E%^g(8Nz{lQYF^h1qegTQYep~TBPRNY-Ke@d>>9*G zC$SgFE`}f`dWl^k`Qpq9-h2d?C8RyX*%Lec=@29T%O& z+pzz}MQ`y4;~y8@#p~EV;-bIAb?k=;mwhd9i!Kz23H)ASCJ}iB5>Kcy6A7W ziH)q=#9OSN#2)5x;vn-WagzI*xXkz(sY(_fY=Y5Yp*j}9C?bGozzFmFF=|ObePK8} zPmDUA8^(B^55@z^h6WyjL2Od~V=QAIFgCCrjcu$8V>jbze9JsBe&T)^muRe|!feX( z#7tK-QFFm8WPdlyiJ;41_GWxdTN@C}o7w-&an#ljOr2gQnDg`w0_IZIx4E8mX1>9A zo4c5I<~Q7b^90$zFqjue7`3pP=zRyQcG~!0O&r-vKw9 zgt!K73C|%{%X2*30qo1}P3(_uJ^Pe9mHonfL@hh*XSfS#5bcM%hWCB$YrNlc-_wQ* z_jA^}`y&~`0JvwhvA}Ccw%h?;E1sub7SA=0U{>?Rt6`t_`m_GM!R&k99qd2eWTMNp z@E+#)ap2I`>Di&izTObN?aMxj&zE?yqE>` None: """Download the sat data""" diff --git a/tests/data/test_nwp.py b/tests/data/test_nwp.py new file mode 100644 index 0000000..4c9617b --- /dev/null +++ b/tests/data/test_nwp.py @@ -0,0 +1,34 @@ +""" Tests for the nwp regridding module """ +import os +import tempfile + +import xarray as xr + +from india_forecast_app.data.nwp import regrid_nwp_data + + +def test_regrid_nwp_data(nwp_mo_global_data): + """Test the regridding of the nwp data""" + + # create a temporary dir + with tempfile.TemporaryDirectory() as temp_dir: + + # save mo data to zarr + nwp_zarr = os.environ["NWP_MO_GLOBAL_ZARR_PATH"] + + # regrid the data + nwp_zarr_save = f"{temp_dir}/nwp_regrid.zarr" + regrid_nwp_data( + nwp_zarr, "india_forecast_app/data/mo_global/india_coords.nc", nwp_zarr_save + ) + + # open the regridded data + nwp_xr = xr.open_zarr(nwp_zarr) + nwp_xr_regridded = xr.open_zarr(nwp_zarr_save) + + # check the data is different in latitude and longitude + assert not nwp_xr_regridded.latitude.equals(nwp_xr.latitude) + assert not nwp_xr_regridded.longitude.equals(nwp_xr.longitude) + + assert len(nwp_xr_regridded.latitude) == 225 + assert len(nwp_xr_regridded.longitude) == 150 From 7b80cc5d344d1a5eec0356579aaae66606f18e58 Mon Sep 17 00:00:00 2001 From: peterdudfield Date: Fri, 29 Nov 2024 15:10:57 +0000 Subject: [PATCH 02/11] lint --- india_forecast_app/models/pvnet/model.py | 2 +- india_forecast_app/models/pvnet/utils.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/india_forecast_app/models/pvnet/model.py b/india_forecast_app/models/pvnet/model.py index 47444f7..dd42471 100644 --- a/india_forecast_app/models/pvnet/model.py +++ b/india_forecast_app/models/pvnet/model.py @@ -35,8 +35,8 @@ wind_path, ) from .utils import ( - download_satellite_data, NWPProcessAndCacheConfig, + download_satellite_data, populate_data_config_sources, process_and_cache_nwp, save_batch, diff --git a/india_forecast_app/models/pvnet/utils.py b/india_forecast_app/models/pvnet/utils.py index f6e023e..c17a4a9 100644 --- a/india_forecast_app/models/pvnet/utils.py +++ b/india_forecast_app/models/pvnet/utils.py @@ -28,6 +28,7 @@ class NWPProcessAndCacheConfig(BaseModel): + """ Configuration for processing and caching NWP data""" source_nwp_path: str dest_nwp_path: str source: str From fbfc6e8bd578a6e7c4c66efbc51fb1fd70cb3158 Mon Sep 17 00:00:00 2001 From: peterdudfield Date: Fri, 29 Nov 2024 15:15:34 +0000 Subject: [PATCH 03/11] lint --- india_forecast_app/models/pvnet/utils.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/india_forecast_app/models/pvnet/utils.py b/india_forecast_app/models/pvnet/utils.py index c17a4a9..e7e436e 100644 --- a/india_forecast_app/models/pvnet/utils.py +++ b/india_forecast_app/models/pvnet/utils.py @@ -13,6 +13,7 @@ from pydantic import BaseModel from india_forecast_app.data.nwp import regrid_nwp_data + from .consts import ( nwp_ecmwf_path, nwp_gfs_path, @@ -28,7 +29,8 @@ class NWPProcessAndCacheConfig(BaseModel): - """ Configuration for processing and caching NWP data""" + """Configuration for processing and caching NWP data""" + source_nwp_path: str dest_nwp_path: str source: str From 034ceaf4a3c90bed0a1af489565ccd6ad97e8d7d Mon Sep 17 00:00:00 2001 From: peterdudfield Date: Fri, 29 Nov 2024 16:01:45 +0000 Subject: [PATCH 04/11] update to run more effeciently in app --- india_forecast_app/data/nwp.py | 29 +++++++++++++++--------- india_forecast_app/models/pvnet/utils.py | 19 +++++++++++----- tests/data/test_nwp.py | 10 +++----- 3 files changed, 34 insertions(+), 24 deletions(-) diff --git a/india_forecast_app/data/nwp.py b/india_forecast_app/data/nwp.py index de68896..2472875 100644 --- a/india_forecast_app/data/nwp.py +++ b/india_forecast_app/data/nwp.py @@ -5,16 +5,16 @@ logger = logging.getLogger(__name__) -def regrid_nwp_data(nwp_zarr: str, target_coords_path: str, nwp_zarr_save: str): +def regrid_nwp_data(nwp_ds: xr.Dataset, target_coords_path: str) -> xr.Dataset: """This function loads the NWP data, then regrids and saves it back out if the data is not on the same grid as expected. The data is resaved in-place. method can be 'conservative' or 'bilinear' """ - logger.info(f"Regridding NWP data {nwp_zarr} to expected grid to {target_coords_path}") + logger.info(f"Regridding NWP data to expected grid to {target_coords_path}") - ds_raw = xr.open_zarr(nwp_zarr) + ds_raw = nwp_ds # These are the coords we are aiming for ds_target_coords = xr.load_dataset(target_coords_path) @@ -26,21 +26,26 @@ def regrid_nwp_data(nwp_zarr: str, target_coords_path: str, nwp_zarr_save: str): ) if not needs_regridding: - logger.info(f"No NWP regridding required for {nwp_zarr} - skipping this step") + logger.info(f"No NWP regridding required - skipping this step") return - logger.info(f"Regridding NWP {nwp_zarr} to expected grid") + # flip latitude, so its in ascending order + if ds_raw.latitude[0] > ds_raw.latitude[-1]: + ds_raw = ds_raw.reindex(latitude=ds_raw.latitude[::-1]) - # Pull the raw data into RAM - ds_raw = ds_raw.compute() + # clip to india coordindates + ds_raw = ds_raw.sel( + latitude=slice(0, 40), + longitude=slice(65, 100), + ) # regrid + logger.info(f"Regridding NWP to expected grid") ds_regridded = ds_raw.interp( latitude=ds_target_coords.latitude, longitude=ds_target_coords.longitude ) - # Re-save - including rechunking - os.system(f"rm -rf {nwp_zarr_save}") + # rechunking ds_regridded["variable"] = ds_regridded["variable"].astype(str) # Rechunk to these dimensions when saving @@ -52,6 +57,8 @@ def regrid_nwp_data(nwp_zarr: str, target_coords_path: str, nwp_zarr_save: str): "y": 100, } - ds_regridded.chunk( + ds_regridded = ds_regridded.chunk( {k: save_chunk_dict[k] for k in list(ds_raw.xindexes) if k in save_chunk_dict} - ).to_zarr(nwp_zarr_save) + ) + + return ds_regridded diff --git a/india_forecast_app/models/pvnet/utils.py b/india_forecast_app/models/pvnet/utils.py index e7e436e..a5a622d 100644 --- a/india_forecast_app/models/pvnet/utils.py +++ b/india_forecast_app/models/pvnet/utils.py @@ -109,7 +109,8 @@ def process_and_cache_nwp(nwp_config: NWPProcessAndCacheConfig): source_nwp_path = nwp_config.source_nwp_path dest_nwp_path = nwp_config.dest_nwp_path - log.info(f"Processing and caching NWP data for {source_nwp_path} and saving to {dest_nwp_path}") + log.info(f"Processing and caching NWP data for {source_nwp_path} " + f"and saving to {dest_nwp_path} for {nwp_config.source}") if os.path.exists(dest_nwp_path): log.info(f"File already exists at {dest_nwp_path}") @@ -149,15 +150,20 @@ def process_and_cache_nwp(nwp_config: NWPProcessAndCacheConfig): ds = data_var.to_dataset(dim="variable") ds = ds.rename({"t2m": "t"}) - # Save destination path - ds.to_zarr(dest_nwp_path, mode="a") - if nwp_config.source == "mo_global": + + # only select the variables we need + ds = ds.sel(variable=["temperature_sl", "wind_u_component_10m", "wind_v_component_10m"]) + # regrid data - regrid_nwp_data( - dest_nwp_path, "india_forecast_app/data/mo_global/india_coords.nc", dest_nwp_path + ds = regrid_nwp_data( + ds, "india_forecast_app/data/mo_global/india_coords.nc" ) + # Save destination path + log.info(f"Saving NWP data to {dest_nwp_path}") + ds.to_zarr(dest_nwp_path, mode="a") + def download_satellite_data(satellite_source_file_path: str) -> None: """Download the sat data""" @@ -225,6 +231,7 @@ def save_batch(batch, i: int, model_name, site_uuid, save_batches_dir: Optional[ save_batches_dir: The directory to save the batch to, defaults to environment variable SAVE_BATCHES_DIR """ + return if save_batches_dir is None: save_batches_dir = os.getenv("SAVE_BATCHES_DIR", None) diff --git a/tests/data/test_nwp.py b/tests/data/test_nwp.py index 4c9617b..1e7b794 100644 --- a/tests/data/test_nwp.py +++ b/tests/data/test_nwp.py @@ -17,14 +17,10 @@ def test_regrid_nwp_data(nwp_mo_global_data): nwp_zarr = os.environ["NWP_MO_GLOBAL_ZARR_PATH"] # regrid the data - nwp_zarr_save = f"{temp_dir}/nwp_regrid.zarr" - regrid_nwp_data( - nwp_zarr, "india_forecast_app/data/mo_global/india_coords.nc", nwp_zarr_save - ) - - # open the regridded data nwp_xr = xr.open_zarr(nwp_zarr) - nwp_xr_regridded = xr.open_zarr(nwp_zarr_save) + nwp_xr_regridded = regrid_nwp_data( + nwp_xr, "india_forecast_app/data/mo_global/india_coords.nc" + ) # check the data is different in latitude and longitude assert not nwp_xr_regridded.latitude.equals(nwp_xr.latitude) From c6fe94446226675119ee1d0494d81d1bd8f40f8e Mon Sep 17 00:00:00 2001 From: peterdudfield Date: Fri, 29 Nov 2024 16:07:57 +0000 Subject: [PATCH 05/11] fix --- india_forecast_app/models/pvnet/utils.py | 1 - 1 file changed, 1 deletion(-) diff --git a/india_forecast_app/models/pvnet/utils.py b/india_forecast_app/models/pvnet/utils.py index a5a622d..3a02bf3 100644 --- a/india_forecast_app/models/pvnet/utils.py +++ b/india_forecast_app/models/pvnet/utils.py @@ -231,7 +231,6 @@ def save_batch(batch, i: int, model_name, site_uuid, save_batches_dir: Optional[ save_batches_dir: The directory to save the batch to, defaults to environment variable SAVE_BATCHES_DIR """ - return if save_batches_dir is None: save_batches_dir = os.getenv("SAVE_BATCHES_DIR", None) From 64517bdc75afe9c01f73a0beed3b6b185de3cbdf Mon Sep 17 00:00:00 2001 From: peterdudfield Date: Mon, 2 Dec 2024 11:55:55 +0000 Subject: [PATCH 06/11] remove hard coding --- india_forecast_app/models/pvnet/model.py | 1 + india_forecast_app/models/pvnet/utils.py | 5 ++++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/india_forecast_app/models/pvnet/model.py b/india_forecast_app/models/pvnet/model.py index dd42471..b509bbb 100644 --- a/india_forecast_app/models/pvnet/model.py +++ b/india_forecast_app/models/pvnet/model.py @@ -243,6 +243,7 @@ def _prepare_data_sources(self): source_nwp_path=os.environ["NWP_MO_GLOBAL_ZARR_PATH"], dest_nwp_path=nwp_mo_global_path, source="mo_global", + config=self.config["input_data"]["nwp"]["ecmwf"] ) ) diff --git a/india_forecast_app/models/pvnet/utils.py b/india_forecast_app/models/pvnet/utils.py index 3a02bf3..a4d3a3a 100644 --- a/india_forecast_app/models/pvnet/utils.py +++ b/india_forecast_app/models/pvnet/utils.py @@ -10,6 +10,7 @@ import yaml from ocf_datapipes.batch import BatchKey from ocf_datapipes.utils.consts import ELEVATION_MEAN, ELEVATION_STD +from ocf_datapipes.config.model import NWP from pydantic import BaseModel from india_forecast_app.data.nwp import regrid_nwp_data @@ -34,6 +35,7 @@ class NWPProcessAndCacheConfig(BaseModel): source_nwp_path: str dest_nwp_path: str source: str + config: Optional[NWP] = None def worker_init_fn(worker_id): @@ -153,7 +155,8 @@ def process_and_cache_nwp(nwp_config: NWPProcessAndCacheConfig): if nwp_config.source == "mo_global": # only select the variables we need - ds = ds.sel(variable=["temperature_sl", "wind_u_component_10m", "wind_v_component_10m"]) + nwp_channels = nwp_config.config.nwp_channels + ds = ds.sel(variable=nwp_channels) # regrid data ds = regrid_nwp_data( From be7a556a03a7ac1a9a4111a92791bcb1a0bb862f Mon Sep 17 00:00:00 2001 From: peterdudfield Date: Mon, 2 Dec 2024 11:58:13 +0000 Subject: [PATCH 07/11] lint --- india_forecast_app/data/nwp.py | 4 +--- india_forecast_app/models/pvnet/utils.py | 3 +-- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/india_forecast_app/data/nwp.py b/india_forecast_app/data/nwp.py index 2472875..3b12107 100644 --- a/india_forecast_app/data/nwp.py +++ b/india_forecast_app/data/nwp.py @@ -8,8 +8,6 @@ def regrid_nwp_data(nwp_ds: xr.Dataset, target_coords_path: str) -> xr.Dataset: """This function loads the NWP data, then regrids and saves it back out if the data is not on the same grid as expected. The data is resaved in-place. - - method can be 'conservative' or 'bilinear' """ logger.info(f"Regridding NWP data to expected grid to {target_coords_path}") @@ -27,7 +25,7 @@ def regrid_nwp_data(nwp_ds: xr.Dataset, target_coords_path: str) -> xr.Dataset: if not needs_regridding: logger.info(f"No NWP regridding required - skipping this step") - return + return ds_raw # flip latitude, so its in ascending order if ds_raw.latitude[0] > ds_raw.latitude[-1]: diff --git a/india_forecast_app/models/pvnet/utils.py b/india_forecast_app/models/pvnet/utils.py index a4d3a3a..1b2c4c0 100644 --- a/india_forecast_app/models/pvnet/utils.py +++ b/india_forecast_app/models/pvnet/utils.py @@ -9,12 +9,11 @@ import xarray as xr import yaml from ocf_datapipes.batch import BatchKey -from ocf_datapipes.utils.consts import ELEVATION_MEAN, ELEVATION_STD from ocf_datapipes.config.model import NWP +from ocf_datapipes.utils.consts import ELEVATION_MEAN, ELEVATION_STD from pydantic import BaseModel from india_forecast_app.data.nwp import regrid_nwp_data - from .consts import ( nwp_ecmwf_path, nwp_gfs_path, From 0344965720c3213caa78cf6b05afa0a3470ffa20 Mon Sep 17 00:00:00 2001 From: peterdudfield Date: Mon, 2 Dec 2024 12:02:29 +0000 Subject: [PATCH 08/11] lint --- india_forecast_app/models/pvnet/utils.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/india_forecast_app/models/pvnet/utils.py b/india_forecast_app/models/pvnet/utils.py index 1b2c4c0..efabd1a 100644 --- a/india_forecast_app/models/pvnet/utils.py +++ b/india_forecast_app/models/pvnet/utils.py @@ -110,8 +110,10 @@ def process_and_cache_nwp(nwp_config: NWPProcessAndCacheConfig): source_nwp_path = nwp_config.source_nwp_path dest_nwp_path = nwp_config.dest_nwp_path - log.info(f"Processing and caching NWP data for {source_nwp_path} " - f"and saving to {dest_nwp_path} for {nwp_config.source}") + log.info( + f"Processing and caching NWP data for {source_nwp_path} " + f"and saving to {dest_nwp_path} for {nwp_config.source}" + ) if os.path.exists(dest_nwp_path): log.info(f"File already exists at {dest_nwp_path}") @@ -158,9 +160,7 @@ def process_and_cache_nwp(nwp_config: NWPProcessAndCacheConfig): ds = ds.sel(variable=nwp_channels) # regrid data - ds = regrid_nwp_data( - ds, "india_forecast_app/data/mo_global/india_coords.nc" - ) + ds = regrid_nwp_data(ds, "india_forecast_app/data/mo_global/india_coords.nc") # Save destination path log.info(f"Saving NWP data to {dest_nwp_path}") From 5b7d186706b539d13ea089064a3388fe204e9eb7 Mon Sep 17 00:00:00 2001 From: peterdudfield Date: Mon, 2 Dec 2024 12:05:24 +0000 Subject: [PATCH 09/11] lint --- india_forecast_app/models/pvnet/utils.py | 1 + 1 file changed, 1 insertion(+) diff --git a/india_forecast_app/models/pvnet/utils.py b/india_forecast_app/models/pvnet/utils.py index efabd1a..8b570e3 100644 --- a/india_forecast_app/models/pvnet/utils.py +++ b/india_forecast_app/models/pvnet/utils.py @@ -14,6 +14,7 @@ from pydantic import BaseModel from india_forecast_app.data.nwp import regrid_nwp_data + from .consts import ( nwp_ecmwf_path, nwp_gfs_path, From 5d38b4103a1e66deb987bf6f96041371dc7d4882 Mon Sep 17 00:00:00 2001 From: peterdudfield Date: Mon, 2 Dec 2024 12:21:17 +0000 Subject: [PATCH 10/11] fix bug --- india_forecast_app/models/pvnet/model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/india_forecast_app/models/pvnet/model.py b/india_forecast_app/models/pvnet/model.py index b509bbb..829eb55 100644 --- a/india_forecast_app/models/pvnet/model.py +++ b/india_forecast_app/models/pvnet/model.py @@ -243,7 +243,7 @@ def _prepare_data_sources(self): source_nwp_path=os.environ["NWP_MO_GLOBAL_ZARR_PATH"], dest_nwp_path=nwp_mo_global_path, source="mo_global", - config=self.config["input_data"]["nwp"]["ecmwf"] + config=self.config["input_data"]["nwp"]["mo_global"] ) ) From 8d67aae866c8581fdb71980cdf0b1d7f4d6f7891 Mon Sep 17 00:00:00 2001 From: peterdudfield Date: Mon, 2 Dec 2024 12:35:42 +0000 Subject: [PATCH 11/11] fix --- india_forecast_app/models/pvnet/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/india_forecast_app/models/pvnet/utils.py b/india_forecast_app/models/pvnet/utils.py index 8b570e3..59e4fb8 100644 --- a/india_forecast_app/models/pvnet/utils.py +++ b/india_forecast_app/models/pvnet/utils.py @@ -157,7 +157,7 @@ def process_and_cache_nwp(nwp_config: NWPProcessAndCacheConfig): if nwp_config.source == "mo_global": # only select the variables we need - nwp_channels = nwp_config.config.nwp_channels + nwp_channels = list(nwp_config.config.nwp_channels) ds = ds.sel(variable=nwp_channels) # regrid data