From 3ec49680a254907e1bd340058299ff01e2b3b9c1 Mon Sep 17 00:00:00 2001 From: James Fulton Date: Tue, 31 Oct 2023 16:25:29 +0000 Subject: [PATCH 1/4] pull data config from huggingface --- pvnet_app/app.py | 52 +++++++++++++++++++++++++++++++++++++++++++----- requirements.txt | 2 +- 2 files changed, 48 insertions(+), 6 deletions(-) diff --git a/pvnet_app/app.py b/pvnet_app/app.py index 21b1a75..c9c2a5f 100644 --- a/pvnet_app/app.py +++ b/pvnet_app/app.py @@ -8,6 +8,7 @@ import logging import os +import tempfile import warnings from datetime import datetime, timedelta, timezone @@ -51,8 +52,6 @@ # TODO: Host data config alongside model? this_dir = os.path.dirname(os.path.abspath(__file__)) -data_config_filename = f"{this_dir}/../configs/data_configuration.yaml" - # Model will use GPU if available device = torch.device("cuda" if torch.cuda.is_available() else "cpu") @@ -67,7 +66,7 @@ # Huggingfacehub model repo and commit for PVNet (GSP-level model) default_model_name = "openclimatefix/pvnet_v2" -default_model_version = "ca621fcb8e275bdcdc586b1f971e82fc65e02735" +default_model_version = "805ca9b2ee3120592b0b70b7c75a454e2b4e4bec" # Huggingfacehub model repo and commit for PVNet summation (GSP sum to national model) # If summation_model_name is set to None, a simple sum is computed instead @@ -102,6 +101,39 @@ # --------------------------------------------------------------------------- # HELPER FUNCTIONS +def populate_data_config_sources(input_path, output_path): + """Resave the data config and replace the source filepaths + + Args: + input_path: Path to input datapipes configuration file + output_path: Location to save the output configuration file + """ + with open(input_path) as infile: + config = yaml.load(infile, Loader=yaml.FullLoader) + + production_paths = { + "gsp": os.environ["DB_URL"], + "nwp": "nwp.zarr", + "satellite": "sat.zarr.zip", + # TODO: include hrvsatellite + } + + # Replace data sources + for source in ["gsp", "nwp", "satellite", "hrvsatellite"]: + if source in config["input_data"]: + assert source in production_paths, f"Missing production path: {source}" + # If not empty - i.e. if used + if config["input_data"][source][f"{source}_zarr_path"]!="": + config["input_data"][source][f"{source}_zarr_path"] = ( + f"{production_paths[source]}.zarr" + ) + + # We do not need to set PV path right now. This currently done through datapipes + # TODO - Move the PV path to here + + with open(output_path, 'w') as outfile: + yaml.dump(config, outfile, default_flow_style=False) + def convert_dataarray_to_forecasts( forecast_values_dataarray: xr.DataArray, session: Session, model_name: str, version: str @@ -291,6 +323,16 @@ def app( # --------------------------------------------------------------------------- # 2. Set up data loader logger.info("Creating DataLoader") + + # Pull the data config from huggingface + data_config_filename = PVNetBaseModel.get_data_config( + model_name, + revision=model_version, + ) + # Populate the data config with production data paths + temp_dir = tempfile.TemporaryDirectory() + populated_data_config_filename = f"{temp_dir.name}/data_config.yaml" + populate_data_config_sources(data_config_filename, populated_data_config_filename) # Location and time datapipes location_pipe = IterableWrapper([gsp_id_to_loc(gsp_id) for gsp_id in gsp_ids]) @@ -302,7 +344,7 @@ def app( # Batch datapipe batch_datapipe = ( construct_sliced_data_pipeline( - config_filename=data_config_filename, + config_filename=populated_data_config_filename, location_pipe=location_pipe, t0_datapipe=t0_datapipe, production=True, @@ -552,7 +594,7 @@ def app( apply_adjuster=False, ) - + temp_dir.cleanup() logger.info("Finished forecast") diff --git a/requirements.txt b/requirements.txt index 4262540..a6879ec 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,6 @@ torch[cpu]>=2.0 PVNet-summation>=0.0.8 -pvnet>=2.2.1 +pvnet>=2.3.0 ocf_datapipes>=2.0.6 nowcasting_datamodel>=1.5.10 fsspec[s3] From bc28c713171e66e376f06aab3957bf8d2dcab4f6 Mon Sep 17 00:00:00 2001 From: James Fulton Date: Tue, 31 Oct 2023 17:06:23 +0000 Subject: [PATCH 2/4] bug fixes --- pvnet_app/app.py | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/pvnet_app/app.py b/pvnet_app/app.py index c9c2a5f..faa297e 100644 --- a/pvnet_app/app.py +++ b/pvnet_app/app.py @@ -8,6 +8,7 @@ import logging import os +import yaml import tempfile import warnings from datetime import datetime, timedelta, timezone @@ -121,12 +122,10 @@ def populate_data_config_sources(input_path, output_path): # Replace data sources for source in ["gsp", "nwp", "satellite", "hrvsatellite"]: if source in config["input_data"]: - assert source in production_paths, f"Missing production path: {source}" # If not empty - i.e. if used if config["input_data"][source][f"{source}_zarr_path"]!="": - config["input_data"][source][f"{source}_zarr_path"] = ( - f"{production_paths[source]}.zarr" - ) + assert source in production_paths, f"Missing production path: {source}" + config["input_data"][source][f"{source}_zarr_path"] = production_paths[source] # We do not need to set PV path right now. This currently done through datapipes # TODO - Move the PV path to here @@ -382,13 +381,12 @@ def app( or summation_model.pvnet_model_version != model_version ): warnings.warn( - f"The PVNet version running in this app is " - f"{model_name}/{model_version}." - f"The summation model running in this app was trained on outputs from PVNet " - f"version {summation_model.model_name}/{summation_model.model_version}. " - f"Combining these models may lead to an error if the shape of PVNet output doesn't " - f"match the expected shape of the summation model. Combining may lead to " - f"unreliable results even if the shapes match." + f"The PVNet version running in this app is {model_name}/{model_version}. " + "The summation model running in this app was trained on outputs from PVNet version " + f"{summation_model.pvnet_model_name}/{summation_model.pvnet_model_version}. " + "Combining these models may lead to an error if the shape of PVNet output doesn't " + "match the expected shape of the summation model. Combining may lead to unreliable " + "results even if the shapes match." ) # 4. Make prediction From a222bdb43be5a1ea28dc1560c7c5741950e41175 Mon Sep 17 00:00:00 2001 From: James Fulton Date: Tue, 31 Oct 2023 17:08:36 +0000 Subject: [PATCH 3/4] remove unnecessary data config --- configs/data_configuration.yaml | 61 --------------------------------- 1 file changed, 61 deletions(-) delete mode 100644 configs/data_configuration.yaml diff --git a/configs/data_configuration.yaml b/configs/data_configuration.yaml deleted file mode 100644 index b7b92ff..0000000 --- a/configs/data_configuration.yaml +++ /dev/null @@ -1,61 +0,0 @@ -general: - description: Config for producing batches in production - name: app_pvnet - -input_data: - default_history_minutes: 120 - default_forecast_minutes: 480 - - gsp: - gsp_zarr_path: !ENV ${DB_URL} - history_minutes: 120 - forecast_minutes: 480 - time_resolution_minutes: 30 - start_datetime: "1900-01-01T00:00:00" - end_datetime: "2100-01-01T00:00:00" - metadata_only: false - - nwp: - nwp_zarr_path: nwp.zarr - history_minutes: 120 - forecast_minutes: 480 - time_resolution_minutes: 60 - nwp_channels: - - t # live = t2m - - dswrf - nwp_image_size_pixels_height: 24 - nwp_image_size_pixels_width: 24 - - satellite: - satellite_zarr_path: sat.zarr.zip - history_minutes: 90 - forecast_minutes: 0 - live_delay_minutes: 60 - time_resolution_minutes: 5 - satellite_channels: - - IR_016 - - IR_039 - - IR_087 - - IR_097 - - IR_108 - - IR_120 - - IR_134 - - VIS006 - - VIS008 - - WV_062 - - WV_073 - satellite_image_size_pixels_height: 24 - satellite_image_size_pixels_width: 24 - - hrvsatellite: - hrvsatellite_zarr_path: "" - history_minutes: 60 - forecast_minutes: 0 - time_resolution_minutes: 5 - hrvsatellite_channels: - - HRV - hrvsatellite_image_size_pixels_height: 24 - hrvsatellite_image_size_pixels_width: 24 - -output_data: - filepath: "not-needed" From 8645e93f66d9a9555e79b25938e8e6e439ab64b4 Mon Sep 17 00:00:00 2001 From: James Fulton Date: Tue, 31 Oct 2023 17:13:13 +0000 Subject: [PATCH 4/4] remove config copy from dockerfile --- Dockerfile | 1 - 1 file changed, 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index 83fb51b..cb6a6c4 100644 --- a/Dockerfile +++ b/Dockerfile @@ -23,7 +23,6 @@ RUN pip install -r app/requirements.txt # copy library files COPY pvnet_app/ app/pvnet_app/ COPY tests/ app/tests/ -COPY configs/ app/configs/ COPY scripts/ app/scripts/ # change to app folder