From dbc9931b555412bae7f6eb1ae40c65cc54b4d102 Mon Sep 17 00:00:00 2001 From: Alexandra Udaltsova <43303448+AUdaltsova@users.noreply.github.com> Date: Mon, 3 Jun 2024 13:39:08 +0100 Subject: [PATCH 1/8] fix 60 min hardcode NWP resolution in multimodal.py NWP resolution fixed to pull form config; if config values not available, default 60 min resolution supplied for backward compatibility --- pvnet/models/multimodal/multimodal.py | 32 ++++++++++++++++----------- 1 file changed, 19 insertions(+), 13 deletions(-) diff --git a/pvnet/models/multimodal/multimodal.py b/pvnet/models/multimodal/multimodal.py index 8167ac9f..5af9ae2c 100644 --- a/pvnet/models/multimodal/multimodal.py +++ b/pvnet/models/multimodal/multimodal.py @@ -2,6 +2,7 @@ from collections import OrderedDict from typing import Optional +from omegaconf import DictConfig import torch from ocf_datapipes.batch import BatchKey, NWPBatchKey @@ -52,8 +53,8 @@ def __init__( history_minutes: int = 60, sat_history_minutes: Optional[int] = None, min_sat_delay_minutes: Optional[int] = 30, - nwp_forecast_minutes: Optional[int] = None, - nwp_history_minutes: Optional[int] = None, + nwp_forecast_minutes: Optional[DictConfig] = None, + nwp_history_minutes: Optional[DictConfig] = None, pv_history_minutes: Optional[int] = None, wind_history_minutes: Optional[int] = None, sensor_history_minutes: Optional[int] = None, @@ -61,6 +62,7 @@ def __init__( optimizer: AbstractOptimizer = pvnet.optimizers.Adam(), target_key: str = "gsp", interval_minutes: int = 30, + nwp_interval_minutes: Optional[DictConfig] = None, pv_interval_minutes: int = 5, sat_interval_minutes: int = 5, sensor_interval_minutes: int = 30, @@ -80,16 +82,16 @@ def __init__( - for example if `m` is a regular function. Args: - output_network: A partially instatiated pytorch Module class used to combine the 1D + output_network: A partially instantiated pytorch Module class used to combine the 1D features to produce the forecast. output_quantiles: A list of float (0.0, 1.0) quantiles to predict values for. If set to None the output is a single value. - nwp_encoders_dict: A dictionary of partially instatiated pytorch Module class used to - encode the NWP data from 4D into an 1D feature vector from different sources. - sat_encoder: A partially instatiated pytorch Module class used to encode the satellite - data from 4D into an 1D feature vector. - pv_encoder: A partially instatiated pytorch Module class used to encode the site-level - PV data from 2D into an 1D feature vector. + nwp_encoders_dict: A dictionary of partially instantiated pytorch Module class used to + encode the NWP data from 4D into a 1D feature vector from different sources. + sat_encoder: A partially instantiated pytorch Module class used to encode the satellite + data from 4D into a 1D feature vector. + pv_encoder: A partially instantiated pytorch Module class used to encode the site-level + PV data from 2D into a 1D feature vector. add_image_embedding_channel: Add a channel to the NWP and satellite data with the embedding of the GSP ID. include_gsp_yield_history: Include GSP yield data. @@ -106,7 +108,7 @@ def __init__( `forecast_minutes` if not provided. nwp_history_minutes: Period of historical NWP forecast used as input. Defaults to `history_minutes` if not provided. - pv_history_minutes: Length of recent site-level PV data data used as + pv_history_minutes: Length of recent site-level PV data used as input. Defaults to `history_minutes` if not provided. optimizer: Optimizer factory function used for network. target_key: The key of the target variable in the batch. @@ -114,6 +116,7 @@ def __init__( wind_interval_minutes: The interval between each sample of the wind data wind_encoder: Encoder for wind data wind_history_minutes: Length of recent wind data used as input. + nwp_interval_minutes: DIctionary of the intervals between each sample of the NWP data for each source pv_interval_minutes: The interval between each sample of the PV data sat_interval_minutes: The interval between each sample of the satellite data sensor_interval_minutes: The interval between each sample of the sensor data @@ -153,7 +156,7 @@ def __init__( ) # Number of features expected by the output_network - # Add to this as network pices are constructed + # Add to this as network pieces are constructed fusion_input_features = 0 if self.include_sat: @@ -186,14 +189,17 @@ def __init__( assert set(nwp_encoders_dict.keys()) == set(nwp_forecast_minutes.keys()) assert set(nwp_encoders_dict.keys()) == set(nwp_history_minutes.keys()) + if nwp_interval_minutes is None: + nwp_interval_minutes = dict.fromkeys(nwp_encoders_dict.keys(), 60) + self.nwp_encoders_dict = torch.nn.ModuleDict() if add_image_embedding_channel: self.nwp_embed_dict = torch.nn.ModuleDict() for nwp_source in nwp_encoders_dict.keys(): nwp_sequence_len = ( - nwp_history_minutes[nwp_source] // 60 - + nwp_forecast_minutes[nwp_source] // 60 + nwp_history_minutes[nwp_source] // nwp_interval_minutes[nwp_source] + + nwp_forecast_minutes[nwp_source] // nwp_interval_minutes[nwp_source] + 1 ) From bb75f6dda361b0363ae7e47a422f0d045b6d64d2 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 3 Jun 2024 12:46:32 +0000 Subject: [PATCH 2/8] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- pvnet/models/multimodal/multimodal.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pvnet/models/multimodal/multimodal.py b/pvnet/models/multimodal/multimodal.py index 5af9ae2c..358cdddb 100644 --- a/pvnet/models/multimodal/multimodal.py +++ b/pvnet/models/multimodal/multimodal.py @@ -2,10 +2,10 @@ from collections import OrderedDict from typing import Optional -from omegaconf import DictConfig import torch from ocf_datapipes.batch import BatchKey, NWPBatchKey +from omegaconf import DictConfig from torch import nn import pvnet From 8dc4e26a63ab7f6cae188ccd433d3bdb57a2a2f8 Mon Sep 17 00:00:00 2001 From: Alexandra Udaltsova <43303448+AUdaltsova@users.noreply.github.com> Date: Mon, 3 Jun 2024 13:58:00 +0100 Subject: [PATCH 3/8] Add nwp_interval_minutes to multimodal.yaml Update example configs to accommodate setting different time resolution for NWPs --- configs.example/model/multimodal.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/configs.example/model/multimodal.yaml b/configs.example/model/multimodal.yaml index 650a2649..56955d8d 100644 --- a/configs.example/model/multimodal.yaml +++ b/configs.example/model/multimodal.yaml @@ -89,6 +89,10 @@ nwp_history_minutes: nwp_forecast_minutes: ukv: 480 ecmwf: 480 +# This dictionary is optional, but make sure to include if using data with different time resolution as defaults to 60 +nwp_interval_minutes: + ukv: 60 + ecmwf: 60 # ---------------------------------------------- # Optimizer From f5a3e094c6965b5dcd2d9998d8b2ff403605d518 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 3 Jun 2024 12:58:46 +0000 Subject: [PATCH 4/8] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- configs.example/model/multimodal.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configs.example/model/multimodal.yaml b/configs.example/model/multimodal.yaml index 56955d8d..d267d9ab 100644 --- a/configs.example/model/multimodal.yaml +++ b/configs.example/model/multimodal.yaml @@ -89,7 +89,7 @@ nwp_history_minutes: nwp_forecast_minutes: ukv: 480 ecmwf: 480 -# This dictionary is optional, but make sure to include if using data with different time resolution as defaults to 60 +# This dictionary is optional, but make sure to include if using data with different time resolution as defaults to 60 nwp_interval_minutes: ukv: 60 ecmwf: 60 From 91243e6127e55c092c7258d698f7e860511e35bd Mon Sep 17 00:00:00 2001 From: Alexandra Udaltsova <43303448+AUdaltsova@users.noreply.github.com> Date: Mon, 3 Jun 2024 14:07:32 +0100 Subject: [PATCH 5/8] Update multimodal.py --- pvnet/models/multimodal/multimodal.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pvnet/models/multimodal/multimodal.py b/pvnet/models/multimodal/multimodal.py index 358cdddb..4f23808d 100644 --- a/pvnet/models/multimodal/multimodal.py +++ b/pvnet/models/multimodal/multimodal.py @@ -116,7 +116,7 @@ def __init__( wind_interval_minutes: The interval between each sample of the wind data wind_encoder: Encoder for wind data wind_history_minutes: Length of recent wind data used as input. - nwp_interval_minutes: DIctionary of the intervals between each sample of the NWP data for each source + nwp_interval_minutes: Dictionary of intervals between each sample of NWP data for each source pv_interval_minutes: The interval between each sample of the PV data sat_interval_minutes: The interval between each sample of the satellite data sensor_interval_minutes: The interval between each sample of the sensor data From 69705a0414d06da40dd32adf7ffe4b3920a10b8c Mon Sep 17 00:00:00 2001 From: Alexandra Udaltsova <43303448+AUdaltsova@users.noreply.github.com> Date: Mon, 3 Jun 2024 14:09:16 +0100 Subject: [PATCH 6/8] Update multimodal.py --- pvnet/models/multimodal/multimodal.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pvnet/models/multimodal/multimodal.py b/pvnet/models/multimodal/multimodal.py index 4f23808d..ad4c11d6 100644 --- a/pvnet/models/multimodal/multimodal.py +++ b/pvnet/models/multimodal/multimodal.py @@ -116,7 +116,8 @@ def __init__( wind_interval_minutes: The interval between each sample of the wind data wind_encoder: Encoder for wind data wind_history_minutes: Length of recent wind data used as input. - nwp_interval_minutes: Dictionary of intervals between each sample of NWP data for each source + nwp_interval_minutes: Dictionary of the intervals between each sample of the NWP + data for each source pv_interval_minutes: The interval between each sample of the PV data sat_interval_minutes: The interval between each sample of the satellite data sensor_interval_minutes: The interval between each sample of the sensor data From 1bf3550ad7d9ba80fa5c4f4a88f31895885ce1b5 Mon Sep 17 00:00:00 2001 From: Alexandra Udaltsova <43303448+AUdaltsova@users.noreply.github.com> Date: Wed, 5 Jun 2024 17:21:36 +0100 Subject: [PATCH 7/8] Update README.md --- README.md | 212 +++++++++++------------------------------------------- 1 file changed, 42 insertions(+), 170 deletions(-) diff --git a/README.md b/README.md index 6cdee8e6..c8b65cc7 100644 --- a/README.md +++ b/README.md @@ -1,13 +1,13 @@ # PVNet 2.1 -[![test-release](https://github.com/openclimatefix/PVNet/actions/workflows/test-release.yml/badge.svg)](https://github.com/openclimatefix/PVNet/actions/workflows/test-release.yml) +[![Python Bump Version & release](https://github.com/openclimatefix/PVNet/actions/workflows/release.yml/badge.svg)](https://github.com/openclimatefix/PVNet/actions/workflows/release.yml) -This project is used for training PVNet and running PVnet on live data. +This project is used for training PVNet and running PVNet on live data. PVNet2 is a multi-modal late-fusion model that largely inherits the same architecture from -[PVNet1.0](https://github.com/openclimatefix/predict_pv_yield). The NWP and +[PVNet1.0](https://github.com/openclimatefix/predict_pv_yield). The NWP (Numerical Weather Prediction) and satellite data are sent through some neural network which encodes them down to -1D intermediate representations. These are concatenated together with the GSP +1D intermediate representations. These are concatenated together with the GSP (Grid Supply Point) output history, the calculated solar coordinates (azimuth and elevation) and the GSP ID which has been put through an embedding layer. This 1D concatenated feature vector is put through an output network which outputs predictions of the @@ -56,7 +56,7 @@ pip install ".[dev]" ## Getting started with running PVNet -Before running any code in within PVNet, copy the example configuration to a +Before running any code in PVNet, copy the example configuration to a configs directory: ``` @@ -74,14 +74,14 @@ suggested locations for downloading such datasets below: **GSP (Grid Supply Point)** - Regional PV generation data\ The University of Sheffield provides API access to download this data: -https://www.solar.sheffield.ac.uk/pvlive/api/ +https://www.solar.sheffield.ac.uk/api/ Documentation for querying generation data aggregated by GSP region can be found here: https://docs.google.com/document/d/e/2PACX-1vSDFb-6dJ2kIFZnsl-pBQvcH4inNQCA4lYL9cwo80bEHQeTK8fONLOgDf6Wm4ze_fxonqK3EVBVoAIz/pub#h.9d97iox3wzmd **NWP (Numerical weather predictions)**\ -OCF maintains a Zarr formatted version the German Weather Service's (DWD) +OCF maintains a Zarr formatted version of the German Weather Service's (DWD) ICON-EU NWP model here: https://huggingface.co/datasets/openclimatefix/dwd-icon-eu which includes the UK @@ -121,66 +121,56 @@ cp -r configs.example configs ### Set up and config example for batch creation -We will use the example of creating batches using data from gcp: -`/PVNet/configs/datamodule/configuration/gcp_configuration.yaml` -Ensure that the file paths are set to the correct locations in -`gcp_configuration.yaml`. +We will use the following example config file for creating batches: `/PVNet/configs/datamodule/configuration/example_configuration.yaml`. Ensure that the file paths are set to the correct locations in `example_configuration.yaml`: search for `PLACEHOLDER` to find where to input the location of the files. You will need to comment out or delete the parts of `example_configuration.yaml` pertaining to the data you are not using. -`PLACEHOLDER` is used to indcate where to input the location of the files. -For OCF use cases, file locations can be found in `template_configuration.yaml` located alongside `gcp_configuration.yaml`. +When creating batches, an additional datamodule config located in `PVNet/configs/datamodule` is passed into the batch creation script: `streamed_batches.yaml`. Like before, a placeholder variable is used when specifying which configuration to use: -In these configurations you can update the train, val & test periods to cover the data you have access to. - - -With your configuration in place, you can proceed to create batches. PVNet uses -[hydra](https://hydra.cc/) which enables us to pass variables via the command -line that will override the configuration defined in the `./configs` directory. - -When creating batches, an additional config is used which is passed into the batch creation script. This is the datamodule config located `PVNet/configs/datamodule`. - -For this example we will be using the `streamed_batches.yaml` config. Like before, a placeholder variable is used when specifing which configuration to use: - -`configuration: "PLACEHOLDER.yaml"` +```yaml +configuration: "PLACEHOLDER.yaml" +``` -This should be given the whole path to the config on your local machine, such as for our example it should be changed to: +This should be given the whole path to the config on your local machine, for example: -`configuration: "/FULL-PATH-TO-REPO/PVNet/configs/datamodule/configuration/gcp_configuration.yaml"` -` +```yaml +configuration: "/FULL-PATH-TO-REPO/PVNet/configs/datamodule/configuration/example_configuration.yaml" +``` Where `FULL-PATH-TO-REPO` represent the whole path to the PVNet repo on your local machine. +This is also where you can update the train, val & test periods to cover the data you have access to. + ### Running the batch creation script -Run the save_batches.py script to create batches if setting parameters in the datamodule config (`streamed_batches.yaml` in this example): +Run the `save_batches.py` script to create batches with the parameters specified in the datamodule config (`streamed_batches.yaml` in this example): -``` +```bash python scripts/save_batches.py ``` -or with the following example arguments to override config: +PVNet uses +[hydra](https://hydra.cc/) which enables us to pass variables via the command +line that will override the configuration defined in the `./configs` directory, like this: -``` +```bash python scripts/save_batches.py datamodule=streamed_batches datamodule.batch_output_dir="./output" datamodule.num_train_batches=10 datamodule.num_val_batches=5 ``` -In this function the datamodule argument looks for a config under `PVNet/configs/datamodule`. The examples here are either to use "premade_batches" or "streamed_batches". - -Its important that the dates set for the training, validation and testing in the datamodule (`streamed_batches.yaml`) config are within the ranges of the dates set for the input features in the configuration (`gcp_configuration.yaml`). +`scripts/save_batches.py` needs a config under `PVNet/configs/datamodule`. You can adapt `streamed_batches.yaml` or create your own in the same folder. -If downloading private data from a gcp bucket make sure to authenticate gcloud (the public satellite data does not need authentication): +If downloading private data from a GCP bucket make sure to authenticate gcloud (the public satellite data does not need authentication): ``` gcloud auth login ``` -For files stored in multiple locations they can be added as list. For example from the gcp_configuration.yaml file we can change from satellite data stored on a bucket: +Files stored in multiple locations can be added as a list. For example, in the `example_configuration.yaml` file we can supply a path to satellite data stored on a bucket: ```yaml satellite: satellite_zarr_path: gs://solar-pv-nowcasting-data/satellite/EUMETSAT/SEVIRI_RSS/v4/2020_nonhrv.zarr ``` -To satellite data hosted by Google: +Or to satellite data hosted by Google: ```yaml satellite: @@ -188,145 +178,27 @@ satellite: - "gs://public-datasets-eumetsat-solar-forecasting/satellite/EUMETSAT/SEVIRI_RSS/v4/2020_nonhrv.zarr" - "gs://public-datasets-eumetsat-solar-forecasting/satellite/EUMETSAT/SEVIRI_RSS/v4/2021_nonhrv.zarr" ``` -Datapipes is currently set up to use 11 channels from the satellite data, the 12th of which is HRV and is not included in these. + +Datapipes are currently set up to use 11 channels from the satellite data, the 12th of which is HRV and is not included in these. ### Training PVNet How PVNet is run is determined by the extensive configuration in the config -files. The following configs have been tested to work using batches of data -created using the steps and batch creation config mentioned above. - -You should create the following configs before trying to train a model locally, -as so: - -In `configs/datamodule/local_premade_batches.yaml`: - -```yaml -_target_: pvnet.data.datamodule.DataModule -configuration: null -batch_dir: "./output" # where the batches are saved -num_workers: 20 -prefetch_factor: 2 -batch_size: 8 -``` - -In `configs/model/local_multimodal.yaml`: +files. The configs stored in `PVNet/configs.example` should work with batches created using the steps and batch creation config mentioned above. -```yaml -_target_: pvnet.models.multimodal.multimodal.Model - -output_quantiles: [0.02, 0.1, 0.25, 0.5, 0.75, 0.9, 0.98] - -#-------------------------------------------- -# NWP encoder -#-------------------------------------------- - -nwp_encoders_dict: - ukv: - _target_: pvnet.models.multimodal.encoders.encoders3d.DefaultPVNet - _partial_: True - in_channels: 10 - out_features: 256 - number_of_conv3d_layers: 6 - conv3d_channels: 32 - image_size_pixels: 24 - -#-------------------------------------------- -# Sat encoder settings -#-------------------------------------------- - -# Ignored as premade batches were created without satellite data -# sat_encoder: -# _target_: pvnet.models.multimodal.encoders.encoders3d.DefaultPVNet -# _partial_: True -# in_channels: 11 -# out_features: 256 -# number_of_conv3d_layers: 6 -# conv3d_channels: 32 -# image_size_pixels: 24 - -add_image_embedding_channel: False - -#-------------------------------------------- -# PV encoder settings -#-------------------------------------------- - -pv_encoder: - _target_: pvnet.models.multimodal.site_encoders.encoders.SingleAttentionNetwork - _partial_: True - num_sites: 349 - out_features: 40 - num_heads: 4 - kdim: 40 - pv_id_embed_dim: 20 - -#-------------------------------------------- -# Tabular network settings -#-------------------------------------------- - -output_network: - _target_: pvnet.models.multimodal.linear_networks.networks.ResFCNet2 - _partial_: True - fc_hidden_features: 128 - n_res_blocks: 6 - res_block_layers: 2 - dropout_frac: 0.0 - -embedding_dim: 16 -include_sun: True -include_gsp_yield_history: False - -#-------------------------------------------- -# Times -#-------------------------------------------- - -# Foreast and time settings -history_minutes: 60 -forecast_minutes: 120 - -min_sat_delay_minutes: 60 - -sat_history_minutes: 90 -pv_history_minutes: 60 - -# These must be set for each NWP encoder -nwp_history_minutes: - ukv: 60 -nwp_forecast_minutes: - ukv: 120 - -# ---------------------------------------------- -# Optimizer -# ---------------------------------------------- -optimizer: - _target_: pvnet.optimizers.EmbAdamWReduceLROnPlateau - lr: 0.0001 - weight_decay: 0.01 - amsgrad: True - patience: 5 - factor: 0.1 - threshold: 0.002 -``` +Make sure to update the following config files before training your model: -In `configs/local_trainer.yaml`: - -```yaml -_target_: lightning.pytorch.trainer.trainer.Trainer - -accelerator: cpu # Important if running on a system without a supported GPU -devices: auto - -min_epochs: null -max_epochs: null -reload_dataloaders_every_n_epochs: 0 -num_sanity_val_steps: 8 -fast_dev_run: false -accumulate_grad_batches: 4 -log_every_n_steps: 50 -``` +1. In `configs/datamodule/local_premade_batches.yaml`: + - update `batch_dir` to point to the directory you stored your batches in during batch creation +2. In `configs/model/local_multimodal.yaml`: + - update the list of encoders to reflect the data sources you are using. If you are using different NWP sources, the encoders for these should follow the same structure with two important updates: + - `in_channels`: number of variables your NWP source supplies + - `image_size_pixels`: spatial crop of your NWP data. It depends on the spatial resolution of your NWP; should match `nwp_image_size_pixels_height` and/or `nwp_image_size_pixels_width` in `datamodule/example_configs.yaml`, unless transformations such as coarsening was applied (e. g. as for ECMWF data) +3. In `configs/local_trainer.yaml`: + - set `accelerator: 0` if running on a system without a supported GPU -And finally update `defaults` in the main `./configs/config.yaml` file to use +If creating copies of the config files instead of modifying existing ones, update `defaults` in the main `./configs/config.yaml` file to use your customised config files: ```yaml @@ -350,7 +222,7 @@ python run.py ## Backtest -If you have succesfully trained a PVNet model and have a saved model checkpoint you can create a backtest using this, e.g. forecasts on historical data to evaluate forecast accuracy/skill. This can be done by running one of the scripts in this repo such as [the UK gsp backtest script](scripts/backtest_uk_gsp.py) or the [the pv site backtest script](scripts/backtest_sites.py), further info on how to run these are in each backtest file. +If you have successfully trained a PVNet model and have a saved model checkpoint you can create a backtest using this, e.g. forecasts on historical data to evaluate forecast accuracy/skill. This can be done by running one of the scripts in this repo such as [the UK GSP backtest script](scripts/backtest_uk_gsp.py) or the [the pv site backtest script](scripts/backtest_sites.py), further info on how to run these are in each backtest file. ## Testing From d5de95e44d7f2936bf86152e2101238c275b3ad5 Mon Sep 17 00:00:00 2001 From: Alexandra Udaltsova <43303448+AUdaltsova@users.noreply.github.com> Date: Wed, 5 Jun 2024 17:27:47 +0100 Subject: [PATCH 8/8] Update comments multimodal.yaml --- configs.example/model/multimodal.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configs.example/model/multimodal.yaml b/configs.example/model/multimodal.yaml index d267d9ab..ec06a5b6 100644 --- a/configs.example/model/multimodal.yaml +++ b/configs.example/model/multimodal.yaml @@ -89,7 +89,7 @@ nwp_history_minutes: nwp_forecast_minutes: ukv: 480 ecmwf: 480 -# This dictionary is optional, but make sure to include if using data with different time resolution as defaults to 60 +# Optional; defaults to 60, so must be set for data with different time resolution nwp_interval_minutes: ukv: 60 ecmwf: 60