diff --git a/README.md b/README.md index 2974756d..e50dd56c 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ This project is used for training PVNet and running PVnet on live data. -PVNet2 largely inherits the same architecture from +PVNet2 is a multi-modal late-fusion model that largely inherits the same architecture from [PVNet1.0](https://github.com/openclimatefix/predict_pv_yield). The NWP and satellite data are sent through some neural network which encodes them down to 1D intermediate representations. These are concatenated together with the GSP @@ -14,6 +14,12 @@ feature vector is put through an output network which outputs predictions of the future GSP yield. National forecasts are made by adding all the GSP forecasts together. + +## Experiments + +Some quite rough working notes on experiments training this model and running it in production are +[here](https://docs.google.com/document/d/1fbkfkBzp16WbnCg7RDuRDvgzInA6XQu3xh4NCjV-WDA/edit?usp=sharing). + ## Setup / Installation ```bash @@ -37,7 +43,8 @@ configs directory: cp -r configs.example configs ``` -You will be making local amendments to these configs +You will be making local amendments to these configs. See the README in +`configs.example` for more info. ### Datasets @@ -108,16 +115,16 @@ input_data: forecast_minutes: 120 time_resolution_minutes: 60 nwp_channels: # comment out channels as appropriate - - t # live = t2m - - dswrf - - dlwrf - - hcc - - MCC - - lcc - - vis - - r # live = r2 - - prate # live ~= rprate - - si10 # 10-metre wind speed | live = unknown + - t # 2-metre temperature + - dswrf # downwards short-wave radiation flux + - dlwrf # downwards long-wave radiation flux + - hcc # high cloud cover + - mcc # medium cloud cover + - lcc # low cloud cover + - vis # visability + - r # relative humidity + - prate # precipitation rate + - si10 # 10-metre wind speed | live = unknown nwp_image_size_pixels_height: 24 nwp_image_size_pixels_width: 24 nwp_provider: ukv @@ -332,8 +339,3 @@ python run.py ## Testing You can use `python -m pytest tests` to run tests - -## Experiments - -Notes on these experiments are -[here](https://docs.google.com/document/d/1fbkfkBzp16WbnCg7RDuRDvgzInA6XQu3xh4NCjV-WDA/edit?usp=sharing). diff --git a/configs.example/callbacks/default.yaml b/configs.example/callbacks/default.yaml index a832d219..999a5bf8 100644 --- a/configs.example/callbacks/default.yaml +++ b/configs.example/callbacks/default.yaml @@ -1,13 +1,3 @@ -#pretrain_early_stopping: -# _target_: pvnet.callbacks.PretrainEarlyStopping -# monitor: "MAE/val" # name of the logged metric which determines when model is improving -# mode: "min" # can be "max" or "min" -# patience: 10 # how many epochs (or val check periods) of not improving until training stops -# min_delta: 0.001 # minimum change in the monitored metric needed to qualify as an improvement - -#pretrain_encoder_freezing: -# _target_: pvnet.callbacks.PretrainFreeze - early_stopping: _target_: pvnet.callbacks.MainEarlyStopping # name of the logged metric which determines when model is improving @@ -34,9 +24,7 @@ model_checkpoint: every_n_epochs: 1 verbose: False filename: "epoch={epoch}-step={step}" - dirpath: "checkpoints/pvnet2.1/${model_name}" #${..model_name} + # The path to where the model checkpoints will be stored + dirpath: "PLACEHOLDER/${model_name}" #${..model_name} auto_insert_metric_name: False save_on_train_epoch_end: False -#device_stats_monitor: -# _target_: lightning.pytorch.callbacks.DeviceStatsMonitor -# cpu_stats: True diff --git a/configs.example/config.yaml b/configs.example/config.yaml index 1af74879..9ad3457b 100644 --- a/configs.example/config.yaml +++ b/configs.example/config.yaml @@ -2,6 +2,7 @@ # specify here default training configuration defaults: + - _self_ - trainer: default.yaml - model: multimodal.yaml - datamodule: ocf_datapipes.yaml diff --git a/configs.example/datamodule/configuration/gcp_configuration.yaml b/configs.example/datamodule/configuration/gcp_configuration.yaml index 27f00990..f297df10 100644 --- a/configs.example/datamodule/configuration/gcp_configuration.yaml +++ b/configs.example/datamodule/configuration/gcp_configuration.yaml @@ -1,23 +1,29 @@ general: - description: Config for producing batches on GCP - name: gcp_pvnet + description: Example data config for creating PVNet batches + name: example_pvnet input_data: default_history_minutes: 120 default_forecast_minutes: 480 gsp: - gsp_zarr_path: /mnt/disks/nwp_rechunk/pv_gsp_temp.zarr + # Path to the GSP data. This should be a zarr file + # e.g. gs://solar-pv-nowcasting-data/PV/GSP/v7/pv_gsp.zarr + gsp_zarr_path: PLACEHOLDER.zarr history_minutes: 120 forecast_minutes: 480 time_resolution_minutes: 30 - metadata_only: false pv: pv_files_groups: - label: solar_sheffield_passiv - pv_filename: /mnt/disks/nwp_rechunk/passive/v1.1/passiv.netcdf - pv_metadata_filename: /mnt/disks/nwp_rechunk/passive/v0/system_metadata_OCF_ONLY.csv + # Path to the site-level PV data. This should be a netcdf + # e.g gs://solar-pv-nowcasting-data/PV/Passive/ocf_formatted/v0/passiv.netcdf + pv_filename: PLACEHOLDER.netcdf + # Path to the site-level PV metadata. This choudl be a csv + # e.g gs://solar-pv-nowcasting-data/PV/Passive/ocf_formatted/v0/system_metadata.csv + pv_metadata_filename: PLACEHOLDER.csv + # This is the list of pv_ml_ids to be sliced from the PV site level data pv_ml_ids: [ 154, @@ -377,33 +383,46 @@ input_data: nwp: ukv: nwp_zarr_path: - - /mnt/disks/nwp_rechunk/UKV_intermediate_version_7.1.zarr - - /mnt/disks/nwp_rechunk/UKV_2021_NWP_missing_chunked.zarr - - /mnt/disks/nwp_rechunk/UKV_2022_NWP_chunked.zarr - - /mnt/disks/nwp_rechunk/UKV_2023_chunked.zarr + # Path(s) to UKV NWP data in zarr format + # e.g. gs://solar-pv-nowcasting-data/NWP/UK_Met_Office/UKV_intermediate_version_7.zarr + - PLACEHOLDER.zarr history_minutes: 120 forecast_minutes: 480 time_resolution_minutes: 60 nwp_channels: - - t # live = t2m - - dswrf - #- lcc - #- mcc - #- hcc - #- dlwrf + - t # 2-metre temperature + - dswrf # downwards short-wave radiation flux + - dlwrf # downwards long-wave radiation flux + - hcc # high cloud cover + - mcc # medium cloud cover + - lcc # low cloud cover nwp_image_size_pixels_height: 24 nwp_image_size_pixels_width: 24 nwp_provider: ukv + ecmwf: + # Path to ECMWF NWP data in zarr format + # n.b. It is not necessary to use multiple or any NWP data. These entries can be removed + nwp_zarr_path: PLACEHOLDER.zarr + history_minutes: 120 + forecast_minutes: 480 + time_resolution_minutes: 60 + nwp_channels: + - t2m # 2-metre temperature + - dswrf # downwards short-wave radiation flux + - dlwrf # downwards long-wave radiation flux + - hcc # high cloud cover + - mcc # medium cloud cover + - lcc # low cloud cover + nwp_image_size_pixels_height: 12 # roughtly equivalent to ukv 24 pixels + nwp_image_size_pixels_width: 12 + nwp_provider: ecmwf + satellite: satellite_zarr_path: - - /mnt/disks/nwp_rechunk/filled_sat/2017_nonhrv.zarr - - /mnt/disks/nwp_rechunk/filled_sat/2018_nonhrv.zarr - - /mnt/disks/nwp_rechunk/filled_sat/2019_nonhrv.zarr - - /mnt/disks/nwp_rechunk/filled_sat/2020_nonhrv.zarr - - /mnt/disks/nwp_rechunk/filled_sat/2021_nonhrv.zarr - - /mnt/disks/nwp_rechunk/filled_sat/2022_nonhrv.zarr - - /mnt/disks/nwp_rechunk/filled_sat/2023_nonhrv.zarr + # Path(s) to non-HRV satellite data in zarr format + # e.g. gs://solar-pv-nowcasting-data/satellite/EUMETSAT/SEVIRI_RSS/v4/2020_nonhrv.zarr + - PLACEHOLDER.zarr history_minutes: 90 forecast_minutes: 0 live_delay_minutes: 30 diff --git a/configs.example/datamodule/configuration/template_configuration.yaml b/configs.example/datamodule/configuration/template_configuration.yaml index 4163ddba..559f9d7f 100644 --- a/configs.example/datamodule/configuration/template_configuration.yaml +++ b/configs.example/datamodule/configuration/template_configuration.yaml @@ -1,5 +1,5 @@ general: - description: Template including all settings + description: Template including all possible settings name: template input_data: @@ -13,7 +13,6 @@ input_data: time_resolution_minutes: 30 start_datetime: "2020-01-01T00:00:00" end_datetime: "2021-09-01T00:00:00" - metadata_only: false nwp: nwp_zarr_path: gs://solar-pv-nowcasting-data/NWP/UK_Met_Office/UKV_intermediate_version_7.zarr @@ -71,20 +70,5 @@ input_data: satellite_image_size_pixels_height: 24 satellite_image_size_pixels_width: 24 - hrvsatellite: - hrvsatellite_zarr_path: gs://solar-pv-nowcasting-data/satellite/EUMETSAT/SEVIRI_RSS/v4/2020_hrv.zarr - history_minutes: 60 - forecast_minutes: 0 - time_resolution_minutes: 5 - hrvsatellite_channels: - - HRV - hrvsatellite_image_size_pixels_height: 128 - hrvsatellite_image_size_pixels_width: 256 - - topographic: - topographic_filename: gs://solar-pv-nowcasting-data/Topographic/europe_dem_1km_osgb.tif - topographic_image_size_pixels_width: 300 - topographic_image_size_pixels_height: 300 - output_data: filepath: "not-needed" diff --git a/configs.example/datamodule/ocf_datapipes.yaml b/configs.example/datamodule/ocf_datapipes.yaml deleted file mode 100644 index 1fe1a2dd..00000000 --- a/configs.example/datamodule/ocf_datapipes.yaml +++ /dev/null @@ -1,14 +0,0 @@ -_target_: pvnet.data.datamodule.DataModule -configuration: "/home/jamesfulton/repos/PVNet/configs/datamodule/configuration/gcp_configuration.yaml" -num_workers: 20 -prefetch_factor: 2 -batch_size: 8 -train_period: - - null - - 2020-12-31 -val_period: - - 2021-01-01 - - 2021-12-31 -test_period: - - 2021-01-01 - - 2021-12-31 diff --git a/configs.example/datamodule/premade_batches.yaml b/configs.example/datamodule/premade_batches.yaml index d89de16c..f08f5af2 100644 --- a/configs.example/datamodule/premade_batches.yaml +++ b/configs.example/datamodule/premade_batches.yaml @@ -1,6 +1,8 @@ _target_: pvnet.data.datamodule.DataModule configuration: null -batch_dir: "/mnt/disks/batches2/batches_v3.1" -num_workers: 20 +# The batch_dir is the location batches were saved to using the save_batches.py script +# The batch_dir should contain train and val subdirectories with batches +batch_dir: "PLACEHOLDER" +num_workers: 10 prefetch_factor: 2 batch_size: 8 diff --git a/configs.example/datamodule/streamed_batches.yaml b/configs.example/datamodule/streamed_batches.yaml new file mode 100644 index 00000000..1b05f323 --- /dev/null +++ b/configs.example/datamodule/streamed_batches.yaml @@ -0,0 +1,16 @@ +_target_: pvnet.data.datamodule.DataModule +# Path to the data configuration yaml file. You can find examples in the configuration subdirectory +# in configs.example/datamodule/configuration +configuration: "PLACEHOLDER.yaml" +num_workers: 20 +prefetch_factor: 2 +batch_size: 8 +train_period: + - null + - "2022-05-07" +val_period: + - "2022-05-08" + - "2023-05-08" +test_period: + - "2022-05-08" + - "2023-05-08" diff --git a/configs.example/hydra/default.yaml b/configs.example/hydra/default.yaml index a086d12f..fb8e779d 100644 --- a/configs.example/hydra/default.yaml +++ b/configs.example/hydra/default.yaml @@ -1,8 +1,10 @@ # output paths for hydra logs run: - dir: logs/runs/${now:%Y-%m-%d}/${now:%H-%M-%S} + # Local log directory for hydra + dir: PLACEHOLDER/runs/${now:%Y-%m-%d}/${now:%H-%M-%S} sweep: - dir: logs/multiruns/${now:%Y-%m-%d_%H-%M-%S} + # Local log directory for hydra + dir: PLACEHOLDER/multiruns/${now:%Y-%m-%d_%H-%M-%S} subdir: ${hydra.job.num} # you can set here environment variables that are universal for all users diff --git a/configs.example/logger/comet.yaml b/configs.example/logger/comet.yaml deleted file mode 100644 index 07d2f5d5..00000000 --- a/configs.example/logger/comet.yaml +++ /dev/null @@ -1,7 +0,0 @@ -# https://www.comet.ml - -comet: - _target_: pytorch_lightning.loggers.comet.CometLogger - api_key: ${oc.env:COMET_API_TOKEN} # api key is laoded from environment variable - project_name: "template-tests" - experiment_name: null diff --git a/configs.example/logger/csv.yaml b/configs.example/logger/csv.yaml index 85a78a2a..784883d2 100644 --- a/configs.example/logger/csv.yaml +++ b/configs.example/logger/csv.yaml @@ -2,7 +2,8 @@ csv: _target_: pytorch_lightning.loggers.csv_logs.CSVLogger - save_dir: "." + # local path to log training process + save_dir: "PLACEHOLDER" name: "csv/" version: null prefix: "" diff --git a/configs.example/logger/many_loggers.yaml b/configs.example/logger/many_loggers.yaml index cb5bc0c4..5ab9ca8c 100644 --- a/configs.example/logger/many_loggers.yaml +++ b/configs.example/logger/many_loggers.yaml @@ -1,10 +1,7 @@ # train with many loggers at once defaults: - # - aim.yaml - # - comet.yaml - csv.yaml - # - mlflow.yaml # - neptune.yaml # - tensorboard.yaml - wandb.yaml diff --git a/configs.example/logger/mlflow.yaml b/configs.example/logger/mlflow.yaml deleted file mode 100644 index c7b4fdab..00000000 --- a/configs.example/logger/mlflow.yaml +++ /dev/null @@ -1,10 +0,0 @@ -# https://mlflow.org - -mlflow: - _target_: pytorch_lightning.loggers.mlflow.MLFlowLogger - experiment_name: default - tracking_uri: null - tags: null - save_dir: ./mlruns - prefix: "" - artifact_location: null diff --git a/configs.example/logger/neptune.yaml b/configs.example/logger/neptune.yaml index 00d2dfa0..abc78b31 100644 --- a/configs.example/logger/neptune.yaml +++ b/configs.example/logger/neptune.yaml @@ -3,5 +3,6 @@ neptune: _target_: pytorch_lightning.loggers.NeptuneLogger api_key: ${oc.env:NEPTUNE_API_TOKEN} # api key is loaded from environment variable - project: OpenClimateFix/predict-pv-yield + # Neptune project placeholder + project: PLACEHOLDER prefix: "" diff --git a/configs.example/logger/tensorboard.yaml b/configs.example/logger/tensorboard.yaml index ff21d176..036c9559 100644 --- a/configs.example/logger/tensorboard.yaml +++ b/configs.example/logger/tensorboard.yaml @@ -2,7 +2,8 @@ tensorboard: _target_: pytorch_lightning.loggers.tensorboard.TensorBoardLogger - save_dir: "/mnt/disks/batches/tensorboard" + # Path to use for tensorboard logs + save_dir: "PLACEHOLDER" name: "default" version: "${model_name}" log_graph: False diff --git a/configs.example/logger/wandb.yaml b/configs.example/logger/wandb.yaml index bfd4d574..0fe80aa1 100644 --- a/configs.example/logger/wandb.yaml +++ b/configs.example/logger/wandb.yaml @@ -2,9 +2,11 @@ wandb: _target_: lightning.pytorch.loggers.wandb.WandbLogger - project: "pvnet2.1" + # wandb project to log to + project: "PLACEHOLDER" name: "${model_name}" - save_dir: "/mnt/disks/batches/" + # location to store the wandb local logs + save_dir: "PLACEHOLDER" offline: False # set True to store all logs only locally id: null # pass correct id to resume experiment! # entity: "" # set to name of your wandb team or just remove it diff --git a/configs.example/readme.md b/configs.example/readme.md index 13cbbf26..36bddee5 100644 --- a/configs.example/readme.md +++ b/configs.example/readme.md @@ -1,7 +1,8 @@ -The following folders how the configuration files - -This idea is copied from -https://github.com/ashleve/lightning-hydra-template/blob/main/configs/experiment/example_simple.yaml +This directory contains example configuration files for the PVNet project. Many paths will need to +be each user. YOu can find these paths by searching for PLACEHOLDER within these logs. Not all of +the values with a placeholder need to be set. For example in the logger subdirectory there are +many different loggers with PLACEHOLDERS. If only one logger is used, then only that placeholder +need be set. run experiments by: `python run.py experiment=example_simple ` diff --git a/configs.example/trainer/default.yaml b/configs.example/trainer/default.yaml index 5f051088..965c274c 100644 --- a/configs.example/trainer/default.yaml +++ b/configs.example/trainer/default.yaml @@ -1,7 +1,7 @@ _target_: lightning.pytorch.trainer.trainer.Trainer # set `1` to train on GPU, `0` to train on CPU only -accelerator: gpu +accelerator: auto devices: auto min_epochs: null @@ -9,9 +9,6 @@ max_epochs: null reload_dataloaders_every_n_epochs: 0 num_sanity_val_steps: 8 fast_dev_run: false -#profiler: 'simple' accumulate_grad_batches: 4 -#val_check_interval: 800 -#limit_val_batches: 800 log_every_n_steps: 50