update configs and readme

openclimatefix · Jun 18, 2024 · 4656597 · 4656597
1 parent 2d1e993
commit 4656597
Show file tree

Hide file tree

Showing 9 changed files with 240 additions and 3 deletions.
diff --git a/README.md b/README.md
@@ -1,10 +1,90 @@
 # PVNet summation
 This project is used for training a model to sum the GSP predictions of [PVNet](https://github.com/openclimatefix/PVNet) into a national estimate.
 
-## Setup
+Using this model to sum the GSP predictions rather than doing a simple sum increases the accuracy of the national predictions and can be configured to produce estimates of the uncertainty range of the national estimate. See the [PVNet](https://github.com/openclimatefix/PVNet) repo for more details and our paper.
+
+
+## Setup / Installation
+
 ```bash
 git clone https://github.com/openclimatefix/PVNet_summation
 cd PVNet_summation
-pip install -r requirements.txt
-pip install git+https://github.com/SheffieldSolar/PV_Live-API
+pip install .
+```
+
+### Additional development dependencies
+
+```bash
+pip install ".[dev]"
+```
+
+## Getting started with running PVNet summation
+
+In order to run PVNet summation, we assume that you are already set up with 
+[PVNet](https://github.com/openclimatefix/PVNet) and have met all the requirements there.
+
+Before running any code, copy the example configuration to a
+configs directory:
+
 ```
+cp -r configs.example configs
+```
+
+You will be making local amendments to these configs.
+
+### Datasets
+
+The datasets required are the same as documented in 
+[PVNet](https://github.com/openclimatefix/PVNet). The only addition is that you will need PVLive 
+data for the national sum i.e. GSP ID 0.
+
+
+## Generating pre-made concurrent batches of data for PVNet
+
+It is required that you preprepare batches using the `save_concurrent_batches.py` script from 
+PVNet. This saves the batches as required by the PVNet model to make predictions for all GSPs for 
+a single forecast init time. Seen the PVNet package for more details on this.
+
+
+### Set up and config example for batch creation
+
+
+The concurrent batches created in the step above will be augmented with a few additional pieces of
+data required for the summation model. Within your copy of `PVNet_summation/configs` make sure you 
+have replaced all of the items marked with `PLACEHOLDER`
+
+### Training PVNet_summation
+
+How PVNet_summation is run is determined by the extensive configuration in the config files. The 
+configs stored in `PVNet/configs.example` should work with batches created using the steps and 
+batch creation config mentioned above.
+
+Make sure to update the following config files before training your model:
+
+1. In `configs/datamodule/default.yaml`:
+    - update `batch_dir` to point to the directory you stored your concurrent batches in during 
+      batch creation.
+    - update `gsp_zarr_path` to point to the PVLive data containing the national estimate
+2. In `configs/model/default.yaml`:
+    - update the PVNet model for which you are training a summation model for. A new summation model
+      should be trained for each PVNet model
+    - update the hyperparameters and structure of the summation model
+3. In `configs/trainer/default.yaml`:
+    - set `accelerator: 0` if running on a system without a supported GPU
+4. In `configs.config.yaml`:
+    - It is recommended that you set `presave_pvnet_outputs` to `True`. This means that the
+      concurrent batches that you create will only be run through the PVNet model once before
+      training and their outputs saved, rather than being run on the fly on each batch throughout 
+      training. This can speed up training significantly.
+
+
+Assuming you have updated the configs, you should now be able to run:
+
+```
+python run.py
+```
+
+
+## Testing
+
+You can use `python -m pytest tests` to run tests
diff --git a/configs.example/callbacks/default.yaml b/configs.example/callbacks/default.yaml
@@ -0,0 +1,27 @@
+learning_rate_monitor:
+  _target_: lightning.pytorch.callbacks.LearningRateMonitor
+  logging_interval: "epoch"
+
+model_summary:
+  _target_: lightning.pytorch.callbacks.ModelSummary
+  max_depth: 3
+
+model_checkpoint:
+  _target_: lightning.pytorch.callbacks.ModelCheckpoint
+  # name of the logged metric which determines when model is improving
+  monitor: "${resolve_monitor_loss:${model.output_quantiles}}"
+  mode: "min" # can be "max" or "min"
+  save_top_k: 1 # save k best models (determined by above metric)
+  save_last: True # additionaly always save model from last epoch
+  every_n_epochs: 1
+  verbose: False
+  filename: "epoch={epoch}-step={step}"
+  dirpath: "PLACEHOLDER/${model_name}"
+  auto_insert_metric_name: False
+  save_on_train_epoch_end: False
+
+#stochastic_weight_averaging:
+#  _target_: pvnet_summation.callbacks.StochasticWeightAveraging
+#  swa_lrs: 0.0000001
+#  swa_epoch_start: 0.8
+#  annealing_epochs: 5
diff --git a/configs.example/config.yaml b/configs.example/config.yaml
@@ -0,0 +1,44 @@
+# @package _global_
+
+# specify here default training configuration
+defaults:
+  - trainer: default.yaml
+  - model: default.yaml
+  - datamodule: default.yaml
+  - callbacks: default.yaml # set this to null if you don't want to use callbacks
+  - logger: wandb.yaml # set logger here or use command line (e.g. `python run.py logger=wandb`)
+  - hydra: default.yaml
+
+# Whether to loop through the PVNet outputs and save them out before training
+presave_pvnet_outputs:
+  True
+
+  # enable color logging
+#  - override hydra/hydra_logging: colorlog
+#  - override hydra/job_logging: colorlog
+
+# path to original working directory
+# hydra hijacks working directory by changing it to the current log directory,
+# so it's useful to have this path as a special variable
+# learn more here: https://hydra.cc/docs/next/tutorials/basic/running_your_app/working_directory
+work_dir: ${hydra:runtime.cwd}
+
+model_name: "default"
+
+# use `python run.py debug=true` for easy debugging!
+# this will run 1 train, val and test loop with only 1 batch
+# equivalent to running `python run.py trainer.fast_dev_run=true`
+# (this is placed here just for easier access from command line)
+debug: False
+
+# pretty print config at the start of the run using Rich library
+print_config: True
+
+# disable python warnings if they annoy you
+ignore_warnings: True
+
+# check performance on test set, using the best model achieved during training
+# lightning chooses best model based on metric specified in checkpoint callback
+test_after_training: False
+
+seed: 2727831
diff --git a/configs.example/datamodule/default.yaml b/configs.example/datamodule/default.yaml
@@ -0,0 +1,6 @@
+_target_: pvnet_summation.data.datamodule.DataModule
+batch_dir: "PLACEHOLDER"
+gsp_zarr_path: "PLACEHOLDER"
+batch_size: 32
+num_workers: 20
+prefetch_factor: 2
diff --git a/configs.example/hydra/default.yaml b/configs.example/hydra/default.yaml
@@ -0,0 +1,12 @@
+# output paths for hydra logs
+run:
+  dir: logs/runs/${now:%Y-%m-%d}/${now:%H-%M-%S}
+sweep:
+  dir: logs/multiruns/${now:%Y-%m-%d_%H-%M-%S}
+  subdir: ${hydra.job.num}
+
+# you can set here environment variables that are universal for all users
+# for system specific variables (like data paths) it's better to use .env file!
+job:
+  env_set:
+    EXAMPLE_VAR: "example_value"
diff --git a/configs.example/logger/wandb.yaml b/configs.example/logger/wandb.yaml
@@ -0,0 +1,15 @@
+# https://wandb.ai
+
+wandb:
+  _target_: lightning.pytorch.loggers.wandb.WandbLogger
+  project: "PLACEHOLDER"
+  name: "${model_name}"
+  save_dir: "PLACEHOLDER"
+  offline: False # set True to store all logs only locally
+  id: null # pass correct id to resume experiment!
+  # entity: ""  # set to name of your wandb team or just remove it
+  log_model: False
+  prefix: ""
+  job_type: "train"
+  group: ""
+  tags: []
diff --git a/configs.example/model/default.yaml b/configs.example/model/default.yaml
@@ -0,0 +1,31 @@
+_target_: pvnet_summation.models.model.Model
+
+output_quantiles: null
+
+model_name: "openclimatefix/pvnet_v2"
+model_version: "898630f3f8cd4e8506525d813dd61c6d8de86144"
+
+#--------------------------------------------
+# Tabular network settings
+#--------------------------------------------
+
+output_network:
+  _target_: pvnet.models.multimodal.linear_networks.networks.ResFCNet2
+  _partial_: True
+output_network_kwargs:
+  fc_hidden_features: 128
+  n_res_blocks: 2
+  res_block_layers: 2
+  dropout_frac: 0.0
+predict_difference_from_sum: False
+
+# ----------------------------------------------
+
+optimizer:
+  _target_: pvnet.optimizers.AdamWReduceLROnPlateau
+  lr: 0.0001
+  weight_decay: 0.25
+  amsgrad: True
+  patience: 20
+  factor: 0.1
+  threshold: 0.00
diff --git a/configs.example/readme.md b/configs.example/readme.md
@@ -0,0 +1,7 @@
+The following folders how the configuration files
+
+This idea is copied from
+https://github.com/ashleve/lightning-hydra-template/blob/main/configs/experiment/example_simple.yaml
+
+run experiments by:
+`python run.py experiment=example_simple `
diff --git a/configs.example/trainer/default.yaml b/configs.example/trainer/default.yaml
@@ -0,0 +1,15 @@
+_target_: lightning.pytorch.trainer.trainer.Trainer
+
+accelerator: gpu
+devices: auto
+
+min_epochs: null
+max_epochs: 100
+reload_dataloaders_every_n_epochs: 0
+num_sanity_val_steps: 8
+fast_dev_run: false
+
+#accumulate_grad_batches: 4
+#val_check_interval: 800
+#limit_val_batches: 800
+log_every_n_steps: 50