Skip to content

Commit

Permalink
Config updates finalised
Browse files Browse the repository at this point in the history
  • Loading branch information
felix-e-h-p committed Dec 18, 2024
1 parent 1c168e3 commit dc6f6cd
Show file tree
Hide file tree
Showing 4 changed files with 84 additions and 130 deletions.
194 changes: 74 additions & 120 deletions configs.example/datamodule/configuration/example_configuration.yaml
Original file line number Diff line number Diff line change
@@ -1,124 +1,48 @@
general:
description: Example data config for creating PVNet batches
name: example_pvnet
description: Example config for producing PVNet samples for a reneweble generation site
name: site_example_config

input_data:
default_history_minutes: 120
default_forecast_minutes: 480

# Either use Site OR GSP configuration
site:
# Path to Site data in NetCDF format
file_path: PLACEHOLDER.nc
# Path to metadata in CSV format
metadata_file_path: PLACEHOLDER.csv
time_resolution_minutes: 15
interval_start_minutes: -60
# Specified for intraday currently
interval_end_minutes: 480
dropout_timedeltas_minutes: null
dropout_fraction: 0 # Fraction of samples with dropout

gsp:
# Path to the GSP data. This should be a zarr file
# Path to GSP data in zarr format
# e.g. gs://solar-pv-nowcasting-data/PV/GSP/v7/pv_gsp.zarr
gsp_zarr_path: PLACEHOLDER.zarr
history_minutes: 120
forecast_minutes: 480
zarr_path: PLACEHOLDER.zarr
interval_start_minutes: -60
# Specified for intraday currently
interval_end_minutes: 480
time_resolution_minutes: 30
# A random value from the list below will be chosen as the delay when dropout is used
# Random value from the list below will be chosen as the delay when dropout is used
# If set to null no dropout is applied. Only values before t0 are dropped out for GSP.
# Values after t0 are assumed as targets and cannot be dropped.
dropout_timedeltas_minutes: null
dropout_fraction: 0 # Fraction of samples with dropout

pv:
pv_files_groups:
- label: solar_sheffield_passiv
# Path to the site-level PV data. This should be a netcdf
# e.g gs://solar-pv-nowcasting-data/PV/Passive/ocf_formatted/v0/passiv.netcdf
pv_filename: PLACEHOLDER.netcdf
# Path to the site-level PV metadata. This choudl be a csv
# e.g gs://solar-pv-nowcasting-data/PV/Passive/ocf_formatted/v0/system_metadata.csv
pv_metadata_filename: PLACEHOLDER.csv
# This is the list of pv_ml_ids to be sliced from the PV site level data
# The IDs below are 349 of the PV systems which have very little NaN data in the historic data
# and which are still reporting live (as of Oct 2023)
pv_ml_ids:
[
154, 155, 156, 158, 159, 160, 162, 164, 165, 166, 167, 168, 169, 171, 173, 177, 178, 179,
181, 182, 185, 186, 187, 188, 189, 190, 191, 192, 193, 197, 198, 199, 200, 202, 204, 205,
206, 208, 209, 211, 214, 215, 216, 217, 218, 219, 220, 221, 225, 229, 230, 232, 233, 234,
236, 242, 243, 245, 252, 254, 255, 256, 257, 258, 260, 261, 262, 265, 267, 268, 272, 273,
275, 276, 277, 280, 281, 282, 283, 287, 289, 291, 292, 293, 294, 295, 296, 297, 298, 301,
302, 303, 304, 306, 307, 309, 310, 311, 317, 318, 319, 320, 321, 322, 323, 325, 326, 329,
332, 333, 335, 336, 338, 340, 342, 344, 345, 346, 348, 349, 352, 354, 355, 356, 357, 360,
362, 363, 368, 369, 370, 371, 372, 374, 375, 376, 378, 380, 382, 384, 385, 388, 390, 391,
393, 396, 397, 398, 399, 400, 401, 403, 404, 405, 406, 407, 409, 411, 412, 413, 414, 415,
416, 417, 418, 419, 420, 421, 422, 423, 424, 425, 426, 427, 429, 431, 435, 437, 438, 440,
441, 444, 447, 450, 451, 453, 456, 457, 458, 459, 464, 465, 466, 467, 468, 470, 471, 473,
474, 476, 477, 479, 480, 481, 482, 485, 486, 488, 490, 491, 492, 493, 496, 498, 501, 503,
506, 507, 508, 509, 510, 511, 512, 513, 515, 516, 517, 519, 520, 521, 522, 524, 526, 527,
528, 531, 532, 536, 537, 538, 540, 541, 542, 543, 544, 545, 549, 550, 551, 552, 553, 554,
556, 557, 560, 561, 563, 566, 568, 571, 572, 575, 576, 577, 579, 580, 581, 582, 584, 585,
588, 590, 594, 595, 597, 600, 602, 603, 604, 606, 611, 613, 614, 616, 618, 620, 622, 623,
624, 625, 626, 628, 629, 630, 631, 636, 637, 638, 640, 641, 642, 644, 645, 646, 650, 651,
652, 653, 654, 655, 657, 660, 661, 662, 663, 666, 667, 668, 670, 675, 676, 679, 681, 683,
684, 685, 687, 696, 698, 701, 702, 703, 704, 706, 710, 722, 723, 724, 725, 727, 728, 729,
730, 732, 733, 734, 735, 736, 737
]
history_minutes: 180
forecast_minutes: 0
time_resolution_minutes: 5
# A random value from the list below will be chosen as the delay when dropout is used.
# If set to null no dropout is applied. All PV systems are dropped together with this setting.
dropout_timedeltas_minutes: null
dropout_fraction: 0 # Fraction of samples with dropout
# A random value from the list below will be chosen as the delay when system dropout is used.
# If set to null no dropout is applied. All PV systems are indpendently with this setting.
system_dropout_timedeltas_minutes: null
# For ech sample a differnt dropout probability is used which is uniformly sampled from the min
# and max below
system_dropout_fraction_min: 0
system_dropout_fraction_max: 0

nwp:
ukv:
nwp_provider: ukv
nwp_zarr_path:
# Path(s) to UKV NWP data in zarr format
# e.g. gs://solar-pv-nowcasting-data/NWP/UK_Met_Office/UKV_intermediate_version_7.zarr
- PLACEHOLDER.zarr
history_minutes: 120
forecast_minutes: 480
time_resolution_minutes: 60
nwp_channels:
# These variables exist in the CEDA training set and in the live MetOffice live service
- t # 2-metre temperature
- dswrf # downwards short-wave radiation flux
- dlwrf # downwards long-wave radiation flux
- hcc # high cloud cover
- mcc # medium cloud cover
- lcc # low cloud cover
- sde # snow depth water equivalent
- r # relative humidty
- vis # visibility
- si10 # 10-metre wind speed
- wdir10 # 10-metre wind direction
- prate # precipitation rate
# These variables exist in CEDA training data but not in the live MetOffice live service
- hcct # height of convective cloud top, meters above surface. NaN if no clouds
- cdcb # height of lowest cloud base > 3 oktas
- dpt # dew point temperature
- prmsl # mean sea level pressure
- h # geometrical? (maybe geopotential?) height
nwp_image_size_pixels_height: 24
nwp_image_size_pixels_width: 24
# A random value from the list below will be chosen as the delay when dropout is used
# If set to null no dropout is applied. Values must be negative.
dropout_timedeltas_minutes: [-180]
# Dropout applied with this probability
dropout_fraction: 1.0
# How long after the NWP init-time are we still willing to use this forecast
# If null we use each init-time for all steps it covers
max_staleness_minutes: null

ecmwf:
nwp_provider: ecmwf
provider: ecmwf
# Path to ECMWF NWP data in zarr format
# n.b. It is not necessary to use multiple or any NWP data. These entries can be removed
nwp_zarr_path: PLACEHOLDER.zarr
history_minutes: 120
forecast_minutes: 480
zarr_path: PLACEHOLDER.zarr
interval_start_minutes: -60
# Specified for intraday currently
interval_end_minutes: 480
time_resolution_minutes: 60
nwp_channels:
channels:
- t2m # 2-metre temperature
- dswrf # downwards short-wave radiation flux
- dlwrf # downwards long-wave radiation flux
Expand All @@ -136,23 +60,55 @@ input_data:
- v10 # 10-metre V component of wind speed
- v100 # 100-metre V component of wind speed
- v200 # 200-metre V component of wind speed
nwp_image_size_pixels_height: 12 # roughly equivalent to UKV 24 pixels
nwp_image_size_pixels_width: 12
dropout_timedeltas_minutes: [-180]
dropout_fraction: 1.0
image_size_pixels_height: 24
image_size_pixels_width: 24
dropout_timedeltas_minutes: [-360]
dropout_fraction: 1.0 # Fraction of samples with dropout
max_staleness_minutes: null

ukv:
provider: ukv
# Path to UKV NWP data in zarr format
# e.g. gs://solar-pv-nowcasting-data/NWP/UK_Met_Office/UKV_intermediate_version_7.zarr
# n.b. It is not necessary to use multiple or any NWP data. These entries can be removed
zarr_path: PLACEHOLDER.zarr
interval_start_minutes: -60
# Specified for intraday currently
interval_end_minutes: 480
time_resolution_minutes: 60
channels:
- t # 2-metre temperature
- dswrf # downwards short-wave radiation flux
- dlwrf # downwards long-wave radiation flux
- hcc # high cloud cover
- mcc # medium cloud cover
- lcc # low cloud cover
- sde # snow depth water equivalent
- r # relative humidty
- vis # visibility
- si10 # 10-metre wind speed
- wdir10 # 10-metre wind direction
- prate # precipitation rate
# These variables exist in CEDA training data but not in the live MetOffice live service
- hcct # height of convective cloud top, meters above surface. NaN if no clouds
- cdcb # height of lowest cloud base > 3 oktas
- dpt # dew point temperature
- prmsl # mean sea level pressure
- h # geometrical? (maybe geopotential?) height
image_size_pixels_height: 24
image_size_pixels_width: 24
dropout_timedeltas_minutes: [-360]
dropout_fraction: 1.0 # Fraction of samples with dropout
max_staleness_minutes: null

satellite:
satellite_zarr_path:
# Path(s) to non-HRV satellite data in zarr format
# e.g. gs://solar-pv-nowcasting-data/satellite/EUMETSAT/SEVIRI_RSS/v4/2020_nonhrv.zarr
- PLACEHOLDER.zarr
history_minutes: 90
forecast_minutes: 0 # Deprecated for most use cases
live_delay_minutes: 60 # Only data up to time t0-60minutes is inluced in slice
# Path to Satellite data (non-HRV) in zarr format
# e.g. gs://solar-pv-nowcasting-data/satellite/EUMETSAT/SEVIRI_RSS/v4/2020_nonhrv.zarr
zarr_path: PLACEHOLDER.zarr
interval_start_minutes: -30
interval_end_minutes: 0
time_resolution_minutes: 5
satellite_channels:
# Uses for each channel taken from https://resources.eumetrain.org/data/3/311/bsc_s4.pdf
channels:
- IR_016 # Surface, cloud phase
- IR_039 # Surface, clouds, wind fields
- IR_087 # Surface, clouds, atmospheric instability
Expand All @@ -164,9 +120,7 @@ input_data:
- VIS008 # Surface, clouds, wind fields
- WV_062 # Water vapor, high level clouds, upper air analysis
- WV_073 # Water vapor, atmospheric instability, upper-level dynamics
satellite_image_size_pixels_height: 24
satellite_image_size_pixels_width: 24
# A random value from the list below will be chosen as the delay when dropout is used
# If set to null no dropout is applied. Values must be negative.
image_size_pixels_height: 24
image_size_pixels_width: 24
dropout_timedeltas_minutes: null
dropout_fraction: 0 # Fraction of samples with dropout
8 changes: 5 additions & 3 deletions configs.example/datamodule/premade_batches.yaml
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
_target_: pvnet.data.datamodule.DataModule
configuration: null
# The batch_dir is the location batches were saved to using the save_batches.py script
# The batch_dir should contain train and val subdirectories with batches
batch_dir: "PLACEHOLDER"

# The sample_dir is the location batches were saved to using the save_batches.py script
# The sample_dir should contain train and val subdirectories with batches

sample_dir: "PLACEHOLDER"
num_workers: 10
prefetch_factor: 2
batch_size: 8
10 changes: 4 additions & 6 deletions configs.example/datamodule/streamed_batches.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,21 +2,19 @@ _target_: pvnet.data.datamodule.DataModule
# Path to the data configuration yaml file. You can find examples in the configuration subdirectory
# in configs.example/datamodule/configuration
# Use the full local path such as: /FULL/PATH/PVNet/configs/datamodule/configuration/gcp_configuration.yaml"

configuration: "PLACEHOLDER.yaml"
num_workers: 20
prefetch_factor: 2
batch_size: 8
batch_output_dir: "PLACEHOLDER"
num_train_batches: 2
num_val_batches: 1

sample_output_dir: "PLACEHOLDER"
num_train_samples: 2
num_val_samples: 1

train_period:
- null
- "2022-05-07"
val_period:
- "2022-05-08"
- "2023-05-08"
test_period:
- "2022-05-08"
- "2023-05-08"
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ dynamic = ["version", "readme"]
license={file="LICENCE"}

dependencies = [
"ocf_data_sampler==0.0.26",
"ocf_data_sampler==0.0.32",
"ocf_datapipes>=3.3.34",
"ocf_ml_metrics>=0.0.11",
"numpy",
Expand Down

0 comments on commit dc6f6cd

Please sign in to comment.