Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update and reformat of site_example_configuration #292

Merged
merged 5 commits into from
Dec 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -140,3 +140,4 @@ dmypy.json

# Pyre type checker
.pyre/
.DS_Store
200 changes: 80 additions & 120 deletions configs.example/datamodule/configuration/example_configuration.yaml
Original file line number Diff line number Diff line change
@@ -1,124 +1,48 @@
general:
description: Example data config for creating PVNet batches
name: example_pvnet
description: Example config for producing PVNet samples
name: example_config

input_data:
default_history_minutes: 120
default_forecast_minutes: 480

# Either use Site OR GSP configuration
site:
# Path to Site data in NetCDF format
file_path: PLACEHOLDER.nc
# Path to metadata in CSV format
metadata_file_path: PLACEHOLDER.csv
time_resolution_minutes: 15
interval_start_minutes: -60
# Specified for intraday currently
interval_end_minutes: 480
dropout_timedeltas_minutes: null
dropout_fraction: 0 # Fraction of samples with dropout

gsp:
# Path to the GSP data. This should be a zarr file
# Path to GSP data in zarr format
# e.g. gs://solar-pv-nowcasting-data/PV/GSP/v7/pv_gsp.zarr
gsp_zarr_path: PLACEHOLDER.zarr
history_minutes: 120
forecast_minutes: 480
zarr_path: PLACEHOLDER.zarr
interval_start_minutes: -60
# Specified for intraday currently
interval_end_minutes: 480
time_resolution_minutes: 30
# A random value from the list below will be chosen as the delay when dropout is used
# Random value from the list below will be chosen as the delay when dropout is used
# If set to null no dropout is applied. Only values before t0 are dropped out for GSP.
# Values after t0 are assumed as targets and cannot be dropped.
dropout_timedeltas_minutes: null
dropout_fraction: 0 # Fraction of samples with dropout

pv:
pv_files_groups:
- label: solar_sheffield_passiv
# Path to the site-level PV data. This should be a netcdf
# e.g gs://solar-pv-nowcasting-data/PV/Passive/ocf_formatted/v0/passiv.netcdf
pv_filename: PLACEHOLDER.netcdf
# Path to the site-level PV metadata. This choudl be a csv
# e.g gs://solar-pv-nowcasting-data/PV/Passive/ocf_formatted/v0/system_metadata.csv
pv_metadata_filename: PLACEHOLDER.csv
# This is the list of pv_ml_ids to be sliced from the PV site level data
# The IDs below are 349 of the PV systems which have very little NaN data in the historic data
# and which are still reporting live (as of Oct 2023)
pv_ml_ids:
[
154, 155, 156, 158, 159, 160, 162, 164, 165, 166, 167, 168, 169, 171, 173, 177, 178, 179,
181, 182, 185, 186, 187, 188, 189, 190, 191, 192, 193, 197, 198, 199, 200, 202, 204, 205,
206, 208, 209, 211, 214, 215, 216, 217, 218, 219, 220, 221, 225, 229, 230, 232, 233, 234,
236, 242, 243, 245, 252, 254, 255, 256, 257, 258, 260, 261, 262, 265, 267, 268, 272, 273,
275, 276, 277, 280, 281, 282, 283, 287, 289, 291, 292, 293, 294, 295, 296, 297, 298, 301,
302, 303, 304, 306, 307, 309, 310, 311, 317, 318, 319, 320, 321, 322, 323, 325, 326, 329,
332, 333, 335, 336, 338, 340, 342, 344, 345, 346, 348, 349, 352, 354, 355, 356, 357, 360,
362, 363, 368, 369, 370, 371, 372, 374, 375, 376, 378, 380, 382, 384, 385, 388, 390, 391,
393, 396, 397, 398, 399, 400, 401, 403, 404, 405, 406, 407, 409, 411, 412, 413, 414, 415,
416, 417, 418, 419, 420, 421, 422, 423, 424, 425, 426, 427, 429, 431, 435, 437, 438, 440,
441, 444, 447, 450, 451, 453, 456, 457, 458, 459, 464, 465, 466, 467, 468, 470, 471, 473,
474, 476, 477, 479, 480, 481, 482, 485, 486, 488, 490, 491, 492, 493, 496, 498, 501, 503,
506, 507, 508, 509, 510, 511, 512, 513, 515, 516, 517, 519, 520, 521, 522, 524, 526, 527,
528, 531, 532, 536, 537, 538, 540, 541, 542, 543, 544, 545, 549, 550, 551, 552, 553, 554,
556, 557, 560, 561, 563, 566, 568, 571, 572, 575, 576, 577, 579, 580, 581, 582, 584, 585,
588, 590, 594, 595, 597, 600, 602, 603, 604, 606, 611, 613, 614, 616, 618, 620, 622, 623,
624, 625, 626, 628, 629, 630, 631, 636, 637, 638, 640, 641, 642, 644, 645, 646, 650, 651,
652, 653, 654, 655, 657, 660, 661, 662, 663, 666, 667, 668, 670, 675, 676, 679, 681, 683,
684, 685, 687, 696, 698, 701, 702, 703, 704, 706, 710, 722, 723, 724, 725, 727, 728, 729,
730, 732, 733, 734, 735, 736, 737
]
history_minutes: 180
forecast_minutes: 0
time_resolution_minutes: 5
# A random value from the list below will be chosen as the delay when dropout is used.
# If set to null no dropout is applied. All PV systems are dropped together with this setting.
dropout_timedeltas_minutes: null
dropout_fraction: 0 # Fraction of samples with dropout
# A random value from the list below will be chosen as the delay when system dropout is used.
# If set to null no dropout is applied. All PV systems are indpendently with this setting.
system_dropout_timedeltas_minutes: null
# For ech sample a differnt dropout probability is used which is uniformly sampled from the min
# and max below
system_dropout_fraction_min: 0
system_dropout_fraction_max: 0

nwp:
ukv:
nwp_provider: ukv
nwp_zarr_path:
# Path(s) to UKV NWP data in zarr format
# e.g. gs://solar-pv-nowcasting-data/NWP/UK_Met_Office/UKV_intermediate_version_7.zarr
- PLACEHOLDER.zarr
history_minutes: 120
forecast_minutes: 480
time_resolution_minutes: 60
nwp_channels:
# These variables exist in the CEDA training set and in the live MetOffice live service
- t # 2-metre temperature
- dswrf # downwards short-wave radiation flux
- dlwrf # downwards long-wave radiation flux
- hcc # high cloud cover
- mcc # medium cloud cover
- lcc # low cloud cover
- sde # snow depth water equivalent
- r # relative humidty
- vis # visibility
- si10 # 10-metre wind speed
- wdir10 # 10-metre wind direction
- prate # precipitation rate
# These variables exist in CEDA training data but not in the live MetOffice live service
- hcct # height of convective cloud top, meters above surface. NaN if no clouds
- cdcb # height of lowest cloud base > 3 oktas
- dpt # dew point temperature
- prmsl # mean sea level pressure
- h # geometrical? (maybe geopotential?) height
nwp_image_size_pixels_height: 24
nwp_image_size_pixels_width: 24
# A random value from the list below will be chosen as the delay when dropout is used
# If set to null no dropout is applied. Values must be negative.
dropout_timedeltas_minutes: [-180]
# Dropout applied with this probability
dropout_fraction: 1.0
# How long after the NWP init-time are we still willing to use this forecast
# If null we use each init-time for all steps it covers
max_staleness_minutes: null

ecmwf:
nwp_provider: ecmwf
provider: ecmwf
# Path to ECMWF NWP data in zarr format
# n.b. It is not necessary to use multiple or any NWP data. These entries can be removed
nwp_zarr_path: PLACEHOLDER.zarr
history_minutes: 120
forecast_minutes: 480
zarr_path: PLACEHOLDER.zarr
interval_start_minutes: -60
# Specified for intraday currently
interval_end_minutes: 480
time_resolution_minutes: 60
nwp_channels:
channels:
- t2m # 2-metre temperature
- dswrf # downwards short-wave radiation flux
- dlwrf # downwards long-wave radiation flux
Expand All @@ -136,23 +60,61 @@ input_data:
- v10 # 10-metre V component of wind speed
- v100 # 100-metre V component of wind speed
- v200 # 200-metre V component of wind speed
nwp_image_size_pixels_height: 12 # roughly equivalent to UKV 24 pixels
nwp_image_size_pixels_width: 12
dropout_timedeltas_minutes: [-180]
dropout_fraction: 1.0
# The following channels are accumulated and need to be diffed
accum_channels:
- dswrf # downwards short-wave radiation flux
- dlwrf # downwards long-wave radiation flux
- sr # direct solar radiation
- duvrs # downwards UV radiation at surface
image_size_pixels_height: 24
image_size_pixels_width: 24
dropout_timedeltas_minutes: [-360]
dropout_fraction: 1.0 # Fraction of samples with dropout
max_staleness_minutes: null

ukv:
provider: ukv
# Path to UKV NWP data in zarr format
# e.g. gs://solar-pv-nowcasting-data/NWP/UK_Met_Office/UKV_intermediate_version_7.zarr
# n.b. It is not necessary to use multiple or any NWP data. These entries can be removed
zarr_path: PLACEHOLDER.zarr
interval_start_minutes: -60
# Specified for intraday currently
interval_end_minutes: 480
time_resolution_minutes: 60
channels:
- t # 2-metre temperature
- dswrf # downwards short-wave radiation flux
- dlwrf # downwards long-wave radiation flux
- hcc # high cloud cover
- mcc # medium cloud cover
- lcc # low cloud cover
- sde # snow depth water equivalent
- r # relative humidty
- vis # visibility
- si10 # 10-metre wind speed
- wdir10 # 10-metre wind direction
- prate # precipitation rate
# These variables exist in CEDA training data but not in the live MetOffice live service
- hcct # height of convective cloud top, meters above surface. NaN if no clouds
- cdcb # height of lowest cloud base > 3 oktas
- dpt # dew point temperature
- prmsl # mean sea level pressure
- h # geometrical? (maybe geopotential?) height
image_size_pixels_height: 24
image_size_pixels_width: 24
dropout_timedeltas_minutes: [-360]
dropout_fraction: 1.0 # Fraction of samples with dropout
max_staleness_minutes: null

satellite:
satellite_zarr_path:
# Path(s) to non-HRV satellite data in zarr format
# e.g. gs://solar-pv-nowcasting-data/satellite/EUMETSAT/SEVIRI_RSS/v4/2020_nonhrv.zarr
- PLACEHOLDER.zarr
history_minutes: 90
forecast_minutes: 0 # Deprecated for most use cases
live_delay_minutes: 60 # Only data up to time t0-60minutes is inluced in slice
# Path to Satellite data (non-HRV) in zarr format
# e.g. gs://solar-pv-nowcasting-data/satellite/EUMETSAT/SEVIRI_RSS/v4/2020_nonhrv.zarr
zarr_path: PLACEHOLDER.zarr
interval_start_minutes: -30
interval_end_minutes: 0
time_resolution_minutes: 5
satellite_channels:
# Uses for each channel taken from https://resources.eumetrain.org/data/3/311/bsc_s4.pdf
channels:
- IR_016 # Surface, cloud phase
- IR_039 # Surface, clouds, wind fields
- IR_087 # Surface, clouds, atmospheric instability
Expand All @@ -164,9 +126,7 @@ input_data:
- VIS008 # Surface, clouds, wind fields
- WV_062 # Water vapor, high level clouds, upper air analysis
- WV_073 # Water vapor, atmospheric instability, upper-level dynamics
satellite_image_size_pixels_height: 24
satellite_image_size_pixels_width: 24
# A random value from the list below will be chosen as the delay when dropout is used
# If set to null no dropout is applied. Values must be negative.
image_size_pixels_height: 24
image_size_pixels_width: 24
dropout_timedeltas_minutes: null
dropout_fraction: 0 # Fraction of samples with dropout
8 changes: 5 additions & 3 deletions configs.example/datamodule/premade_batches.yaml
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
_target_: pvnet.data.datamodule.DataModule
configuration: null
# The batch_dir is the location batches were saved to using the save_batches.py script
# The batch_dir should contain train and val subdirectories with batches
batch_dir: "PLACEHOLDER"

# The sample_dir is the location batches were saved to using the save_batches.py script
# The sample_dir should contain train and val subdirectories with batches

sample_dir: "PLACEHOLDER"
num_workers: 10
prefetch_factor: 2
batch_size: 8
10 changes: 4 additions & 6 deletions configs.example/datamodule/streamed_batches.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,21 +2,19 @@ _target_: pvnet.data.datamodule.DataModule
# Path to the data configuration yaml file. You can find examples in the configuration subdirectory
# in configs.example/datamodule/configuration
# Use the full local path such as: /FULL/PATH/PVNet/configs/datamodule/configuration/gcp_configuration.yaml"

configuration: "PLACEHOLDER.yaml"
num_workers: 20
prefetch_factor: 2
batch_size: 8
batch_output_dir: "PLACEHOLDER"
num_train_batches: 2
num_val_batches: 1

sample_output_dir: "PLACEHOLDER"
num_train_samples: 2
num_val_samples: 1

train_period:
- null
- "2022-05-07"
val_period:
- "2022-05-08"
- "2023-05-08"
test_period:
- "2022-05-08"
- "2023-05-08"
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ dynamic = ["version", "readme"]
license={file="LICENCE"}

dependencies = [
"ocf_data_sampler==0.0.26",
"ocf_data_sampler==0.0.32",
"ocf_datapipes>=3.3.34",
"ocf_ml_metrics>=0.0.11",
"numpy",
Expand Down
Loading