Skip to content

Commit

Permalink
Merge pull request #139 from openclimatefix/add_nwp_var_longnames
Browse files Browse the repository at this point in the history
update example data configs
  • Loading branch information
dfulu authored Feb 15, 2024
2 parents d35e6ab + a2b1377 commit ff801d6
Show file tree
Hide file tree
Showing 6 changed files with 175 additions and 523 deletions.
1 change: 1 addition & 0 deletions .prettierignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
configs.example
1 change: 1 addition & 0 deletions .ruff.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ unfixable = []

# Exclude a variety of commonly ignored directories.
exclude = [
"configs.example",
".bzr",
".direnv",
".eggs",
Expand Down
172 changes: 172 additions & 0 deletions configs.example/datamodule/configuration/example_configuration.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,172 @@
general:
description: Example data config for creating PVNet batches
name: example_pvnet

input_data:
default_history_minutes: 120
default_forecast_minutes: 480

gsp:
# Path to the GSP data. This should be a zarr file
# e.g. gs://solar-pv-nowcasting-data/PV/GSP/v7/pv_gsp.zarr
gsp_zarr_path: PLACEHOLDER.zarr
history_minutes: 120
forecast_minutes: 480
time_resolution_minutes: 30
# A random value from the list below will be chosen as the delay when dropout is used
# If set to null no dropout is applied. Only values before t0 are dropped out for GSP.
# Values after t0 are assumed as targets and cannot be dropped.
dropout_timedeltas_minutes: null
dropout_fraction: 0 # Fraction of samples with dropout

pv:
pv_files_groups:
- label: solar_sheffield_passiv
# Path to the site-level PV data. This should be a netcdf
# e.g gs://solar-pv-nowcasting-data/PV/Passive/ocf_formatted/v0/passiv.netcdf
pv_filename: PLACEHOLDER.netcdf
# Path to the site-level PV metadata. This choudl be a csv
# e.g gs://solar-pv-nowcasting-data/PV/Passive/ocf_formatted/v0/system_metadata.csv
pv_metadata_filename: PLACEHOLDER.csv
# This is the list of pv_ml_ids to be sliced from the PV site level data
# The IDs below are 349 of the PV systems which have very little NaN data in the historic data
# and which are still reporting live (as of Oct 2023)
pv_ml_ids:
[
154, 155, 156, 158, 159, 160, 162, 164, 165, 166, 167, 168, 169, 171, 173, 177, 178, 179,
181, 182, 185, 186, 187, 188, 189, 190, 191, 192, 193, 197, 198, 199, 200, 202, 204, 205,
206, 208, 209, 211, 214, 215, 216, 217, 218, 219, 220, 221, 225, 229, 230, 232, 233, 234,
236, 242, 243, 245, 252, 254, 255, 256, 257, 258, 260, 261, 262, 265, 267, 268, 272, 273,
275, 276, 277, 280, 281, 282, 283, 287, 289, 291, 292, 293, 294, 295, 296, 297, 298, 301,
302, 303, 304, 306, 307, 309, 310, 311, 317, 318, 319, 320, 321, 322, 323, 325, 326, 329,
332, 333, 335, 336, 338, 340, 342, 344, 345, 346, 348, 349, 352, 354, 355, 356, 357, 360,
362, 363, 368, 369, 370, 371, 372, 374, 375, 376, 378, 380, 382, 384, 385, 388, 390, 391,
393, 396, 397, 398, 399, 400, 401, 403, 404, 405, 406, 407, 409, 411, 412, 413, 414, 415,
416, 417, 418, 419, 420, 421, 422, 423, 424, 425, 426, 427, 429, 431, 435, 437, 438, 440,
441, 444, 447, 450, 451, 453, 456, 457, 458, 459, 464, 465, 466, 467, 468, 470, 471, 473,
474, 476, 477, 479, 480, 481, 482, 485, 486, 488, 490, 491, 492, 493, 496, 498, 501, 503,
506, 507, 508, 509, 510, 511, 512, 513, 515, 516, 517, 519, 520, 521, 522, 524, 526, 527,
528, 531, 532, 536, 537, 538, 540, 541, 542, 543, 544, 545, 549, 550, 551, 552, 553, 554,
556, 557, 560, 561, 563, 566, 568, 571, 572, 575, 576, 577, 579, 580, 581, 582, 584, 585,
588, 590, 594, 595, 597, 600, 602, 603, 604, 606, 611, 613, 614, 616, 618, 620, 622, 623,
624, 625, 626, 628, 629, 630, 631, 636, 637, 638, 640, 641, 642, 644, 645, 646, 650, 651,
652, 653, 654, 655, 657, 660, 661, 662, 663, 666, 667, 668, 670, 675, 676, 679, 681, 683,
684, 685, 687, 696, 698, 701, 702, 703, 704, 706, 710, 722, 723, 724, 725, 727, 728, 729,
730, 732, 733, 734, 735, 736, 737
]
history_minutes: 180
forecast_minutes: 0
time_resolution_minutes: 5
# A random value from the list below will be chosen as the delay when dropout is used.
# If set to null no dropout is applied. All PV systems are dropped together with this setting.
dropout_timedeltas_minutes: null
dropout_fraction: 0 # Fraction of samples with dropout
# A random value from the list below will be chosen as the delay when system dropout is used.
# If set to null no dropout is applied. All PV systems are indpendently with this setting.
system_dropout_timedeltas_minutes: null
# For ech sample a differnt dropout probability is used which is uniformly sampled from the min
# and max below
system_dropout_fraction_min: 0
system_dropout_fraction_max: 0

nwp:
ukv:
nwp_provider: ukv
nwp_zarr_path:
# Path(s) to UKV NWP data in zarr format
# e.g. gs://solar-pv-nowcasting-data/NWP/UK_Met_Office/UKV_intermediate_version_7.zarr
- PLACEHOLDER.zarr
history_minutes: 120
forecast_minutes: 480
time_resolution_minutes: 60
nwp_channels:
# These variables exist in the CEDA training set and in the live MetOffice live service
- t # 2-metre temperature
- dswrf # downwards short-wave radiation flux
- dlwrf # downwards long-wave radiation flux
- hcc # high cloud cover
- mcc # medium cloud cover
- lcc # low cloud cover
- sde # snow depth water equivalent
- r # relative humidty
- vis # visibility
- si10 # 10-metre wind speed
- wdir10 # 10-metre wind direction
- prate # precipitation rate
# These variables exist in CEDA training data but not in the live MetOffice live service
- hcct # height of convective cloud top, meters above surface. NaN if no clouds
- cdcb # height of lowest cloud base > 3 oktas
- dpt # dew point temperature
- prmsl # mean sea level pressure
- h # geometrical? (maybe geopotential?) height
nwp_image_size_pixels_height: 24
nwp_image_size_pixels_width: 24
# A random value from the list below will be chosen as the delay when dropout is used
# If set to null no dropout is applied. Values must be negative.
dropout_timedeltas_minutes: [-180]
# Dropout applied with this probability
dropout_fraction: 1.0
# How long after the NWP init-time are we still willing to use this forecast
# If null we use each init-time for all steps it covers
max_staleness_minutes: null

ecmwf:
nwp_provider: ecmwf
# Path to ECMWF NWP data in zarr format
# n.b. It is not necessary to use multiple or any NWP data. These entries can be removed
nwp_zarr_path: PLACEHOLDER.zarr
history_minutes: 120
forecast_minutes: 480
time_resolution_minutes: 60
nwp_channels:
- t2m # 2-metre temperature
- dswrf # downwards short-wave radiation flux
- dlwrf # downwards long-wave radiation flux
- hcc # high cloud cover
- mcc # medium cloud cover
- lcc # low cloud cover
- tcc # total cloud cover
- sde # snow depth water equivalent
- sr # direct solar radiation
- duvrs # downwards UV radiation at surface
- prate # precipitation rate
- u10 # 10-metre U component of wind speed
- u100 # 100-metre U component of wind speed
- u200 # 200-metre U component of wind speed
- v10 # 10-metre V component of wind speed
- v100 # 100-metre V component of wind speed
- v200 # 200-metre V component of wind speed
nwp_image_size_pixels_height: 12 # roughly equivalent to UKV 24 pixels
nwp_image_size_pixels_width: 12
dropout_timedeltas_minutes: [-180]
dropout_fraction: 1.0
max_staleness_minutes: null

satellite:
satellite_zarr_path:
# Path(s) to non-HRV satellite data in zarr format
# e.g. gs://solar-pv-nowcasting-data/satellite/EUMETSAT/SEVIRI_RSS/v4/2020_nonhrv.zarr
- PLACEHOLDER.zarr
history_minutes: 90
forecast_minutes: 0 # Deprecated for most use cases
live_delay_minutes: 60 # Only data up to time t0-60minutes is inluced in slice
time_resolution_minutes: 5
satellite_channels:
# Uses for each channel taken from https://resources.eumetrain.org/data/3/311/bsc_s4.pdf
- IR_016 # Surface, cloud phase
- IR_039 # Surface, clouds, wind fields
- IR_087 # Surface, clouds, atmospheric instability
- IR_097 # Ozone
- IR_108 # Surface, clouds, wind fields, atmospheric instability
- IR_120 # Surface, clouds, atmospheric instability
- IR_134 # Cirrus cloud height, atmospheric instability
- VIS006 # Surface, clouds, wind fields
- VIS008 # Surface, clouds, wind fields
- WV_062 # Water vapor, high level clouds, upper air analysis
- WV_073 # Water vapor, atmospheric instability, upper-level dynamics
satellite_image_size_pixels_height: 24
satellite_image_size_pixels_width: 24
# A random value from the list below will be chosen as the delay when dropout is used
# If set to null no dropout is applied. Values must be negative.
dropout_timedeltas_minutes: null
dropout_fraction: 0 # Fraction of samples with dropout
Loading

0 comments on commit ff801d6

Please sign in to comment.