diff --git a/.prettierignore b/.prettierignore new file mode 100644 index 00000000..b980d35f --- /dev/null +++ b/.prettierignore @@ -0,0 +1 @@ +configs.example diff --git a/.ruff.toml b/.ruff.toml index 6eb44608..4cdfdabd 100644 --- a/.ruff.toml +++ b/.ruff.toml @@ -8,6 +8,7 @@ unfixable = [] # Exclude a variety of commonly ignored directories. exclude = [ + "configs.example", ".bzr", ".direnv", ".eggs", diff --git a/configs.example/datamodule/configuration/example_configuration.yaml b/configs.example/datamodule/configuration/example_configuration.yaml new file mode 100644 index 00000000..827b0e9d --- /dev/null +++ b/configs.example/datamodule/configuration/example_configuration.yaml @@ -0,0 +1,172 @@ +general: + description: Example data config for creating PVNet batches + name: example_pvnet + +input_data: + default_history_minutes: 120 + default_forecast_minutes: 480 + + gsp: + # Path to the GSP data. This should be a zarr file + # e.g. gs://solar-pv-nowcasting-data/PV/GSP/v7/pv_gsp.zarr + gsp_zarr_path: PLACEHOLDER.zarr + history_minutes: 120 + forecast_minutes: 480 + time_resolution_minutes: 30 + # A random value from the list below will be chosen as the delay when dropout is used + # If set to null no dropout is applied. Only values before t0 are dropped out for GSP. + # Values after t0 are assumed as targets and cannot be dropped. + dropout_timedeltas_minutes: null + dropout_fraction: 0 # Fraction of samples with dropout + + pv: + pv_files_groups: + - label: solar_sheffield_passiv + # Path to the site-level PV data. This should be a netcdf + # e.g gs://solar-pv-nowcasting-data/PV/Passive/ocf_formatted/v0/passiv.netcdf + pv_filename: PLACEHOLDER.netcdf + # Path to the site-level PV metadata. This choudl be a csv + # e.g gs://solar-pv-nowcasting-data/PV/Passive/ocf_formatted/v0/system_metadata.csv + pv_metadata_filename: PLACEHOLDER.csv + # This is the list of pv_ml_ids to be sliced from the PV site level data + # The IDs below are 349 of the PV systems which have very little NaN data in the historic data + # and which are still reporting live (as of Oct 2023) + pv_ml_ids: + [ + 154, 155, 156, 158, 159, 160, 162, 164, 165, 166, 167, 168, 169, 171, 173, 177, 178, 179, + 181, 182, 185, 186, 187, 188, 189, 190, 191, 192, 193, 197, 198, 199, 200, 202, 204, 205, + 206, 208, 209, 211, 214, 215, 216, 217, 218, 219, 220, 221, 225, 229, 230, 232, 233, 234, + 236, 242, 243, 245, 252, 254, 255, 256, 257, 258, 260, 261, 262, 265, 267, 268, 272, 273, + 275, 276, 277, 280, 281, 282, 283, 287, 289, 291, 292, 293, 294, 295, 296, 297, 298, 301, + 302, 303, 304, 306, 307, 309, 310, 311, 317, 318, 319, 320, 321, 322, 323, 325, 326, 329, + 332, 333, 335, 336, 338, 340, 342, 344, 345, 346, 348, 349, 352, 354, 355, 356, 357, 360, + 362, 363, 368, 369, 370, 371, 372, 374, 375, 376, 378, 380, 382, 384, 385, 388, 390, 391, + 393, 396, 397, 398, 399, 400, 401, 403, 404, 405, 406, 407, 409, 411, 412, 413, 414, 415, + 416, 417, 418, 419, 420, 421, 422, 423, 424, 425, 426, 427, 429, 431, 435, 437, 438, 440, + 441, 444, 447, 450, 451, 453, 456, 457, 458, 459, 464, 465, 466, 467, 468, 470, 471, 473, + 474, 476, 477, 479, 480, 481, 482, 485, 486, 488, 490, 491, 492, 493, 496, 498, 501, 503, + 506, 507, 508, 509, 510, 511, 512, 513, 515, 516, 517, 519, 520, 521, 522, 524, 526, 527, + 528, 531, 532, 536, 537, 538, 540, 541, 542, 543, 544, 545, 549, 550, 551, 552, 553, 554, + 556, 557, 560, 561, 563, 566, 568, 571, 572, 575, 576, 577, 579, 580, 581, 582, 584, 585, + 588, 590, 594, 595, 597, 600, 602, 603, 604, 606, 611, 613, 614, 616, 618, 620, 622, 623, + 624, 625, 626, 628, 629, 630, 631, 636, 637, 638, 640, 641, 642, 644, 645, 646, 650, 651, + 652, 653, 654, 655, 657, 660, 661, 662, 663, 666, 667, 668, 670, 675, 676, 679, 681, 683, + 684, 685, 687, 696, 698, 701, 702, 703, 704, 706, 710, 722, 723, 724, 725, 727, 728, 729, + 730, 732, 733, 734, 735, 736, 737 + ] + history_minutes: 180 + forecast_minutes: 0 + time_resolution_minutes: 5 + # A random value from the list below will be chosen as the delay when dropout is used. + # If set to null no dropout is applied. All PV systems are dropped together with this setting. + dropout_timedeltas_minutes: null + dropout_fraction: 0 # Fraction of samples with dropout + # A random value from the list below will be chosen as the delay when system dropout is used. + # If set to null no dropout is applied. All PV systems are indpendently with this setting. + system_dropout_timedeltas_minutes: null + # For ech sample a differnt dropout probability is used which is uniformly sampled from the min + # and max below + system_dropout_fraction_min: 0 + system_dropout_fraction_max: 0 + + nwp: + ukv: + nwp_provider: ukv + nwp_zarr_path: + # Path(s) to UKV NWP data in zarr format + # e.g. gs://solar-pv-nowcasting-data/NWP/UK_Met_Office/UKV_intermediate_version_7.zarr + - PLACEHOLDER.zarr + history_minutes: 120 + forecast_minutes: 480 + time_resolution_minutes: 60 + nwp_channels: + # These variables exist in the CEDA training set and in the live MetOffice live service + - t # 2-metre temperature + - dswrf # downwards short-wave radiation flux + - dlwrf # downwards long-wave radiation flux + - hcc # high cloud cover + - mcc # medium cloud cover + - lcc # low cloud cover + - sde # snow depth water equivalent + - r # relative humidty + - vis # visibility + - si10 # 10-metre wind speed + - wdir10 # 10-metre wind direction + - prate # precipitation rate + # These variables exist in CEDA training data but not in the live MetOffice live service + - hcct # height of convective cloud top, meters above surface. NaN if no clouds + - cdcb # height of lowest cloud base > 3 oktas + - dpt # dew point temperature + - prmsl # mean sea level pressure + - h # geometrical? (maybe geopotential?) height + nwp_image_size_pixels_height: 24 + nwp_image_size_pixels_width: 24 + # A random value from the list below will be chosen as the delay when dropout is used + # If set to null no dropout is applied. Values must be negative. + dropout_timedeltas_minutes: [-180] + # Dropout applied with this probability + dropout_fraction: 1.0 + # How long after the NWP init-time are we still willing to use this forecast + # If null we use each init-time for all steps it covers + max_staleness_minutes: null + + ecmwf: + nwp_provider: ecmwf + # Path to ECMWF NWP data in zarr format + # n.b. It is not necessary to use multiple or any NWP data. These entries can be removed + nwp_zarr_path: PLACEHOLDER.zarr + history_minutes: 120 + forecast_minutes: 480 + time_resolution_minutes: 60 + nwp_channels: + - t2m # 2-metre temperature + - dswrf # downwards short-wave radiation flux + - dlwrf # downwards long-wave radiation flux + - hcc # high cloud cover + - mcc # medium cloud cover + - lcc # low cloud cover + - tcc # total cloud cover + - sde # snow depth water equivalent + - sr # direct solar radiation + - duvrs # downwards UV radiation at surface + - prate # precipitation rate + - u10 # 10-metre U component of wind speed + - u100 # 100-metre U component of wind speed + - u200 # 200-metre U component of wind speed + - v10 # 10-metre V component of wind speed + - v100 # 100-metre V component of wind speed + - v200 # 200-metre V component of wind speed + nwp_image_size_pixels_height: 12 # roughly equivalent to UKV 24 pixels + nwp_image_size_pixels_width: 12 + dropout_timedeltas_minutes: [-180] + dropout_fraction: 1.0 + max_staleness_minutes: null + + satellite: + satellite_zarr_path: + # Path(s) to non-HRV satellite data in zarr format + # e.g. gs://solar-pv-nowcasting-data/satellite/EUMETSAT/SEVIRI_RSS/v4/2020_nonhrv.zarr + - PLACEHOLDER.zarr + history_minutes: 90 + forecast_minutes: 0 # Deprecated for most use cases + live_delay_minutes: 60 # Only data up to time t0-60minutes is inluced in slice + time_resolution_minutes: 5 + satellite_channels: + # Uses for each channel taken from https://resources.eumetrain.org/data/3/311/bsc_s4.pdf + - IR_016 # Surface, cloud phase + - IR_039 # Surface, clouds, wind fields + - IR_087 # Surface, clouds, atmospheric instability + - IR_097 # Ozone + - IR_108 # Surface, clouds, wind fields, atmospheric instability + - IR_120 # Surface, clouds, atmospheric instability + - IR_134 # Cirrus cloud height, atmospheric instability + - VIS006 # Surface, clouds, wind fields + - VIS008 # Surface, clouds, wind fields + - WV_062 # Water vapor, high level clouds, upper air analysis + - WV_073 # Water vapor, atmospheric instability, upper-level dynamics + satellite_image_size_pixels_height: 24 + satellite_image_size_pixels_width: 24 + # A random value from the list below will be chosen as the delay when dropout is used + # If set to null no dropout is applied. Values must be negative. + dropout_timedeltas_minutes: null + dropout_fraction: 0 # Fraction of samples with dropout diff --git a/configs.example/datamodule/configuration/gcp_configuration.yaml b/configs.example/datamodule/configuration/gcp_configuration.yaml deleted file mode 100644 index f297df10..00000000 --- a/configs.example/datamodule/configuration/gcp_configuration.yaml +++ /dev/null @@ -1,443 +0,0 @@ -general: - description: Example data config for creating PVNet batches - name: example_pvnet - -input_data: - default_history_minutes: 120 - default_forecast_minutes: 480 - - gsp: - # Path to the GSP data. This should be a zarr file - # e.g. gs://solar-pv-nowcasting-data/PV/GSP/v7/pv_gsp.zarr - gsp_zarr_path: PLACEHOLDER.zarr - history_minutes: 120 - forecast_minutes: 480 - time_resolution_minutes: 30 - - pv: - pv_files_groups: - - label: solar_sheffield_passiv - # Path to the site-level PV data. This should be a netcdf - # e.g gs://solar-pv-nowcasting-data/PV/Passive/ocf_formatted/v0/passiv.netcdf - pv_filename: PLACEHOLDER.netcdf - # Path to the site-level PV metadata. This choudl be a csv - # e.g gs://solar-pv-nowcasting-data/PV/Passive/ocf_formatted/v0/system_metadata.csv - pv_metadata_filename: PLACEHOLDER.csv - # This is the list of pv_ml_ids to be sliced from the PV site level data - pv_ml_ids: - [ - 154, - 155, - 156, - 158, - 159, - 160, - 162, - 164, - 165, - 166, - 167, - 168, - 169, - 171, - 173, - 177, - 178, - 179, - 181, - 182, - 185, - 186, - 187, - 188, - 189, - 190, - 191, - 192, - 193, - 197, - 198, - 199, - 200, - 202, - 204, - 205, - 206, - 208, - 209, - 211, - 214, - 215, - 216, - 217, - 218, - 219, - 220, - 221, - 225, - 229, - 230, - 232, - 233, - 234, - 236, - 242, - 243, - 245, - 252, - 254, - 255, - 256, - 257, - 258, - 260, - 261, - 262, - 265, - 267, - 268, - 272, - 273, - 275, - 276, - 277, - 280, - 281, - 282, - 283, - 287, - 289, - 291, - 292, - 293, - 294, - 295, - 296, - 297, - 298, - 301, - 302, - 303, - 304, - 306, - 307, - 309, - 310, - 311, - 317, - 318, - 319, - 320, - 321, - 322, - 323, - 325, - 326, - 329, - 332, - 333, - 335, - 336, - 338, - 340, - 342, - 344, - 345, - 346, - 348, - 349, - 352, - 354, - 355, - 356, - 357, - 360, - 362, - 363, - 368, - 369, - 370, - 371, - 372, - 374, - 375, - 376, - 378, - 380, - 382, - 384, - 385, - 388, - 390, - 391, - 393, - 396, - 397, - 398, - 399, - 400, - 401, - 403, - 404, - 405, - 406, - 407, - 409, - 411, - 412, - 413, - 414, - 415, - 416, - 417, - 418, - 419, - 420, - 421, - 422, - 423, - 424, - 425, - 426, - 427, - 429, - 431, - 435, - 437, - 438, - 440, - 441, - 444, - 447, - 450, - 451, - 453, - 456, - 457, - 458, - 459, - 464, - 465, - 466, - 467, - 468, - 470, - 471, - 473, - 474, - 476, - 477, - 479, - 480, - 481, - 482, - 485, - 486, - 488, - 490, - 491, - 492, - 493, - 496, - 498, - 501, - 503, - 506, - 507, - 508, - 509, - 510, - 511, - 512, - 513, - 515, - 516, - 517, - 519, - 520, - 521, - 522, - 524, - 526, - 527, - 528, - 531, - 532, - 536, - 537, - 538, - 540, - 541, - 542, - 543, - 544, - 545, - 549, - 550, - 551, - 552, - 553, - 554, - 556, - 557, - 560, - 561, - 563, - 566, - 568, - 571, - 572, - 575, - 576, - 577, - 579, - 580, - 581, - 582, - 584, - 585, - 588, - 590, - 594, - 595, - 597, - 600, - 602, - 603, - 604, - 606, - 611, - 613, - 614, - 616, - 618, - 620, - 622, - 623, - 624, - 625, - 626, - 628, - 629, - 630, - 631, - 636, - 637, - 638, - 640, - 641, - 642, - 644, - 645, - 646, - 650, - 651, - 652, - 653, - 654, - 655, - 657, - 660, - 661, - 662, - 663, - 666, - 667, - 668, - 670, - 675, - 676, - 679, - 681, - 683, - 684, - 685, - 687, - 696, - 698, - 701, - 702, - 703, - 704, - 706, - 710, - 722, - 723, - 724, - 725, - 727, - 728, - 729, - 730, - 732, - 733, - 734, - 735, - 736, - 737, - ] - history_minutes: 180 - forecast_minutes: 0 - time_resolution_minutes: 5 - - nwp: - ukv: - nwp_zarr_path: - # Path(s) to UKV NWP data in zarr format - # e.g. gs://solar-pv-nowcasting-data/NWP/UK_Met_Office/UKV_intermediate_version_7.zarr - - PLACEHOLDER.zarr - history_minutes: 120 - forecast_minutes: 480 - time_resolution_minutes: 60 - nwp_channels: - - t # 2-metre temperature - - dswrf # downwards short-wave radiation flux - - dlwrf # downwards long-wave radiation flux - - hcc # high cloud cover - - mcc # medium cloud cover - - lcc # low cloud cover - nwp_image_size_pixels_height: 24 - nwp_image_size_pixels_width: 24 - nwp_provider: ukv - - ecmwf: - # Path to ECMWF NWP data in zarr format - # n.b. It is not necessary to use multiple or any NWP data. These entries can be removed - nwp_zarr_path: PLACEHOLDER.zarr - history_minutes: 120 - forecast_minutes: 480 - time_resolution_minutes: 60 - nwp_channels: - - t2m # 2-metre temperature - - dswrf # downwards short-wave radiation flux - - dlwrf # downwards long-wave radiation flux - - hcc # high cloud cover - - mcc # medium cloud cover - - lcc # low cloud cover - nwp_image_size_pixels_height: 12 # roughtly equivalent to ukv 24 pixels - nwp_image_size_pixels_width: 12 - nwp_provider: ecmwf - - satellite: - satellite_zarr_path: - # Path(s) to non-HRV satellite data in zarr format - # e.g. gs://solar-pv-nowcasting-data/satellite/EUMETSAT/SEVIRI_RSS/v4/2020_nonhrv.zarr - - PLACEHOLDER.zarr - history_minutes: 90 - forecast_minutes: 0 - live_delay_minutes: 30 - time_resolution_minutes: 5 - satellite_channels: - - IR_016 - - IR_039 - - IR_087 - - IR_097 - - IR_108 - - IR_120 - - IR_134 - - VIS006 - - VIS008 - - WV_062 - - WV_073 - satellite_image_size_pixels_height: 24 - satellite_image_size_pixels_width: 24 diff --git a/configs.example/datamodule/configuration/template_configuration.yaml b/configs.example/datamodule/configuration/template_configuration.yaml deleted file mode 100644 index d2354155..00000000 --- a/configs.example/datamodule/configuration/template_configuration.yaml +++ /dev/null @@ -1,79 +0,0 @@ -general: - description: Template including all possible settings - name: template - -input_data: - default_forecast_minutes: 120 - default_history_minutes: 60 - - gsp: - gsp_zarr_path: gs://solar-pv-nowcasting-data/PV/GSP/v7/pv_gsp.zarr - history_minutes: 60 - forecast_minutes: 120 - time_resolution_minutes: 30 - start_datetime: "2020-01-01T00:00:00" - end_datetime: "2021-09-01T00:00:00" - - nwp: - ukv: - nwp_zarr_path: gs://solar-pv-nowcasting-data/NWP/UK_Met_Office/UKV_intermediate_version_7.zarr - history_minutes: 60 - forecast_minutes: 120 - time_resolution_minutes: 60 - nwp_channels: # comment out channels as appropriate - - t # 2-metre temperature - - dswrf # downwards short-wave radiation flux - - dlwrf # downwards long-wave radiation flux - - hcc # high cloud cover - - mcc # medium cloud cover - - lcc # low cloud cover - - vis # visability - - r # relative humidity - - prate # precipitation rate - - si10 # 10-metre wind speed | live = unknown - nwp_image_size_pixels_height: 24 - nwp_image_size_pixels_width: 24 - nwp_provider: ukv - start_datetime: "2020-01-01T00:00:00" - end_datetime: "2021-09-01T00:00:00" - - pv: - pv_files_groups: - - label: solar_sheffield_passiv - pv_filename: gs://solar-pv-nowcasting-data/PV/Passive/ocf_formatted/v0/passiv.netcdf - pv_metadata_filename: gs://solar-pv-nowcasting-data/PV/Passive/ocf_formatted/v0/system_metadata_OCF_ONLY.csv - # This is the list of pv_ml_ids to be sliced from the PV site level data - pv_ml_ids: [156, 158, 159, 160, 162] - history_minutes: 60 - forecast_minutes: 120 - time_resolution_minutes: 5 - start_datetime: "2020-01-01T00:00:00" - end_datetime: "2021-09-01T00:00:00" - pv_image_size_meters_height: 24 - pv_image_size_meters_width: 24 - n_pv_systems_per_example: 128 - get_center: false - is_live: false - - satellite: - satellite_zarr_path: - - "gs://public-datasets-eumetsat-solar-forecasting/satellite/EUMETSAT/SEVIRI_RSS/v4/2020_nonhrv.zarr" - - "gs://public-datasets-eumetsat-solar-forecasting/satellite/EUMETSAT/SEVIRI_RSS/v4/2021_nonhrv.zarr" - history_minutes: 60 - forecast_minutes: 0 - live_delay_minutes: 30 - time_resolution_minutes: 5 - satellite_channels: - - IR_016 - - IR_039 - - IR_087 - - IR_097 - - IR_108 - - IR_120 - - IR_134 - - VIS006 - - VIS008 - - WV_062 - - WV_073 - satellite_image_size_pixels_height: 24 - satellite_image_size_pixels_width: 24 diff --git a/pyproject.toml b/pyproject.toml index 30bc919a..92600c34 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -25,4 +25,4 @@ testpaths = [ [tool.ruff] line-length = 100 -exclude = [".ipynb_checkpoints", "tests"] +exclude = [".ipynb_checkpoints", "tests", "configs.example"]