diff --git a/.gitignore b/.gitignore index 7d1f119c..0501c8aa 100644 --- a/.gitignore +++ b/.gitignore @@ -140,3 +140,4 @@ dmypy.json # Pyre type checker .pyre/ +.DS_Store diff --git a/configs.example/datamodule/configuration/example_configuration.yaml b/configs.example/datamodule/configuration/example_configuration.yaml new file mode 100644 index 00000000..a8ecc535 --- /dev/null +++ b/configs.example/datamodule/configuration/example_configuration.yaml @@ -0,0 +1,132 @@ +general: + description: Example config for producing PVNet samples + name: example_config + +input_data: + + # Either use Site OR GSP configuration + site: + # Path to Site data in NetCDF format + file_path: PLACEHOLDER.nc + # Path to metadata in CSV format + metadata_file_path: PLACEHOLDER.csv + time_resolution_minutes: 15 + interval_start_minutes: -60 + # Specified for intraday currently + interval_end_minutes: 480 + dropout_timedeltas_minutes: null + dropout_fraction: 0 # Fraction of samples with dropout + + gsp: + # Path to GSP data in zarr format + # e.g. gs://solar-pv-nowcasting-data/PV/GSP/v7/pv_gsp.zarr + zarr_path: PLACEHOLDER.zarr + interval_start_minutes: -60 + # Specified for intraday currently + interval_end_minutes: 480 + time_resolution_minutes: 30 + # Random value from the list below will be chosen as the delay when dropout is used + # If set to null no dropout is applied. Only values before t0 are dropped out for GSP. + # Values after t0 are assumed as targets and cannot be dropped. + dropout_timedeltas_minutes: null + dropout_fraction: 0 # Fraction of samples with dropout + + nwp: + + ecmwf: + provider: ecmwf + # Path to ECMWF NWP data in zarr format + # n.b. It is not necessary to use multiple or any NWP data. These entries can be removed + zarr_path: PLACEHOLDER.zarr + interval_start_minutes: -60 + # Specified for intraday currently + interval_end_minutes: 480 + time_resolution_minutes: 60 + channels: + - t2m # 2-metre temperature + - dswrf # downwards short-wave radiation flux + - dlwrf # downwards long-wave radiation flux + - hcc # high cloud cover + - mcc # medium cloud cover + - lcc # low cloud cover + - tcc # total cloud cover + - sde # snow depth water equivalent + - sr # direct solar radiation + - duvrs # downwards UV radiation at surface + - prate # precipitation rate + - u10 # 10-metre U component of wind speed + - u100 # 100-metre U component of wind speed + - u200 # 200-metre U component of wind speed + - v10 # 10-metre V component of wind speed + - v100 # 100-metre V component of wind speed + - v200 # 200-metre V component of wind speed + # The following channels are accumulated and need to be diffed + accum_channels: + - dswrf # downwards short-wave radiation flux + - dlwrf # downwards long-wave radiation flux + - sr # direct solar radiation + - duvrs # downwards UV radiation at surface + image_size_pixels_height: 24 + image_size_pixels_width: 24 + dropout_timedeltas_minutes: [-360] + dropout_fraction: 1.0 # Fraction of samples with dropout + max_staleness_minutes: null + + ukv: + provider: ukv + # Path to UKV NWP data in zarr format + # e.g. gs://solar-pv-nowcasting-data/NWP/UK_Met_Office/UKV_intermediate_version_7.zarr + # n.b. It is not necessary to use multiple or any NWP data. These entries can be removed + zarr_path: PLACEHOLDER.zarr + interval_start_minutes: -60 + # Specified for intraday currently + interval_end_minutes: 480 + time_resolution_minutes: 60 + channels: + - t # 2-metre temperature + - dswrf # downwards short-wave radiation flux + - dlwrf # downwards long-wave radiation flux + - hcc # high cloud cover + - mcc # medium cloud cover + - lcc # low cloud cover + - sde # snow depth water equivalent + - r # relative humidty + - vis # visibility + - si10 # 10-metre wind speed + - wdir10 # 10-metre wind direction + - prate # precipitation rate + # These variables exist in CEDA training data but not in the live MetOffice live service + - hcct # height of convective cloud top, meters above surface. NaN if no clouds + - cdcb # height of lowest cloud base > 3 oktas + - dpt # dew point temperature + - prmsl # mean sea level pressure + - h # geometrical? (maybe geopotential?) height + image_size_pixels_height: 24 + image_size_pixels_width: 24 + dropout_timedeltas_minutes: [-360] + dropout_fraction: 1.0 # Fraction of samples with dropout + max_staleness_minutes: null + + satellite: + # Path to Satellite data (non-HRV) in zarr format + # e.g. gs://solar-pv-nowcasting-data/satellite/EUMETSAT/SEVIRI_RSS/v4/2020_nonhrv.zarr + zarr_path: PLACEHOLDER.zarr + interval_start_minutes: -30 + interval_end_minutes: 0 + time_resolution_minutes: 5 + channels: + - IR_016 # Surface, cloud phase + - IR_039 # Surface, clouds, wind fields + - IR_087 # Surface, clouds, atmospheric instability + - IR_097 # Ozone + - IR_108 # Surface, clouds, wind fields, atmospheric instability + - IR_120 # Surface, clouds, atmospheric instability + - IR_134 # Cirrus cloud height, atmospheric instability + - VIS006 # Surface, clouds, wind fields + - VIS008 # Surface, clouds, wind fields + - WV_062 # Water vapor, high level clouds, upper air analysis + - WV_073 # Water vapor, atmospheric instability, upper-level dynamics + image_size_pixels_height: 24 + image_size_pixels_width: 24 + dropout_timedeltas_minutes: null + dropout_fraction: 0 # Fraction of samples with dropout diff --git a/configs.example/datamodule/configuration/site_example_configuration.yaml b/configs.example/datamodule/configuration/site_example_configuration.yaml deleted file mode 100644 index a8cfadeb..00000000 --- a/configs.example/datamodule/configuration/site_example_configuration.yaml +++ /dev/null @@ -1,70 +0,0 @@ -general: - description: Example config for producing PVNet samples for a reneweble generation site - name: site_example_config - -input_data: - - site: - time_resolution_minutes: 15 - interval_start_minutes: -60 - interval_end_minutes: 480 - file_path: PLACEHOLDER.nc - metadata_file_path: PLACEHOLDER.csv - dropout_timedeltas_minutes: null - dropout_fraction: 0 # Fraction of samples with dropout - - nwp: - ecmwf: - provider: ecmwf - # Path to ECMWF NWP data in zarr format - # n.b. It is not necessary to use multiple or any NWP data. These entries can be removed - zarr_path: PLACEHOLDER - interval_start_minutes: -60 - interval_end_minutes: 480 - time_resolution_minutes: 60 - channels: - - t2m # 2-metre temperature - - dswrf # downwards short-wave radiation flux - - dlwrf # downwards long-wave radiation flux - - hcc # high cloud cover - - mcc # medium cloud cover - - lcc # low cloud cover - - tcc # total cloud cover - - sde # snow depth water equivalent - - sr # direct solar radiation - - duvrs # downwards UV radiation at surface - - prate # precipitation rate - - u10 # 10-metre U component of wind speed - - u100 # 100-metre U component of wind speed - - u200 # 200-metre U component of wind speed - - v10 # 10-metre V component of wind speed - - v100 # 100-metre V component of wind speed - - v200 # 200-metre V component of wind speed - image_size_pixels_height: 24 - image_size_pixels_width: 24 - dropout_timedeltas_minutes: [-360] - dropout_fraction: 1.0 - max_staleness_minutes: null - - satellite: - zarr_path: PLACEHOLDER.zarr - interval_start_minutes: -30 - interval_end_minutes: 0 - time_resolution_minutes: 5 - channels: - # Uses for each channel taken from https://resources.eumetrain.org/data/3/311/bsc_s4.pdf - - IR_016 # Surface, cloud phase - - IR_039 # Surface, clouds, wind fields - - IR_087 # Surface, clouds, atmospheric instability - - IR_097 # Ozone - - IR_108 # Surface, clouds, wind fields, atmospheric instability - - IR_120 # Surface, clouds, atmospheric instability - - IR_134 # Cirrus cloud height, atmospheric instability - - VIS006 # Surface, clouds, wind fields - - VIS008 # Surface, clouds, wind fields - - WV_062 # Water vapor, high level clouds, upper air analysis - - WV_073 # Water vapor, atmospheric instability, upper-level dynamics - image_size_pixels_height: 24 - image_size_pixels_width: 24 - dropout_timedeltas_minutes: null - dropout_fraction: 0. diff --git a/configs.example/datamodule/premade_batches.yaml b/configs.example/datamodule/premade_batches.yaml index 350e7573..2326edc0 100644 --- a/configs.example/datamodule/premade_batches.yaml +++ b/configs.example/datamodule/premade_batches.yaml @@ -1,7 +1,9 @@ _target_: pvnet.data.datamodule.DataModule configuration: null -# The batch_dir is the location batches were saved to using the save_batches.py script -# The batch_dir should contain train and val subdirectories with batches + +# The sample_dir is the location batches were saved to using the save_batches.py script +# The sample_dir should contain train and val subdirectories with batches + sample_dir: "PLACEHOLDER" num_workers: 10 prefetch_factor: 2 diff --git a/configs.example/datamodule/streamed_batches.yaml b/configs.example/datamodule/streamed_batches.yaml index 8c2ef3cb..e8573ad3 100644 --- a/configs.example/datamodule/streamed_batches.yaml +++ b/configs.example/datamodule/streamed_batches.yaml @@ -2,6 +2,7 @@ _target_: pvnet.data.datamodule.DataModule # Path to the data configuration yaml file. You can find examples in the configuration subdirectory # in configs.example/datamodule/configuration # Use the full local path such as: /FULL/PATH/PVNet/configs/datamodule/configuration/gcp_configuration.yaml" + configuration: "PLACEHOLDER.yaml" num_workers: 20 prefetch_factor: 2 @@ -16,3 +17,4 @@ train_period: val_period: - "2022-05-08" - "2023-05-08" +