From c9fed4b417231249566ecb6c3f40363eabe8423c Mon Sep 17 00:00:00 2001 From: Sukhil Patel Date: Mon, 1 Jul 2024 15:29:51 +0100 Subject: [PATCH 1/4] Update datamodule config parameters --- scripts/save_concurrent_batches.py | 36 ++++++++++++++++-------------- 1 file changed, 19 insertions(+), 17 deletions(-) diff --git a/scripts/save_concurrent_batches.py b/scripts/save_concurrent_batches.py index a0252258..55bee9ee 100644 --- a/scripts/save_concurrent_batches.py +++ b/scripts/save_concurrent_batches.py @@ -2,14 +2,16 @@ Constructs batches where each batch includes all GSPs and only a single timestamp. Currently a slightly hacky implementation due to the way the configs are done. This script will use -the same config file currently set to train the model. +the same config file currently set to train the model. In the datamodule config file it is possible +to set the batch_output_dir and number of train/val batches, they can also be overriden in the command as +shown in the example below. use: ``` python save_concurrent_batches.py \ - +batch_output_dir="/mnt/disks/nwp_rechunk/concurrent_batches_v3.9" \ - +num_train_batches=20_000 \ - +num_val_batches=4_000 + datamodule.batch_output_dir="/mnt/disks/nwp_rechunk/concurrent_batches_v3.9" \ + datamodule.num_train_batches=20_000 \ + datamodule.num_val_batches=4_000 ``` """ @@ -157,12 +159,12 @@ def main(config: DictConfig): config_dm = config.datamodule # Set up directory - os.makedirs(config.batch_output_dir, exist_ok=False) + os.makedirs(config_dm.batch_output_dir, exist_ok=False) - with open(f"{config.batch_output_dir}/datamodule.yaml", "w") as f: + with open(f"{config_dm.batch_output_dir}/datamodule.yaml", "w") as f: f.write(OmegaConf.to_yaml(config.datamodule)) - shutil.copyfile(config_dm.configuration, f"{config.batch_output_dir}/data_configuration.yaml") + shutil.copyfile(config_dm.configuration, f"{config_dm.batch_output_dir}/data_configuration.yaml") dataloader_kwargs = dict( shuffle=False, @@ -179,39 +181,39 @@ def main(config: DictConfig): persistent_workers=False, ) - if config.num_val_batches > 0: + if config_dm.num_val_batches > 0: print("----- Saving val batches -----") - os.mkdir(f"{config.batch_output_dir}/val") + os.mkdir(f"{config_dm.batch_output_dir}/val") val_batch_pipe = _get_datapipe( config_dm.configuration, *config_dm.val_period, - config.num_val_batches, + config_dm.num_val_batches, ) _save_batches_with_dataloader( batch_pipe=val_batch_pipe, - batch_dir=f"{config.batch_output_dir}/val", - num_batches=config.num_val_batches, + batch_dir=f"{config_dm.batch_output_dir}/val", + num_batches=config_dm.num_val_batches, dataloader_kwargs=dataloader_kwargs, ) - if config.num_train_batches > 0: + if config_dm.num_train_batches > 0: print("----- Saving train batches -----") - os.mkdir(f"{config.batch_output_dir}/train") + os.mkdir(f"{config_dm.batch_output_dir}/train") train_batch_pipe = _get_datapipe( config_dm.configuration, *config_dm.train_period, - config.num_train_batches, + config_dm.num_train_batches, ) _save_batches_with_dataloader( batch_pipe=train_batch_pipe, - batch_dir=f"{config.batch_output_dir}/train", - num_batches=config.num_train_batches, + batch_dir=f"{config_dm.batch_output_dir}/train", + num_batches=config_dm.num_train_batches, dataloader_kwargs=dataloader_kwargs, ) From 37c07702f69362a3e12bb9d8be2ebb9f86eee507 Mon Sep 17 00:00:00 2001 From: Sukhil Patel Date: Mon, 1 Jul 2024 15:30:43 +0100 Subject: [PATCH 2/4] Update install command for ocf_datapipes --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index c8b65cc7..128a1a9e 100644 --- a/README.md +++ b/README.md @@ -101,7 +101,7 @@ conda create -n ocf_datapipes python=3.10 Then go inside the ocf_datapipes repo to add packages ```bash -pip install ".[dev]" +pip install -r requirements.txt -r requirements-dev.txt ``` Then exit this environment, and enter back into the pvnet conda environment and install ocf_datapies in editable mode (-e). This means the package is directly linked to the source code in the ocf_datapies repo. From 2b59d1ca4c17cffb1d896e445a3e50e6c4b0073a Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 1 Jul 2024 14:37:41 +0000 Subject: [PATCH 3/4] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- scripts/save_concurrent_batches.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/scripts/save_concurrent_batches.py b/scripts/save_concurrent_batches.py index 55bee9ee..7ee2e568 100644 --- a/scripts/save_concurrent_batches.py +++ b/scripts/save_concurrent_batches.py @@ -2,9 +2,9 @@ Constructs batches where each batch includes all GSPs and only a single timestamp. Currently a slightly hacky implementation due to the way the configs are done. This script will use -the same config file currently set to train the model. In the datamodule config file it is possible -to set the batch_output_dir and number of train/val batches, they can also be overriden in the command as -shown in the example below. +the same config file currently set to train the model. In the datamodule config file it is possible +to set the batch_output_dir and number of train/val batches, they can also be overriden in the command as +shown in the example below. use: ``` @@ -164,7 +164,9 @@ def main(config: DictConfig): with open(f"{config_dm.batch_output_dir}/datamodule.yaml", "w") as f: f.write(OmegaConf.to_yaml(config.datamodule)) - shutil.copyfile(config_dm.configuration, f"{config_dm.batch_output_dir}/data_configuration.yaml") + shutil.copyfile( + config_dm.configuration, f"{config_dm.batch_output_dir}/data_configuration.yaml" + ) dataloader_kwargs = dict( shuffle=False, From 6df425c9cd7e903f3148404d948fd24604c4f8c5 Mon Sep 17 00:00:00 2001 From: Sukhil Patel Date: Mon, 1 Jul 2024 15:40:41 +0100 Subject: [PATCH 4/4] linting --- scripts/save_concurrent_batches.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/save_concurrent_batches.py b/scripts/save_concurrent_batches.py index 55bee9ee..820596e9 100644 --- a/scripts/save_concurrent_batches.py +++ b/scripts/save_concurrent_batches.py @@ -3,8 +3,8 @@ Currently a slightly hacky implementation due to the way the configs are done. This script will use the same config file currently set to train the model. In the datamodule config file it is possible -to set the batch_output_dir and number of train/val batches, they can also be overriden in the command as -shown in the example below. +to set the batch_output_dir and number of train/val batches, they can also be overriden in the +command as shown in the example below. use: ```