From b1d2772166a16fc8b1df736712995d59555f4728 Mon Sep 17 00:00:00 2001 From: Gregor von Laszewski Date: Wed, 4 Oct 2023 14:39:14 -0400 Subject: [PATCH] add missing file --- .../cloudmask/target/greene_v0.5/a.yaml | 121 +++++++++++++++++ .../target/greene_v0.5/config_simple.1.yaml | 121 +++++++++++++++++ .../greene_v0.5/config_simple_rivanna.yaml | 122 ++++++++++++++++++ .../target/greene_v0.5/experiement-greene.sh | 45 +++++++ .../target/greene_v0.5/slstr_uva_nyu_cloud.py | 2 +- 5 files changed, 410 insertions(+), 1 deletion(-) create mode 100644 benchmarks/cloudmask/target/greene_v0.5/a.yaml create mode 100644 benchmarks/cloudmask/target/greene_v0.5/config_simple.1.yaml create mode 100644 benchmarks/cloudmask/target/greene_v0.5/config_simple_rivanna.yaml create mode 100755 benchmarks/cloudmask/target/greene_v0.5/experiement-greene.sh diff --git a/benchmarks/cloudmask/target/greene_v0.5/a.yaml b/benchmarks/cloudmask/target/greene_v0.5/a.yaml new file mode 100644 index 00000000..e970126e --- /dev/null +++ b/benchmarks/cloudmask/target/greene_v0.5/a.yaml @@ -0,0 +1,121 @@ +# config.yaml + +# SciML-Bench +# Copyright © 2022 Scientific Machine Learning Research Group +# Scientific Computing Department, Rutherford Appleton Laboratory +# Science and Technology Facilities Council, UK. +# with modifications from Gregor von Laszewski, Juri Papay +# All rights reserved. + +# This is a configuration file for the CloudMask benchmark. + +name: cloudmask-greene + + +sbatch: + mode: h + dir: localscratch + +run: + venvpath: "/scratch/{os.USER}/ENV3" + datadir: "/scratch/{os.USER}/data" + branch: 'main' + mode: "parallel" + # mode: "original" + # mode: "cloudmesh" + +# Submission Information +submission: + benchmark: cloudmask + submitter: Gregor von Laszewski + email: laszewski@gmail.com + org: University of Virginia + division: closed + version: mlcommons-cloudmask-v1.1 + github_commit_version: TBD + status: completed + platform: rivanna + accelerators_per_node: 1 + +experiment: +# card_name: v100 + card_name: v100 + gpu_count: 1 + cpu_num: 1 + mem: "64GB" + repeat: "1" + epoch: 2 + #epoch: 2 + seed: 1234 + learning_rate: 0.001 + batch_size: 32 + train_split: 0.8 + clip_offset: 15 + no_cache: False + nodes: 1 + gpu: 1 + early_stoppage_patience: "25" + early_stoppage: "False" + +identifier: "{experiment.card_name}-{experiment.early_stoppage}-{experiment.early_stoppage_patience}-{experiment.epoch}-{experiment.learningrate}-{experiment.repeat}" + +#system: +# host: "rivanna" +# python: "3.10.8" +# num_cpus: 1 +# partition: "bii-gpu" +# # allocation: ds6011-sp22-002 +# # allocation: bii_dsc +# allocation: bii_dsc_community +# reservation: bi_fox_dgx +# constraint: "" + +system: + host: "greene" + python: "3.10.8" + num_cpus: 1 + platform: greene + +mask: "float" + +# Training data +train_dir: "/scratch/{os.USER}/data/one-day" + +# Inference data +inference_dir: "/scratch/{os.USER}/data/ssts" + +# Model file +model_file: "{os.TARGET}/outputs/slstr_cloud/cloudModel-{identifier}.h5" + +# training +training_loss: binary_crossentropy +training_metrics: accuracy + + +# Output directory +output_dir: "{os.TARGET}/outputs/slstr_cloud" + +# Log file for recording runtimes +log_file: ./cloudmask_final_1.log + +# Log file for MLCommons logging +mlperf_logfile: ./mlperf_cloudmask_final_1.log + +# Size of each patch to feed to the network +PATCH_SIZE: 256 + +# Original height of the image +IMAGE_H: 1200 + +# Original width of the image +IMAGE_W: 1500 + +# No. of channels +N_CHANNELS: 9 + +# Min allowable SST +MIN_SST: 273.15 + +# Amount to crop the edges of the images by +CROP_SIZE: 80 + diff --git a/benchmarks/cloudmask/target/greene_v0.5/config_simple.1.yaml b/benchmarks/cloudmask/target/greene_v0.5/config_simple.1.yaml new file mode 100644 index 00000000..5c5b3174 --- /dev/null +++ b/benchmarks/cloudmask/target/greene_v0.5/config_simple.1.yaml @@ -0,0 +1,121 @@ +# config.yaml + +# SciML-Bench +# Copyright © 2022 Scientific Machine Learning Research Group +# Scientific Computing Department, Rutherford Appleton Laboratory +# Science and Technology Facilities Council, UK. +# with modifications from Gregor von Laszewski, Juri Papay +# All rights reserved. + +# This is a configuration file for the CloudMask benchmark. + +name: cloudmask-greene + +hyperparameter: + early_stoppage: False + +sbatch: + mode: h + dir: localscratch + +run: + venvpath: "/scratch/{os.USER}/ENV3" + datadir: ""/scratch/{os.USER}/data" + branch: 'main' + mode: "parallel" + # mode: "original" + # mode: "cloudmesh" + +# Submission Information +submission: + benchmark: cloudmask + submitter: Gregor von Laszewski + email: laszewski@gmail.com + org: University of Virginia + division: closed + version: mlcommons-cloudmask-v1.1 + github_commit_version: TBD + status: completed + platform: rivanna + accelerators_per_node: 1 + +experiment: +# card_name: a100 + card_name: v100 + gpu_count: 1 + cpu_num: 1 + mem: "64GB" + repeat: "1" + epoch: 200 + seed: 1234 + learning_rate: 0.001 + batch_size: 32 + train_split: 0.8 + clip_offset: 15 + no_cache: False + nodes: 1 + gpu: 1 + early_stoppage_patience: 25 + + + +#system: +# host: "rivanna" +# python: "3.10.8" +# num_cpus: 1 +# partition: "bii-gpu" +# # allocation: ds6011-sp22-002 +# # allocation: bii_dsc +# allocation: bii_dsc_community +# reservation: bi_fox_dgx +# constraint: "" + +system: + host: "greene" + python: "3.10.8" + num_cpus: 1 + platform: greene + +mask: "float" + +# Training data +train_dir: "/scratch/{os.USER}/data/one-day" + +# Inference data +inference_dir: "/scratch/{os.USER}/data/ssts" + +# Model file +model_file: "/scratch/{os.USER}/github/mlcommons/benchmarks/cloudmask/target/greene_v0.5/outputs/slstr_cloud/cloudModel.h5" + +# training +training_loss: binary_crossentropy +training_metrics: accuracy + + +# Output directory +output_dir: "/scratch/{os.USER}/github/mlcommons/benchmarks/cloudmask/target/greene_v0.5/outputs/slstr_cloud" + +# Log file for recording runtimes +log_file: ./cloudmask_final_1.log + +# Log file for MLCommons logging +mlperf_logfile: ./mlperf_cloudmask_final_1.log + +# Size of each patch to feed to the network +PATCH_SIZE: 256 + +# Original height of the image +IMAGE_H: 1200 + +# Original width of the image +IMAGE_W: 1500 + +# No. of channels +N_CHANNELS: 9 + +# Min allowable SST +MIN_SST: 273.15 + +# Amount to crop the edges of the images by +CROP_SIZE: 80 + diff --git a/benchmarks/cloudmask/target/greene_v0.5/config_simple_rivanna.yaml b/benchmarks/cloudmask/target/greene_v0.5/config_simple_rivanna.yaml new file mode 100644 index 00000000..16e3c20a --- /dev/null +++ b/benchmarks/cloudmask/target/greene_v0.5/config_simple_rivanna.yaml @@ -0,0 +1,122 @@ +# config.yaml + +# SciML-Bench +# Copyright © 2022 Scientific Machine Learning Research Group +# Scientific Computing Department, Rutherford Appleton Laboratory +# Science and Technology Facilities Council, UK. +# with modifications from Gregor von Laszewski, Juri Papay +# All rights reserved. + +# This is a configuration file for the CloudMask benchmark. + +name: cloudmask-greene + + +sbatch: + mode: h + dir: localscratch + +run: + venvpath: "/scratch/{os.USER}/ENV3" + datadir: "/scratch/{os.USER}/data" + branch: 'main' + mode: "parallel" + # mode: "original" + # mode: "cloudmesh" + +# Submission Information +submission: + benchmark: cloudmask + submitter: Gregor von Laszewski + email: laszewski@gmail.com + org: University of Virginia + division: closed + version: mlcommons-cloudmask-v1.1 + github_commit_version: TBD + status: completed + platform: rivanna + accelerators_per_node: 1 + +experiment: +# card_name: v100 + card_name: v100 + gpu_count: 1 + cpu_num: 1 + mem: "64GB" + repeat: "1" + epoch: 2 + #epoch: 2 + seed: 1234 + learning_rate: 0.001 + batch_size: 32 + train_split: 0.8 + clip_offset: 15 + no_cache: False + nodes: 1 + gpu: 1 + early_stoppage_patience: "25" + early_stoppage: "False" + +identifier: "{experiment.card_name}-{experiment.early_stoppage}-{experiment.early_stoppage_patience}-{experiment.epoch}-{experiment.learningrate}-{experiment.repeat}" + +#system: +# host: "rivanna" +# python: "3.10.8" +# num_cpus: 1 +# partition: "bii-gpu" +# # allocation: ds6011-sp22-002 +# # allocation: bii_dsc +# allocation: bii_dsc_community +# reservation: bi_fox_dgx +# constraint: "" + +system: + host: "greene" + python: "3.10.8" + num_cpus: 1 + platform: greene + +mask: "float" + +# Training data +train_dir: "/scratch/{os.USER}/data/one-day" + +# Inference data +inference_dir: "/scratch/{os.USER}/data/ssts" + + +# training +training_loss: binary_crossentropy +training_metrics: accuracy + + +# Output directory +output_dir: "{os.TARGET}/outputs/slstr_cloud" + +# Model file +model_file: "{output_dir}/cloudModel-{identifier}.h5" + +# Log file for recording runtimes +log_file: "{output_dir}/cloudmask_final_{identifier}.log" + +# Log file for MLCommons logging +mlperf_logfile: "{output_dir}/mlperf_cloudmask_final_{identifier}.log" + +# Size of each patch to feed to the network +PATCH_SIZE: 256 + +# Original height of the image +IMAGE_H: 1200 + +# Original width of the image +IMAGE_W: 1500 + +# No. of channels +N_CHANNELS: 9 + +# Min allowable SST +MIN_SST: 273.15 + +# Amount to crop the edges of the images by +CROP_SIZE: 80 + diff --git a/benchmarks/cloudmask/target/greene_v0.5/experiement-greene.sh b/benchmarks/cloudmask/target/greene_v0.5/experiement-greene.sh new file mode 100755 index 00000000..98b289bf --- /dev/null +++ b/benchmarks/cloudmask/target/greene_v0.5/experiement-greene.sh @@ -0,0 +1,45 @@ +sbatch experiement_simple.slurm_1_epochs_1_5.slurm +sbatch experiement_simple.slurm_5_epochs_1_5.slurm +sbatch experiement_simple.slurm_10_epochs_1_5.slurm +sbatch experiement_simple.slurm_20_epochs_1_5.slurm +sbatch experiement_simple.slurm_30_epochs_1_5.slurm +sbatch experiement_simple.slurm_50_epochs_1_5.slurm +sbatch experiement_simple.slurm_80_epochs_1_5.slurm +sbatch experiement_simple.slurm_100_epochs_1_5.slurm +sbatch experiement_simple.slurm_200_epochs_1_5.slurm +sbatch experiement_simple.slurm_1_epochs_2_5.slurm +sbatch experiement_simple.slurm_5_epochs_2_5.slurm +sbatch experiement_simple.slurm_10_epochs_2_5.slurm +sbatch experiement_simple.slurm_20_epochs_2_5.slurm +sbatch experiement_simple.slurm_30_epochs_2_5.slurm +sbatch experiement_simple.slurm_50_epochs_2_5.slurm +sbatch experiement_simple.slurm_80_epochs_2_5.slurm +sbatch experiement_simple.slurm_100_epochs_2_5.slurm +sbatch experiement_simple.slurm_200_epochs_2_5.slurm +sbatch experiement_simple.slurm_1_epochs_3_5.slurm +sbatch experiement_simple.slurm_5_epochs_3_5.slurm +sbatch experiement_simple.slurm_10_epochs_3_5.slurm +sbatch experiement_simple.slurm_20_epochs_3_5.slurm +sbatch experiement_simple.slurm_30_epochs_3_5.slurm +sbatch experiement_simple.slurm_50_epochs_3_5.slurm +sbatch experiement_simple.slurm_80_epochs_3_5.slurm +sbatch experiement_simple.slurm_100_epochs_3_5.slurm +sbatch experiement_simple.slurm_200_epochs_3_5.slurm +sbatch experiement_simple.slurm_1_epochs_4_5.slurm +sbatch experiement_simple.slurm_5_epochs_4_5.slurm +sbatch experiement_simple.slurm_10_epochs_4_5.slurm +sbatch experiement_simple.slurm_20_epochs_4_5.slurm +sbatch experiement_simple.slurm_30_epochs_4_5.slurm +sbatch experiement_simple.slurm_50_epochs_4_5.slurm +sbatch experiement_simple.slurm_80_epochs_4_5.slurm +sbatch experiement_simple.slurm_100_epochs_4_5.slurm +sbatch experiement_simple.slurm_200_epochs_4_5.slurm +sbatch experiement_simple.slurm_1_epochs_5_5.slurm +sbatch experiement_simple.slurm_5_epochs_5_5.slurm +sbatch experiement_simple.slurm_10_epochs_5_5.slurm +sbatch experiement_simple.slurm_20_epochs_5_5.slurm +sbatch experiement_simple.slurm_30_epochs_5_5.slurm +sbatch experiement_simple.slurm_50_epochs_5_5.slurm +sbatch experiement_simple.slurm_80_epochs_5_5.slurm +sbatch experiement_simple.slurm_100_epochs_5_5.slurm +sbatch experiement_simple.slurm_200_epochs_5_5.slurm diff --git a/benchmarks/cloudmask/target/greene_v0.5/slstr_uva_nyu_cloud.py b/benchmarks/cloudmask/target/greene_v0.5/slstr_uva_nyu_cloud.py index 3168cc09..3209f85c 100755 --- a/benchmarks/cloudmask/target/greene_v0.5/slstr_uva_nyu_cloud.py +++ b/benchmarks/cloudmask/target/greene_v0.5/slstr_uva_nyu_cloud.py @@ -360,7 +360,7 @@ def main(): print("Config file:", configYamlFile) config = FlatDict() - config.load(content=configYamlFile) + config.loadf(filename=configYamlFile) print (config)