From 1c2925e25682d5474ae51bd8941c0f6bf3ece6bd Mon Sep 17 00:00:00 2001 From: Jeff Zarnett Date: Wed, 20 Sep 2023 13:48:00 -0400 Subject: [PATCH] Opt L24 python --- .../L10/producer-consumer-opt/Cargo.toml | 5 +- .../live-coding/L13/nbody-parallel/Cargo.toml | 4 +- lectures/live-coding/L24/dummy_data_opt.py | 51 +++++++++++++++++++ 3 files changed, 58 insertions(+), 2 deletions(-) create mode 100644 lectures/live-coding/L24/dummy_data_opt.py diff --git a/lectures/live-coding/L10/producer-consumer-opt/Cargo.toml b/lectures/live-coding/L10/producer-consumer-opt/Cargo.toml index 578ae718..36a2b47a 100644 --- a/lectures/live-coding/L10/producer-consumer-opt/Cargo.toml +++ b/lectures/live-coding/L10/producer-consumer-opt/Cargo.toml @@ -6,7 +6,10 @@ edition = "2018" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html +[profile.release] +debug=true + [dependencies] rand = "0.7.3" tokio = { version = "1.8.4", features = ["sync"] } -futures = "0.3.6" \ No newline at end of file +futures = "0.3.6" diff --git a/lectures/live-coding/L13/nbody-parallel/Cargo.toml b/lectures/live-coding/L13/nbody-parallel/Cargo.toml index 00875b5c..94402b8f 100644 --- a/lectures/live-coding/L13/nbody-parallel/Cargo.toml +++ b/lectures/live-coding/L13/nbody-parallel/Cargo.toml @@ -5,7 +5,9 @@ authors = ["Jeff Zarnett "] edition = "2018" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html +[profile.release] +debug = true [dependencies] rand = "0.7.3" -rayon = "1.1" \ No newline at end of file +rayon = "1.1" diff --git a/lectures/live-coding/L24/dummy_data_opt.py b/lectures/live-coding/L24/dummy_data_opt.py new file mode 100644 index 00000000..b6a1a10c --- /dev/null +++ b/lectures/live-coding/L24/dummy_data_opt.py @@ -0,0 +1,51 @@ +import numpy as np +from datasets import Dataset +from pynvml import * +import torch +from transformers import AutoModelForSequenceClassification +from transformers import TrainingArguments, Trainer, logging + +default_args = { + "output_dir": "tmp", + "evaluation_strategy": "no", + "num_train_epochs": 1, + "log_level": "error", + "report_to": "none", +} + +def print_gpu_utilization(): + nvmlInit() + handle = nvmlDeviceGetHandleByIndex(0) + info = nvmlDeviceGetMemoryInfo(handle) + print(f"GPU memory occupied: {info.used//1024**2} MB.") + + +def print_summary(result): + print(f"Time: {result.metrics['train_runtime']:.2f}") + print(f"Samples/second: {result.metrics['train_samples_per_second']:.2f}") + print_gpu_utilization() + +print("Starting up. Initial GPU utilization:") +print_gpu_utilization() +torch.ones((1, 1)).to("cuda") +print("Initialized Torch; current GPU utilization:") +print_gpu_utilization() + +model = AutoModelForSequenceClassification.from_pretrained("bert-base-uncased").to("cuda") +print_gpu_utilization() + +logging.set_verbosity_error() + +seq_len, dataset_size = 512, 512 +dummy_data = { + "input_ids": np.random.randint(100, 30000, (dataset_size, seq_len)), + "labels": np.random.randint(0, 1, (dataset_size)), +} +ds = Dataset.from_dict(dummy_data) +ds.set_format("pt") + +training_args = TrainingArguments(per_device_train_batch_size=1, gradient_checkpointing=True, **default_args) +trainer = Trainer(model=model, args=training_args, train_dataset=ds) +result = trainer.train() +print_summary(result) +