Opt L24 python

jzarnett · Sep 20, 2023 · 1c2925e · 1c2925e
1 parent 507b919
commit 1c2925e
Show file tree

Hide file tree

Showing 3 changed files with 58 additions and 2 deletions.
diff --git a/lectures/live-coding/L10/producer-consumer-opt/Cargo.toml b/lectures/live-coding/L10/producer-consumer-opt/Cargo.toml
@@ -6,7 +6,10 @@ edition = "2018"
 
 # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
 
+[profile.release]
+debug=true
+
 [dependencies]
 rand = "0.7.3"
 tokio = { version = "1.8.4", features = ["sync"] }
-futures = "0.3.6"
+futures = "0.3.6"
diff --git a/lectures/live-coding/L13/nbody-parallel/Cargo.toml b/lectures/live-coding/L13/nbody-parallel/Cargo.toml
@@ -5,7 +5,9 @@ authors = ["Jeff Zarnett <[email protected]>"]
 edition = "2018"
 
 # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
+[profile.release]
+debug = true
 
 [dependencies]
 rand = "0.7.3"
-rayon = "1.1"
+rayon = "1.1"
diff --git a/lectures/live-coding/L24/dummy_data_opt.py b/lectures/live-coding/L24/dummy_data_opt.py
@@ -0,0 +1,51 @@
+import numpy as np
+from datasets import Dataset
+from pynvml import *
+import torch
+from transformers import AutoModelForSequenceClassification
+from transformers import TrainingArguments, Trainer, logging
+
+default_args = {
+    "output_dir": "tmp",
+    "evaluation_strategy": "no",
+    "num_train_epochs": 1,
+    "log_level": "error",
+    "report_to": "none",
+}
+
+def print_gpu_utilization():
+    nvmlInit()
+    handle = nvmlDeviceGetHandleByIndex(0)
+    info = nvmlDeviceGetMemoryInfo(handle)
+    print(f"GPU memory occupied: {info.used//1024**2} MB.")
+
+
+def print_summary(result):
+    print(f"Time: {result.metrics['train_runtime']:.2f}")
+    print(f"Samples/second: {result.metrics['train_samples_per_second']:.2f}")
+    print_gpu_utilization()
+
+print("Starting up. Initial GPU utilization:")
+print_gpu_utilization()
+torch.ones((1, 1)).to("cuda")
+print("Initialized Torch; current GPU utilization:")
+print_gpu_utilization()
+
+model = AutoModelForSequenceClassification.from_pretrained("bert-base-uncased").to("cuda")
+print_gpu_utilization()
+
+logging.set_verbosity_error()
+
+seq_len, dataset_size = 512, 512
+dummy_data = {
+    "input_ids": np.random.randint(100, 30000, (dataset_size, seq_len)),
+    "labels": np.random.randint(0, 1, (dataset_size)),
+}
+ds = Dataset.from_dict(dummy_data)
+ds.set_format("pt")
+
+training_args = TrainingArguments(per_device_train_batch_size=1, gradient_checkpointing=True, **default_args)
+trainer = Trainer(model=model, args=training_args, train_dataset=ds)
+result = trainer.train()
+print_summary(result)
+