Skip to content

Commit

Permalink
Opt L24 python
Browse files Browse the repository at this point in the history
  • Loading branch information
jzarnett committed Sep 20, 2023
1 parent 507b919 commit 1c2925e
Show file tree
Hide file tree
Showing 3 changed files with 58 additions and 2 deletions.
5 changes: 4 additions & 1 deletion lectures/live-coding/L10/producer-consumer-opt/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,10 @@ edition = "2018"

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[profile.release]
debug=true

[dependencies]
rand = "0.7.3"
tokio = { version = "1.8.4", features = ["sync"] }
futures = "0.3.6"
futures = "0.3.6"
4 changes: 3 additions & 1 deletion lectures/live-coding/L13/nbody-parallel/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@ authors = ["Jeff Zarnett <[email protected]>"]
edition = "2018"

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[profile.release]
debug = true

[dependencies]
rand = "0.7.3"
rayon = "1.1"
rayon = "1.1"
51 changes: 51 additions & 0 deletions lectures/live-coding/L24/dummy_data_opt.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
import numpy as np
from datasets import Dataset
from pynvml import *
import torch
from transformers import AutoModelForSequenceClassification
from transformers import TrainingArguments, Trainer, logging

default_args = {
"output_dir": "tmp",
"evaluation_strategy": "no",
"num_train_epochs": 1,
"log_level": "error",
"report_to": "none",
}

def print_gpu_utilization():
nvmlInit()
handle = nvmlDeviceGetHandleByIndex(0)
info = nvmlDeviceGetMemoryInfo(handle)
print(f"GPU memory occupied: {info.used//1024**2} MB.")


def print_summary(result):
print(f"Time: {result.metrics['train_runtime']:.2f}")
print(f"Samples/second: {result.metrics['train_samples_per_second']:.2f}")
print_gpu_utilization()

print("Starting up. Initial GPU utilization:")
print_gpu_utilization()
torch.ones((1, 1)).to("cuda")
print("Initialized Torch; current GPU utilization:")
print_gpu_utilization()

model = AutoModelForSequenceClassification.from_pretrained("bert-base-uncased").to("cuda")
print_gpu_utilization()

logging.set_verbosity_error()

seq_len, dataset_size = 512, 512
dummy_data = {
"input_ids": np.random.randint(100, 30000, (dataset_size, seq_len)),
"labels": np.random.randint(0, 1, (dataset_size)),
}
ds = Dataset.from_dict(dummy_data)
ds.set_format("pt")

training_args = TrainingArguments(per_device_train_batch_size=1, gradient_checkpointing=True, **default_args)
trainer = Trainer(model=model, args=training_args, train_dataset=ds)
result = trainer.train()
print_summary(result)

0 comments on commit 1c2925e

Please sign in to comment.