forked from yqhu/profiler-workshop
-
Notifications
You must be signed in to change notification settings - Fork 0
/
hf_clas_prof_eager.py
60 lines (50 loc) · 2.15 KB
/
hf_clas_prof_eager.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
from transformers import AutoTokenizer
from transformers import AutoModelForSequenceClassification
import torch
import time
checkpoint = "distilbert-base-uncased-finetuned-sst-2-english"
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
raw_inputs = ["I've been waiting for a HuggingFace course my whole life.",]
inputs = tokenizer(raw_inputs, padding=True, truncation=True, return_tensors="pt")
model = AutoModelForSequenceClassification.from_pretrained(checkpoint, torchscript=True)
device = torch.device('cuda')
model = model.to(device)
inputs = inputs.to(device)
# warm up
with torch.no_grad():
for i in range(100):
output = model(**inputs)
# benchmark no_grad()
start = time.perf_counter()
with torch.no_grad():
for i in range(100):
output = model(**inputs)
print(f'no_grad():, {1000 * (time.perf_counter() - start) / 100:.1f} ms')
# warm up
with torch.inference_mode():
for i in range(100):
output = model(**inputs)
# benchmark inference_mode()
start = time.perf_counter()
with torch.inference_mode():
for i in range(100):
output = model(**inputs)
print(f'inference_mode():, {1000 * (time.perf_counter() - start) / 100:.1f} ms')
# profile no_grad()
with torch.profiler.profile(activities=[torch.profiler.ProfilerActivity.CPU,
torch.profiler.ProfilerActivity.CUDA],
on_trace_ready=torch.profiler.tensorboard_trace_handler('hf-eager-no-grad'),
profile_memory=True,
with_stack=True,
record_shapes=True) as prof:
with torch.no_grad():
output = model(**inputs)
# profile inference_mode()
with torch.profiler.profile(activities=[torch.profiler.ProfilerActivity.CPU,
torch.profiler.ProfilerActivity.CUDA],
on_trace_ready=torch.profiler.tensorboard_trace_handler('hf-eager-inference-mode'),
profile_memory=True,
with_stack=True,
record_shapes=True) as prof:
with torch.inference_mode():
output = model(**inputs)