-
Notifications
You must be signed in to change notification settings - Fork 1
/
torcheck.py
93 lines (74 loc) · 2.43 KB
/
torcheck.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
import traceback
import sys
import torch
import psutil
def print_header(title):
width = (80 - len(title) - 2) // 2
print("=" * width, title, "=" * width)
def get_size(sz_bytes, suffix="B"):
"""
Scale bytes to its proper format
e.g:
1253656 => '1.20MB'
1253656678 => '1.17GB'
"""
factor = 1024
for unit in ["", "K", "M", "G", "T", "P"]:
if sz_bytes < factor:
return f"{sz_bytes:.2f}{unit}{suffix}"
sz_bytes /= factor
def print_device_info(device):
"""
Prints information about the properties and state of the given device.
:param device: [torch.device or int] device for which to print information.
"""
print(f"id: {device} - name: {torch.cuda.get_device_name(device)}")
print(f" properties: {torch.cuda.get_device_properties(device)}")
# Extra debug info, if desired.
#print(f" processes: {torch.cuda.list_gpu_processes(device)}")
#print(f" memory summary:\n{torch.cuda.memory_summary(device)}")
def show_gpu_info():
has_cuda = torch.cuda.is_available()
if has_cuda:
curr_device = torch.cuda.current_device()
else:
curr_device = "N/A"
print(f"Has cuda? {has_cuda}; Current device: {curr_device}")
print_header("GPU Devices")
if torch.cuda.device_count() > 0:
for device in range(torch.cuda.device_count()):
print_device_info(device)
else:
print("(No devices)")
if __name__ == "__main__":
print("Flags: ", sys.argv[1:])
try:
a = torch.ones(10)
a.normal_(0, 1)
except Exception:
print(f"ERROR: TORCH test failed:")
traceback.print_exc(file=sys.stdout)
else:
print("OK: TORCH test passed!")
try:
b = torch.ones(10, device='cuda')
b.normal_(0, 1)
except Exception:
print(f"ERROR: CUDA test failed:")
traceback.print_exc(file=sys.stdout)
else:
print("OK: CUDA test passed!")
print_header("INFO")
# number of cores
p = psutil.Process()
try:
print("Cores:", len(p.cpu_affinity()))
except Exception:
pass
# memory report seems wrong.
# print("Memory:", {key:get_size(val) for key,val in p.memory_info()._asdict().items()})
print("Torch version:", torch.__version__)
print("CUDA version:", torch.version.cuda)
# Extra debug info, if desired.
#print("CUDA compiled for:", torch.cuda.get_arch_list())
show_gpu_info()