-
Notifications
You must be signed in to change notification settings - Fork 29
/
smon
66 lines (56 loc) · 3.05 KB
/
smon
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
#!/usr/bin/env python3
# -*- mode: python -*-
'''
$ smon
-------pending------- -------running------- ------completing-----
mem(GB) #cpus #jobs mem(GB) #cpus #jobs mem(GB) #cpus #jobs
3 48 2 [user's full name] 158 1944 81 [username]
[user's full name] 1820 364 91 [username]
23028 3624 3624 [user's full name] 1440 180 180 [username]
[user's full name] 1367 140 140 [username]
[user's full name] 488 100 100 [username]
[user's full name] 61 18 18 [username]
CPUS ON CLUSTER:
allocated: 2746
idle: 4326
other: 512
total: 7584
'''
from subprocess import check_output
from collections import Counter
from math import floor, ceil
o = check_output(['getent','passwd']).decode('ascii').rstrip('\n').split('\n')
o = [line.split(':') for line in o]
users = {line[0]: line[4] for line in o}
states = 'PD R CG'.split()
o = check_output('squeue --array --Format=username:15,statecompact:3,numcpus:5,minmemory:10'.split()).decode('ascii').rstrip('\n').split('\n')
assert o[0] == '{:15}{:3}{:5}{:10}'.format('USER','ST','CPUS','MIN_MEMORY'), o[0]
ppl = {}
for line in o[1:]:
try:
username, state, ncpus, mem = line[0:15].strip(), line[15:18].strip(), int(line[18:23]), line[23:33].strip()
assert state in states, state
if mem.endswith('M'): mem = float(mem[:-1]) / 1024
elif mem.endswith('G'): mem = float(mem[:-1])
else: raise mem
ppl.setdefault(username, {}).setdefault('jobs', Counter())[state] += 1
ppl.setdefault(username, {}).setdefault('cpus', Counter())[state] += ncpus
ppl.setdefault(username, {}).setdefault('mem', Counter())[state] += mem
except Exception as exc: raise Exception('BAD LINE: ' + repr(line)) from exc
print('')
print('-------pending-------'+' '*18+'-------running------- ------completing-----')
print('mem(GB) #cpus #jobs'+' '*18+'mem(GB) #cpus #jobs mem(GB) #cpus #jobs')
for username in sorted(ppl.keys(), key=lambda u: -ppl[u]['cpus']['R']):
for state in states: ppl[username]['mem'][state] = int(ppl[username]['mem'][state])
print('{:6}{:7}{:7} {:13} {:7}{:7}{:7} {:7}{:7}{:7} {}'.format(
*[ppl[username][metric][state] or '' for state in states[:1] for metric in ['mem','cpus','jobs']],
f'{users[username]}'[:13],
*[ppl[username][metric][state] or '' for state in states[1:] for metric in ['mem','cpus','jobs']],
f'{username} ({users[username]})',
))
print('\n')
o = check_output('sinfo --format=%C'.split()).decode('ascii').split('\n')
assert o[0] == 'CPUS(A/I/O/T)'
print('CPUS ON CLUSTER:')
for label, ncpus in zip('allocated idle other total'.split(), [int(x) for x in o[1].split('/')]):
print('{:>10}:{:5}'.format(label, ncpus))