-
Notifications
You must be signed in to change notification settings - Fork 2
/
main.py
217 lines (200 loc) · 7.87 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
from decimal import Decimal
from json import encoder
from time import sleep
import psutil
import simplejson as json
attrs = [
'pid',
'ppid',
'name',
'cpu_times',
'num_threads',
'threads',
'cmdline',
'create_time',
'exe',
]
# EXAMPLE OUTPUT
# {
# "pid": 816,
# "threads": [
# {
# "id": 816,
# "user_time": 0.02,
# "system_time": 0.01
# },
# {
# "id": 823,
# "user_time": 2.54,
# "system_time": 3.04
# },
# {
# "id": 864,
# "user_time": 0.02,
# "system_time": 0.0
# }
# ],
# "cpu_times": {
# "user": 2.59,
# "system": 3.05,
# "children_user": 0.0,
# "children_system": 0.0,
# "iowait": 0.0
# },
# "cmdline": [
# "/usr/lib/accountsservice/accounts-daemon"
# ],
# "num_threads": 3,
# "ppid": 1,
# "exe": "/usr/lib/accountsservice/accounts-daemon",
# "name": "accounts-daemon",
# "create_time": 1581216030.43
# }
def floatfmt(f, precision=3):
""" Creates a normalized Decimal with the given precision from a number.
Useful for serializing floating point numbers with simplejson with a given precision.
"""
q = Decimal(10) ** -precision
return Decimal(f).quantize(q).normalize()
def collect_processes(pid=None):
""" Gets a list of processes
If pid is None or 0 then it will return all processes
"""
if pid:
p = psutil.Process(pid)
result = {p.pid: p}
result.update({c.pid: c for c in p.children(recursive=True)})
else:
result = {p.pid: p for p in psutil.process_iter(attrs=attrs)}
return result
def collect_data(process_list, interval=1):
""" This part collects two measurements for the CPU and for each process.
Each measurement contains how long the CPU/process has been active.
"""
# START OF CRITICAL SECTION
# This begins the critical section until the second set of timing data is collected
# Too much processing inside the critical section will skew CPU usage percentage results
# get cpu times
cpu_time1 = psutil.cpu_times(percpu=False)
# track data in dictionaries keyed by PID
data = {}
garbage = []
for pid, child in process_list.items():
try:
data[pid] = child.as_dict(attrs=attrs)
except psutil.NoSuchProcess:
garbage.append(pid)
for pid in garbage:
del data[pid]
# sleep for a bit so the CPU actually does things
sleep(interval)
# second timing data is used to calculate deltas/percentages
cpu_time2 = psutil.cpu_times(percpu=False)
for pid, child in process_list.items():
value = data[pid]
try:
value.update({
'cpu_times2': child.cpu_times(),
'threads2': child.threads(),
})
except psutil.NoSuchProcess:
# Just give terminated processes duplicate timings.
# They can be filtered out by sorting later
value.update({
'cpu_times2': value['cpu_times'] or None,
'threads2': value['threads'] or None,
})
# END OF CRITICAL SECTION
return data
def calculate_percentages(data):
""" At this point we have the running CPU time from two measurements. Subtracting the two gives us the difference in total CPU time between those measurements.
We also have the running process times for points that should be close to the total CPU measurement times.
Subtract the two to get how much time that process was active in our interval (delta times).
Dividing that delta by the total CPU time that passed to get an approximate CPU usage for the process in the given interval.
Returns a dictionary keyed by PID.
"""
# post processing to calculate cpu percentages
cpu_delta = sum(cpu_time2) - sum(cpu_time1)
for tid, item in data.items():
# calculate cpu times of process and children
t1 = item.pop('cpu_times')
t2 = item.pop('cpu_times2')
p_user_delta = t2.user - t1.user
p_system_delta = t2.system - t1.system
c_user_delta = t2.children_user - t1.children_user
c_system_delta = t2.children_system - t1.children_system
io_delta = t2.iowait - t1.iowait
cpu_times = {
"user_delta": floatfmt(p_user_delta),
"system_delta": floatfmt(p_system_delta),
"process_delta": floatfmt(p_user_delta + p_system_delta),
"children_user_delta": floatfmt(c_user_delta),
"children_system_delta": floatfmt(c_system_delta),
"children_delta": floatfmt(c_user_delta + c_system_delta),
"process_cpu_pct": floatfmt(100.0 * (p_user_delta + p_system_delta) / cpu_delta),
"children_cpu_pct": floatfmt(100.0 * (c_user_delta + c_system_delta) / cpu_delta),
"total_cpu_delta": floatfmt((p_user_delta + p_system_delta + c_user_delta + c_system_delta)),
"total_cpu_pct": floatfmt(100.0*(p_user_delta + p_system_delta + c_user_delta + c_system_delta) / cpu_delta),
"io_delta": floatfmt(io_delta),
}
# calculate thread cpu percentages
threads1 = {t.id: t for t in item.pop('threads')}
threads2 = {t.id: t for t in item.pop('threads2')}
thread_deltas = {}
for tid, t1 in threads1.items():
t2 = threads2.get(tid)
if t2:
user_delta = t2.user_time - t1.user_time
system_delta = t2.system_time - t1.system_time
thread_deltas[tid] = {
'tid': tid,
'user_time_delta': floatfmt(user_delta),
'system_time_delta': floatfmt(system_delta),
'total_delta': floatfmt(user_delta+system_delta),
'total_pct': floatfmt(100.0 * (user_delta + system_delta) / cpu_delta),
}
item.update({
'cpu_times': cpu_times,
'threads': list(thread_deltas.values()),
})
return data
def main(pid=None, interval=1, lines=None, sort_key='total_cpu_pct'):
process_list = collect_processes(pid)
data = collect_data(process_list, interval=interval)
data = calculate_percentages(data)
# Everything after this is specific to how you want the data formatted for output
data = list(data.values())
data = sorted(data, key=lambda x: x['cpu_times'][sort_key], reverse=True)
if lines:
data = data[:lines]
try:
for item in data:
print(json.dumps(item, sort_keys=True))
except BrokenPipeError:
pass # ignore error caused by piping to head
def get_default_args(func):
""" Helper for argparse to pass default values to main
"""
import inspect
signature = inspect.signature(func)
return {
k: v.default
for k, v in signature.parameters.items()
if v.default is not inspect.Parameter.empty
}
if __name__ == '__main__':
import argparse
default = get_default_args(main)
parser = argparse.ArgumentParser(
description='Get CPU percentages for a process and children. Emits a JSON line for each process. `jq` will be helpful for post processing.')
parser.add_argument('--interval', type=float, dest='interval', default=default.get('interval'),
help='Time between timing measurements in seconds')
parser.add_argument('-n','--lines', type=int, dest='lines', default=default.get('lines'),
help='Number of lines to emit')
parser.add_argument('-p', '--pid', type=int, dest='pid', default=default.get('pid'),
help='PID of target process')
parser.add_argument('--sort-key', type=str, dest='sort_key', default=default.get('sort_key'),
help='Sort by this key inside the "cpu_times" field'
)
args = parser.parse_args()
main(**vars(args))