-
Notifications
You must be signed in to change notification settings - Fork 7
/
generic_helpers.py
390 lines (317 loc) · 11.7 KB
/
generic_helpers.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
import os
import argparse
from collections.abc import Iterable
import numpy as np
import vector
def negative_part(array):
array = np.array(array)
return np.minimum(array, 0)
def positive_part(array):
array = np.array(array)
return np.maximum(array, 0)
def check_all_zero(array):
array = np.array(array)
return not np.any(array)
def check_one_nonzero(array):
array = np.array(array)
return np.any(array)
def check_nan_inf(array):
array = np.array(array)
return np.any( ~np.isfinite(array) )
def nan_to_value(array, value=0):
array = np.array(array)
array[~np.isfinite(array)] = value
return array
def argparse_filepath_type(path):
if not os.path.exists(path):
raise argparse.ArgumentTypeError(f"The path {path} is invalid!")
else:
return path
def get_spatial(four_vector):
"""Takes in a 4 vector and returns its spatial component. Identical to TLorentzVector.Vect()
Parameters
----------
four_vector : vector.array
A 4 vector from the vector class
Returns
-------
vector.array
A 3 vector from the vector class
"""
return vector.array(
{
"x":four_vector.x,
"y":four_vector.y,
"z":four_vector.z
}
)
def MELA_simpleParticle_toVector(simpleParticle):
id = simpleParticle.id
vec = simpleParticle.PxPyPzE_vector
return id, vector.obj(px=vec[0], py=vec[1], pz=vec[2], E=vec[3])
def print_msg_box(msg, indent=1, width=0, title=""):
"""returns message-box with optional title.
Ripped from https://stackoverflow.com/questions/39969064/how-to-print-a-message-box-in-python
Parameters
----------
msg : str
The message to use
indent : int, optional
indent size, by default 1
width : int, optional
box width, by default 0
title : str, optional
box title, by default ""
"""
lines = msg.split('\n')
space = " " * indent
if not width:
width = max(map(len, lines))
box = f'╔{"═" * (width + indent * 2)}╗\n' # upper_border
if title:
box += f'║{space}{title:<{width}}{space}║\n' # title
box += f'║{space}{"-" * len(title):<{width}}{space}║\n' # underscore
box += ''.join([f'║{space}{line:<{width}}{space}║\n' for line in lines])
box += f'╚{"═" * (width + indent * 2)}╝' # lower_border
return box
def recurse_through_folder(folder_path, extension="", verbose=False):
"""_summary_
Parameters
----------
folder_path : _type_
_description_
extension : str, optional
_description_, by default ""
verbose : bool, optional
_description_, by default False
Returns
-------
_type_
_description_
Raises
------
FileNotFoundError
_description_
"""
folder_path = folder_path.strip()
if not os.path.exists(folder_path):
msg = print_msg_box(folder_path + "\nDoes Not Exist!", title="ERROR")
raise FileNotFoundError("\n"+msg)
if folder_path[-1] != '/':
folder_path += '/'
if extension != "" and extension[0] == ".":
extension = extension[1:]
paths_in_folder = os.listdir(folder_path)
folders_wanted = []
for path in paths_in_folder:
path = folder_path + path
if not os.path.isdir(path):
if (path.split('/')[-1].split(".")[-1] == extension) or (extension == ""):
folders_wanted.append(path)
elif verbose:
print("Looking for", extension.split('/')[-1].split(".")[-1])
print("ignoring", path, "due to incorrect file extension")
else:
folders_wanted += recurse_through_folder(path, extension, verbose)
return folders_wanted
def make_legend_label_hist(counts, bins, name, extra_space=True):
x = (bins[1:]+bins[:-1])/2
if x.shape != counts.shape:
errortext = "Shapes do not match!"
errortext += "\nx.shape != counts.shape"
errortext += "\n"+str(x.shape)+"!="+str(counts.shape)
raise ValueError("\n" + print_msg_box(errortext, title="ERROR"))
avg = np.average(x, weights=counts)
labelstr = name+"\n"
labelstr += "Area={:.3f}\n".format(np.sum(counts))
labelstr += r'$\mu=$' + "{:.2f}\n".format(avg)
labelstr += r'$\sigma=$' + "{:.2f}".format(np.sqrt(np.average( (x - avg)**2, weights=counts )))
if extra_space:
labelstr += "\n"
return labelstr
def make_legend_label(data, name, extra_space=True):
avg = np.average(data)
labelstr = name+"\n"
labelstr += "N={:.0f}\n".format(len(data))
labelstr += r'$\mu=$' + "{:.2f}\n".format(avg)
labelstr += r'$\sigma=$' + "{:.2f}".format(np.std(data))
if extra_space:
labelstr += "\n"
return labelstr
def scale(scaleto, counts, bins=None, return_scale_factor=False):
"""This function scales histograms according to their absolute area under the curve
Parameters
----------
scaleto : float
The absolute area to scale to
counts : list[Union[int,float]]
A list of bin counts
bins : list[float]
The bins you want to use; use this option if you are passing a numpy histogram in (i.e. scale(1, *<numpy histogram>)), by default None
return_scale_factor : bool
Whether the scale factor being used will be returned
Returns
-------
list[float] OR Tuple(list[float], list[float]) OR Tuple(list[float], float) OR Tuple(list[float], list[float], float)
The scaled histogram bin counts or the scaled histogram, depending on whether you passed the bins in as well
"""
counts = np.array(counts)
counts = counts.astype(float)
signs = np.sign(counts) #makes sure to preserve sign
counts = np.abs(counts)
if (scaleto == 0) or (np.sum(counts) == 0):
return np.zeros(counts.shape)
new_counts = signs*counts*scaleto/np.sum(counts)
new_counts[~np.isfinite(new_counts)] = 0
if bins is not None:
if return_scale_factor:
return new_counts, bins, scaleto/np.sum(counts)
return new_counts, bins
elif return_scale_factor:
return new_counts, scaleto/np.sum(counts)
return new_counts
def unroll_ND_histogram(N_dimension_counts, isbkg=False):
unrolled_arr = N_dimension_counts.ravel()
if isbkg:
hist_integral = unrolled_arr.sum()
fill_val = hist_integral*0.1/len(unrolled_arr)
unrolled_arr[unrolled_arr <= 0] = fill_val
pos_arr = np.where(unrolled_arr > 0, unrolled_arr, 0)
neg_arr = -1*np.where(unrolled_arr < 0, unrolled_arr, 0)
bins = np.arange(len(unrolled_arr) + 1)
return (pos_arr, neg_arr), bins
def binary_search_on_array(
array,
upper_index,
lower_index,
target,
search_function=None):
"""This is a simple function that conducts a binary search on an array
You can give it a custom search function if you'd like
Parameters
----------
array : numpy.ndarray
This is the array you are searching on. It should be sorted in descending order
upper_index : int
The upper index you are starting from
lower_index : int
The lower index you are starting from
target : any
This is what you are looking for
NOTE: Some binary searches don't utilize targets (if you are looking for c constants for discriminants, for instance)
search_function : function, optional
This is a custom search function. If not provided, use the default, by default None
Returns
-------
int
The index that is found
Raises
------
ValueError
The search function must return -1, 0, or 1
"""
index = (upper_index + lower_index)//2
if search_function == None:
def search(index, array, target): #generate a default search function that works for arrays
val = array[index]
if val == target:
return 0
elif val < target:
return 1
elif val > target:
return -1
search_function = search
was_it_found = search_function(index, array, target)
if upper_index == lower_index or was_it_found== 0 or upper_index == lower_index + 1:
return index
if was_it_found == -1: #This should mean that the target is somewhere below the current index
upper_index = index
elif was_it_found == 1: #This should mean that the target is somewhere above the current index
lower_index = index
else:
errortext = print_msg_box("Search function should return 0, 1, or -1!", title="ERROR")
raise ValueError("\n"+errortext)
return binary_search_on_array(
array,
upper_index,
lower_index,
target,
search_function)
def find_discr_c_constant(
w1_1,
w1_2,
w2_1,
w2_2,
guess=None,
statistics_limit=0,
debug=False):
"""This function looks for a calibrated c constant for a given discriminant
Parameters
----------
w1_1 : numpy.ndarray
The probabilities for hypothesis 1 from a sample generated with hypothesis 1
w1_2 : numpy.ndarray
The probabilities for hypothesis 2 from a sample generated with hypothesis 1
w2_1 : numpy.ndarray
The probabilities for hypothesis 1 from a sample generated with hypothesis 2
w2_2 : numpy.ndarray
The probabilities for hypothesis 2 from a sample generated with hypothesis 2
guess : float, optional
This is a guess for an initial c constant to make sure there is some crossing point.
If None, one will be made for you, by default None
statistics_limit : int, optional
This is a limiter for the minimum number of events you want in a calculation, by default 0
"""
def search_function(index, array, target):
fraction1 = np.sum(array[0][:index])/np.sum(array[0])
fraction2 = np.sum(array[1][index:])/np.sum(array[1])
fraction1 = np.round(fraction1, 3)
fraction2 = np.round(fraction2, 3)
if debug:
print(fraction1, fraction2)
if fraction1 == fraction2:
if debug:
print("found!")
print()
return 0
elif fraction1 < fraction2:
if debug:
print("moving up!")
print()
return 1
elif fraction1 > fraction2:
if debug:
print("moving down!")
print()
return -1
else:
print(fraction1, array[0])
print(fraction2, array[1])
raise ValueError("What the hell is going on!?")
w1_1 = np.array(w1_1, float)
w1_2 = np.array(w1_2, float)
w2_1 = np.array(w2_1, float)
w2_2 = np.array(w2_2, float)
if (not np.any(w1_1)) or (not np.any(w2_1)): #If all the elements are 0 then the fit has failed
return "FAILED"
if not guess:
guess = np.mean(w1_1)/np.mean(w1_2)
_, bins = np.histogram([], bins=100000, range=[0,1])
values_1, _ = np.histogram(1/(1 + guess*(w1_2/w1_1)), bins=bins)
values_2, _ = np.histogram(1/(1 + guess*(w2_2/w2_1)), bins=bins)
if (np.sum(values_1) <= statistics_limit) or (np.sum(values_2) <= statistics_limit):
return "FAILED"
values_1 = scale(1, values_1)
values_2 = scale(1, values_2)
searchable = np.vstack((values_1, values_2))
crossing = binary_search_on_array(
searchable,
len(bins) - 1,
0,
0.5,
search_function
)
centers = (bins[1:] + bins[:-1])/2
R = centers[crossing]
c_constant = guess/( (1/R) - 1)
return c_constant