-
Notifications
You must be signed in to change notification settings - Fork 0
/
plotRejection.py
233 lines (200 loc) · 11.6 KB
/
plotRejection.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
import warnings
import numpy as np
import argparse
SMALL_SIZE = 18#15
MEDIUM_SIZE = 21#18
BIGGER_SIZE = 24#20
def plotOutlier(ax, fig, runs, values, uncert,
runsRejected, edgeRuns, highlight,
means, ranges, ytitle, showAllRunID,
plotRange, rejectionRange, showPseudoID,
showEdgeRuns, devName='RMS', counts=None):
# edges cover the first and last run
idEdges = [0] + np.searchsorted(runs, edgeRuns).tolist() + [runs.shape[0]]
x = np.arange(values.shape[0])
# rejected data
# convert run numbers to array id
idRejected = np.searchsorted(runs, runsRejected)
# plot ranges
onlyGoodRuns = np.delete(values, idRejected)
onlyGoodMean = onlyGoodRuns.mean()
onlyGoodStd = onlyGoodRuns.std()
upperBound = onlyGoodMean + plotRange*onlyGoodStd
lowerBound = onlyGoodMean - plotRange*onlyGoodStd
ax.fill_between(idEdges, (means-ranges).tolist() + [0], (means+ranges).tolist() + [0], step='post', color='green', alpha=0.5,
label='%g-%s ' % (rejectionRange, devName))
# 10 SD ranges
ax.fill_between(idEdges, (means-2*ranges).tolist() + [0], (means+2*ranges).tolist() + [0], step='post', color='yellow', alpha=0.5,
label='%s-%s' % (2*rejectionRange, devName))
# mean of each segment
ax.step(idEdges, means.tolist() + [0], where='post', linestyle='--')
if showEdgeRuns:
xpad = 0.005*x.shape[0]
ypad = 0.03*(upperBound - lowerBound)
for edge in idEdges[:-1]:
ax.annotate(str(runs[edge]) + ' ', xy=(edge, upperBound - ypad), xytext=(edge + xpad, upperBound - ypad),
bbox=dict(boxstyle='square', alpha=0, pad=0),
rotation=90, horizontalalignment='left', verticalalignment='top',
size=int(0.8*SMALL_SIZE), arrowprops=dict(arrowstyle='->', ec='black', relpos=(1.01, 1)), zorder=100)
# label the last edge from the other side
ax.annotate(str(runs[-1]) + ' ', xy=(idEdges[-1] - 1, upperBound - ypad), xytext=(idEdges[-1] - 1 - xpad, upperBound - ypad),
bbox=dict(boxstyle='square', alpha=0, pad=0),
rotation=90, horizontalalignment='right', verticalalignment='top',
size=int(0.8*SMALL_SIZE), arrowprops=dict(arrowstyle='->', ec='black', relpos=(-0.8, 1)), zorder=100)
# data itself
ax.errorbar(x, values, yerr=uncert, color='blue', fmt='o', zorder=5)
if len(runsRejected) > 0:
ax.errorbar(x[idRejected], values[idRejected], yerr=uncert[idRejected], color='red', markerfacecolor='blue', zorder=6, fmt='o', label='%d badruns' % idRejected.shape[0])
# highlight rejected data due to THIS condition
ax.scatter(x[idRejected][highlight], values[idRejected][highlight], color='red', zorder=7, label='%d %s bad' % (np.sum(highlight), ytitle))
# segment boundaries
for id in idEdges:
ax.axvline(id, linestyle='--', color='b')
# out of bound runs
idBelow = x[values < lowerBound]
runRejected = set(x[idRejected])
meanBelow = means[np.searchsorted(edgeRuns, runs[idBelow])]
for xarr, m in zip(idBelow, meanBelow):
if lowerBound <= m and m <= upperBound:
ax.annotate('', xytext=(xarr, m), xycoords='data',
xy=(xarr, lowerBound), textcoords='data', arrowprops=dict(arrowstyle='->', ec='r' if xarr in runRejected else 'b'), zorder=10)
else:
warnings.warn('Mean of a segment lies beyond the plotting region. This can happen if the weighting factor strongly skew the mean/median. You should widen the plot range with -pr <NO STD>')
idAbove = x[values > upperBound]
meanAbove = means[np.searchsorted(edgeRuns, runs[idAbove])]
for xarr, m in zip(idAbove, meanAbove):
if lowerBound <= m and m <= upperBound:
ax.annotate('', xytext=(xarr, m), xycoords='data',
xy=(xarr, upperBound), textcoords='data', arrowprops=dict(arrowstyle='->', ec='r' if xarr in runRejected else 'b'), zorder=10)
else:
warnings.warn('Mean of a segment lies beyond the plotting region. This can happen if the weighting factor strongly skew the mean/median. You should widen the plot range with -pr <NO STD>')
# convert x-axis into run id
if np.isfinite(lowerBound) and np.isfinite(upperBound):
ax.set_ylim(lowerBound, upperBound)
else:
warnings.warn('lower bound or upper bound is not finite. Will use auto-axis range instead.')
ax.set_xlim(0, x.shape[0]-1)
ax.set_ylabel(ytitle)
ax.set_xlabel('Run ID')
stat = '%d total runs' % values.shape[0]
if counts is not None:
totStats = np.sum(counts)
rejectedStats = np.sum(counts[idRejected])
stat = stat + '\nTot events = %d' % totStats
stat = stat + '\nRejected events = %d' % rejectedStats
stat = stat + '\nRejection rate = %.2f%%' % (rejectedStats*100/float(totStats))
ax.text(0.7, 0.01, stat, transform=ax.transAxes)
if showAllRunID:
plt.xticks(x)
ax.set_xticklabels(runs, rotation=90)
elif not showPseudoID:
import matplotlib.ticker as ticker
fig.canvas.draw()
xLabelID = [int(float(item.get_text())+0.5) for item in ax.get_xticklabels()]
ax.xaxis.set_major_locator(ticker.FixedLocator(xLabelID)) # can't zoom in due to limitations of matplotlib
xLabel = [str(runs[id]) if id < runs.shape[0] else id for id in xLabelID]
ax.set_xticklabels(xLabel, rotation=30, ha='right')
return stat
def appendRunInfo(ax, fig, ele, energy):
ax.text(0.1, 0.9, 'STAR', weight='bold', transform=fig.transFigure)
ax.text(0.15, 0.9, '%s $\sqrt{s_{NN}}$ = %s GeV' % (ele, energy), transform=fig.transFigure)
ax.legend(bbox_to_anchor=(0.35, 1.0), loc='lower left', ncol=4, frameon=False, columnspacing=0.01, borderpad=0, handletextpad=0.1)
def main(runs, secMean, secStd,
x, xerr, runsRejected, reasonsRejected,
varNames, counts, edgeRuns,
allRunID, plotRange, rejectionRange, pseudoID,
showEdgeRuns, spreadName, element, sNN,
genPDF, batch, plotGood, pdfSuffix=''):
print('Plot QA result.')
import matplotlib.pyplot as plt
plt.rc('font', size=SMALL_SIZE) # controls default text sizes
plt.rc('axes', titlesize=SMALL_SIZE) # fontsize of the axes title
plt.rc('axes', labelsize=MEDIUM_SIZE) # fontsize of the x and y labels
plt.rc('xtick', labelsize=SMALL_SIZE) # fontsize of the tick labels
plt.rc('ytick', labelsize=SMALL_SIZE) # fontsize of the tick labels
plt.rc('legend', fontsize=SMALL_SIZE) # legend fontsize
plt.rc('figure', titlesize=BIGGER_SIZE) # fontsize of the figure title
if counts is None:
counts = np.array([None]*x.shape[1])
for xcol, errcol, highlight, mcol, stdcol, ytitle, coun in zip(x.T, xerr.T, reasonsRejected.T, secMean.T, secStd.T, varNames, counts.T):
fig, ax = plt.subplots(figsize=(15, 5))
statSummary = plotOutlier(ax, fig, runs, xcol, #convert normalized values to real values
errcol, runsRejected, edgeRuns, highlight,
mcol, stdcol, ytitle, allRunID,
plotRange, rejectionRange, pseudoID,
showEdgeRuns, spreadName, coun)
appendRunInfo(ax, fig, element, sNN)
plt.tight_layout()
if genPDF:
plt.savefig(ytitle + pdfSuffix + '.pdf')
if not batch:
print('Close all the plots to continue')
plt.show()
if plotGood:
print('Plot QA result wight Just good runs.')
idRejected = np.searchsorted(runs, runsRejected)
main(np.delete(runs, idRejected), secMean, secStd,
np.delete(x, idRejected, axis=0), np.delete(xerr, idRejected, axis=0),
[], reasonsRejected,
varNames, None, edgeRuns,
allRunID, plotRange, rejectionRange, pseudoID,
showEdgeRuns, spreadName, element, sNN,
genPDF, batch, False, '.good')
return statSummary
if __name__ == '__main__':
from readFromROOT import getVarNames, readFromROOT, getNamesAllTProfile
parser = argparse.ArgumentParser(description='Plot runs')
parser.add_argument('-i', '--input', required=True, help='ROOT files that contains all the QA TProfile')
parser.add_argument('-br', '--badRuns', help='List of bad runs in txt format.')
parser.add_argument('-v', '--varNames', help='Txt files with all the variable names for QA. If it is not set, it will read ALL TProfiles in the ROOT file.')
parser.add_argument('-e', '--element', default='??+??', help='Element of your reaction')
parser.add_argument('-s', '--sNN', default='??', help='Beam energy')
parser.add_argument('--allRunID', action='store_true', help='When used, Run ID of EVERY SINGLE RUN is shown on QA plots. May not be suitable if you have tones of runs.')
parser.add_argument('--pseudoID', action='store_true', help='Show run ID in ascending order of apparence from 0 instead of the STAR formated run ID')
parser.add_argument('-pr', '--plotRange', type=float, default=10, help='The factor of SD of all good runs in the QA plot (default: %(default)s)')
parser.add_argument('--genPDF', action='store_true', help='When used, QA plots will be stored with name <varName>.pdf')
parser.add_argument('--batch', action='store_true', help='Batch mode. Plots won\'t appear throught x-11 terminal')
parser.add_argument('-pg', '--plotGood', action='store_true', help='Plot QA plots again, but only with good runs')
parser.add_argument('-m', '--mapping', help='If x-axis of TProfile does not corresponds to STAR run ID, you can supply a file that translate bin low edge to STAR ID')
args = parser.parse_args()
if args.batch:
import matplotlib
matplotlib.use('Agg')
if args.varNames is None:
varNames = getNamesAllTProfile(args.input)
else:
varNames = getVarNames(args.varNames)
print('*'*100)
print('Name of the TProfile being read:')
print('\n'.join(varNames))
if args.varNames is None:
print('If you want to exclude some TProfiles, copy the names of the required TProfiles, put them in a text file and put the name of the text file in the argument of this script as -v <filename>')
else:
print('Those are the names of TProfiles in %s' % args.varNames)
print('*'*100)
runs, x, xerr, counts = readFromROOT(args.input, varNames, args.mapping)
runsRejected = []
reasonsRejected = []
var2ID = dict(((var, i) for i, var in enumerate(varNames)))
if args.badRuns is not None:
with open(args.badRuns) as f:
for line in f:
if not line.isspace():
runAndReasons = line.strip().split(' ')
# if there are no reasons, the good is assumed to be good
if len(runAndReasons) == 1:
continue
runsRejected.append(int(runAndReasons[0]))
reasons = [False]*len(varNames)
for reason in runAndReasons[1:]:
if reason in var2ID:
reasons[var2ID[reason.strip()]] =True
reasonsRejected.append(reasons)
reasonsRejected = np.array(reasonsRejected)
print('Plotting result')
main(runs, np.atleast_2d(np.average(x, axis=0)), 5*np.atleast_2d(np.std(x, axis=0)),
x, xerr, runsRejected, reasonsRejected,
varNames, counts, [],
args.allRunID, args.plotRange, 5, args.pseudoID,
False, 'RMS', args.element, args.sNN,
args.genPDF, args.batch, args.plotGood)