-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgraph_data.py
387 lines (272 loc) · 11.6 KB
/
graph_data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
import matplotlib.pyplot as plt
from astroquery.nist import Nist
import astropy.units as u
import math
from hapi import *
# MATPLOTLIB SUPPORTED COLORS
COLORS = ['r', 'forestgreen', 'darkorange', 'dodgerblue']
# in micrometers
WAVELENGTH_GROUPING = 0.005
# for all 1600 datapoints
DATA_FILE = "./clean_csv/all_data.csv"
# for only one file
DATA_FILE = "./clean_csv/one_file.csv"
FILE_NUM = 4988.1
# special molecules
SPECIAL_MOLECULES = ["H2O", "CO2", "CO", "CH4"]
# abundances from hitran's isotopogue
ABUNDANCES = {
"H2O": 0.9973,
"CO2": 0.9842,
"CO": 0.9865,
"CH4": 0.98827
}
def sortData(min_wl=0, max_wl=10, remove_odd_points=True):
"""
Inputs min-max wavelength range, in microns, to visualize (if any boundaries) and return x-y pofloats, 1D x-error bar values and 2D y-error bar values
"""
x = list()
y = list()
x_errorbars = list()
# first list is the minimum and second list is maximum
y_errorbars = [[], []]
with open(DATA_FILE, "r") as f:
lines = f.readlines()
for l in lines[1:]:
data = l.split(",")
# for removing weird points
if abs(float(data[3])) > 0.2 or abs(float(data[4])) > 0.2:
print(f"ODD DATA POINTS FOUND: wavelength of {data[0]} has very high transit depth errors of {data[3]} and {data[4]}")
# skip this outlier
if remove_odd_points:
continue
if data[1] != 'null':
if abs(float(data[1])) > 0.35:
print(f"ODD DATA POINTS FOUND: wavelength of {data[0]} has very high wavelength absolute uncertainty of {data[1]}")
# skip this outlier
if remove_odd_points:
continue
# if data is within plottable range
if min_wl <= float(data[0]) <= max_wl:
x.append(float(data[0]))
x_errorbars.append(float(data[1]) if data[1] != "null" else 0)
y.append(float(data[2]))
# makes sure the minimum bounds of y-error isn't negative
y_errorbars[0].append(abs(float(data[3])))
y_errorbars[1].append(abs(float(data[4])))
return x, y, x_errorbars, y_errorbars
def sortPlotCleanData(x, y, option):
"""
Takes the sorted data from sortData()
option determins whether the y value is an accumulation (s) or an average (a)
Returns two lists (list of x values) (list of corresponding y values) that are grouped by wavelength range specified in global WAVELENGTH_GROUPING
"""
print("Sorting through the clean data...")
ret_list = list()
for i in range(len(x)):
ret_list.append((x[i], y[i]))
ret_list.sort()
# further groups the data into blocks
x_plot, y_plot = list(), list()
avrg_x = list()
avrg_y = list()
counter = 0
while len(ret_list) != 0:
# if a new group needs to be started
while not counter <= ret_list[0][0] <= counter + WAVELENGTH_GROUPING:
# block is ready to be officially plotted
if len(avrg_x) != 0:
x_plot.append(sum(avrg_x)/len(avrg_x))
if option == 'a':
y_plot.append(sum(avrg_y)/len(avrg_y))
else:
y_plot.append(sum(avrg_y))
# reset
avrg_x, avrg_y = list(), list()
# update the next block
counter += WAVELENGTH_GROUPING
# update the average of the block
avrg_x.append(ret_list[0][0])
avrg_y.append(ret_list[0][1])
# remove the point once it has been processed
ret_list = ret_list[1:]
return x_plot, y_plot
def fetchElementData(lowerbound, upperbound, molec, field):
"""
Enter the lowerbound (in nm) and upperbound in nm
Returns a list of x values in micrometers; y=[1,1,1,...]'
"""
print("\nFETCHING MOLECULAR LINES DATA...")
print(f" currently fetching data for {molec} between {lowerbound} and {upperbound}")
x = list()
y = list()
if not molec in SPECIAL_MOLECULES:
# bounds
table = Nist.query(lowerbound *u.nm, upperbound * u.nm, linename=molec, wavelength_type="vacuum")
# print(table)
# USING ONLY OBSERVED VALUES
for i in range(len(table)):
# prevent if there are no values
if table[i][field] != '--' and not '+' in str(table[i][field]):
x.append(round(float(table[i][field])/1000, 3))
y.append(1)
return x, y
return x, y
def fetchMolecularData(file_name, molec):
"""
given a txt file that per line follows the following format:
wavenumber(cm^-1) intensities cm^-1/(molecule cm^-2)
read from the file and returns a list of x values (wavelength in micrometers) and list of y values (intensity, adjusted for abundance)
"""
wavenumbers = list()
wavelength = list()
relative_intensities = list()
with open(file_name, 'r') as f:
lines = f.readlines()
for l in lines:
# try:
# w, i = l.split(" ")[1:]
# except:
# print(molec)
# print(l.split(" ")[1:])
# break
w, i = l.split(" ")[1:]
# removing points that have very minor intensities (TODO: ADJUST BASED ON MEASUREMENT INACCURACY IN HITRAN)
if float(i) > 1e-24:
# wavenumbers in cm^-1
wavenumbers.append(float(w))
# wavelength in micrometers
wavelength.append(1/(float(w))*1e4)
# relative_intensities.append(float(i)/ABUNDANCES[molec])
relative_intensities.append(float(i))
# normalizing everything to 1
# max_val = max(relative_intensities)
# relative_intensities = list(map(lambda x: x/max_val, relative_intensities))
return wavelength, relative_intensities
def fetchHitran(molecule, global_id, iso_id, min_lambda, max_lambda):
"""given molecule name, global id, isotope id, min wavelength and max wavelength, fetches the data using fetch"""
fetch(molecule, global_id, iso_id, 1e4/max_lambda, 1e4/min_lambda)
def plotRaw(x_vals, y_vals, x_err, y_err, target_molecules=[]):
LOWERBOUND = math.floor(min(x_vals))
UPPERBOUND = math.ceil(max(x_vals))
# Initialize Graph
fig_raw = plt.figure()
ax = fig_raw.add_subplot()
if len(target_molecules):
ax2 = ax.twinx()
# Plots Normal Data
ax.errorbar(x_vals, y_vals, xerr=x_err, yerr=y_err, fmt='.', ls='none')
# MOLECULAR ANALYSIS -------------------------
# SETS THE TARGET MOLECULES in list for query
# Fetch atomic data + graph
for i, molec in enumerate(target_molecules):
if molec in SPECIAL_MOLECULES:
x, y = fetchMolecularData(f"./hitran_data/{molec}.txt", molec)
ax2.errorbar(x, y, marker='.', color=COLORS[SPECIAL_MOLECULES.index(molec)], label=molec, ls='none')
else:
print(f"\nCONDUCTING MOLECULAR ANALYSIS for {molec}")
x_vals, y = fetchElementData(LOWERBOUND*1000, UPPERBOUND*1000, molec, "Ritz")
print(f" data fetched...")
for j in range(len(x_vals)):
if x_vals[j] >= LOWERBOUND:
ax.axvspan(x_vals[j], x_vals[j]+0.001, alpha=0.25, color=COLORS[i])
# Paper-proof the graph
ax.set_xlabel("Wavelength (micrometers)", loc='center')
ax.set_ylabel("Transit Depth (%)", loc='center')
ax.set_title(f"Transmission Spectrum of WASP-39b from {LOWERBOUND} to {UPPERBOUND} micrometers")
if len(target_molecules):
ax2.set_ylabel("relative intensity of molecular spectra")
ax2.legend()
fig_raw.tight_layout()
plt.plot()
def plotClean(x, y, target_molecules=[]):
"""
Takes in raw x and y values, cleans up the values and plots it
Afterwards plots the target molecules on the same axes, using the RITZ WAVELENGTH
"""
# initialize graph
print("\nInitializing clean plotting graph...")
fig_clean = plt.figure()
ax1 = fig_clean.add_subplot()
ax2 = ax1.twinx()
# DATA CLEANING -----------------------------------
x, y = sortPlotCleanData(x, y, 'a')
min_wl = math.floor(min(x))
max_wl = math.ceil(max(x))
ax1.errorbar(x, y, marker='.', ls='none')
# ax1.errorbar(x, y, marker='.')
# MOLECULAR ANALYSIS -----------------------------------
# queries for database info
for i, molec in enumerate(target_molecules):
if molec in SPECIAL_MOLECULES:
x, y = fetchMolecularData(f"./hitran_data/{molec}.txt", molec)
else:
x, y = fetchElementData(min_wl*1000, max_wl*1000, molec, "Ritz")
x, y = sortPlotCleanData(x, y, 's')
# plot molecular data
# ax2.plot(x, y, marker='.', ls='none', color=COLORS[i], label=molec)
ax2.errorbar(x, y, marker='.', color=COLORS[i], label=molec, ls='none')
# Paper-proof the graph
ax1.set_title(f"Processed Transmission Spectrum of WASP-39b from {min_wl} to {max_wl} micrometers, file name {FILE_NUM}")
ax1.set_xlabel("Wavelength (micrometers)", loc='center')
ax1.set_ylabel("Transit Depth (%) (transmission spectra)", loc='center')
ax1.legend()
ax2.set_ylabel("relative frequency (atomic spectra)")
ax2.legend()
# show graph
fig_clean.tight_layout()
plt.plot()
def plotModel(x_vals, y_vals, x_err, y_err, target_molecules=[]):
"""
plots raw data: x, y and error
target molcules two dimentional list:
[molecule name, global id, isotope id]"""
LOWERBOUND = math.floor(min(x_vals))
UPPERBOUND = math.ceil(max(x_vals))
# Initialize Graph
fig_raw = plt.figure()
ax = fig_raw.add_subplot()
# Plots Normal Data
ax.errorbar(x_vals, y_vals, xerr=x_err, yerr=y_err, fmt='.', ls='none')
# source_tables = list()
# fetching hitran for each of the target molecules
for molec in target_molecules:
ax2 = ax.twinx()
# source_tables.append(molec[0])
# fetches data from hitran
fetchHitran(molec[0], molec[1], molec[2], LOWERBOUND, UPPERBOUND)
# models line broadening
nu, coef = absorptionCoefficient_Lorentz(SourceTables=molec[0])
# converts wavenumbers to wavelengths
wv = list(map(lambda x: 1e4/x, nu))
ax2.errorbar(wv, coef, color=COLORS[target_molecules.index(molec)])
fig_raw.tight_layout()
plt.plot()
def plotRockPlanet():
""" plots the transmission spectrum of a planet with no atmosphere """
x_vals = [x/100 for x in range(500)]
y_vals = [1.8 for _ in range(500)]
plt.plot(x_vals, y_vals)
plt.ylabel("Transit Depth (%)")
plt.xlabel("Wavelength (micrometers)")
def main():
# TRANSMISSION SPECTRA DATA ANALYSIS -----------------------
# SETS THE GRAPHING BOUNDARY in micrometers
min_val, max_val = 0, 5
# sort data
x_vals, y_vals, xerr_bars, yerr_bars = sortData(min_val, max_val)
# plotting
# to_plot = ["H2O", "CO2", "CO", "CH4"]
to_plot = ["H2O", "CO2"]
# for a in to_plot:
# plotRaw(x_vals, y_vals, xerr_bars, yerr_bars, [a])
# plotRaw(x_vals, y_vals, xerr_bars, yerr_bars, to_plot)
# plotRaw(x_vals, y_vals, xerr_bars, yerr_bars)
# plotClean(x_vals, y_vals, elements)
# plotClean(x_vals, y_vals, ["Na"])
# plotModel(x_vals, y_vals, xerr_bars, yerr_bars, [['CO2', 2, 1], ['H2O', 1, 1]])
# plotClean(x_vals, y_vals, ["H2O"])
plotRockPlanet()
plt.show()
if __name__ == "__main__":
main()