-
Notifications
You must be signed in to change notification settings - Fork 4
/
find_rv_outliers.py
447 lines (366 loc) · 19.6 KB
/
find_rv_outliers.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
#!/usr/bin/env python
# encoding: utf-8
"""
find_rv.py
Created by Vivienne Baldassare on 2012-03-30.
Copyright (c) 2012 __MyCompanyName__. All rights reserved.
Description:
This code finds the radial velocity of a target when supplied with data for the target and data for a standard object
whose radial velocity is known.
Usage:
Note: Data is not corrected for heliocentric velocities.
Inputs:
wv_obj, fx_obj, and sig_obj are arrays containing data for the the wavelength, flux, and flux uncertainty of the target.
wv_std, fx_std, and sig_std are arrays containing data for the the wavelength, flux, and flux uncertainty of the standard.
Example:
>>> import find_rv
>>> find_rv.radial_velocity(wv_obj,fx_obj,sig_obj,wv_std,fx_std,sig_std,rv_std,rv_std_err,obj_name,std_name)
"""
from array import array
import math
import matplotlib.pyplot as plt
import matplotlib.mlab as mlab
import numpy as np
import scipy
import scipy.optimize as op
import scipy.ndimage
def quadratic(x,a,b,c):
return a + b*x + c*x**2
def cubic(x,a,b,c,d):
return a + b*x + c*x**2 + d*x**3
def quartic(x,a,b,c,d,e):
return a + b*x + c*x**2 + d*x**3 + e*x**4
def lsf_rotate(deltav,vsini,epsilon=None,velgrid=None):
# based on the IDL routine LSF_ROTATE.PRO
if epsilon == None:
epsilon = 0.6
e1 = 2.0*(1.0 - epsilon)
e2 = np.pi*epsilon/2.0
e3 = np.pi*(1.0 - epsilon/3.0)
npts = np.ceil(2*vsini/deltav)
if npts % 2 == 0:
npts += 1
nwid = np.floor(npts/2)
x = np.arange(npts) - nwid
x = x*deltav/vsini
x1 = np.abs(1.0 - x**2)
if velgrid != None:
velgrid = x*vsini
return (e1*np.sqrt(x1) + e2*x1)/e3,velgrid
return (e1*np.sqrt(x1) + e2*x1)/e3
def radial_velocity(wv_obj,fx_obj,sig_obj,wv_std,fx_std,sig_std,obj_name,std_name,rv_std,rv_std_err,order,xcorr_width,cut,cutstart,cutend):
# The more random iterations, the better... but it takes longer
n_iter = 1000
# Step 1: Fix the spectra:
# * Select only the region in which they overlap
# * Make a new stretched wavelength array (for sub-pixel precision work)
# * Interpolate the data onto the new wavelength array
# * Remove large scale slopes so we only compare line and band features
# Find where standard and object overlap ---------------
wv_min = max([min(wv_std),min(wv_obj)])
wv_max = min([max(wv_std),max(wv_obj)])
n_pix_std = len(wv_std)
# Creates ln standard wavelength array ---------------------------------
# AR 2013.0423 The wavelength array only covers the overlap region. Also, I'm folding the rebinning by 10 into this statement.
acoef_std = (n_pix_std*10 -1)/(math.log(wv_max) - math.log(wv_min))
bcoef_std = (n_pix_std*10) - (acoef_std * math.log(wv_max))
arr = np.arange(n_pix_std*10)+1
wv_ln_std = np.exp((arr - bcoef_std)/acoef_std)
# AR 2012.1018: Find the conversion between pixels and velocity. This will vary from instrument
# to instrument and spectral order to spectral order, so we should preferentially calculate this
# based on the actual input spectrum.
# AR 2013.0422: Change the calculation to happen AFTER the corrected wavelength scale has been made
# Find the average pixel/spectrum offset
# Note: even though it's called micron_per_pix, it will still work if the wavelengths are
# angstroms instead (it really converts <wavelength unit> to km/s)
# Interpolate data onto same ln wavelength scale -------------------------------
fx_interp_std = np.interp(wv_ln_std, wv_std, fx_std)
fx_interp_obj = np.interp(wv_ln_std, wv_obj, fx_obj)
sig_interp_std = np.interp(wv_ln_std, wv_std, sig_std) # AR 2012.1018 Also need to rebin sig
sig_interp_obj = np.interp(wv_ln_std, wv_obj, sig_obj) # AR 2012.1018 Also need to rebin sig
# Rebin Data ----------------------------
wv_arr_std=np.asarray(wv_ln_std,dtype=float)
fx_arr_obj=np.asarray(fx_interp_obj,dtype=float)
fx_arr_std=np.asarray(fx_interp_std,dtype=float)
sig_arr_obj=np.asarray(sig_interp_obj,dtype=float)
sig_arr_std=np.asarray(sig_interp_std,dtype=float)
datalen = len(fx_arr_obj)
# Step 2: Measure vsini:
# Note that as of 2015.0605, this doesn't actually work.
# AR 2014.0922: For vsini:
# In a loop:
# Take the standard spectrum
# broaden it to width X
# autocorrelate,
# measure width of gaussian Y (this is supposed to give you a means of translating between width-of-cross-correlation and vsini)
# Fit function solving Y for X.
# For each cross correlation of object and standard:
# Determine vsini
pix_scale = (2.99792458*10**5)/acoef_std
#vsinirange = [1,2,5,10,20,30,40,50,60,80,100,100]
#widthrange = []
#for v in vsinirange:
# # Make convolution kernel for v km/s
# kernel = lsf_rotate(pix_scale,v)
# # Broaden the standard spectrum
# fx_obj_wide = np.correlate(fx_arr_obj, kernel, mode='same')
# # Rectify the spectrum
# fx_obj_orig = (fx_arr_obj - np.mean(fx_arr_obj))/np.std(fx_arr_obj,ddof=1)
# fx_obj_wide = (fx_obj_wide - np.mean(fx_obj_wide))/np.std(fx_obj_wide,ddof=1)
#
# # Remove a cubic (flatten the spectrum)
# coeff,pcov = op.curve_fit(cubic,wv_arr_std,fx_obj_wide)
# fx_obj_wide = fx_obj_wide - (coeff[0] + coeff[1]*wv_arr_std + coeff[2]*wv_arr_std**2 + coeff[3]*wv_arr_std**3)
# coeff,pcov = op.curve_fit(cubic,wv_arr_std,fx_obj_orig)
# fx_obj_orig = fx_obj_orig - (coeff[0] + coeff[1]*wv_arr_std + coeff[2]*wv_arr_std**2 + coeff[3]*wv_arr_std**3)
#
# # Cross-correlate the spectrum with its broadened self
# ycorr = np.correlate(fx_obj_orig, fx_obj_wide, mode='full')
# # Now determine where the peak is (should be near 0)
# length = len(ycorr)
# xcorr = np.arange(length) - length//2
# xmid = np.argmax(ycorr)
# ymax = np.max(ycorr)
# # Chop out just the portion of the array near the peak
# xcorr_min=xmid-xcorr_width
# xcorr_max=xmid+xcorr_width
# ycorr1=ycorr[xcorr_min:xcorr_max] #isolate section of array with gaussian
# xcorr1=xcorr[xcorr_min:xcorr_max] #isolate the same section of the pixel range
#
# # set up initial values for gaussian fitting via chi2
# sig = 10
# sky = np.min(ycorr1)/1.2
# # print ycorr1[-1],ycorr1[0],xcorr1[-1],xcorr1[0]
# sky2 = (ycorr1[-1]-ycorr1[0])/(xcorr1[-1]-xcorr1[0])
# lnamp = np.log(ymax/1.2-sky) # guess some values
# mean = xcorr[xmid]
#
# amp = np.exp(lnamp)
# sig2 = sig**2
# # suggestion from D. Hogg 12/15/12: Add extra linear feature to fit.
# # suggestion from D. Hogg 12/15/12: operate on ln(amp) so that the amplitude CANNOT be negative.
# def chi2(p): #define gaussian function for fitting
# sig2=p[2] ** 2
# m = (np.exp(p[0]) * np.exp(-0.5 * (xcorr1 - p[1]) ** 2 / sig2)) + p[3] + p[4]*xcorr1
# return (ycorr1 - m)
#
# # Fit the gaussian.
# popt, ier = op.leastsq(chi2, [lnamp, mean, sig, sky, sky2])
# lnamp, mean, sig, sky, sky2 = popt
#
# amp = np.exp(lnamp)
# # record the width
# widthrange.append(sig)
#
# # Plot all the widths to get a width-vsini curve
# vsinicoeff,popt = op.curve_fit(quartic,np.asarray(widthrange),np.asarray(vsinirange))
#
# relationx = np.arange(50,200,1)
# relationy = vsinicoeff[0]+vsinicoeff[1]*relationx+vsinicoeff[2]*relationx**2+vsinicoeff[3]*relationx**3+vsinicoeff[4]*relationx**4
# figv = plt.figure(1)
# axv = figv.add_subplot(211)
# axv.scatter(widthrange,vsinirange)
# axv.plot(relationx,relationy)
# #ax.text(70,100,"{0:} {1:} {2:} {3:} {4:}".format(vsinicoeff))
# 3. Cross-correlate the data, using n_iter trials:
# * Generate two random gaussian noises scaled to the uncertainty on the fluxes
# * Apply those gaussian noises to the standard and target stars
# * Cross-correlate the standard and target stars
# * Find and then cut out just the part of the cross-correlation curve near the maximum
# * Set up gaussian
# * Fit gaussian to that center part
# * Save fitted parameters (pixel shift aka mean of gaussian, width aka stddev of gaussian)
# * Repeat n_iter times
# Cross correlation loop --------------------------------
pix_shift=np.array([]) #initialize array for pixel shift values
pix_width=np.zeros(n_iter) #initialize array for pixel width values
l = 0
# using the xrange generator rather than making a full list saves memory
while len(pix_shift) < n_iter:
# prepare the randomized data
# GETTING ARRAYS READY FOR CROSS CORRELATION
# Randomize noise:
# create gaussian distribution of random numbers b/t 1 and -1, multiply err by numbers, add numbers to flux
# I have drastically simplified the arrays here AR 2013.0319
# AR 2013.0318: There was a problem, previously: noise was a fixed value, not linked to the known error values
# AR 2013.0321: Speed fix. Rather than step through the array and generate one
# normally-distributed error value scaled to the SNR at that point, I will generate an
# array of normally-distributed error values scaled to 1, and then multiply by the SNR:
# One array generation, one array multiplication.
rand_dist = np.random.normal(loc=0.0,scale=1.0,size=datalen)
rand_dist2 = np.random.normal(loc=0.0,scale=1.0,size=datalen)
fx_temp_obj = np.asarray(fx_arr_obj + rand_dist * sig_arr_obj)
fx_temp_std = np.asarray(fx_arr_std + rand_dist2 * sig_arr_std)
mean_obj=np.mean(fx_temp_obj)
mean_std=np.mean(fx_temp_std)
stddev_obj=np.std(fx_temp_obj,ddof=1)
stddev_std=np.std(fx_temp_std,ddof=1)
# Regularize data (subtract mean, divide by std dev) (Should definitely be done AFTER noise was added)
fx_reg_temp_obj = fx_temp_obj-mean_obj
fx_reg_temp_obj = fx_reg_temp_obj/stddev_obj
fx_reg_temp_std = fx_temp_std-mean_std
fx_reg_temp_std = fx_reg_temp_std/stddev_std
# curve fit - remove a cubic AR 2012.1113
coeff,pcov = op.curve_fit(cubic,wv_arr_std,fx_reg_temp_obj)
fx_reg_temp_obj = fx_reg_temp_obj - (coeff[0] + coeff[1]*wv_arr_std + coeff[2]*wv_arr_std**2 + coeff[3]*wv_arr_std**3)
coeff,pcov = op.curve_fit(cubic,wv_arr_std,fx_reg_temp_std)
fx_reg_temp_std = fx_reg_temp_std - (coeff[0] + coeff[1]*wv_arr_std + coeff[2]*wv_arr_std**2 + coeff[3]*wv_arr_std**3)
# CROSS CORRELATION
# compute the cross-correlation between the two spectra
ycorr = np.correlate(fx_reg_temp_obj, fx_reg_temp_std, mode='full')
# time required: 0.045 seconds average
#http://stackoverflow.com/questions/12323959/fast-cross-correlation-method-in-python
#conv1 = np.zeros(datalen * 2)
#conv1[datalen/2:datalen/2+datalen] = fx_reg_temp_obj
#conv2 = fx_reg_temp_std[::-1]
#ycorr = signal.fftconvolve(conv1,conv2, mode='valid')
# time required: 0.006 seconds average, but it segfaults by the third try.
## slight smoothing AR 2013.0315
#ycorr = scipy.ndimage.filters.gaussian_filter1d(ycorr,11)
# create the x offset axis (same length as ycorr, with 0 in the MIDDLE)
length = len(ycorr)
xcorr = np.arange(length) - length//2
# AR 2012.1126 Select a tiny piece around the maximum to fit with a gaussian.
xmid = np.argmax(ycorr)
ymax = np.max(ycorr)
# now take just the portion of the array that matters
xcorr_min=int(xmid-xcorr_width)
xcorr_max=int(xmid+xcorr_width)
ycorr1=ycorr[xcorr_min:xcorr_max] #isolate section of array with gaussian
xcorr1=xcorr[xcorr_min:xcorr_max] #isolate the same section of the pixel range
ycorr2=ycorr[xcorr_min-50:xcorr_max+50]
xcorr2=xcorr[xcorr_min-50:xcorr_max+50]
# suggestion from D. Hogg 12/15/12: Add extra linear feature to fit.
# suggestion from D. Hogg 12/15/12: operate on ln(amp) so that the amplitude CANNOT be negative.
def chi2(p): #define gaussian function for fitting
sig2=p[2] ** 2
m = (np.exp(p[0]) * np.exp(-0.5 * (xcorr1 - p[1]) ** 2 / sig2)) + p[3] + p[4]*xcorr1
return (ycorr1 - m)
# set up initial values for chi2
sig = 10
sky = np.min(ycorr1)/1.2
# print ycorr1[-1],ycorr1[0],xcorr1[-1],xcorr1[0]
sky2 = (ycorr1[-1]-ycorr1[0])/(xcorr1[-1]-xcorr1[0])
lnamp = np.log(ymax/1.2-sky) # guess some values
mean = xcorr[xmid]
amp = np.exp(lnamp)
sig2 = sig**2
popt, ier = op.leastsq(chi2, [lnamp, mean, sig, sky, sky2])
lnamp, mean, sig, sky, sky2 = popt
amp = np.exp(lnamp)
#print_num=len(pix_shift)%100
print_num=l%100
if print_num == 0:
## Uncomment the following to make a plot every 500 fits.
#fig = plt.figure(l)
#ax = fig.add_subplot(111)
#my_gauss = (amp * (np.exp(-0.5 * ((xcorr1 - mean) ** 2) / sig**2))) + sky + sky2 * xcorr1
#ax.plot(xcorr1,my_gauss,'r--')
#ax.plot(xcorr2,ycorr2,'#000000')
#ax.plot(xcorr1,ycorr1-my_gauss,'#00CC00')
##if abs(mean - xcorr[xmid]) > 5:
## print "Mean is off",mean,xcorr[xmid]
#figname='rv_{0:}_{1:}_{2:}_{3:}.png'.format(std_name,obj_name,order,l)
#ax.set_xlim(xcorr[xcorr_min-50],xcorr[xcorr_max+50])
#fig.savefig(figname)
#fig.clf()
#plt.close()
print "amp={0: 12.4f} mu={1: 10.4f} sig={2: 9.4f} sky={3: 11.4f} sky2={4: 8.4f} n_entries={5:}".format(amp,mean,sig,sky,sky2,len(pix_shift))
l += 1
if (cut==0) | (mean > np.float(cutstart)) & (mean < np.float(cutend)):
pix_shift = np.append(pix_shift,mean)
# if ier < 5:
# I'm calculating the vsini now because I need errors, and the vsini calculation is not linear.
#pix_width[l] = vsinicoeff[0] + vsinicoeff[1] * sig + vsinicoeff[2] * sig**2 + vsinicoeff[3] * sig**3 + vsinicoeff[4] * sig**4
# End cross correlation loop ---------------------------------
# 4. Find the RV
# All 5000 rv fits have been calculated and stored in arrays
# 4a. Cut out outlier RVs. Useful if the cross-correlation produces occasional bad results. Use cutstart and cutend to force the code to only fit a gaussian to a certain region. Don't over-use this to force the result you want, though.
# 4b. Compute the mean pixel shift and pixel shift uncertainty.
# 4c. Convert pixel shift into RV
# 4d. Shift the wavelength array appropriately - all lines should now line up.
## Uncomment this to print out an example cross-correlation diagram
#fig = plt.figure(2)
#ax = fig.add_subplot(111)
#ax.plot(xcorr,ycorr,'k')
#figname='rv_{0:}_{1:}_{2:}_xcorr.png'.format(std_name,obj_name,order)
#fig.savefig(figname)
#fig.clf()
#plt.close()
# Turn the list of pixel shifts into a numpy array
pix_shift = np.asarray(pix_shift)
# 4a. Cut out outliers from the pixel shift
if cut == 1:
pix_shift = pix_shift[np.where((pix_shift > np.float(cutstart)) & (pix_shift < np.float(cutend)))]
# 4b. Compute the mean pixel shift (rv value) and pixel shift uncertainty (RV uncertainty).
print l,len(pix_shift),np.float(len(pix_shift))/np.float(n_iter)*100.0
mu = np.mean(pix_shift)
sigma = np.std(pix_shift,ddof=1)
#vsini = np.mean(pix_width)
#vsini_err = np.std(pix_width,ddof=1)
#axh = figv.add_subplot(212)
#n, bins, patches=axh.hist(pix_width,bins=30,normed=1.0,facecolor='green',align='mid')
#figv.savefig('vsiniplot.png')
#plt.clf()
#plt.close()
# 4c. Transform pixel shift to shift in radial velocity
# AR 2013.0423: The actually appropriate method requires a speed-of-light correction. This works for both angstroms and microns.
rv_meas = (2.99792458*10**5 * mu)/acoef_std
rv_meas_err = (2.99792458*10**5 * sigma)/acoef_std
# 4d. Apply shift to arrays
wv_rvcorr_obj=wv_arr_std * (1 - rv_meas/(2.99792458*10**5))
## 5. Create plots ---------------------------------
# The plots are the only reason find_rv.py needs to know the names of either star, or the RV of the standard.
# Plot object and standard so you can clearly see that shift exists --------------------------------
fig = plt.figure(1)
# AR 2013.0703 Regularize the spectra for display purposes in the final graph
# I'm using the mean and stddev of the last random-added attempt so it won't be perfect...
fx_reg_obj = fx_arr_obj-mean_obj
fx_reg_obj = fx_reg_obj/stddev_obj
fx_reg_std = fx_arr_std-mean_std
fx_reg_std = fx_arr_std/stddev_std
#Plots target and standard with shift applied
ax1 = fig.add_subplot(311)
ax1.plot(wv_rvcorr_obj, fx_reg_obj, 'red')
ax1.plot(wv_arr_std, fx_reg_std, 'blue')
ax1.set_xlabel('wavelength (microns)')
ax1.set_ylabel('normalized flux')
target = 'Target: %s' %(obj_name)
standard = 'Standard: %s' %(std_name)
ax1.annotate(target,xy=(.7,.9),xycoords='axes fraction',xytext=(.6,.9),textcoords='axes fraction',color='red')
ax1.annotate(standard,xy=(.7,.8),xycoords='axes fraction',xytext=(.6,.8),textcoords='axes fraction',color='blue')
sig2=sig ** 2
my_gauss = (amp * (np.exp(-0.5 * ((xcorr1 - mu) ** 2) / sig2))) + sky + sky2 * xcorr1
#Plots example of gaussian fit to cross correlation function
ax2 = fig.add_subplot(312)
ax2.plot(xcorr1, ycorr1, 'k.')
ax2.plot(xcorr1, my_gauss, 'r--', linewidth=2)
ax2.plot(xcorr1,ycorr1-my_gauss,'#00CC00')
ax2.set_xlabel('example of fit to cross correlation function')
ax2.set_xlim(xcorr[xcorr_min-50],xcorr[xcorr_max+50])
#print pix_shift
## Plot histogram of pixel shift values --------------------------------
ax3 = fig.add_subplot(313)
n, bins, patches=plt.hist(pix_shift,bins=30,normed=1.0,facecolor='green',align='mid')
#Plot best fit gaussian over histogram
y=mlab.normpdf(bins,mu,sigma)
ax3.plot(bins,y,'r--',linewidth=2)
ax3.set_xlabel('radial velocity of target (pixels)')
ax3.set_ylabel('frequency (normalized)')
rad='RV = %.3f +/- %.3f' %(rv_meas,rv_meas_err)
corr = 'RV (corr) = %.3f +/- %.3f' %(rv_std + rv_meas, (rv_std_err**2 + rv_meas_err**2)**(0.5))
#vsinistr = 'VsinI = %.3f +/- %.3f' % (vsini,vsini_err)
ax3.annotate(rad,xy=(.66,.9),xycoords='axes fraction',xytext=(.66,.9),textcoords='axes fraction',color='black')
ax3.annotate(corr,xy=(.6,.8),xycoords='axes fraction',xytext=(.60,.8),textcoords='axes fraction',color='black')
#ax3.annotate(vsinistr,xy=(.6,.6),xycoords='axes fraction',xytext=(.60,.6),textcoords='axes fraction',color='black')
ax3.annotate('{0:+5.2f} {1: 5.2f}'.format(mu,sigma),xy=(.05,.9),xycoords='axes fraction',xytext=(.05,.9),textcoords='axes fraction',color='black')
ax3.annotate('{0:5.3f} km/s/pix'.format((2.99792458*10**5)/acoef_std),xy=(.05,.8),xycoords='axes fraction',xytext=(.05,.8),textcoords='axes fraction',color='black')
fig.subplots_adjust(hspace=.3)
figname='rv_%s_%s_%d.png' %(std_name,obj_name,order)
fig.savefig(figname)
fig.clf()
plt.close()
#plt.figure(l+1)
#plt.hist(pix_shift)
#END RADIAL VELOCITY FUNCTION -----------------------------------------
return rv_meas,rv_meas_err