-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathcatalog_table.py
109 lines (85 loc) · 3.33 KB
/
catalog_table.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
# Matt Bonnyman 2018-07-12
import re
import numpy as np
from astropy.table import Table
def catalog_table(otfile, verbose = False):
"""
Read OT browser catalog file and return columns in '~astropy.table' object.
Numbers are appended to the end of repeated column names.
Format requires that column names are along line 8, and observation data begins at line 10.
Parameters
----------
otfile : str
OT catalog file name
Returns
-------
'~astropy.table'
"""
# import sys
#
# reload(sys)
# sys.setdefaultencoding('utf8')
# encoding=utf8
# cattext = []
rows = np.array([])
with open(otfile, 'r', encoding="utf-8") as readcattext: # read file into memory.
# [print(line.split('\t')) for line in readcattext]
# [cattext.append(line.split('\t')) for line in readcattext] # Split fields where tabs ('\t') are found.
nline = 0
for line in readcattext:
nline += 1
values = line.rstrip("\n").split('\t')
# Don't includes invalid lines, e.g. obs with no targets have ra=null, no Acq
if nline > 10 and values[5] != 'null' and 'Acquisition' not in values[15]:
# In case no charged time
if values[14] == '':
values[14] = '00:00:00'
if rows.size > 0:
rows = np.vstack([rows,values])
else:
rows = np.array(values)
elif nline == 9:
colnames = np.array(values)
readcattext.close()
# colnames = np.array(cattext[8])
# rows = np.array(cattext[10:])
# print(colnames)
# print(rows.shape)
if verbose:
print('\notcat attribute names...', colnames)
existing_names = []
for i in range(0, len(colnames)):
# remove special characters, trim ends of string, replace whitespace with underscore
string = colnames[i].replace('.', '')
string = re.sub(r'\W', ' ', string)
string = string.strip()
string = re.sub(r' +', '_', string)
string = string.lower()
if string == 'class': # change attribute name (python doesn't allow attribute name 'class')
string = 'obs_class'
if np.isin(string, existing_names): # add number to end of repeated attribute name
rename = True
j = 0
while rename:
if j >= 8: # if number 9 is reached, make next number 10
tempstring = string + '_1' + chr(50 + j - 10)
else:
tempstring = string + '_' + chr(50 + j)
if np.isin(tempstring, existing_names): # if name taken, increment number and check again
j += 1
else:
string = tempstring
rename = False
existing_names.append(string) # add name to library of used names
if verbose:
print(string)
cattable = Table()
# print(len(existing_names))
for i in range(len(existing_names)):
if verbose:
print(existing_names[i], rows[:, i])
cattable[existing_names[i]] = rows[:, i] # add column with to table with column name.
if verbose:
print('\nFound '+str(len(rows))+' observations in '+str(otfile))
print(cattable)
return cattable