-
Notifications
You must be signed in to change notification settings - Fork 1
/
master_processing.py
93 lines (90 loc) · 3.96 KB
/
master_processing.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
"""
Processes data
arguments: (infile, outfile, filt=true, filterval=250.0, average=true, split=false, splitval=20.0)
infile: string - name of file to process
outfile: string - name of file to output. If split=true, this name will have
less and more prefixes for the two files
filt: boolean - whether to remove rows based on final value
filterval: float - what value to remove above
average: boolean - whether to average all hour measurements into one
split: boolean - whether to split into two files
splitval: float - what reflectivity value to split at if split = true
"""
import csv
import numpy
#big csvs require changing maximum size allowed: http://lethain.com/handling-very-large-csv-and-xml-files-in-python/
csv.field_size_limit(1000000000)
def processor(infile, outfile, filt=True,filterval=250.0, average=True,split=False,splitval=20.0):
inputreader = csv.reader(open(infile,'r'), delimiter=",")
if split:
lesswriter = csv.writer(open("less"+outfile,'w'),delimiter=",")
morewriter = csv.writer(open("more"+outfile,'w'),delimiter=",")
else:
outwriter = csv.writer(open(outfile,'w'),delimiter=",")
prev = [-1]
idray = []
for row in inputreader:
#want to clump together ones with the same ID
if (row[0] == prev[0]):
idray.append(row)
else:
#process the previous id before running through this one
if (len(idray) > 0):
try:
if ((not filt) or (float(idray[-1][-1]) <= filterval)):
#using this as cutoff point not incorrect data, since record for rainfall in US
#http://www.wunderground.com/blog/weatherhistorian/what-is-the-most-rain-to-ever-fall-in-one-minute-or-one-hour
#average or not?
if average:
newrow = []
idray = numpy.array(idray)
for i in range(len(idray[0])):
col = idray[:,i]
temp = [float(j) for j in col if (len(j) > 0)]
newrow.append(numpy.mean(temp))
if split:
if (newrow[3] >= splitval):
morewriter.writerow(newrow)
else:
lesswriter.writerow(newrow)
else:
outwriter.writerow(newrow)
else:
if split:
if (float(idray[-1][3]) >= splitval):
morewriter.writerows(idray)
else:
lesswriter.writerows(idray)
else:
outwriter.writerows(idray)
except ValueError:
#wasn't a number, just ignore
pass
#now reset for next id
idray = []
idray.append(row)
prev = row
#have to process the last ID afterward
if (not filt or (float(idray[-1][-1]) <= filterval)):
if average:
newrow = []
idray = numpy.array(idray)
for i in range(len(idray[0])):
col = idray[:,i]
temp = [float(j) for j in col if (len(j) > 0)]
newrow.append(numpy.mean(temp))
if split:
if (newrow[3] >= splitval):
morewriter.writerow(newrow)
else:
lesswriter.writerow(newrow)
else:
outwriter.writerow(newrow)
else:
if split:
if (float(idray[-1][3]) >= splitval):
morewriter.writerows(idray)
else:
lesswriter.writerows(idray)
else:
outwriter.writerows(idray)