forked from erdc/getdatatestbed
-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathgetOutsideData.py
226 lines (200 loc) · 10.9 KB
/
getOutsideData.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
import datetime as DT
import netCDF4 as nc
import os
import numpy as np
import sys
class forecastData:
def __init__(self, d1):
"""Initialization description here
Data are returned in self.datainex are inclusive at start,end
Args:
d1: datetime object start time of forecast data
Returns:
instance of forecastData
"""
self.rawdataloc_wave = []
self.outputdir = [] # location for outputfiles
self.d1 = d1 # start date for data grab
self.timeunits = 'seconds since 1970-01-01 00:00:00'
self.epochd1 = nc.date2num(self.d1, self.timeunits)
self.dataLocFRF = 'http://134.164.129.55/thredds/dodsC/FRF/'
self.dataLocTB = 'http://134.164.129.62:8080/thredds/dodsC/CMTB'
self.dataLocCHL = 'https://chlthredds.erdc.dren.mil/thredds/dodsC/frf/' #'http://10.200.23.50/thredds/dodsC/frf/'
self.dataLocNCEP = 'http://nomads.ncep.noaa.gov/pub/data/nccf/com/wave/prod/'#ftpprd.ncep.noaa.gov/pub/data/nccf/com/wave/prod/multi_1.'
self.dataLocECWMF = 'ftp://data-portal.ecmwf.int/20170808120000/' # ECMWF forecasts
assert type(self.d1) == DT.datetime, 'end need to be in python "Datetime" data types'
def getWW3(self, forecastHour, buoyNumber=44100):
"""This funcion will get spectral forecasts from the NCEP nomads server and
parse it out to geographic coordinate system. Currently, the data is
transformed from oceanographic to meteorological coordinates and from
units of m^2 s rad^-1 to m^2 s deg^-1 to maintain FRF gauge data
conventions. Spectra are also sorted in ascending order by frequency and
direction. The functionality associated with transforming the data may be
more appropriately located in cmtb/PrepData.
Args:
forecastHour: param buoyNumber:
buoyNumber: (Default value = 44100)
Returns:
A dictionary with wave directions, frequencies, directional wave
:key 'wavedirbin':
:key 'wavefreqbin':
:key 'dWED': 2dimensional wave spectra [t, freq, dir]
:key 'lat': latitude
:key 'lon': longitude
:key 'time': date time
spectra, and the timestamps for each spectrum.
"""
import urllib.request, urllib.parse, urllib.error
assert type(forecastHour) is str, 'Forecast hour variable must be a string'
forecastHour = forecastHour.zfill(2)
urlBack = '/bulls.t%sz/' %forecastHour +'multi_1.%d.spec' %buoyNumber
ftpURL = self.dataLocNCEP + 'multi_1.' + self.d1.strftime('%Y%m%d') + urlBack
ftpstream = urllib.request.urlopen(ftpURL) # open url
lines = ftpstream.readlines() # read the lines of the url into an array of lines
ftpstream.close() # close connection with the server
# # # # # # # # # # # # now the forecast spectra are in lines # # # # # # # # # # # #
frequencies, directions, forcastDates, forcastDateLines = [], [], [], []
for ii, line in enumerate(lines): # read through each line
split = line.split() # split the current line up
if split[0].strip("'") == 'WAVEWATCH': # is it the header of the file?
nFreq = int(split[3]) # number of Frequencies
nDir = int(split[4])
elif len(split) == 8 or nFreq - len(frequencies) == len(split) and len(frequencies) != nFreq: # this is frequencies
frequencies.extend(split)
elif (len(split) == 7 or nDir - len(directions) == len(split)) and len(directions) != nDir: # this is directions
directions.extend(split)
elif len(split[0]) == 8 and len(split) == 2: ## this is the date line for the beggining of a spectra
# MPG: Include time component (second entry in date line).
timestampstr = split[0] + split[1]
timestamp = DT.datetime.strptime(timestampstr, '%Y%m%d%H%M%S')
forcastDates.append(timestamp)
forcastDateLines.append(ii)
# MPG: convert directions and frequencies from list(string) to
# np.array(float).
directions = np.array(directions).astype('float')
frequencies = np.array(frequencies).astype('float')
# MPG: convert directions from radians to degrees.
directions = np.rad2deg(directions)
# MPG: convert directions from oceanographic to meteorological
# convention to be consistent w/ FRF wave gauge data.
small_angle = directions < 180.0
directions[small_angle] = 180.0 + directions[small_angle]
directions[~small_angle] = directions[~small_angle] - 180.0
# MPG: sort directions and frequencies.
didx = directions.argsort()
fidx = frequencies.argsort()
directions = directions[didx]
frequencies = frequencies[fidx]
## now go back through 'lines' and parse spectra
spectra = np.ones((len(forcastDateLines), nFreq, nDir), dtype=float) * 1e-8
buoyNum, lon, lat, Depth, Hm0, Dp, b, c = [], [], [], [], [], [], [], []
for ll in forcastDateLines:
numLinesPerSpec = np.ceil(nFreq*float(nDir)/len(lines[ll+2].split())).astype(int)
buoyStats = lines[ll+1].split()
if ll == forcastDateLines[0]: # if its not going to change only grab it once
buoyNum = int(buoyStats[0].strip("'"))
lon = float(buoyStats[2])
lat = float(buoyStats[3])
Depth = float(buoyStats[4])
# Hm0.append(float(buoyStats[5])) # these need to be converted to meters ... is this actually wind?
# Dp.append(float(buoyStats[6])) # these are not exported ... are these wind?
# b.append(float(buoyStats[7])) # not sure what this field is .... wind speed?
# c.append(float(buoyStats[8])) # not sure what this field is ... wind dir?
tt = np.floor(float(ll) / (numLinesPerSpec - 1)).astype(int) # time index
linear = []
for ss in range(numLinesPerSpec):
# data =
linear.extend(lines[ss + ll + 2].split())
spectra[tt] = np.array(linear, dtype=float).reshape(nDir, nFreq).T
spectra[tt] = spectra[tt][fidx][:,didx]
# MPG: convert dWED from rad^-1 to deg^-1 to be consistent w/
# FRF wave gauge data.
spectra = spectra*2*np.pi / 180.0
out = {'wavedirbin': directions,
'wavefreqbin': frequencies,
'buoyNum': buoyNum,
'dWED': spectra,
'lat': lat,
'lon': lon,
'Depth': Depth,
'time': np.array(forcastDates)}
return out
def get_CbathyFromFTP(self, dlist, path, timex=True):
"""this function downloads argus cbathy bathy data from the argus ftp server
times must be on the hour or half hour, it will return dates from a list
provided as dlist. dlist can be a single point (not in list) in time or
a list of datetimes
# written by Ty Hesser
# modified by Spicer Bak
Args:
dlist(list, np.array): a list of datetime dataList for cbathy data to be collected
path (str): directory to put the cbathy file(s)
timex: (Default value = True)
Returns:
oflist (list): list of strings of files to be downloaded
"""
curdir = os.getcwd() # remembering where i am now
if not os.path.exists(path):
os.mkdir(path)
os.chdir(path) # changing locations to where data should be downloaded to
# defining month string to month numbers
mon = {1: 'Jan', 2: 'Feb', 3: 'Mar', 4: 'Apr', 5: 'May', 6: 'Jun', 7: 'Jul',
8: 'Aug', 9: 'Sep', 10: 'Oct', 11: 'Nov', 12: 'Dec'}
# defining days of the week to day number
dow = {0: 'Mon', 1: 'Tue', 2: 'Wed', 3: 'Thu', 4: 'Fri', 5: 'Sat', 6: 'Sun'}
# quick data check
if type(dlist) == DT.datetime:
dlist = [dlist] # making it into a list if its a single value
assert type(dlist[0]) == DT.datetime, 'This function requires datetime dataList'
# begin looping through data, acquiring cbathy data
oflist = []
for ii in range(0, len(dlist)):
# assert dlist[ii].minute == 0 or dlist[ii].minute == 30, 'the minutes on your datetime object are not 0 or 30'
if timex == True:
din_m = DT.timedelta(0, seconds=1) + dlist[
ii] # changing to data processed on the mintue and 31 of the hour
else:
din_m = dlist[ii] - DT.timedelta(0, 60)
# creating month/day hours of timestamp that is being looked for
yearc = din_m.strftime('%Y') # string year
# monthc = din_m.strftime('%m') # string month
monthc = mon[din_m.month] # making a month string
tt = din_m.timetuple() # making time tuple to make more strings
dayc = din_m.strftime('%d') # making a day string
hourc = din_m.strftime('%H')
mmc = str(tt.tm_min) # making a minute string
if len(mmc) == 1:
mmc = '0' + mmc
ssc = str(tt.tm_sec)
if len(ssc) == 1:
ssc = '0' + ssc
# creating epoch time
eptm = str(int(nc.date2num(din_m, 'seconds since 1970-01-01')))
# creating the url to download the cbathy data from
# frfserver = "'\\134.164.129.42\cil\argus02b\'" # at the frf
OSUserver = "ftp://cil-ftp.coas.oregonstate.edu/pub/argus02b/" # the oregon state server
svr = OSUserver + yearc + "/cx/" # server base
daynum = str(tt.tm_yday) # day number in a year
fldr = daynum + "_" + monthc + "." + dayc # defining the folder (date) structure to be used
fname = "/" + eptm + '.' + dow[tt.tm_wday] + '.' + monthc + '.' + dayc + \
"_" + hourc + '_' + mmc + \
'_' + ssc + '.GMT.' + yearc + ".argus02b.cx.cBathy.mat"
# fname = '/1445709540.Sat.Oct.24_17_59_00.GMT.2015.argus02b.cx.cBathy.mat' # copied and pasted
if timex == True:
fname = '/*timex.merge.mat'
addr = svr + fldr + fname
print("checking " + fldr + fname)
if os.path.isfile(fname[1:]):
print("already downloaded: %s" % fname)
elif not os.path.isfile(fname[1:]):
# try:
# dlfname = wget.download(addr)
os.system('wget %s' % addr)
# oflist.append(dlfname)
# print 'Retrieved %s' %dlfname
# except IOError:
# print "There is no file on the server, It's probably dark outside"
os.chdir(curdir)
# import urllib
# urllib.urlretrieve(fn4)
return oflist