-
Notifications
You must be signed in to change notification settings - Fork 8
/
Copy pathgetDataFRF.py
3163 lines (2624 loc) · 150 KB
/
getDataFRF.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
# -*- coding: utf-8 -*-
"""This is a class definition designed to get data from the FRF thredds server.
@author: Spicer Bak, PhD
@contact: [email protected]
@organization: USACE CHL FRF
"""
import collections
import datetime as DT
import os
import pickle as pickle
import time
import warnings
from posixpath import join as urljoin
import socket
import netCDF4 as nc
import numpy as np
import pandas as pd
from testbedutils import geoprocess as gp, sblib as sb
def gettime(allEpoch, epochStart, epochEnd):
"""This function opens the netcdf file, and retrieves time.
It pulls the dates of interest from the THREDDS (data loc) server based on d1,d2, and data location it returns
the indices in the NCML file of the dates d1>=time>d2
It was modified to check if there are duplicate times, and only produces indices with unique
times
Args:
allEpoch (list, float): a list of floats that has epoch times in it
epochStart (float): start time in epoch
epochEnd (float): end time in epoch
Returns:
index of dates between
"""
try:
mask = (allEpoch >= epochStart) & (allEpoch < epochEnd)
idx = np.argwhere(mask).squeeze()
if np.size(idx) == 0:
idx = None
except TypeError: # when None's are handed for allEpoch
idx = None
finally:
return idx
def getnc(dataLoc, callingClass, dtRound=60, **kwargs):
"""Function grabs the netCDF file interested.
Responsible for drilling down to specific monthly file if applicable to speed things up.
Args:
dataLoc (str):
THREDDS (str): a key associated with the server location
callingClass (str): which class calls this
dtRound(int): rounding the times returned from the server (Default=60 (s))
Keyword Args:
start: if given, will parse out to monthly netCDF file (if query is in same month)
end: if given, will parse out to monthly netCDF file (if query is in same month)
Returns:
object:
TODO: could use thredds crawler to more efficiently pick files to pull from. This would save query time
"""
# toggle my data location
start = kwargs.get('start', None)
end = kwargs.get('end', None)
FRFdataloc = u'http://134.164.129.55/thredds/dodsC/'
chlDataLoc = u'https://chldata.erdc.dren.mil/thredds/dodsC/'
# a list of data sets (just the ncml) that shouldn't drill down to monthly file
doNotDrillList = ['survey']
# chose which server to select based on IP
ipAddress = socket.gethostbyname(socket.gethostname())
if ipAddress.startswith('134.164.129'): # FRF subdomain
THREDDSloc = FRFdataloc
pName = u'FRF'
else:
THREDDSloc = chlDataLoc
pName = u'frf'
if callingClass == 'getDataTestBed': # overwrite pName if calling for model data
pName = u'cmtb'
# now set URL for netCDF file call,
if start is None and end is None:
ncfileURL = urljoin(THREDDSloc, pName, dataLoc)
elif isinstance(start, float) and isinstance(end, float): # then we assume epoch
raise NotImplementedError(
'check conversion for floats (epoch time), currently needs to be datetime object')
# ncfileURL = urljoin(THREDDSloc, pName, monthlyPath)
elif isinstance(start, DT.datetime) and isinstance(end, DT.datetime) \
and (start.year == end.year and start.month == end.month) \
and ~np.in1d(doNotDrillList, dataLoc.split('/')).any():
# this section dives to the specific month's datafile if it's within the same month
dataLocSplit = os.path.split(dataLoc)
fileparts = dataLocSplit[0].split('/')
if fileparts[0] == 'oceanography':
field = 'ocean'
else:
field = fileparts[0]
try: # this will work for get Obs
fname = "{}-{}_{}_{}_{}{:02d}.nc".format(pName.upper(), field, fileparts[1],
fileparts[2], start.year,
start.month)
except IndexError: # works for getDataTestBed class
fname = u"{}-{}_{}_{}{:02d}.nc".format(pName.upper(), field, fileparts[1], start.year,
start.month)
ncfileURL = urljoin(THREDDSloc, pName, dataLocSplit[0], str(start.year), fname)
else: # function couldn't be more efficient, default to old way
ncfileURL = urljoin(THREDDSloc, pName, dataLoc)
# ___________________ go now to open file ___________________________________________
finished, n, maxTries = False, 0, 3 # initializing variables to iterate over
ncFile, allEpoch = None, None # will return None's when URL doesn't exist
while not finished and n < maxTries:
try:
ncFile = nc.Dataset(ncfileURL) # get the netCDF file
allEpoch = ncFile['time'][:]
finished = True
except IOError:
print('Error reading {}, trying again {}/{}'.format(ncfileURL, n + 1, maxTries))
time.sleep(5) # time in seconds to wait
n += 1 # iteration number
return ncFile, sb.baseRound(allEpoch, base=dtRound) # round to nearest minute
def removeDuplicatesFromDictionary(inputDict):
"""This function checks through the data and will remove duplicates from key 'epochtime's.
A place holder to check, and remove duplicate times from this whole class. It needs to be though through still,
but the code below is used to do it from an exterior script and would be a good place to start.
Args:
inputDict (dict): to check this if its duplicate
Returns:
inputdict (dict): same dictionary with-out duplicates in time
References:
https://www.peterbe.com/plog/fastest-way-to-uniquify-a-list-in-python-3.6
https://stackoverflow.com/questions/480214/how-do-you-remove-duplicates-from-a-list
-whilst-preserving-order
"""
from collections.abc import Iterable # this is not available in Python 2.7?
if inputDict is not None:
if 'epochtime' in inputDict:
key = 'epochtime'
elif 'time' in inputDict:
key = 'time'
warnings.warn(
'Removing duplicates is faster using numeric time, failed looking for "epochtime" '
'key')
else:
raise NotImplementedError('Requires keys "time" or "epochtime"')
if isinstance(inputDict[key], Iterable) and np.size(set(np.array(inputDict[key]))) != np.size(inputDict[key]):
# we have duplicate times in dictionary
print(' Removing Duplicates from {}'.format(inputDict['name'])) # find the duplicates
_, idxUnique = np.unique(inputDict[key], return_index=True)
inputDict = sb.reduceDict(inputDict, idxUnique)
# try: # python 3.6 + only
# ans2 = list(dict.fromkeys(inputDict[key]))
# # nonzero(np.in1d(inputDict[key], ans2)) #<===================================
# this leaves duplicates
# except: # a slower way
# seen = set()
# ans3b = [x for x in inputDict[key] if x not in seen and not seen.add(x)]
# idxObs = np.nonzero(np.in1d(inputDict[key], ans3b))
# original Way --- super slow
# dupes = np.array([x for n, x in enumerate(inputDict[key]) if x in inputDict[key][
# :n]]).squeeze()
# idxObs = np.delete(np.arange(len(inputDict[key])),
# np.argwhere(np.in1d(inputDict[key], dupes).squeeze())[
# ::2].squeeze()) # delete
# every other duplicate record
# inputDict = sb.reduceDict(inputDict, idxObs)
return inputDict
class getObs:
"""Class focused on retrieving observational data."""
def __init__(self, d1, d2):
"""Data are returned in self.dataindex are inclusive at start, exclusive at end."""
# this is active wave gauge list for looping through as needed
self.waveGaugeList = ['waverider-26m', 'waverider-17m', 'awac-11m', '8m-array',
'awac-6m', 'awac-4.5m', 'adop-3.5m', 'xp200m', 'xp150m', 'xp125m']
self.directionalWaveGaugeList = ['waverider-26m', 'waverider-17m', 'awac-11m', '8m-array',
'awac-6m', 'awac-4.5m', 'adop-3.5m']
self.currentsGaugeList = ['awac-11m', 'awac-6m', 'awac-4.5m', 'adop-3.5m']
#self.rawdataloc_wave = []
#self.outputdir = [] # location for outputfiles
self.d1 = d1 # start date for data grab
self.d2 = d2 # end data for data grab
self.timeunits = 'seconds since 1970-01-01 00:00:00'
self.epochd1 = nc.date2num(self.d1, self.timeunits)
self.epochd2 = nc.date2num(self.d2, self.timeunits)
self.callingClass = 'getObs'
self.FRFdataloc = 'http://134.164.129.55/thredds/dodsC/FRF/'
self.crunchDataLoc = 'http://134.164.129.55/thredds/dodsC/cmtb/'
self.chlDataLoc = 'https://chlthredds.erdc.dren.mil/thredds/dodsC/frf/' #
self._comp_time()
assert type(self.d2) == DT.datetime, 'd1 need to be in python "Datetime" data types'
assert type(self.d1) == DT.datetime, 'd2 need to be in python "Datetime" data types'
def _comp_time(self):
"""Test if times are backwards."""
assert self.d2 >= self.d1, 'finish time: end needs to be after start time: start'
def _roundtime(self, dt=None, roundto=60):
"""Round a datetime object to any time laps in seconds.
Author: Thierry Husson 2012 - Use it as you want but don't blame me.
Args:
dt: datetime.datetime object, default now.
roundto: Closest number of SECONDS to round to, default 1 minute
Returns:
datetime object that is rounded
"""
if dt is None:
dt = DT.datetime.now()
seconds = (dt - dt.min).seconds
# // is a floor division, not a comment on following line:
rounding = (seconds + roundto / 2) // roundto * roundto
return dt + DT.timedelta(0, rounding - seconds, -dt.microsecond)
def getWaveSpec(self, gaugenumber=0, roundto=30, removeBadDataFlag=4, **kwargs):
"""This function pulls down the data from the thredds server and puts the data into a dictionary.
TODO: Set optional date input from function arguments to change self.start self.end
Args:
gaugenumber: wave gauge numbers pulled from self.waveGaugeURLlookup
see help on self.waveGaugeURLlookup for possible gauge names (Default value = 0)
roundto: this is duration in minutes which data are expected. times are rounded to nearest
30 minute increment (data on server are not even times) (Default value = 30)
removeBadDataFlag (int): this will remove data with a directional flag of 3/4 signaling questionable or
failed directional spectra (default = 4, remove failed (directional) spectral data time periods)
valid values: [3, 4, False] False will not remove any data
Keyword Args:
"returnAB" (bool): if this is True function will return a's and b's for time period (default=False)
"specOnly" (bool); if this is True function will not return bulk statistics (default=False)
Returns:
dictionary with following keys for all gauges
'time' (array): time in datetime objects
'epochtime' (array): time in epoch time
'name' (str): gauge name
'wavefreqbin' (array): wave frequencys associated with 2D spectra
'wavedirbin' (array): wave direction bin associated with 2D spectra
'xFRF' (float): x location in FRF coordinates
'yFRF' (float): y location in FRF coordinates
'lat' (float): latitude
'lon' (float): longitude
'depth' (float): nominal water dept
'Hs' (array): wave height
'peakf' (array): wave peak frequency
"""
returnAB = kwargs.get('returnAB', False)
specOnly = kwargs.get('specOnly', False)
# Making gauges flexible
self._waveGaugeURLlookup(gaugenumber)
# parsing out data of interest in time
self.ncfile, self.allEpoch = getnc(dataLoc=self.dataloc, callingClass=self.callingClass,
dtRound=roundto * 60, start=self.d1, end=self.d2)
try:
self.wavedataindex = gettime(allEpoch=self.allEpoch, epochStart=self.epochd1,
epochEnd=self.epochd2)
assert np.array(self.wavedataindex).all() is not None, 'there''s no data in your time period'
if np.size(self.wavedataindex) >= 1:
# consistant for all wave gauges
if np.size(self.wavedataindex) == 1:
self.wavedataindex = np.expand_dims(self.wavedataindex, axis=0)
self.snaptime = nc.num2date(self.allEpoch[self.wavedataindex],
self.ncfile['time'].units,
only_use_cftime_datetimes=False)
try:
depth = self.ncfile['nominalDepth'][:] # this should always go
except IndexError:
try:
depth = self.ncfile['gaugeDepth'][:] # non directionalWaveGaugeList gauges
except IndexError:
depth = -999 # fill value
try:
wave_coords = gp.FRFcoord(self.ncfile['longitude'][:],
self.ncfile['latitude'][:])
except IndexError:
wave_coords = gp.FRFcoord(self.ncfile['lon'][:], self.ncfile['lat'][:])
#######################################################################################################
# now that wave data index is resolved, go get data
self.snaptime = nc.num2date(self.allEpoch[self.wavedataindex],
self.ncfile['time'].units,
only_use_cftime_datetimes=False)
wavespec = {'time': self.snaptime, # note this is new variable names??
'epochtime': self.allEpoch[self.wavedataindex],
'name': str(self.ncfile.title),
'wavefreqbin': self.ncfile['waveFrequency'][:],
'xFRF': wave_coords['xFRF'],
'yFRF': wave_coords['yFRF'],
'lat': self.ncfile['latitude'][:],
'lon': self.ncfile['longitude'][:],
'depth': depth,
'Hs': self.ncfile['waveHs'][self.wavedataindex],
'peakf': 1 / self.ncfile['waveTp'][self.wavedataindex],
}
# now do directionalWaveGaugeList gauge try
try: # pull time specific data based on self.wavedataindex
wavespec['depth'] = self.ncfile['nominalDepth'][:] # this should always go with directional gauges
wavespec['wavedirbin'] = self.ncfile['waveDirectionBins'][:]
wavespec['fspec'] = self.ncfile['waveEnergyDensity'][self.wavedataindex, :]
wavespec['qcFlagE'] = self.ncfile['qcFlagE'][self.wavedataindex]
wavespec['qcFlagD'] = self.ncfile['qcFlagD'][self.wavedataindex]
wavespec['dWED'] = self.ncfile['directionalWaveEnergyDensity'][self.wavedataindex, :, :]
if wavespec['dWED'].ndim < 3:
wavespec['dWED'] = np.expand_dims(wavespec['dWED'], axis=0)
wavespec['fspec'] = np.expand_dims(wavespec['fspec'], axis=0)
if specOnly is True:
return removeDuplicatesFromDictionary(wavespec) # pull out here if specOnly is true (saves time)
wavespec['waveDp'] = self.ncfile['wavePeakDirectionPeakFrequency'][self.wavedataindex]
wavespec['waveDm'] = self.ncfile['waveMeanDirection'][self.wavedataindex]
wavespec['Tm'] = self.ncfile['waveTm'][self.wavedataindex]
if returnAB is True:
wavespec['a1'] = self.ncfile['waveA1Value'][self.wavedataindex, :]
wavespec['a2'] = self.ncfile['waveA2Value'][self.wavedataindex, :]
wavespec['b1'] = self.ncfile['waveB1Value'][self.wavedataindex, :]
wavespec['b2'] = self.ncfile['waveB2Value'][self.wavedataindex, :]
# this should throw when gauge is non directionalWaveGaugeList
except IndexError: # if error its non-directional gauge
# lidar guages don't have this variable.
if 'nominalDepth' in self.ncfile.variables.keys():
wavespec['depth'] = self.ncfile['nominalDepth'][:] # non directional gauges
else:
# leave it blank if lidar wave gauge.
wavespec['depth'] = np.nan
wavespec['wavedirbin'] = np.arange(0, 360, 90) # 90 degree bins
wavespec['waveDp'] = np.ones_like(self.wavedataindex) * -999
try:
wavespec['fspec'] = self.ncfile['waveEnergyDensity'][self.wavedataindex, :]
except(RuntimeError): # handle n-1 index error with Thredds
wavespec['fspec'] = self.ncfile['waveEnergyDensity'][self.wavedataindex[:-1], :]
wavespec['fspec'] = np.append(wavespec['fspec'],
self.ncfile['waveEnergyDensity'][
self.wavedataindex[-1], :][
np.newaxis, :], axis=0)
if wavespec['fspec'].ndim < 2:
wavespec['fspec'] = np.expand_dims(wavespec['fspec'], axis=0)
# multiply the freq spectra for all directions
wavespec['dWED'] = np.ones([np.size(self.wavedataindex), np.size(wavespec['wavefreqbin']),
np.size(wavespec['wavedirbin'])])
wavespec['dWED'] = wavespec['dWED']*wavespec['fspec'][:, :, np.newaxis]/len(wavespec['wavedirbin'])
if 'qcFlagE' in self.ncfile.variables.keys():
# lidar wave gauges don't have this variable.
wavespec['qcFlagE'] = self.ncfile['qcFlagE'][self.wavedataindex]
else:
# lidar wave gauges have waterLevelQCFlag and spectralQCFlag
wavespec['qcFlagE'] = self.ncfile['waterLevelQCFlag'][self.wavedataindex]
if removeBadDataFlag is not False:
# Energy should not be needed
try:
# find data that are below bad data threshold
idx = np.argwhere(wavespec['qcFlagD'] < removeBadDataFlag).squeeze()
if np.size(idx) > 0:
wavespec = sb.reduceDict(wavespec,
idx) # if there are values, keep good ones
idx = np.argwhere(wavespec['qcFlagE'] < removeBadDataFlag).squeeze()
if np.size(idx) > 0:
wavespec = sb.reduceDict(wavespec, idx)
except(KeyError):
pass # non -directional gauge
wavespec = removeDuplicatesFromDictionary(wavespec)
except (RuntimeError, AssertionError):
print(' ---- Problem Retrieving wave data from %s\n - in this time period start: %s End: %s' % (
gaugenumber, self.d1, self.d2))
try:
wavespec = {'lat': self.ncfile['latitude'][:],
'lon': self.ncfile['longitude'][:],
'name': str(self.ncfile.title), }
except TypeError: # when self.ncfile is None
wavespec = None
except KeyError:
wavespec = {'lat': self.ncfile['lat'][:],
'lon': self.ncfile['lon'][:],
'name': str(self.ncfile.title), }
return wavespec
def getCurrents(self, gaugenumber=5, roundto=1):
"""This function pulls down the currents data from the Thredds Server.
Args:
gaugenumber: a string or number to get ocean currents from look up table
gaugenumber = [2, 'awac-11m']
gaugenumber = [3, 'awac-8m']
gaugenumber = [4, 'awac-6m']
gaugenumber = [5, 'awac-4.5m']
gaugenumber = [6, 'adop-3.5m'] (Default value = 5)
roundto: the time over which the wind record exists, ie data is collected in 10 minute
increments
data is rounded to the nearst [roundto] (default 1 min)
Returns:
dict, None if error is encoutered
'name' (str): gauge name
'time' (obj): date time objects time stamp
'epochtime' (float): unix epoch time
'aveU' (array): average cross-shore current from collection
'aveV' (array): average along-shore current from collection
'speed' (array): average speed [m/s]
'dir' (array): current direction (TN)
'lat' (float): latitude of gauge
'lon' (float): longitude of gauge
'xFRF' (float): cross-shore coordinate of gauge
'yFRF' (float): along-shore coordinate of gauge
'depth' (float): gauge nominal depth Depth is calculated by:
depth = -xducerD + blank + (binSize/2) + (numBins * binSize)
'meanP' (array): mean pressure
"""
assert gaugenumber.lower() in [2, 3, 4, 5, 6, 'awac-11m', 'awac-8m', 'awac-6m', 'awac-4.5m',
'adop-3.5m'], 'Input string/number is not a valid gage ' \
'name/number'
if gaugenumber in [2, 'awac-11m']:
# gname = 'AWAC04 - 11m'
self.dataloc = 'oceanography/currents/awac-11m/awac-11m.ncml'
elif gaugenumber in [3, 'awac-8m']:
# gname = 'AWAC 8m'
self.dataloc = 'oceanography/currents/awac-8m/awac-8m.ncml'
elif gaugenumber in [4, 'awac-6m']:
# gname = 'AWAC 6m'
self.dataloc = 'oceanography/currents/awac-6m/awac-6m.ncml'
elif gaugenumber in [5, 'awac-4.5m']:
# gname = 'AWAC 4.5m'
self.dataloc = 'oceanography/currents/awac-4.5m/awac-4.5m.ncml'
elif gaugenumber in [6, 'adop-3.5m']:
# gname = 'Aquadopp 3.5m'
self.dataloc = 'oceanography/currents/adop-3.5m/adop-3.5m.ncml'
else:
raise NameError('Check gauge name')
self.ncfile, self.allEpoch = getnc(dataLoc=self.dataloc, callingClass=self.callingClass,
dtRound=roundto * 60) # start=self.d1, end=self.d2) <
# -- needs to be tested
currdataindex = gettime(allEpoch=self.allEpoch, epochStart=self.epochd1,
epochEnd=self.epochd2)
# _______________________________________
# get the actual current data
if np.size(currdataindex) > 1:
curr_aveU = self.ncfile['aveE'][
currdataindex] # pulling depth averaged Eastward current
curr_aveV = self.ncfile['aveN'][
currdataindex] # pulling depth averaged Northward current
curr_spd = self.ncfile['currentSpeed'][currdataindex] # currents speed [m/s]
curr_dir = self.ncfile['currentDirection'][
currdataindex] # current from direction [deg]
self.curr_time = nc.num2date(self.allEpoch[currdataindex], self.ncfile['time'].units,
self.ncfile['time'].calendar,
only_use_cftime_datetimes=False)
# for num in range(0, len(self.curr_time)):
# self.curr_time[num] = self.roundtime(self.curr_time[num], roundto=roundto * 60)
curr_coords = gp.FRFcoord(self.ncfile['longitude'][0], self.ncfile['latitude'][0])
self.curpacket = {
'name': str(self.ncfile.title),
'time': self.curr_time,
'epochtime': self.allEpoch[currdataindex],
'aveU': curr_aveU,
'aveV': curr_aveV,
'speed': curr_spd,
'dir': curr_dir,
'lat': self.ncfile['latitude'][0],
'lon': self.ncfile['longitude'][0],
'xFRF': curr_coords['xFRF'],
'yFRF': curr_coords['yFRF'],
'depth': self.ncfile['depth'][:],
# Depth is calculated by: depth = -xducerD + blank + (binSize/2) + (numBins *
# binSize)
'meanP': self.ncfile['meanPressure'][currdataindex]}
return self.curpacket
else:
print('ERROR: There is no current data for this time period!!!')
self.curpacket = None
return self.curpacket
def getWind(self, gaugenumber=0, collectionlength=10):
"""This function retrieves the wind data.
Collection length is the time over which the wind record exists ie data is collected in 10 minute increments
data is rounded to the nearst [collectionlength] (default 10 min).
Args:
collectionlength: Default value = 10)
gaugenumber: (Default value = 0)
gauge number in ['derived', 'Derived', 0]
'932 wind gauge' in [1]
'832 wind gauge' in [2]
'732 wind gauge' in [3]
Returns:
dict, will return None if an error is encountered
'name' (str): station name
'time' (obj): datetime object time stamp
'vecspeed' (array): Vector Averaged Wind Speed
'windspeed' (array): Mean Wind Speed
'windspeed_corrected' (array): corrected 10m windspeed
'winddir' (array): Wind direction from true north
'windgust' (array): 5 second largest mean wind speed
'qcflagS' (array): QC flag for speed
'qcflagD' (array): qcflag for directions
'stdspeed' (array): std dev of 10 min wind record
'minspeed' (array): min speed in 10 min avg
'maxspeed' (array): max speed in 10 min avg
'sustspeed' (array): 1 min largest mean wind speed
'lat' (float): latitude
'lon' (float): longitde
'gaugeht' (float): gauge height for uncorrected wind measurements
"""
# Making gauges flexible
# different Gauges
if gaugenumber in ['derived', 'Derived', 0]:
self.dataloc = 'meteorology/wind/derived/derived.ncml' # 932 wind gauge
gname = 'Derived wind gauge '
elif gaugenumber == 1:
self.dataloc = 'meteorology/wind/D932/D932.ncml' # 932 wind gauge
gname = '932 wind gauge'
elif gaugenumber == 2:
gname = '832 wind gauge'
self.dataloc = 'meteorology/wind/D832/D832.ncml'
elif gaugenumber == 3:
gname = '732 wind gauge'
self.dataloc = 'meteorology/wind/D732/D732.ncml'
else:
raise NameError('Specifiy proper Gauge number')
self.ncfile, self.allEpoch = getnc(dataLoc=self.dataloc, callingClass=self.callingClass,
dtRound=collectionlength * 60)
self.winddataindex = gettime(allEpoch=self.allEpoch, epochStart=self.epochd1,
epochEnd=self.epochd2)
# remove nan's that shouldn't be there
# ______________________________________
if np.size(self.winddataindex) > 0 and self.winddataindex is not None:
# TODO: why are we removing nan's here. this should be resolved down stream if they're causing problems
# do they even come up as nans? i thought they returned as masked arrays
self.winddataindex = self.winddataindex[~np.isnan(self.ncfile['windDirection'][self.winddataindex])]
if np.size(self.winddataindex) == 0:
# return None is he wind direction is associated with the wind is no good!
return None
# MPG: moved inside if statement b/c call to gettime possibly returns None.
self.winddataindex = self.winddataindex[~np.isnan(self.ncfile['windDirection'][self.winddataindex])]
windvecspd = self.ncfile['vectorSpeed'][self.winddataindex]
windspeed = self.ncfile['windSpeed'][self.winddataindex] # wind speed
winddir = self.ncfile['windDirection'][self.winddataindex] # wind direction
windgust = self.ncfile['windGust'][self.winddataindex] # 5 sec largest mean speed
stdspeed = self.ncfile['stdWindSpeed'][self.winddataindex] # std dev of 10 min avg
qcflagS = self.ncfile['qcFlagS'][self.winddataindex] # qc flag
qcflagD = self.ncfile['qcFlagD'][self.winddataindex]
minspeed = self.ncfile['minWindSpeed'][self.winddataindex] # min wind speed in 10 min avg
maxspeed = self.ncfile['maxWindSpeed'][self.winddataindex] # max wind speed in 10 min avg
sustspeed = self.ncfile['sustWindSpeed'][self.winddataindex] # 1 minute largest mean wind speed
gaugeht = self.ncfile.geospatial_vertical_max
self.windtime = nc.num2date(self.allEpoch[self.winddataindex],
self.ncfile['time'].units,
only_use_cftime_datetimes=False)
# correcting for wind elevations from Johnson (1999) - Simple Expressions for
# correcting wind speed data
# for elevation
if gaugeht <= 20:
windspeed_corrected = windspeed * (10 / gaugeht) ** (1 / 7)
else:
windspeed_corrected = 'No Corrections done for gauges over 20m, please read: ' \
'\nJohnson (1999) - ' \
'Simple Expressions for correcting wind speed data for ' \
'elevation'
windpacket = {
'name': str(self.ncfile.title), # station name
'time': self.windtime, # time
'epochtime': self.allEpoch[self.winddataindex],
'vecspeed': windvecspd, # Vector Averaged Wind Speed
'windspeed': windspeed, # Mean Wind Speed
'windspeed_corrected': windspeed_corrected, # corrected windspeed
'winddir': winddir, # Wind direction from true nort
'windgust': windgust, # 5 second largest mean wind speed
'qcflagS': qcflagS, # QC flag
'qcflagD': qcflagD,
'stdspeed': stdspeed, # std dev of 10 min wind record
'minspeed': minspeed, # min speed in 10 min avg
'maxspeed': maxspeed, # max speed in 10 min avg
'sustspeed': sustspeed, # 1 min largest mean wind speed
'lat': self.ncfile['latitude'][:], # latitude
'lon': self.ncfile['longitude'][:], # longitde
'gaugeht': gaugeht,
}
if (windpacket['qcflagD'] == 3).all() or (windpacket['qcflagS'] == 3).all():
print("Wind querey returned all bad data for speed or direction")
windpacket = None
return windpacket
else:
print(' ---- ERROR: Problem finding wind !!!')
return None
def getWL(self, collectionlength=6):
"""This function retrieves the water level data from the server.
WL data on server is NAVD88
collection length is the time over which the wind record exists
ie data is collected in 10 minute increments
data is rounded to the nearst [collectionlength] (default 6 min)
Args:
collectionlength (int): dictates what value to round time to (Default value = 6)
Returns:
dictionary with keys
'name': gauge name - taken from title
'WL': measured water level (NAVD88) [m]
'time': datetime object
'epochtime': time in seconds since 1970-01-01 (float)
'lat': latitude
'lon': longitude
'residual': water level residual
'predictedWL': predicted tide
"""
# this is the back end of the url for waterlevel
self.dataloc = 'oceanography/waterlevel/eopNoaaTide/eopNoaaTide.ncml'
self.ncfile, self.allEpoch = getnc(dataLoc=self.dataloc, callingClass=self.callingClass,
dtRound=collectionlength * 60, start=self.d1,
end=self.d2)
self.WLdataindex = gettime(allEpoch=self.allEpoch, epochStart=self.epochd1,
epochEnd=self.epochd2)
if np.size(self.WLdataindex) > 1:
self.WLtime = nc.num2date(self.allEpoch[self.WLdataindex], self.ncfile['time'].units,
only_use_cftime_datetimes=False)
self.WLpacket = {
'name': str(self.ncfile.title),
'WL': self.ncfile['waterLevel'][self.WLdataindex],
# why does this call take so long for even 10 data points?
'time': self.WLtime,
'epochtime': self.allEpoch[self.WLdataindex],
'lat': self.ncfile['latitude'][:],
'lon': self.ncfile['longitude'][:],
'predictedWL': self.ncfile['predictedWaterLevel'][self.WLdataindex],
}
# this is faster to calculate myself, than pull from server
self.WLpacket['residual'] = self.WLpacket['WL'] - self.WLpacket['predictedWL']
elif self.WLdataindex is not None and np.size(self.WLdataindex) == 1:
raise BaseException('you have 1 WL point, can the above be a >= logic or does 1 cause problems')
else:
print('ERROR: there is no WATER level Data for this time period!!!')
self.WLpacket = None
return self.WLpacket
def getGaugeWL(self, gaugenumber=5, roundto=1):
"""This function pulls down the water level data at a particular gauge from the Server.
Args:
gaugenumber (int/str) describing the location (default=5 End of pier)
roundto: the time over which the wind record exists ie data is collected in 10 minute
increments
data is rounded to the nearst [roundto] (default 1 min)
Returns
wlpacket (dict) with keys below
'name': gagename
'time': datetime of the measurements
'epochtime': epochtime of the measurements
'wl': water level at the gage (NAVD88?)
'lat': latitude of the gage
'lon': longitude of the gage
'xFRF': xFRF position of the gage
'yFRF': yFRF position of the gage
"""
# Making gauges flexible
self._wlGageURLlookup(gaugenumber)
# parsing out data of interest in time
self.ncfile, self.allEpoch = getnc(dataLoc=self.dataloc, callingClass=self.callingClass,
dtRound=roundto * 60)
try:
self.wldataindex = gettime(allEpoch=self.allEpoch, epochStart=self.epochd1,
epochEnd=self.epochd2)
assert np.array(self.wldataindex).all() != None, 'there''s no data in your time period'
if np.size(self.wldataindex) >= 1:
# consistant for all wl gauges
# do we need this?
# it is causing some of the stuff further down to crash
# if you have only one data point in the date range?
# if np.size(self.wldataindex) == 1:
# self.wldataindex = np.expand_dims(self.wldataindex, axis=0)
self.snaptime = nc.num2date(self.allEpoch[self.wldataindex],
self.ncfile['time'].units,
only_use_cftime_datetimes=False)
try:
wl_coords = gp.FRFcoord(self.ncfile['longitude'][:], self.ncfile['latitude'][:])
except IndexError:
wl_coords = gp.FRFcoord(self.ncfile['lon'][:], self.ncfile['lat'][:])
wlpacket = {'time': self.snaptime, # note this is new variable names??
'epochtime': self.allEpoch[self.wldataindex],
'name': str(self.ncfile.title),
'xFRF': wl_coords['xFRF'],
'yFRF': wl_coords['yFRF'],
'lat': self.ncfile['latitude'][:],
'lon': self.ncfile['longitude'][:],
'wl': self.ncfile['waterLevel'][self.wldataindex]}
return wlpacket
except (RuntimeError, AssertionError):
print(
' ---- Problem Retrieving water level data from %s\n - in this time period '
'start: %s End: %s'
% (gaugenumber, self.d1, self.d2))
try:
wlpacket = {'lat': self.ncfile['latitude'][:],
'lon': self.ncfile['longitude'][:],
'name': str(self.ncfile.title), }
except:
wlpacket = {'lat': self.ncfile['lat'][:],
'lon': self.ncfile['lon'][:],
'name': str(self.ncfile.title), }
return wlpacket
def getBathyTransectFromNC(self, profilenumbers=None, method=1, forceReturnAll=False):
"""This function gets the bathymetric data from the server.
Args:
profilenumbers: Default value = None)
method: bathymetry selection method (Default value = 1)
method == 1 - > 'Bathymetry is taken as closest in HISTORY - operational'
method == 0 - > 'Bathymetry is taken as closest in TIME - NON-operational'
forceReturnAll (bool): (Default Value = False)
This will force the survey to take and return all indices between start and end,
not the single
Returns:
dictionary with keys, will return None if call fails
'xFRF': x coordinate in frf
'yFRF': y coordiante in Frf
'elevation': bathy elevation
'time': time in date time object
'lat': lat,
'lon': lon,
'northing': NC northing
'easting': NC easting
'profileNumber': FRF profile number
'surveyNumber': FRF survey Number
'Ellipsoid': which ellipsoid is used
"""
# do check here on profile numbers
# acceptableProfileNumbers = [None, ]
self.dataloc = 'geomorphology/elevationTransects/survey/surveyTransects.ncml' # location
# of the gridded surveys
self.ncfile, self.allEpoch = getnc(dataLoc=self.dataloc, callingClass=self.callingClass,
dtRound=1 * 60)
try:
self.bathydataindex = gettime(allEpoch=self.allEpoch, epochStart=self.epochd1,
epochEnd=self.epochd2)
except IOError: # when data are not on CHL thredds
self.bathydataindex = None
# returning None object is convention and must be followed/handled down the line
# if self.bathydataindex is None:
# self.bathydataindex = []
# logic to handle no transects in date range
if forceReturnAll == True:
idx = self.bathydataindex
elif np.size(self.bathydataindex) == 1 and self.bathydataindex is not None:
idx = self.bathydataindex
elif (np.size(self.bathydataindex) < 1 & method == 1) or (
self.bathydataindex is None and method == 1):
# there's no exact bathy match so find the max negative number where the negative
# numbers are historical and the max would be the closest historical
val = (max([n for n in (self.ncfile['time'][:] - self.epochd1) if n < 0]))
idx = np.where((self.ncfile['time'][:] - self.epochd1) == val)[0][0]
# print 'Bathymetry is taken as closest in HISTORY - operational'
elif (np.size(self.bathydataindex) < 1 and method == 0) or (
self.bathydataindex is None and method == 1):
idx = np.argmin(np.abs(self.ncfile['time'][:] - self.d1)) # closest in time
# print 'Bathymetry is taken as closest in TIME - NON-operational'
elif np.size(self.bathydataindex) > 1: # if dates fall into d1,d2 bounds,
idx = self.bathydataindex[
0] # return a single index. this means there was a survey between d1,d2
if forceReturnAll is not True:
# find the whole survey (via surveyNumber) and assign idx to return the whole survey
idxSingle = idx
idx = np.argwhere(
self.ncfile['surveyNumber'][:] == self.ncfile['surveyNumber'][idxSingle]).squeeze()
if np.size(idx) == 0:
print('The closest in history to your start date is %s\n' % nc.num2date(
self.gridTime[idx],
self.ncfile['time'].units))
raise NotImplementedError('Please End new simulation with the date above')
idx = self.bathydataindex
# else:
# # Now that indices of interest are sectioned off, find the survey number that
# matches them and return
# whole survey
# idxSingle = idx
# # idx = np.argwhere(self.ncfile['surveyNumber'][:] == self.ncfile['surveyNumber'][
# idxSingle]).squeeze()
# isolate specific profile numbers if necessicary
if profilenumbers != None:
assert pd.Series(profilenumbers).isin(np.unique(self.ncfile['profileNumber'][
idx])).all(), 'given profiles ' \
'don''t Match ' \
'profiles ' \
'in database' # if
# all of the profile
# numbers match
idx2mask = np.in1d(self.ncfile['profileNumber'][idx],
profilenumbers) # boolean true/false of time and profile number
idx = idx[idx2mask]
# elif pd.Series(profileNumbers).isin(np.unique(self.cshore_ncfile['profileNumber'][
# :])).any(): #if only some
# of the profile numbers match
# print 'One or more input profile numbers do not match those in the FRF transects!
# Fetching data for
# those that do.'
# mask = (self.alltime >= self.start) & (self.alltime < self.end) & np.in1d(
# self.cshore_ncfile[
# 'profileNumber'][:],profileNumbers) # boolean true/false of time and profile number
# now retrieve data with idx
if np.size(idx) > 0 and idx is not None:
elevation_points = self.ncfile['elevation'][idx]
xCoord = self.ncfile['xFRF'][idx]
yCoord = self.ncfile['yFRF'][idx]
lat = self.ncfile['lat'][idx]
lon = self.ncfile['lon'][idx]
northing = self.ncfile['northing'][idx]
easting = self.ncfile['easting'][idx]
profileNum = self.ncfile['profileNumber'][idx]
surveyNum = self.ncfile['surveyNumber'][idx]
Ellipsoid = self.ncfile['Ellipsoid'][idx]
time = nc.num2date(self.ncfile['time'][idx], self.ncfile['time'].units,
only_use_cftime_datetimes=False)
profileDict = {'xFRF': xCoord,
'yFRF': yCoord,
'elevation': elevation_points,
'epochtime': self.allEpoch[idx],
'time': time,
'lat': lat,
'lon': lon,
'northing': northing,
'easting': easting,
'profileNumber': profileNum,
'surveyNumber': surveyNum,
'Ellipsoid': Ellipsoid, }
else:
profileDict = None
return profileDict
def getBathyTransectProfNum(self, method=1):
"""This function gets the bathymetric data from the server, currently designed for the bathy duck experiment.
Just gets profile numbers only.
Args:
method (int): approach to select which method of how to select bathymetry
method == 1 - > 'Bathymetry is taken as closest in HISTORY - operational'
method == 0 - > 'Bathymetry is taken as closest in TIME - NON-operational' (
Default value = 1)
Returns:
prof_nums (array): an array of profile numbers over which a single survey was taken
"""
# do check here on profile numbers
# acceptableProfileNumbers = [None, ]
self.dataloc = 'geomorphology/elevationTransects/survey/surveyTransects.ncml' # location
# of the gridded surveys
self.ncfile, self.allEpoch = getnc(dataLoc=self.dataloc, callingClass=self.callingClass,
dtRound=1 * 60)
try:
self.bathydataindex = gettime(allEpoch=self.allEpoch, epochStart=self.epochd1,
epochEnd=self.epochd2)