Skip to content

Commit

Permalink
Merge pull request #93 from NCAR/devel
Browse files Browse the repository at this point in the history
Merge to master for version 0.2.8
  • Loading branch information
sherimickelson authored May 2, 2019
2 parents 1cbe2f1 + ccc6e58 commit 6f5c11e
Show file tree
Hide file tree
Showing 15 changed files with 839 additions and 150 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,4 @@ temp
build
dist
PyConform.egg-info
.idea/
18 changes: 0 additions & 18 deletions .project

This file was deleted.

8 changes: 0 additions & 8 deletions .pydevproject

This file was deleted.

2 changes: 0 additions & 2 deletions .settings/org.eclipse.core.resources.prefs

This file was deleted.

83 changes: 59 additions & 24 deletions scripts/iconform
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@ from dreqPy import dreq
import uuid


version = 'v'+str(datetime.datetime.now().year)+str(datetime.datetime.now().month).zfill(2)+str(datetime.datetime.now().day).zfill(2)
#version = 'v'+str(datetime.datetime.now().year)+str(datetime.datetime.now().month).zfill(2)+str(datetime.datetime.now().day).zfill(2)
version = 'v20190309'

# Map netcdf types to python types
#data_types = {'char': 'char', 'byte': 'int8', 'short': 'int16', 'int': 'int32',
Expand Down Expand Up @@ -191,30 +192,54 @@ def fill_missing_glob_attributes(attr, table, v, grids):
attr.pop("external_variables")

if "branch_method" in attr.keys():
if "none" not in attr["branch_method"]:
if "no parent" not in attr["branch_method"]:
if "branch_time_in_child" in attr.keys():
if len(attr["branch_time_in_child"])>0:
attr["branch_time_in_child"] = float(attr["branch_time_in_child"].split('D')[0])
try:
attr["branch_time_in_child"] = float(attr["branch_time_in_child"].split('D')[0])
except ValueError as e:
attr["branch_time_in_child"] = attr["branch_time_in_child"].split('D')[0]
if "branch_time_in_parent" in attr.keys():
if len(attr["branch_time_in_parent"])>0:
attr["branch_time_in_parent"] = float(attr["branch_time_in_parent"].split('D')[0])
if "parent_mip_era" in attr.keys():
try:
attr["branch_time_in_parent"] = float(attr["branch_time_in_parent"].split('D')[0])
except ValueError as e:
attr["branch_time_in_parent"] = attr["branch_time_in_parent"].split('D')[0]
if "parent_mip_era" in attr.keys() and len(attr["parent_activity_id"]) > 2:
attr["parent_mip_era"] = attr["mip_era"]
if "parent_source_id" in attr.keys():
else:
attr["parent_mip_era"] = "no parent"
if "parent_source_id" in attr.keys() and len(attr["parent_activity_id"]) > 2:
attr["parent_source_id"] = attr["source_id"]
if "parent_time_units" in attr.keys():
else:
attr["parent_source_id"] = ""
if "parent_time_units" in attr.keys() and len(attr["parent_activity_id"]) > 2:
attr["parent_time_units"] = "days since 0001-01-01 00:00:00"
else:
attr["parent_time_units"] = "none"
else:
if "branch_time_in_child" in attr.keys():
attr["branch_time_in_child"] = float(attr["branch_time_in_child"].split('D')[0])
if "branch_time_in_parent" in attr.keys():
attr["branch_time_in_parent"] = 0.0
if "parent_mip_era" in attr.keys():
attr["parent_mip_era"] = "no parent"
if "parent_source_id" in attr.keys():
attr["parent_source_id"] = "no parent"
if "parent_time_units" in attr.keys():
attr["parent_time_units"] = "no parent"

else:
if "branch_time_in_child" in attr.keys():
attr["branch_time_in_child"] = "none"
attr["branch_time_in_child"] = "no parent"
if "branch_time_in_parent" in attr.keys():
attr["branch_time_in_parent"] = "none"
attr["branch_time_in_parent"] = "no parent"
if "parent_mip_era" in attr.keys():
attr["parent_mip_era"] = "none"
attr["parent_mip_era"] = "no parent"
if "parent_source_id" in attr.keys():
attr["parent_source_id"] = "none"
attr["parent_source_id"] = "no parent"
if "parent_time_units" in attr.keys():
attr["parent_time_units"] = "none"
attr["parent_time_units"] = "no parent"

if "variant_label" in attr.keys():
pre = attr["variant_label"].split('r')[1]
Expand Down Expand Up @@ -349,10 +374,12 @@ def defineVar(v, varName, attr, table_info, definition, ig, experiment, out_dir)
dst = date_strings[v["frequency"]]
else:
dst = ''
vid = v['variable_id']

f_name = ("{0}/{1}/{2}/{3}/{4}/{5}/{6}/{7}/{8}/{9}/{10}/{11}_{12}_{13}_{14}_{15}_{16}{17}.nc".format(
out_dir, mip_era, activity_id, institution_id, source_id, experiment, ripf, mipTable,
varName, grid, version,
varName, mipTable, source_id, experiment, ripf, grid, dst))
vid, grid, version,
vid, mipTable, source_id, experiment, ripf, grid, dst))
var = {}

# put together the dictionary entry for this variable
Expand All @@ -363,6 +390,7 @@ def defineVar(v, varName, attr, table_info, definition, ig, experiment, out_dir)
var["file"] = {}
var["file"]["attributes"] = attributes
var["file"]["attributes"]["variant_label"] = ripf
var["attributes"]["comment"] = definition
var["file"]["filename"] = f_name
var["file"]["format"] = f_format
if compression is not None:
Expand Down Expand Up @@ -496,21 +524,25 @@ def create_output(exp_dict, definitions, input_glob, attributes, output_path, ar
ts_key = None
mip = d['mipTable']
if mip in definitions.keys():
ig = ""
if v in definitions[mip].keys():
if "N/A" in definitions[mip][v].upper():
v_def = ""
else:
v_def = definitions[mip][v]
ig = input_glob[mip][v]
else:
v_def = ""
ig = ""
if v in definitions[mip].keys():
v_def = definitions[mip][v]
ig = input_glob[mip][v]
# if v in definitions[mip].keys():
# if "N/A" in definitions[mip][v].upper():
# v_def = ""
# else:
# v_def = definitions[mip][v]
# ig = input_glob[mip][v]
# else:
# v_def = ""
var_list[v] = defineVar(d, v, attributes, table_info, v_def, ig, experiment, out_dir)
realm = d["realm"].replace(' ','_')
ts_key = var_list[v]["file"]["attributes"]["activity_id"]+'_'+var_list[v]["attributes"]["mipTable"]+'_'+realm
if ts_key not in TableSpec.keys():
TableSpec[ts_key] = {}
TableSpec[ts_key][v] = var_list[v]
#TableSpec[ts_key][v] = var_list[v]
TableSpec[ts_key][var_list[v]["file"]["attributes"]["variable_id"]] = var_list[v]
t_realm = "NoRealm"
for k1,v1 in definitions.iteritems():
if 'Coords' in k1:
Expand All @@ -528,6 +560,8 @@ def create_output(exp_dict, definitions, input_glob, attributes, output_path, ar
else:
if 'definition' not in TableSpec[ts_key][dim].keys():
print "MISSING "+dim+" in "+'Coords_'+t_realm+" (for variable "+v+")"
else:
print 'missing:',v
else:
AllMissing[t].append(v)

Expand Down Expand Up @@ -642,6 +676,7 @@ def main(argv=None):
for gaFile in args.globalAttrFile.split(","):
if os.path.isfile(gaFile):
if "json" in gaFile:
print 'opening ',gaFile
with open(gaFile) as gaF:
ga = json.load(gaF)
for k in ga.keys():
Expand Down
17 changes: 15 additions & 2 deletions source/pyconform/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -380,7 +380,8 @@ class FileDesc(object):
file, a dict of DimensionDesc objects, and a dict of VariableDesc objects.
"""

def __init__(self, name, format='NETCDF4_CLASSIC', deflate=2, variables=(), attributes={}): # @ReservedAssignment
def __init__(self, name, format='NETCDF4_CLASSIC', deflate=2, variables=(), attributes={},
autoparse_time_variable=None): # @ReservedAssignment
"""
Initializer
Expand All @@ -392,6 +393,8 @@ def __init__(self, name, format='NETCDF4_CLASSIC', deflate=2, variables=(), attr
deflate (int): Level of lossless compression to use in all variables within the file (0-9)
variables (tuple): Tuple of VariableDesc objects describing the file variables
attributes (dict): Dict of global attributes in the file
autoparse_time_variable (str): The name of an output variable that should be used
to represent the 'time' when autoparsing the output filename
"""
self._name = name

Expand Down Expand Up @@ -431,6 +434,13 @@ def __init__(self, name, format='NETCDF4_CLASSIC', deflate=2, variables=(), attr
raise TypeError(err_msg)
self._attributes = deepcopy(attributes)

if autoparse_time_variable:
if autoparse_time_variable not in self._variables:
err_msg = ('Variable {!r} does not exist in describe file {!r}, but is required '
'for autoparsing the filename'.format(autoparse_time_variable, name))
raise ValueError(err_msg)
self.autoparse_time_variable = autoparse_time_variable

@property
def name(self):
"""Name of the file"""
Expand Down Expand Up @@ -773,6 +783,9 @@ def __init__(self, name='output', dsdict=OrderedDict()):
if 'deflate' in fdict:
files[fname]['deflate'] = fdict['deflate']

if 'autoparse_time_variable' in fdict:
files[fname]['autoparse_time_variable'] = fdict['autoparse_time_variable']

if 'attributes' in fdict:
files[fname]['attributes'] = fdict['attributes']

Expand All @@ -794,7 +807,7 @@ def __init__(self, name='output', dsdict=OrderedDict()):
# Get the variable descriptors for each variable required to be in the file
vlist = OrderedDict([(vname, variables[vname]) for vname in fdict['variables']])

# Get the unique list of dimension names for required by these variables
# Get the unique list of dimension names required by these variables
fdims = set()
for vname in vlist:
vdesc = vlist[vname]
Expand Down
69 changes: 38 additions & 31 deletions source/pyconform/flownodes.py
Original file line number Diff line number Diff line change
Expand Up @@ -637,6 +637,11 @@ def __init__(self, filedesc, inputs=()):
raise TypeError(('WriteNode {!r} cannot accept input from type {}, must be a '
'ValidateNode').format(filedesc.name, type(inp)))

# Extract hidden variables (names starting with '_') from list of input nodes
hidden_labels = [inp.label for inp in inputs if inp.label[0] == '_']
self._hidden_inputs = [inp for inp in inputs if inp.label in hidden_labels]
inputs = [inp for inp in inputs if inp.label not in hidden_labels]

# Call base class (label is filename)
super(WriteNode, self).__init__(filedesc.name, *inputs)

Expand All @@ -647,7 +652,7 @@ def __init__(self, filedesc, inputs=()):
for inp in inputs:
if inp.label not in self._filedesc.variables:
raise ValueError(('WriteNode {!r} takes input from variable {!r} that is not '
'contained in the descibed file').format(filedesc.name, inp.label))
'contained in the described file').format(filedesc.name, inp.label))

# Construct the proper filename
fname = self._autoparse_filename_(self.label)
Expand Down Expand Up @@ -678,28 +683,31 @@ def _autoparse_filename_(self, fname):
if '{' in fname:

possible_tvars = []
for var in self._filedesc.variables:
vdesc = self._filedesc.variables[var]
if var in ('time', 'time1', 'time2', 'time3'):
possible_tvars.append(var)
elif vdesc.cfunits().is_time_reference() and len(vdesc.dimensions) == 1:
possible_tvars.append(var)
elif 'standard_name' in vdesc.attributes and vdesc.attributes['standard_name'] == 'time':
possible_tvars.append(var)
elif 'axis' in vdesc.attributes and vdesc.attributes['axis'] == 'T':
possible_tvars.append(var)
possible_inputs = list(self.inputs)
if self._filedesc.autoparse_time_variable:
possible_tvars.append(self._filedesc.autoparse_time_variable)
possible_inputs += self._hidden_inputs
else:
for var in self._filedesc.variables:
vdesc = self._filedesc.variables[var]
if var in ('time', 'time1', 'time2', 'time3'):
possible_tvars.append(var)
elif vdesc.cfunits().is_time_reference() and len(vdesc.dimensions) == 1:
possible_tvars.append(var)
elif 'standard_name' in vdesc.attributes and vdesc.attributes['standard_name'] == 'time':
possible_tvars.append(var)
elif 'axis' in vdesc.attributes and vdesc.attributes['axis'] == 'T':
possible_tvars.append(var)
if len(possible_tvars) == 0:
msg = 'Could not identify a time variable to autoparse filename {!r}'.format(
fname)
msg = 'Could not identify a time variable to autoparse filename {!r}'.format(fname)
warn(msg, DateTimeAutoParseWarning)
return fname
possible_tnodes = {vnode.label:vnode for vnode in possible_inputs
if vnode.label in possible_tvars}
if len(possible_tnodes) == 0:
raise ValueError('Time variable input missing for file {!r}'.format(fname))
tnode = possible_tnodes['time'] if 'time' in possible_tnodes else possible_tnodes.values()[0]

tvar = 'time' if 'time' in possible_tvars else possible_tvars[0]
tnodes = [vnode for vnode in self.inputs if vnode.label == tvar]
if len(tnodes) == 0:
raise ValueError(
'Time variable input missing for file {!r}'.format(fname))
tnode = tnodes[0]
t1 = tnode[0:1]
t2 = tnode[-1:]

Expand Down Expand Up @@ -750,8 +758,11 @@ def _open_(self, deflate=None):
try:
makedirs(fdir)
except:
raise IOError(
'Failed to create directory for output file {!r}'.format(fname))
if exists(fdir):
print('Already created directory for output file {!r}'.format(fname))
else:
raise IOError(
'Failed to create directory for output file {!r}'.format(fname))

# Try to open the output file for writing
try:
Expand Down Expand Up @@ -806,8 +817,7 @@ def _open_(self, deflate=None):
for vnode in self.inputs:
vname = vnode.label
vdesc = self._filedesc.variables[vname]
vattrs = OrderedDict((k, v)
for k, v in vnode.attributes.iteritems())
vattrs = OrderedDict((k, v) for k, v in vnode.attributes.iteritems())

vdtype = vdesc.dtype
fillval = vattrs.get('_FillValue', None)
Expand All @@ -824,8 +834,8 @@ def _open_(self, deflate=None):
'Override deflate value range from 0 to 9')
zlib = deflate > 0
clev = deflate if zlib else 1
ncvar = self._file.createVariable(
vname, vdtype, vdims, fill_value=fillval, zlib=zlib, complevel=clev)
ncvar = self._file.createVariable(vname, vdtype, vdims, fill_value=fillval,
zlib=zlib, complevel=clev)

for aname in vattrs:
if aname not in self._unwritten_attributes:
Expand Down Expand Up @@ -926,19 +936,16 @@ def execute(self, chunks={}, deflate=None):
# Open the file and write the header information
self._open_(deflate=deflate)

# Create data structure to keep track of which variable chunks we have
# written
# Create data structure to keep track of which variable chunks we have written
vchunks = {vnode.label: set() for vnode in self.inputs}

# Compute the Global Dimension Sizes dictionary from the input variable
# nodes
# Compute the Global Dimension Sizes dictionary from the input variable nodes
inputdims = []
for vnode in self.inputs:
for d in self._filedesc.variables[vnode.label].dimensions:
if d not in inputdims:
inputdims.append(d)
gdims = OrderedDict(
(d, self._filedesc.dimensions[d].size) for d in inputdims)
gdims = OrderedDict((d, self._filedesc.dimensions[d].size) for d in inputdims)

# Iterate over the global dimension space
for chunk in WriteNode._chunk_iter_(gdims, chunks=chunks):
Expand Down
Loading

0 comments on commit 6f5c11e

Please sign in to comment.