Merge pull request #93 from NCAR/devel

Merge to master for version 0.2.8
NCAR · May 2, 2019 · 6f5c11e · 6f5c11e
2 parents 1cbe2f1 + ccc6e58
commit 6f5c11e
Show file tree

Hide file tree

Showing 15 changed files with 839 additions and 150 deletions.
diff --git a/.gitignore b/.gitignore
@@ -6,3 +6,4 @@ temp
 build
 dist
 PyConform.egg-info
+.idea/
diff --git a/.project b/.project
diff --git a/.pydevproject b/.pydevproject
diff --git a/.settings/org.eclipse.core.resources.prefs b/.settings/org.eclipse.core.resources.prefs
diff --git a/scripts/iconform b/scripts/iconform
@@ -17,7 +17,8 @@ from dreqPy import dreq
 import uuid
 
 
-version = 'v'+str(datetime.datetime.now().year)+str(datetime.datetime.now().month).zfill(2)+str(datetime.datetime.now().day).zfill(2)
+#version = 'v'+str(datetime.datetime.now().year)+str(datetime.datetime.now().month).zfill(2)+str(datetime.datetime.now().day).zfill(2)
+version = 'v20190309'
 
 # Map netcdf types to python types
 #data_types = {'char': 'char', 'byte': 'int8', 'short': 'int16', 'int': 'int32',
@@ -191,30 +192,54 @@ def fill_missing_glob_attributes(attr, table, v, grids):
         attr.pop("external_variables")
 
     if "branch_method" in attr.keys():
-        if "none" not in attr["branch_method"]:
+        if "no parent" not in attr["branch_method"]:
             if "branch_time_in_child" in attr.keys():
               if len(attr["branch_time_in_child"])>0:
-                attr["branch_time_in_child"] = float(attr["branch_time_in_child"].split('D')[0]) 
+                try:
+                    attr["branch_time_in_child"] = float(attr["branch_time_in_child"].split('D')[0]) 
+                except ValueError as e:
+                    attr["branch_time_in_child"] = attr["branch_time_in_child"].split('D')[0]
             if "branch_time_in_parent" in attr.keys():
               if len(attr["branch_time_in_parent"])>0:
-                attr["branch_time_in_parent"] = float(attr["branch_time_in_parent"].split('D')[0])
-            if "parent_mip_era" in attr.keys():
+                try:
+                    attr["branch_time_in_parent"] = float(attr["branch_time_in_parent"].split('D')[0])
+                except ValueError as e:
+                    attr["branch_time_in_parent"] = attr["branch_time_in_parent"].split('D')[0]
+            if "parent_mip_era" in attr.keys() and len(attr["parent_activity_id"]) > 2:
                 attr["parent_mip_era"] = attr["mip_era"]
-            if "parent_source_id" in attr.keys():
+            else:
+                attr["parent_mip_era"] = "no parent"
+            if "parent_source_id" in attr.keys() and len(attr["parent_activity_id"]) > 2:
                 attr["parent_source_id"] = attr["source_id"]
-            if "parent_time_units" in attr.keys():
+            else:
+                attr["parent_source_id"] = ""
+            if "parent_time_units" in attr.keys() and len(attr["parent_activity_id"]) > 2:
                 attr["parent_time_units"] = "days since 0001-01-01 00:00:00"
+            else:
+                attr["parent_time_units"] = "none"
+        else: 
+          if "branch_time_in_child" in attr.keys():
+              attr["branch_time_in_child"] = float(attr["branch_time_in_child"].split('D')[0])
+          if "branch_time_in_parent" in attr.keys():
+              attr["branch_time_in_parent"] = 0.0
+          if "parent_mip_era" in attr.keys():
+              attr["parent_mip_era"] = "no parent"
+          if "parent_source_id" in attr.keys():
+              attr["parent_source_id"] = "no parent"
+          if "parent_time_units" in attr.keys():
+              attr["parent_time_units"] = "no parent"
+
     else:
         if "branch_time_in_child" in attr.keys():
-            attr["branch_time_in_child"] = "none"
+            attr["branch_time_in_child"] = "no parent"
         if "branch_time_in_parent" in attr.keys():
-            attr["branch_time_in_parent"] = "none"
+            attr["branch_time_in_parent"] = "no parent"
         if "parent_mip_era" in attr.keys():
-            attr["parent_mip_era"] = "none"
+            attr["parent_mip_era"] = "no parent"
         if "parent_source_id" in attr.keys():
-            attr["parent_source_id"] = "none"
+            attr["parent_source_id"] = "no parent"
         if "parent_time_units" in attr.keys():
-            attr["parent_time_units"] = "none"
+            attr["parent_time_units"] = "no parent"
 
     if "variant_label" in attr.keys():
         pre = attr["variant_label"].split('r')[1]
@@ -349,10 +374,12 @@ def defineVar(v, varName, attr, table_info, definition, ig, experiment, out_dir)
         dst = date_strings[v["frequency"]]
     else:
         dst = ''
+    vid = v['variable_id']
+
     f_name = ("{0}/{1}/{2}/{3}/{4}/{5}/{6}/{7}/{8}/{9}/{10}/{11}_{12}_{13}_{14}_{15}_{16}{17}.nc".format(
                out_dir, mip_era, activity_id, institution_id, source_id, experiment, ripf, mipTable,
-               varName, grid, version, 
-               varName, mipTable, source_id, experiment, ripf, grid, dst))
+               vid, grid, version, 
+               vid, mipTable, source_id, experiment, ripf, grid, dst))
     var = {}
 
     # put together the dictionary entry for this variable    
@@ -363,6 +390,7 @@ def defineVar(v, varName, attr, table_info, definition, ig, experiment, out_dir)
     var["file"] = {}
     var["file"]["attributes"] = attributes
     var["file"]["attributes"]["variant_label"] = ripf
+    var["attributes"]["comment"] = definition
     var["file"]["filename"] = f_name
     var["file"]["format"] = f_format 
     if compression is not None:
@@ -496,21 +524,25 @@ def create_output(exp_dict, definitions, input_glob, attributes, output_path, ar
             ts_key = None
             mip = d['mipTable']
             if mip in definitions.keys():
-                ig = ""
-                if v in definitions[mip].keys():
-                    if "N/A" in definitions[mip][v].upper():
-                        v_def = ""
-                    else:
-                        v_def = definitions[mip][v]
-                        ig = input_glob[mip][v]
-                else:
-                    v_def = ""
+              ig = ""
+              if v in definitions[mip].keys():
+                v_def = definitions[mip][v]
+                ig = input_glob[mip][v]
+#                if v in definitions[mip].keys():
+#                    if "N/A" in definitions[mip][v].upper():
+#                        v_def = ""
+#                    else:
+#                        v_def = definitions[mip][v]
+#                        ig = input_glob[mip][v]
+#                else:
+#                    v_def = ""
                 var_list[v] = defineVar(d, v, attributes, table_info, v_def, ig, experiment, out_dir)
                 realm = d["realm"].replace(' ','_')
                 ts_key = var_list[v]["file"]["attributes"]["activity_id"]+'_'+var_list[v]["attributes"]["mipTable"]+'_'+realm
                 if ts_key not in TableSpec.keys():
                     TableSpec[ts_key] = {}
-                TableSpec[ts_key][v] = var_list[v]
+                #TableSpec[ts_key][v] = var_list[v]
+                TableSpec[ts_key][var_list[v]["file"]["attributes"]["variable_id"]] = var_list[v]
                 t_realm = "NoRealm"
                 for k1,v1 in definitions.iteritems():
                     if 'Coords' in k1:
@@ -528,6 +560,8 @@ def create_output(exp_dict, definitions, input_glob, attributes, output_path, ar
                              else:
                                  if 'definition' not in  TableSpec[ts_key][dim].keys():
                                      print "MISSING "+dim+" in "+'Coords_'+t_realm+" (for variable "+v+")"
+              else:
+                  print 'missing:',v
             else:
                 AllMissing[t].append(v)
 
@@ -642,6 +676,7 @@ def main(argv=None):
         for gaFile in args.globalAttrFile.split(","):
             if os.path.isfile(gaFile):
                 if "json" in gaFile:
+                    print 'opening ',gaFile
                     with open(gaFile) as gaF: 
                         ga = json.load(gaF)
                     for k in ga.keys():

diff --git a/source/pyconform/datasets.py b/source/pyconform/datasets.py
@@ -380,7 +380,8 @@ class FileDesc(object):
     file, a dict of DimensionDesc objects, and a dict of VariableDesc objects. 
     """
 
-    def __init__(self, name, format='NETCDF4_CLASSIC', deflate=2, variables=(), attributes={}):  # @ReservedAssignment
+    def __init__(self, name, format='NETCDF4_CLASSIC', deflate=2, variables=(), attributes={},
+                 autoparse_time_variable=None):  # @ReservedAssignment
         """
         Initializer
         
@@ -392,6 +393,8 @@ def __init__(self, name, format='NETCDF4_CLASSIC', deflate=2, variables=(), attr
             deflate (int): Level of lossless compression to use in all variables within the file (0-9)
             variables (tuple):  Tuple of VariableDesc objects describing the file variables            
             attributes (dict):  Dict of global attributes in the file
+            autoparse_time_variable (str):  The name of an output variable that should be used
+                to represent the 'time' when autoparsing the output filename
         """
         self._name = name
 
@@ -431,6 +434,13 @@ def __init__(self, name, format='NETCDF4_CLASSIC', deflate=2, variables=(), attr
             raise TypeError(err_msg)
         self._attributes = deepcopy(attributes)
 
+        if autoparse_time_variable:
+            if autoparse_time_variable not in self._variables:
+                err_msg = ('Variable {!r} does not exist in describe file {!r}, but is required '
+                           'for autoparsing the filename'.format(autoparse_time_variable, name))
+                raise ValueError(err_msg)
+        self.autoparse_time_variable = autoparse_time_variable
+
     @property
     def name(self):
         """Name of the file"""
@@ -773,6 +783,9 @@ def __init__(self, name='output', dsdict=OrderedDict()):
                 if 'deflate' in fdict:
                     files[fname]['deflate'] = fdict['deflate']
 
+                if 'autoparse_time_variable' in fdict:
+                    files[fname]['autoparse_time_variable'] = fdict['autoparse_time_variable']
+
                 if 'attributes' in fdict:
                     files[fname]['attributes'] = fdict['attributes']
 
@@ -794,7 +807,7 @@ def __init__(self, name='output', dsdict=OrderedDict()):
             # Get the variable descriptors for each variable required to be in the file
             vlist = OrderedDict([(vname, variables[vname]) for vname in fdict['variables']])
 
-            # Get the unique list of dimension names for required by these variables
+            # Get the unique list of dimension names required by these variables
             fdims = set()
             for vname in vlist:
                 vdesc = vlist[vname]

diff --git a/source/pyconform/flownodes.py b/source/pyconform/flownodes.py
@@ -637,6 +637,11 @@ def __init__(self, filedesc, inputs=()):
                 raise TypeError(('WriteNode {!r} cannot accept input from type {}, must be a '
                                  'ValidateNode').format(filedesc.name, type(inp)))
 
+        # Extract hidden variables (names starting with '_') from list of input nodes
+        hidden_labels = [inp.label for inp in inputs if inp.label[0] == '_']
+        self._hidden_inputs = [inp for inp in inputs if inp.label in hidden_labels]
+        inputs = [inp for inp in inputs if inp.label not in hidden_labels]
+
         # Call base class (label is filename)
         super(WriteNode, self).__init__(filedesc.name, *inputs)
 
@@ -647,7 +652,7 @@ def __init__(self, filedesc, inputs=()):
         for inp in inputs:
             if inp.label not in self._filedesc.variables:
                 raise ValueError(('WriteNode {!r} takes input from variable {!r} that is not '
-                                  'contained in the descibed file').format(filedesc.name, inp.label))
+                                  'contained in the described file').format(filedesc.name, inp.label))
 
         # Construct the proper filename
         fname = self._autoparse_filename_(self.label)
@@ -678,28 +683,31 @@ def _autoparse_filename_(self, fname):
         if '{' in fname:
 
             possible_tvars = []
-            for var in self._filedesc.variables:
-                vdesc = self._filedesc.variables[var]
-                if var in ('time', 'time1', 'time2', 'time3'):
-                    possible_tvars.append(var)
-                elif vdesc.cfunits().is_time_reference() and len(vdesc.dimensions) == 1:
-                    possible_tvars.append(var)
-                elif 'standard_name' in vdesc.attributes and vdesc.attributes['standard_name'] == 'time':
-                    possible_tvars.append(var)
-                elif 'axis' in vdesc.attributes and vdesc.attributes['axis'] == 'T':
-                    possible_tvars.append(var)
+            possible_inputs = list(self.inputs)
+            if self._filedesc.autoparse_time_variable:
+                possible_tvars.append(self._filedesc.autoparse_time_variable)
+                possible_inputs += self._hidden_inputs
+            else:
+                for var in self._filedesc.variables:
+                    vdesc = self._filedesc.variables[var]
+                    if var in ('time', 'time1', 'time2', 'time3'):
+                        possible_tvars.append(var)
+                    elif vdesc.cfunits().is_time_reference() and len(vdesc.dimensions) == 1:
+                        possible_tvars.append(var)
+                    elif 'standard_name' in vdesc.attributes and vdesc.attributes['standard_name'] == 'time':
+                        possible_tvars.append(var)
+                    elif 'axis' in vdesc.attributes and vdesc.attributes['axis'] == 'T':
+                        possible_tvars.append(var)
             if len(possible_tvars) == 0:
-                msg = 'Could not identify a time variable to autoparse filename {!r}'.format(
-                    fname)
+                msg = 'Could not identify a time variable to autoparse filename {!r}'.format(fname)
                 warn(msg, DateTimeAutoParseWarning)
                 return fname
+            possible_tnodes = {vnode.label:vnode for vnode in possible_inputs
+                               if vnode.label in possible_tvars}
+            if len(possible_tnodes) == 0:
+                raise ValueError('Time variable input missing for file {!r}'.format(fname))
+            tnode = possible_tnodes['time'] if 'time' in possible_tnodes else possible_tnodes.values()[0]
 
-            tvar = 'time' if 'time' in possible_tvars else possible_tvars[0]
-            tnodes = [vnode for vnode in self.inputs if vnode.label == tvar]
-            if len(tnodes) == 0:
-                raise ValueError(
-                    'Time variable input missing for file {!r}'.format(fname))
-            tnode = tnodes[0]
             t1 = tnode[0:1]
             t2 = tnode[-1:]
 
@@ -750,8 +758,11 @@ def _open_(self, deflate=None):
                 try:
                     makedirs(fdir)
                 except:
-                    raise IOError(
-                        'Failed to create directory for output file {!r}'.format(fname))
+                    if exists(fdir):
+                        print('Already created directory for output file {!r}'.format(fname))
+                    else:
+                        raise IOError(
+                            'Failed to create directory for output file {!r}'.format(fname))
 
             # Try to open the output file for writing
             try:
@@ -806,8 +817,7 @@ def _open_(self, deflate=None):
             for vnode in self.inputs:
                 vname = vnode.label
                 vdesc = self._filedesc.variables[vname]
-                vattrs = OrderedDict((k, v)
-                                     for k, v in vnode.attributes.iteritems())
+                vattrs = OrderedDict((k, v) for k, v in vnode.attributes.iteritems())
 
                 vdtype = vdesc.dtype
                 fillval = vattrs.get('_FillValue', None)
@@ -824,8 +834,8 @@ def _open_(self, deflate=None):
                             'Override deflate value range from 0 to 9')
                     zlib = deflate > 0
                     clev = deflate if zlib else 1
-                ncvar = self._file.createVariable(
-                    vname, vdtype, vdims, fill_value=fillval, zlib=zlib, complevel=clev)
+                ncvar = self._file.createVariable(vname, vdtype, vdims, fill_value=fillval,
+                                                  zlib=zlib, complevel=clev)
 
                 for aname in vattrs:
                     if aname not in self._unwritten_attributes:
@@ -926,19 +936,16 @@ def execute(self, chunks={}, deflate=None):
         # Open the file and write the header information
         self._open_(deflate=deflate)
 
-        # Create data structure to keep track of which variable chunks we have
-        # written
+        # Create data structure to keep track of which variable chunks we have written
         vchunks = {vnode.label: set() for vnode in self.inputs}
 
-        # Compute the Global Dimension Sizes dictionary from the input variable
-        # nodes
+        # Compute the Global Dimension Sizes dictionary from the input variable nodes
         inputdims = []
         for vnode in self.inputs:
             for d in self._filedesc.variables[vnode.label].dimensions:
                 if d not in inputdims:
                     inputdims.append(d)
-        gdims = OrderedDict(
-            (d, self._filedesc.dimensions[d].size) for d in inputdims)
+        gdims = OrderedDict((d, self._filedesc.dimensions[d].size) for d in inputdims)
 
         # Iterate over the global dimension space
         for chunk in WriteNode._chunk_iter_(gdims, chunks=chunks):
-Original file line number
+Diff line change
@@ Expand Up / @@ -6,3 +6,4 @@ temp @@
     build
     dist
     PyConform.egg-info
+    .idea/