From b657862dd19fefe5734086802ba5b528d3c3e3c6 Mon Sep 17 00:00:00 2001
From: Aleksandar Jelenak <ajelenak@gharial.home>
Date: Sat, 29 Sep 2018 11:03:15 -0400
Subject: [PATCH] Add specifying library version for created HDF5 file

---
 h5json/hdf5db.py            | 178 ++++++++++++++++++------------------
 h5json/jsontoh5/jsontoh5.py |  26 ++++--
 2 files changed, 107 insertions(+), 97 deletions(-)

diff --git a/h5json/hdf5db.py b/h5json/hdf5db.py
index 38c1265..ef546e7 100644
--- a/h5json/hdf5db.py
+++ b/h5json/hdf5db.py
@@ -16,7 +16,7 @@
 
 if six.PY3:
     unicode = str
-    
+
 
 
 """
@@ -75,7 +75,7 @@
 import json
 import logging
 
-from .hdf5dtype import getTypeItem, createDataType, getItemSize 
+from .hdf5dtype import getTypeItem, createDataType, getItemSize
 
 # global dictionary to direct back to the Hdf5db instance by filename
 # (needed for visititems callback)
@@ -138,7 +138,7 @@ def getVersionInfo():
 
     def __init__(self, filePath, dbFilePath=None, readonly=False,
                  app_logger=None, root_uuid=None, update_timestamps=True,
-                 userid=None):
+                 userid=None, libver='latest'):
         if app_logger:
             self.log = app_logger
         else:
@@ -158,13 +158,13 @@ def __init__(self, filePath, dbFilePath=None, readonly=False,
             else:
                 mode = 'r+'
                 self.readonly = False
-             
+
 
         self.log.info("init -- filePath: " + filePath + " mode: " + mode)
 
         self.update_timestamps = update_timestamps
 
-        self.f = h5py.File(filePath, mode, libver='latest')
+        self.f = h5py.File(filePath, mode, libver=libver)
 
         self.root_uuid = root_uuid
 
@@ -1338,9 +1338,9 @@ def makeNullTermStringAttribute(self, obj, attr_name, strLength, value):
         if strLength < len(value):
             self.log.warning("makeNullTermStringAttribute: value string longer than length")
             #value = value[:strLength]  # truncate to length
-        
-        
-        if six.PY3 and type(attr_name) is str:    
+
+
+        if six.PY3 and type(attr_name) is str:
             try:
                 attr_name = attr_name.encode('ascii')
             except UnicodeDecodeError:
@@ -1432,7 +1432,7 @@ def makeAttribute(self, obj, attr_name, shape, attr_type, value):
 
                     # create numpy array
                     npdata = np.zeros(shape, dtype=dt)
-                    
+
                     if rank == 0:
                         npdata[()] = self.toNumPyValue(attr_type, value, npdata[()])
                     else:
@@ -1689,9 +1689,9 @@ def toNumPyValue(self, typeItem, src, des):
                     try:
                         src.encode('ascii')
                     except UnicodeDecodeError:
-                        raise TypeError("non-ascii value not allowed with H5T_CSET_ASCII")         
+                        raise TypeError("non-ascii value not allowed with H5T_CSET_ASCII")
                 des = src
-             
+
         else:
             msg = "Unexpected type class: " + typeClass
             self.log.info(msg)
@@ -1870,9 +1870,9 @@ def listToRef(self, data):
             self.log.info(msg)
             raise IOError(errno.EINVAL, msg)
         return out
-        
+
     """
-       Convert list that may contain bytes type elements to list of string elements  
+       Convert list that may contain bytes type elements to list of string elements
     """
     def bytesArrayToList(self, data):
         if type(data) in (bytes, str, unicode):
@@ -1886,16 +1886,16 @@ def bytesArrayToList(self, data):
                 else:
                     is_list = False
             else:
-                is_list = True        
+                is_list = True
         elif type(data) in (list, tuple):
             is_list = True
         else:
             is_list = False
-                
+
         if is_list:
             out = []
             for item in data:
-                out.append(self.bytesArrayToList(item)) # recursive call  
+                out.append(self.bytesArrayToList(item)) # recursive call
         elif type(data) is bytes:
             if six.PY3:
                 out = data.decode("utf-8")
@@ -1903,9 +1903,9 @@ def bytesArrayToList(self, data):
                 out = data
         else:
             out = data
-                   
+
         return out
-    
+
     """
       Get item description of region reference value
     """
@@ -2058,7 +2058,7 @@ def createRegionReference(self, item):
                     h5py.h5s.SpaceID.select_hyperslab(space_id, start, count, op=h5py.h5s.SELECT_OR)
 
         # now that we've selected the desired region in the space, return a region reference
-        
+
         if six.PY3:
             dset_name = dset.name.encode('utf-8')
         else:
@@ -2091,12 +2091,12 @@ def getDatasetValuesByUuid(self, obj_uuid, slices=Ellipsis, format="json"):
             msg = "only json and binary formats are supported"
             self.log.info(msg)
             raise IOError(errno.EINVAL, msg)
-            
+
         if dset is None:
             msg = "Dataset: " + obj_uuid + " not found"
             self.log.info(msg)
             raise IOError(errno.ENXIO, msg)
-            
+
         values = None
         dt = dset.dtype
         typeItem = getTypeItem(dt)
@@ -2105,13 +2105,13 @@ def getDatasetValuesByUuid(self, obj_uuid, slices=Ellipsis, format="json"):
             msg = "Only JSON is supported for for this data type"
             self.log.info(msg)
             raise IOError(errno.EINVAL, msg)
-            
+
         if dset.shape is None:
             # null space dataset (with h5py 2.6.0)
-            return None   
-               
+            return None
+
         rank = len(dset.shape)
-         
+
         if rank == 0:
             # check for null dataspace
             try:
@@ -2131,7 +2131,7 @@ def getDatasetValuesByUuid(self, obj_uuid, slices=Ellipsis, format="json"):
             msg = "Unexpected error: getDatasetValuesByUuid: number of dims in selection not same as rank"
             self.log.error(msg)
             raise IOError(errno.EIO, msg)
-       
+
         if dt.kind == 'O':
             if format != "json":
                 msg = "Only JSON is supported for for this data type"
@@ -2170,31 +2170,31 @@ def getDatasetValuesByUuid(self, obj_uuid, slices=Ellipsis, format="json"):
                 values = dset[slices].tobytes()
         else:
             values = dset[slices]
-            
+
             # just use tolist to dump
-            if format == "json":             
+            if format == "json":
                 values = values.tolist()
             else:
                 #values = base64.b64encode(dset[slices].tobytes())
                 values = values.tobytes()
-            
+
         return values
-        
+
     """
       doDatasetQueryByUuid: return rows based on query string
         Return rows from a dataset that matches query string.
-        
+
         Note: Only supported for compound_type/one-dimensional datasets
     """
     def doDatasetQueryByUuid(self, obj_uuid, query, start=0, stop=-1, step=1, limit=None):
         self.log.info("doQueryByUuid - uuid: " + obj_uuid + " query:" + query)
         self.log.info("start: " + str(start) + " stop: " + str(stop) + " step: " + str(step) + " limit: " + str(limit))
-        dset = self.getDatasetObjByUuid(obj_uuid)   
+        dset = self.getDatasetObjByUuid(obj_uuid)
         if dset is None:
             msg = "Dataset: " + obj_uuid + " not found"
             self.log.info(msg)
             raise IOError(errno.ENXIO, msg)
-            
+
         values = []
         dt = dset.dtype
         typeItem = getTypeItem(dt)
@@ -2203,33 +2203,33 @@ def doDatasetQueryByUuid(self, obj_uuid, query, start=0, stop=-1, step=1, limit=
             msg = "Only compound type datasets can be used as query target"
             self.log.info(msg)
             raise IOError(errno.EINVAL, msg)
-            
+
         if dset.shape is None:
             # null space dataset (with h5py 2.6.0)
-            return None   
-               
+            return None
+
         rank = len(dset.shape)
         if rank != 1:
             msg = "One one-dimensional datasets can be used as query target"
             self.log.info(msg)
             raise IOError(errno.EINVAL, msg)
-        
+
 
         values = []
         indexes = []
         count = 0
-      
+
         num_elements = dset.shape[0]
         if stop == -1:
             stop = num_elements
         elif stop > num_elements:
             stop = num_elements
         block_size = self._getBlockSize(dset)
-        self.log.info("block_size: " + str(block_size))   
-        
+        self.log.info("block_size: " + str(block_size))
+
         field_names = list(dset.dtype.fields.keys())
-        eval_str = self._getEvalStr(query, field_names) 
-        
+        eval_str = self._getEvalStr(query, field_names)
+
         while start < stop:
             if limit and (count == limit):
                 break  # no more rows for this batch
@@ -2248,21 +2248,21 @@ def doDatasetQueryByUuid(self, obj_uuid, query, start=0, stop=-1, step=1, limit=
                     count += 1
                     if limit and (count == limit):
                         break  # no more rows for this batch
-                              
+
             start = end  # go to next block
-            
-         
+
+
         # values = self.getDataValue(item_type, values, dimension=1, dims=(len(values),))
-        
-        self.log.info("got " + str(count) + " query matches")    
+
+        self.log.info("got " + str(count) + " query matches")
         return (indexes, values)
-    
+
     """
      _getBlockSize: Get number of rows to read from disk
-     
+
         heurestic to get reasonable sized chunk of data to fetch.
         make multiple of chunk_size if possible
-    """    
+    """
     def _getBlockSize(self, dset):
         target_block_size = 256 * 1000
         if dset.chunks:
@@ -2274,12 +2274,12 @@ def _getBlockSize(self, dset):
         else:
             block_size = target_block_size
         return block_size
-    
+
     """
      _getEvalStr: Get eval string for given query
-     
+
         Gets Eval string to use with numpy where method.
-    """    
+    """
     def _getEvalStr(self, query, field_names):
         i = 0
         eval_str = ""
@@ -2310,7 +2310,7 @@ def _getEvalStr(self, query, field_names):
                 eval_str += "rows['" + var_name + "']"
                 var_name = None
                 var_count += 1
-            
+
             if end_quote_char:
                 if ch == end_quote_char:
                     # end of literal
@@ -2352,7 +2352,7 @@ def _getEvalStr(self, query, field_names):
             msg = "Mismatched paren"
             self.log.info("EINVAL: " + msg)
             raise IOError(errno.EINVAL, msg)
-            
+
         return eval_str
 
     """
@@ -2365,7 +2365,7 @@ def getDatasetPointSelectionByUuid(self, obj_uuid, points):
             msg = "Dataset: " + obj_uuid + " not found"
             self.log.info(msg)
             raise IOError(errno.ENXIO, msg)
-            
+
         rank = len(dset.shape)
         values = np.zeros(len(points), dtype=dset.dtype)
         try:
@@ -2389,22 +2389,22 @@ def getDatasetPointSelectionByUuid(self, obj_uuid, points):
     """
     def setDatasetValuesByUuid(self, obj_uuid, data, slices=None, format="json"):
         dset = self.getDatasetObjByUuid(obj_uuid)
-        
+
         if format not in ("json", "binary"):
             msg = "only json and binary formats are supported"
             self.log.info(msg)
             raise IOError(errno.EINVAL, msg)
-            
+
         if format == "binary" and type(data) is not bytes:
             msg ="data must be of type bytes for binary writing"
             self.log.info(msg)
             raise IOError(errno.EINVAL, msg)
-            
+
         if dset is None:
             msg = "Dataset: " + obj_uuid + " not found"
             self.log.info(msg)
-            raise IOError(errno.ENXIO, msg) 
-            
+            raise IOError(errno.ENXIO, msg)
+
         dt = dset.dtype
         typeItem = getTypeItem(dt)
         itemSize = getItemSize(typeItem)
@@ -2412,11 +2412,11 @@ def setDatasetValuesByUuid(self, obj_uuid, data, slices=None, format="json"):
         arraySize = 1
         for extent in dset.shape:
             arraySize *= arraySize
-            
+
         if itemSize == "H5T_VARIABLE" and format == "binary":
             msg = "Only JSON is supported for for this data type"
             self.log.info(msg)
-            raise IOError(errno.EINVAL, msg)     
+            raise IOError(errno.EINVAL, msg)
 
         if slices is None:
             slices = []
@@ -2425,24 +2425,24 @@ def setDatasetValuesByUuid(self, obj_uuid, data, slices=None, format="json"):
                 s = slice(0, dset.shape[dim], 1)
                 slices.append(s)
             slices = tuple(slices)
-            
-             
+
+
         if type(slices) != tuple:
             msg = "setDatasetValuesByUuid: bad type for dim parameter"
             self.log.error(msg)
             raise IOError(erno.EIO, msg)
-            
+
 
         if len(slices) != rank:
             msg = "number of dims in selection not same as rank"
             self.log.info(msg)
             raise IOError(errno.EINVAL, msg)
-           
+
         npoints = 1
         np_shape = []
         for i in range(rank):
             s = slices[i]
-            
+
             if s.start < 0 or s.step <= 0 or s.stop < s.start:
                 msg = "invalid slice specification"
                 self.log.info(msg)
@@ -2452,17 +2452,17 @@ def setDatasetValuesByUuid(self, obj_uuid, data, slices=None, format="json"):
                 self.log.info(msg)
                 raise IOError(errno.EINVAL, msg)
             np_shape.append(s.stop - s.start)
-                        
+
             count = (s.stop - s.start) // s.step
             if count <= 0:
                 msg = "invalid slice specification"
                 self.log.info(msg)
-                raise IOError(errno.EINVAL, msg)  
-                    
-            npoints *= count        
-               
+                raise IOError(errno.EINVAL, msg)
+
+            npoints *= count
+
         np_shape = tuple(np_shape)  # for comparison with ndarray shape
-                
+
         self.log.info("selection shape:" + str(np_shape))
 
 
@@ -2481,9 +2481,9 @@ def setDatasetValuesByUuid(self, obj_uuid, data, slices=None, format="json"):
                 if format == "binary":
                     msg = "Only JSON is supported for for this data type"
                     self.log.info(msg)
-                    raise IOError(errno.EINVAL, msg)  
+                    raise IOError(errno.EINVAL, msg)
                 data = self.listToRef(data)
-                    
+
         if format == "binary":
             if npoints*itemSize != len(data):
                 msg = "Expected: " + str(npoints*itemSize) + " bytes, but got: " + str(len(data))
@@ -2524,16 +2524,16 @@ def setDatasetValuesByUuid(self, obj_uuid, data, slices=None, format="json"):
                 if selection_extent == 1:
                     np_index += 1
                     continue  # skip singleton selection
-                 
+
                 # selection/data mismatch!
                 msg = "data shape doesn't match selection shape"
                 msg += "--data shape: " + str(arr.shape)
                 msg += "--selection shape: " + str(np_shape)
-                
+
                 self.log.info(msg)
                 raise IOError(errno.EINVAL, msg)
-                    
-        # write temp numpy array to dataset        
+
+        # write temp numpy array to dataset
         if rank == 1:
             s = slices[0]
             try:
@@ -2558,29 +2558,29 @@ def setDatasetValuesByUuid(self, obj_uuid, data, slices=None, format="json"):
     """
     def setDatasetValuesByPointSelection(self, obj_uuid, data, points, format="json"):
         dset = self.getDatasetObjByUuid(obj_uuid)
-        
+
         if format not in ("json", "binary"):
             msg = "only json and binary formats are supported"
             self.log.info(msg)
             raise IOError(errno.EINVAL, msg)
-            
+
         if format == "binary" and type(data) is not bytes:
             msg ="data must be of type bytes for binary writing"
             self.log.info(msg)
             raise IOError(errno.EINVAL, msg)
-            
+
         if dset is None:
             msg = "Dataset: " + obj_uuid + " not found"
             self.log.info(msg)
-            raise IOError(errno.ENXIO, msg) 
-            
+            raise IOError(errno.ENXIO, msg)
+
         dt = dset.dtype
         typeItem = getTypeItem(dt)
         itemSize = getItemSize(typeItem)
         if itemSize == "H5T_VARIABLE" and format == "binary":
             msg = "Only JSON is supported for for this data type"
             self.log.info(msg)
-            raise IOError(errno.EINVAL, msg)     
+            raise IOError(errno.EINVAL, msg)
 
         rank = len(dset.shape)
 
@@ -2592,7 +2592,7 @@ def setDatasetValuesByPointSelection(self, obj_uuid, data, points, format="json"
             #for i in range(len(data)):
             #    converted_data.append(self.toTuple(data[i]))
             #data = converted_data
-        
+
         if format == "json":
 
             try:
@@ -2608,7 +2608,7 @@ def setDatasetValuesByPointSelection(self, obj_uuid, data, points, format="json"
                 msg = "setDatasetValuesByPointSelection, out of range error"
                 self.log.info(msg)
                 raise IOError(errno.EINVAL, msg)
-            
+
         else:
             #binary
             arr = np.fromstring(data, dtype=dset.dtype)
@@ -2779,7 +2779,7 @@ def createDataset(self, datatype, datashape, max_shape=None,
         else:
 
             # create the dataset
-           
+
             try:
                 newDataset = datasets.create_dataset(
                     obj_uuid, shape=datashape, maxshape=max_shape,
diff --git a/h5json/jsontoh5/jsontoh5.py b/h5json/jsontoh5/jsontoh5.py
index 82707a8..9edc199 100755
--- a/h5json/jsontoh5/jsontoh5.py
+++ b/h5json/jsontoh5/jsontoh5.py
@@ -13,7 +13,7 @@
 
 if six.PY3:
     unicode = str
-    
+
 import sys
 import json
 import argparse
@@ -232,10 +232,18 @@ def writeFile(self):
         self.createAttributes() # create attributes for objects
         self.createLinks()      # link it all together
 
+
 def main():
-    parser = argparse.ArgumentParser(usage='%(prog)s [-h] <json_file> <h5_file>')
-    parser.add_argument('in_filename', nargs='+', help='JSon file to be converted to h5')
-    parser.add_argument('out_filename', nargs='+', help='name of HDF5 output file')
+    parser = argparse.ArgumentParser(
+        usage='%(prog)s [-h] <json_file> <h5_file>',
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+    parser.add_argument('in_filename', nargs='+',
+                        help='JSon file to be converted to h5')
+    parser.add_argument('out_filename', nargs='+',
+                        help='name of HDF5 output file')
+    parser.add_argument('--libver', default='latest',
+                        choices=['earliest', 'latest'],
+                        help='Library version to use when storing objects')
     args = parser.parse_args()
 
     # create logger
@@ -249,7 +257,7 @@ def main():
     log.addHandler(handler)
 
     text = open(args.in_filename[0]).read()
-     
+
     # parse the json file
     h5json = json.loads(text)
 
@@ -258,11 +266,12 @@ def main():
     root_uuid = h5json["root"]
 
     filename = args.out_filename[0]
-     
+
     # create the file, will raise IOError if there's a problem
-    Hdf5db.createHDF5File(filename) 
+    Hdf5db.createHDF5File(filename)
 
-    with Hdf5db(filename, root_uuid=root_uuid, update_timestamps=False, app_logger=log) as db:
+    with Hdf5db(filename, root_uuid=root_uuid, update_timestamps=False,
+                app_logger=log, libver=args.libver) as db:
         h5writer = Writeh5(db, h5json)
         h5writer.writeFile()
 
@@ -275,5 +284,6 @@ def main():
 
     print("done!")
 
+
 if __name__ == "__main__":
     main()