From e135c40adb1ccdcaf02f7c9b650f4a1097a1a3b9 Mon Sep 17 00:00:00 2001 From: jreadey Date: Mon, 22 Apr 2024 13:32:14 -0700 Subject: [PATCH 1/4] add MultiManager to h5pyd __init__.py --- h5pyd/__init__.py | 2 +- h5pyd/_hl/dataset.py | 7 +++- test/hl/common.py | 52 ++++++++++++++++++++++++----- test/hl/multi_benchmark.py | 16 ++++++--- test/hl/test_dataset.py | 4 +-- test/hl/test_dataset_extend.py | 2 -- test/hl/test_dataset_fancyselect.py | 1 - test/hl/test_datatype.py | 1 - 8 files changed, 64 insertions(+), 21 deletions(-) diff --git a/h5pyd/__init__.py b/h5pyd/__init__.py index 572c10c..e337fa0 100644 --- a/h5pyd/__init__.py +++ b/h5pyd/__init__.py @@ -21,7 +21,7 @@ from ._hl.files import File, is_hdf5 from ._hl.folders import Folder from ._hl.group import Group, SoftLink, ExternalLink, UserDefinedLink, HardLink -from ._hl.dataset import Dataset +from ._hl.dataset import Dataset, MultiManager from ._hl.table import Table from ._hl.datatype import Datatype from ._hl.attrs import AttributeManager diff --git a/h5pyd/_hl/dataset.py b/h5pyd/_hl/dataset.py index 62cfcf0..57c6a8b 100644 --- a/h5pyd/_hl/dataset.py +++ b/h5pyd/_hl/dataset.py @@ -19,6 +19,7 @@ import base64 import numpy import os +import logging from concurrent.futures import ThreadPoolExecutor from concurrent.futures import as_completed @@ -1741,10 +1742,14 @@ class MultiManager(): # Avoid overtaxing HSDS max_workers = 16 - def __init__(self, datasets=None): + def __init__(self, datasets=None, logger=None): if (datasets is None) or (len(datasets) == 0): raise ValueError("MultiManager requires non-empty list of datasets") self.datasets = datasets + if logger is None: + self.log = logging + else: + self.log = logging.getLogger(logger) def read_dset_tl(self, args): """ diff --git a/test/hl/common.py b/test/hl/common.py index 5d3f4e2..3bab462 100644 --- a/test/hl/common.py +++ b/test/hl/common.py @@ -12,7 +12,6 @@ from __future__ import absolute_import -import sys import os import os.path as op import tempfile @@ -38,6 +37,31 @@ del fname del testfile +def getTestFileName(basename, subfolder=None): + """ + Get filepath for a test case given a testname + """ + + if config.get("use_h5py"): + filename = "out" + if not op.isdir(filename): + os.mkdir(filename) + if subfolder: + filename = op.join(filename, subfolder) + if not op.isdir(filename): + os.mkdir(filename) + filename = op.join(filename, f"{basename}.h5") + else: + if "H5PYD_TEST_FOLDER" in os.environ: + filename = os.environ["H5PYD_TEST_FOLDER"] + else: + # default to the root folder + filename = "/" + if subfolder: + filename = op.join(filename, subfolder) + filename = op.join(filename, f"{basename}.h5") + return filename + class TestCase(ut.TestCase): @@ -201,23 +225,33 @@ def assertNumpyBehavior(self, dset, arr, s): with self.assertRaises(exc): dset[s] - def getFileName(self, basename): + def getFileName(self, basename, subfolder=None): """ Get filepath for a test case given a testname """ + # Just call the external function + filename = getTestFileName(basename, subfolder=subfolder) + + if config.get("use_h5py"): - if not op.isdir("out"): - os.mkdir("out") - filename = "out/" + basename + ".h5" + filename = "out" + if not op.isdir(filename): + os.mkdir(filename) + if subfolder: + filename = op.join(filename, subfolder) + if not op.isdir(filename): + os.mkdir(filename) + filename = op.join(filename, f"{basename}.h5") else: if "H5PYD_TEST_FOLDER" in os.environ: - domain = os.environ["H5PYD_TEST_FOLDER"] + filename = os.environ["H5PYD_TEST_FOLDER"] else: # default to the root folder - domain = "/" - filename = op.join(domain, basename) - filename += ".h5" + filename = "/" + if subfolder: + filename = op.join(filename, subfolder) + filename = op.join(filename, f"{basename}.h5") return filename def getPathFromDomain(self, domain): diff --git a/test/hl/multi_benchmark.py b/test/hl/multi_benchmark.py index 44c210e..1f4af58 100644 --- a/test/hl/multi_benchmark.py +++ b/test/hl/multi_benchmark.py @@ -6,8 +6,9 @@ import subprocess import re -from h5pyd._hl.dataset import MultiManager +from h5pyd import MultiManager import h5pyd as h5py +from common import getTestFileName # Flag to stop resource usage collection thread after a benchmark finishes stop_stat_collection = False @@ -244,12 +245,18 @@ def run_benchmark(test_name, test_func, stats, datasets, num_iters): if __name__ == '__main__': print("Executing multi read/write benchmark") shape = (100, 100, 100) - count = 64 + count = 4 # 64 num_iters = 50 dt = np.int32 stats = {} - fs = [h5py.File("/home/test_user1/h5pyd_multi_bm_" + str(i), mode='w') for i in range(count)] + fs = [] + + for i in range(count): + filename = getTestFileName(f"bm_{i:04d}", subfolder="multi_bm") + f = h5py.File(filename, mode='w') + fs.append(f) + data_in = np.zeros(shape, dtype=dt) datasets = [f.create_dataset("data", shape, dtype=dt, data=data_in) for f in fs] @@ -266,7 +273,8 @@ def run_benchmark(test_name, test_func, stats, datasets, num_iters): print("Testing with shared HTTP connection...") - f = h5py.File("/home/test_user1/h5pyd_multi_bm_shared", mode='w') + filename = getTestFileName("bm_shared", subfolder="multi_bm") + f = h5py.File(filename, mode='w') datasets = [f.create_dataset("data" + str(i), data=data_in, dtype=dt) for i in range(count)] run_benchmark("Read Multi (Shared HttpConn)", read_datasets_multi, stats, datasets, num_iters) diff --git a/test/hl/test_dataset.py b/test/hl/test_dataset.py index 7c5c96b..cfad3fe 100644 --- a/test/hl/test_dataset.py +++ b/test/hl/test_dataset.py @@ -23,11 +23,10 @@ import sys import numpy as np import platform -import warnings from common import ut, TestCase -from h5pyd._hl.dataset import MultiManager import config +from h5pyd import MultiManager if config.get("use_h5py"): from h5py import File, Dataset @@ -37,6 +36,7 @@ import h5pyd as h5py + def is_empty_dataspace(obj): shape_json = obj.shape_json if "class" not in shape_json: diff --git a/test/hl/test_dataset_extend.py b/test/hl/test_dataset_extend.py index d318786..d2c6f48 100644 --- a/test/hl/test_dataset_extend.py +++ b/test/hl/test_dataset_extend.py @@ -11,8 +11,6 @@ ############################################################################## import logging -import numpy as np -import math import config diff --git a/test/hl/test_dataset_fancyselect.py b/test/hl/test_dataset_fancyselect.py index a43e867..e506610 100644 --- a/test/hl/test_dataset_fancyselect.py +++ b/test/hl/test_dataset_fancyselect.py @@ -11,7 +11,6 @@ ############################################################################## import numpy as np -import math import config diff --git a/test/hl/test_datatype.py b/test/hl/test_datatype.py index 00c0c50..b5dceac 100644 --- a/test/hl/test_datatype.py +++ b/test/hl/test_datatype.py @@ -11,7 +11,6 @@ ############################################################################## import numpy as np -import math import logging import config From 0648dc1ab5d9a34d697d7faedc389ae4e2282f71 Mon Sep 17 00:00:00 2001 From: jreadey Date: Mon, 22 Apr 2024 13:53:21 -0700 Subject: [PATCH 2/4] fix flake8 warnings --- test/hl/common.py | 40 +++++++++++++++++++------------------- test/hl/multi_benchmark.py | 2 +- test/hl/test_dataset.py | 2 +- 3 files changed, 22 insertions(+), 22 deletions(-) diff --git a/test/hl/common.py b/test/hl/common.py index 3bab462..81ab5cf 100644 --- a/test/hl/common.py +++ b/test/hl/common.py @@ -37,30 +37,31 @@ del fname del testfile + def getTestFileName(basename, subfolder=None): - """ - Get filepath for a test case given a testname - """ + """ + Get filepath for a test case given a testname + """ - if config.get("use_h5py"): - filename = "out" + if config.get("use_h5py"): + filename = "out" + if not op.isdir(filename): + os.mkdir(filename) + if subfolder: + filename = op.join(filename, subfolder) if not op.isdir(filename): os.mkdir(filename) - if subfolder: - filename = op.join(filename, subfolder) - if not op.isdir(filename): - os.mkdir(filename) - filename = op.join(filename, f"{basename}.h5") + filename = op.join(filename, f"{basename}.h5") + else: + if "H5PYD_TEST_FOLDER" in os.environ: + filename = os.environ["H5PYD_TEST_FOLDER"] else: - if "H5PYD_TEST_FOLDER" in os.environ: - filename = os.environ["H5PYD_TEST_FOLDER"] - else: - # default to the root folder - filename = "/" - if subfolder: - filename = op.join(filename, subfolder) - filename = op.join(filename, f"{basename}.h5") - return filename + # default to the root folder + filename = "/" + if subfolder: + filename = op.join(filename, subfolder) + filename = op.join(filename, f"{basename}.h5") + return filename class TestCase(ut.TestCase): @@ -233,7 +234,6 @@ def getFileName(self, basename, subfolder=None): # Just call the external function filename = getTestFileName(basename, subfolder=subfolder) - if config.get("use_h5py"): filename = "out" if not op.isdir(filename): diff --git a/test/hl/multi_benchmark.py b/test/hl/multi_benchmark.py index 1f4af58..3133466 100644 --- a/test/hl/multi_benchmark.py +++ b/test/hl/multi_benchmark.py @@ -245,7 +245,7 @@ def run_benchmark(test_name, test_func, stats, datasets, num_iters): if __name__ == '__main__': print("Executing multi read/write benchmark") shape = (100, 100, 100) - count = 4 # 64 + count = 64 num_iters = 50 dt = np.int32 stats = {} diff --git a/test/hl/test_dataset.py b/test/hl/test_dataset.py index cfad3fe..2cb7f3a 100644 --- a/test/hl/test_dataset.py +++ b/test/hl/test_dataset.py @@ -36,9 +36,9 @@ import h5pyd as h5py - def is_empty_dataspace(obj): shape_json = obj.shape_json + if "class" not in shape_json: raise KeyError() if shape_json["class"] == 'H5S_NULL': From 7da15121394783b050241e1738445e50fd3ae7ee Mon Sep 17 00:00:00 2001 From: jreadey Date: Wed, 24 Apr 2024 11:04:05 -0700 Subject: [PATCH 3/4] remove dup code --- test/hl/common.py | 18 ------------------ 1 file changed, 18 deletions(-) diff --git a/test/hl/common.py b/test/hl/common.py index 81ab5cf..9abccce 100644 --- a/test/hl/common.py +++ b/test/hl/common.py @@ -234,24 +234,6 @@ def getFileName(self, basename, subfolder=None): # Just call the external function filename = getTestFileName(basename, subfolder=subfolder) - if config.get("use_h5py"): - filename = "out" - if not op.isdir(filename): - os.mkdir(filename) - if subfolder: - filename = op.join(filename, subfolder) - if not op.isdir(filename): - os.mkdir(filename) - filename = op.join(filename, f"{basename}.h5") - else: - if "H5PYD_TEST_FOLDER" in os.environ: - filename = os.environ["H5PYD_TEST_FOLDER"] - else: - # default to the root folder - filename = "/" - if subfolder: - filename = op.join(filename, subfolder) - filename = op.join(filename, f"{basename}.h5") return filename def getPathFromDomain(self, domain): From 5a4754cb58bf118ed97908c01e30802085516973 Mon Sep 17 00:00:00 2001 From: jreadey Date: Wed, 24 Apr 2024 11:17:04 -0700 Subject: [PATCH 4/4] print error if sub-folder not present for multi_benchmark --- h5pyd/_hl/dataset.py | 4 ++-- test/hl/multi_benchmark.py | 7 ++++++- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/h5pyd/_hl/dataset.py b/h5pyd/_hl/dataset.py index 57c6a8b..e62edde 100644 --- a/h5pyd/_hl/dataset.py +++ b/h5pyd/_hl/dataset.py @@ -1798,7 +1798,7 @@ def __getitem__(self, args): except Exception as e: msg = f"{e}: Defaulting Number of SN_COREs to 1" - self.log.warning(msg) + self.log.debug(msg) num_endpoints = 1 if (num_endpoints > 1): @@ -1853,7 +1853,7 @@ def __setitem__(self, args, vals): raise ValueError("Malformed port range specification; must be sequential ports") except Exception as e: - print(f"{e}: Defaulting Number of SNs to 1") + self.log.debug(f"{e}: Defaulting Number of SNs to 1") num_endpoints = 1 # TODO: Handle the case where some or all datasets share an HTTPConn object diff --git a/test/hl/multi_benchmark.py b/test/hl/multi_benchmark.py index 3133466..64d5a8b 100644 --- a/test/hl/multi_benchmark.py +++ b/test/hl/multi_benchmark.py @@ -1,5 +1,6 @@ import numpy as np import time +import sys from concurrent.futures import ThreadPoolExecutor from concurrent.futures import as_completed @@ -254,7 +255,11 @@ def run_benchmark(test_name, test_func, stats, datasets, num_iters): for i in range(count): filename = getTestFileName(f"bm_{i:04d}", subfolder="multi_bm") - f = h5py.File(filename, mode='w') + try: + f = h5py.File(filename, mode='w') + except IOError: + print(f"unable to create domain at: {filename} - does the parent folder exist?") + sys.exit(1) fs.append(f) data_in = np.zeros(shape, dtype=dt)