From a54d95a6ee8882e1afcd798bbd291fa4488c1e00 Mon Sep 17 00:00:00 2001 From: John Readey Date: Wed, 24 Apr 2024 14:44:27 -0700 Subject: [PATCH] add MultiManager to h5pyd __init__.py (#185) * add MultiManager to h5pyd __init__.py * fix flake8 warnings * remove dup code * print error if sub-folder not present for multi_benchmark --- h5pyd/__init__.py | 2 +- h5pyd/_hl/dataset.py | 11 ++++++-- test/hl/common.py | 44 ++++++++++++++++++++--------- test/hl/multi_benchmark.py | 19 +++++++++++-- test/hl/test_dataset.py | 4 +-- test/hl/test_dataset_extend.py | 2 -- test/hl/test_dataset_fancyselect.py | 1 - test/hl/test_datatype.py | 1 - 8 files changed, 57 insertions(+), 27 deletions(-) diff --git a/h5pyd/__init__.py b/h5pyd/__init__.py index 572c10c..e337fa0 100644 --- a/h5pyd/__init__.py +++ b/h5pyd/__init__.py @@ -21,7 +21,7 @@ from ._hl.files import File, is_hdf5 from ._hl.folders import Folder from ._hl.group import Group, SoftLink, ExternalLink, UserDefinedLink, HardLink -from ._hl.dataset import Dataset +from ._hl.dataset import Dataset, MultiManager from ._hl.table import Table from ._hl.datatype import Datatype from ._hl.attrs import AttributeManager diff --git a/h5pyd/_hl/dataset.py b/h5pyd/_hl/dataset.py index 62cfcf0..e62edde 100644 --- a/h5pyd/_hl/dataset.py +++ b/h5pyd/_hl/dataset.py @@ -19,6 +19,7 @@ import base64 import numpy import os +import logging from concurrent.futures import ThreadPoolExecutor from concurrent.futures import as_completed @@ -1741,10 +1742,14 @@ class MultiManager(): # Avoid overtaxing HSDS max_workers = 16 - def __init__(self, datasets=None): + def __init__(self, datasets=None, logger=None): if (datasets is None) or (len(datasets) == 0): raise ValueError("MultiManager requires non-empty list of datasets") self.datasets = datasets + if logger is None: + self.log = logging + else: + self.log = logging.getLogger(logger) def read_dset_tl(self, args): """ @@ -1793,7 +1798,7 @@ def __getitem__(self, args): except Exception as e: msg = f"{e}: Defaulting Number of SN_COREs to 1" - self.log.warning(msg) + self.log.debug(msg) num_endpoints = 1 if (num_endpoints > 1): @@ -1848,7 +1853,7 @@ def __setitem__(self, args, vals): raise ValueError("Malformed port range specification; must be sequential ports") except Exception as e: - print(f"{e}: Defaulting Number of SNs to 1") + self.log.debug(f"{e}: Defaulting Number of SNs to 1") num_endpoints = 1 # TODO: Handle the case where some or all datasets share an HTTPConn object diff --git a/test/hl/common.py b/test/hl/common.py index 5d3f4e2..9abccce 100644 --- a/test/hl/common.py +++ b/test/hl/common.py @@ -12,7 +12,6 @@ from __future__ import absolute_import -import sys import os import os.path as op import tempfile @@ -39,6 +38,32 @@ del testfile +def getTestFileName(basename, subfolder=None): + """ + Get filepath for a test case given a testname + """ + + if config.get("use_h5py"): + filename = "out" + if not op.isdir(filename): + os.mkdir(filename) + if subfolder: + filename = op.join(filename, subfolder) + if not op.isdir(filename): + os.mkdir(filename) + filename = op.join(filename, f"{basename}.h5") + else: + if "H5PYD_TEST_FOLDER" in os.environ: + filename = os.environ["H5PYD_TEST_FOLDER"] + else: + # default to the root folder + filename = "/" + if subfolder: + filename = op.join(filename, subfolder) + filename = op.join(filename, f"{basename}.h5") + return filename + + class TestCase(ut.TestCase): """ @@ -201,23 +226,14 @@ def assertNumpyBehavior(self, dset, arr, s): with self.assertRaises(exc): dset[s] - def getFileName(self, basename): + def getFileName(self, basename, subfolder=None): """ Get filepath for a test case given a testname """ - if config.get("use_h5py"): - if not op.isdir("out"): - os.mkdir("out") - filename = "out/" + basename + ".h5" - else: - if "H5PYD_TEST_FOLDER" in os.environ: - domain = os.environ["H5PYD_TEST_FOLDER"] - else: - # default to the root folder - domain = "/" - filename = op.join(domain, basename) - filename += ".h5" + # Just call the external function + filename = getTestFileName(basename, subfolder=subfolder) + return filename def getPathFromDomain(self, domain): diff --git a/test/hl/multi_benchmark.py b/test/hl/multi_benchmark.py index 44c210e..64d5a8b 100644 --- a/test/hl/multi_benchmark.py +++ b/test/hl/multi_benchmark.py @@ -1,13 +1,15 @@ import numpy as np import time +import sys from concurrent.futures import ThreadPoolExecutor from concurrent.futures import as_completed import subprocess import re -from h5pyd._hl.dataset import MultiManager +from h5pyd import MultiManager import h5pyd as h5py +from common import getTestFileName # Flag to stop resource usage collection thread after a benchmark finishes stop_stat_collection = False @@ -249,7 +251,17 @@ def run_benchmark(test_name, test_func, stats, datasets, num_iters): dt = np.int32 stats = {} - fs = [h5py.File("/home/test_user1/h5pyd_multi_bm_" + str(i), mode='w') for i in range(count)] + fs = [] + + for i in range(count): + filename = getTestFileName(f"bm_{i:04d}", subfolder="multi_bm") + try: + f = h5py.File(filename, mode='w') + except IOError: + print(f"unable to create domain at: {filename} - does the parent folder exist?") + sys.exit(1) + fs.append(f) + data_in = np.zeros(shape, dtype=dt) datasets = [f.create_dataset("data", shape, dtype=dt, data=data_in) for f in fs] @@ -266,7 +278,8 @@ def run_benchmark(test_name, test_func, stats, datasets, num_iters): print("Testing with shared HTTP connection...") - f = h5py.File("/home/test_user1/h5pyd_multi_bm_shared", mode='w') + filename = getTestFileName("bm_shared", subfolder="multi_bm") + f = h5py.File(filename, mode='w') datasets = [f.create_dataset("data" + str(i), data=data_in, dtype=dt) for i in range(count)] run_benchmark("Read Multi (Shared HttpConn)", read_datasets_multi, stats, datasets, num_iters) diff --git a/test/hl/test_dataset.py b/test/hl/test_dataset.py index 7c5c96b..2cb7f3a 100644 --- a/test/hl/test_dataset.py +++ b/test/hl/test_dataset.py @@ -23,11 +23,10 @@ import sys import numpy as np import platform -import warnings from common import ut, TestCase -from h5pyd._hl.dataset import MultiManager import config +from h5pyd import MultiManager if config.get("use_h5py"): from h5py import File, Dataset @@ -39,6 +38,7 @@ def is_empty_dataspace(obj): shape_json = obj.shape_json + if "class" not in shape_json: raise KeyError() if shape_json["class"] == 'H5S_NULL': diff --git a/test/hl/test_dataset_extend.py b/test/hl/test_dataset_extend.py index d318786..d2c6f48 100644 --- a/test/hl/test_dataset_extend.py +++ b/test/hl/test_dataset_extend.py @@ -11,8 +11,6 @@ ############################################################################## import logging -import numpy as np -import math import config diff --git a/test/hl/test_dataset_fancyselect.py b/test/hl/test_dataset_fancyselect.py index a43e867..e506610 100644 --- a/test/hl/test_dataset_fancyselect.py +++ b/test/hl/test_dataset_fancyselect.py @@ -11,7 +11,6 @@ ############################################################################## import numpy as np -import math import config diff --git a/test/hl/test_datatype.py b/test/hl/test_datatype.py index 00c0c50..b5dceac 100644 --- a/test/hl/test_datatype.py +++ b/test/hl/test_datatype.py @@ -11,7 +11,6 @@ ############################################################################## import numpy as np -import math import logging import config