From a7fcc563bf4d040c20d8fae6f0cc0bc46e8fb091 Mon Sep 17 00:00:00 2001 From: mattjala Date: Wed, 27 Mar 2024 15:57:24 -0500 Subject: [PATCH 1/4] Fix int32 vlens being parsed as int64 --- h5pyd/_hl/files.py | 2 +- h5pyd/_hl/h5type.py | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/h5pyd/_hl/files.py b/h5pyd/_hl/files.py index 06ae737..ee8dbf2 100644 --- a/h5pyd/_hl/files.py +++ b/h5pyd/_hl/files.py @@ -215,7 +215,7 @@ def __init__( # remove the trailing slash on endpoint if it exists if endpoint.endswith('/'): endpoint = endpoint.strip('/') - + if username is None: if "H5SERV_USERNAME" in os.environ: username = os.environ["H5SERV_USERNAME"] diff --git a/h5pyd/_hl/h5type.py b/h5pyd/_hl/h5type.py index fed3da7..eb2afe2 100644 --- a/h5pyd/_hl/h5type.py +++ b/h5pyd/_hl/h5type.py @@ -142,7 +142,6 @@ def special_dtype(**kwds): name, val = kwds.popitem() if name == 'vlen': - return np.dtype('O', metadata={'vlen': val}) if name == 'enum': @@ -441,6 +440,10 @@ def getTypeItem(dt): type_info['length'] = 'H5T_VARIABLE' type_info['charSet'] = 'H5T_CSET_UTF8' type_info['strPad'] = 'H5T_STR_NULLTERM' + elif vlen_check == np.int32: + type_info['class'] = 'H5T_VLEN' + type_info['size'] = 'H5T_VARIABLE' + type_info['base'] = 'H5T_STD_I32' elif vlen_check in (int, np.int64): type_info['class'] = 'H5T_VLEN' type_info['size'] = 'H5T_VARIABLE' From 06feab2f20c6e57169dcdcd189f79430e4bb89e6 Mon Sep 17 00:00:00 2001 From: mattjala Date: Thu, 28 Mar 2024 20:29:08 -0500 Subject: [PATCH 2/4] Specify charset for json PUT requests --- h5pyd/_hl/httpconn.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/h5pyd/_hl/httpconn.py b/h5pyd/_hl/httpconn.py index cb161ba..7807de5 100644 --- a/h5pyd/_hl/httpconn.py +++ b/h5pyd/_hl/httpconn.py @@ -571,7 +571,11 @@ def PUT(self, req, body=None, format="json", params=None, headers=None): # binary write data = body else: + headers["Content-Type"] = "application/json" + # Must be explicitly specified for Windows + headers["Content-Type"] += "; charset=utf-8" data = json.dumps(body) + self.log.info("PUT: {} format: {} [{} bytes]".format(req, format, len(data))) try: From 4804c3b797669908a43bbeb55c337a0fa51cb8e6 Mon Sep 17 00:00:00 2001 From: mattjala Date: Mon, 1 Apr 2024 11:02:44 -0500 Subject: [PATCH 3/4] Standardize windows paths --- h5pyd/_hl/files.py | 6 ++++++ h5pyd/_hl/httpconn.py | 4 ++-- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/h5pyd/_hl/files.py b/h5pyd/_hl/files.py index ee8dbf2..e811b7b 100644 --- a/h5pyd/_hl/files.py +++ b/h5pyd/_hl/files.py @@ -15,6 +15,7 @@ import os import time import json +import pathlib from .objectid import GroupID from .group import Group @@ -203,6 +204,11 @@ def __init__( if not domain: raise IOError(400, "no domain provided") + domain_path = pathlib.PurePath(domain) + if isinstance(domain_path, pathlib.PureWindowsPath): + # Standardize path root to POSIX-style path + domain = '/' + '/'.join(domain_path.parts[1:]) + if domain[0] != "/": raise IOError(400, "relative paths are not valid") diff --git a/h5pyd/_hl/httpconn.py b/h5pyd/_hl/httpconn.py index 7807de5..1a88dcf 100644 --- a/h5pyd/_hl/httpconn.py +++ b/h5pyd/_hl/httpconn.py @@ -572,10 +572,10 @@ def PUT(self, req, body=None, format="json", params=None, headers=None): data = body else: headers["Content-Type"] = "application/json" - # Must be explicitly specified for Windows - headers["Content-Type"] += "; charset=utf-8" data = json.dumps(body) + # Must be explicitly specified for Windows + headers["Content-Type"] += "; charset=utf-8" self.log.info("PUT: {} format: {} [{} bytes]".format(req, format, len(data))) try: From 73c3dda7c7c001be89e82568919d76f2827d576b Mon Sep 17 00:00:00 2001 From: mattjala Date: Mon, 1 Apr 2024 11:22:05 -0500 Subject: [PATCH 4/4] Use curl for hsload test files --- h5pyd/_hl/httpconn.py | 2 -- test/apps/load_files.py | 16 +++++++++++++--- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/h5pyd/_hl/httpconn.py b/h5pyd/_hl/httpconn.py index 1a88dcf..a9bbeba 100644 --- a/h5pyd/_hl/httpconn.py +++ b/h5pyd/_hl/httpconn.py @@ -574,8 +574,6 @@ def PUT(self, req, body=None, format="json", params=None, headers=None): headers["Content-Type"] = "application/json" data = json.dumps(body) - # Must be explicitly specified for Windows - headers["Content-Type"] += "; charset=utf-8" self.log.info("PUT: {} format: {} [{} bytes]".format(req, format, len(data))) try: diff --git a/test/apps/load_files.py b/test/apps/load_files.py index 2c58934..abf1303 100644 --- a/test/apps/load_files.py +++ b/test/apps/load_files.py @@ -15,7 +15,7 @@ import sys import config import h5pyd - +from platform import system # # Main # @@ -60,9 +60,19 @@ # wget from S3 http_path = test_file_http_path + filename print("downloading:", http_path) - rc = os.system(f"wget -q https://s3.amazonaws.com/hdfgroup/data/hdf5test/{filename} -P {data_dir}") + + if system() == "Windows": + get_cmd = f"curl.exe -o {filename}\ + https://s3.amazonaws.com/hdfgroup/data/hdf5test/{filename}\ + --create-dirs --output-dir {data_dir}" + else: + get_cmd = f"wget -q\ + https://s3.amazonaws.com/hdfgroup/data/hdf5test/{filename}\ + -P {data_dir}" + + rc = os.system(f"{get_cmd}") if rc != 0: - sys.exit("Failed to retreive test data file") + sys.exit(f"Failed to retreive test data file with error code {rc}") # run hsload for each file print(f"running hsload for {hdf5_path} to {test_folder}") rc = os.system(f"python ../../h5pyd/_apps/hsload.py {hdf5_path} {test_folder}")