From 9093c5d5bc518d61fe64f20fbe8fd45663cbe649 Mon Sep 17 00:00:00 2001 From: Serwan Asaad Date: Tue, 20 Feb 2024 21:20:49 +0100 Subject: [PATCH 01/28] add functionality to select new data folder based on idx --- qualang_tools/results/data_handler.py | 193 ++++++++++++++++++ tests/data_handler/test_create_data_folder.py | 76 +++++++ tests/data_handler/test_data_handler.py | 16 ++ .../test_extract_data_folder_properties.py | 21 ++ .../test_get_latest_data_folder.py | 125 ++++++++++++ tests/data_handler/test_validate_datetime.py | 15 ++ 6 files changed, 446 insertions(+) create mode 100644 qualang_tools/results/data_handler.py create mode 100644 tests/data_handler/test_create_data_folder.py create mode 100644 tests/data_handler/test_data_handler.py create mode 100644 tests/data_handler/test_extract_data_folder_properties.py create mode 100644 tests/data_handler/test_get_latest_data_folder.py create mode 100644 tests/data_handler/test_validate_datetime.py diff --git a/qualang_tools/results/data_handler.py b/qualang_tools/results/data_handler.py new file mode 100644 index 00000000..2c4369b4 --- /dev/null +++ b/qualang_tools/results/data_handler.py @@ -0,0 +1,193 @@ +from pathlib import Path +from typing import Sequence, Dict, Union, Optional +import re +from datetime import datetime +import re + +DEFAULT_FOLDER_PATTERN = "%Y-%m-%d/#{idx}_{name}_%H%M%S" + + +def _validate_datetime(datetime_str: str, datetime_format: str) -> bool: + """Validate a datetime string with a given format. + + :param datetime_str: The datetime string to validate. + :param datetime_format: The format of the datetime string. + :return: True if the datetime string is valid, False otherwise. + """ + try: + datetime.strptime(datetime_str, datetime_format) + except ValueError: + return False + return True + + +def extract_data_folder_properties( + data_folder: Path, pattern: str = DEFAULT_FOLDER_PATTERN, root_data_folder: Path = None +) -> Optional[Dict[str, Union[str, int]]]: + """Extract properties from a data folder. + + :param data_folder: The data folder to extract properties from. + :param pattern: The pattern to extract the properties from, e.g. "#{idx}_{name}_%H%M%S". + :param root_data_folder: The root data folder to extract the relative path from. + If not provided, "relative_path" is not included in the properties. + + :return: A dictionary with the extracted properties. + Dictionary keys: + - idx: The index of the data folder. + - name: The name of the data folder. + - datetime attributes "year", "month", "day", "hour", "minute", "second". + - absolute_path: The absolute path of the data folder. + - relative_path: The relative path of the data folder w.r.t the root_data_folder. + """ + pattern = pattern.replace("{idx}", r"(?P\d+)") + pattern = pattern.replace("{name}", r"(?P\w+)") + pattern = pattern.replace("%Y", r"(?P\d{4})") + pattern = pattern.replace("%m", r"(?P\d{2})") + pattern = pattern.replace("%d", r"(?P\d{2})") + pattern = pattern.replace("%H", r"(?P\d{2})") + pattern = pattern.replace("%M", r"(?P\d{2})") + pattern = pattern.replace("%S", r"(?P\d{2})") + + if root_data_folder is not None: + folder_path_str = str(data_folder.relative_to(root_data_folder)) + else: + folder_path_str = data_folder.name + + regex_match = re.match(pattern, folder_path_str) + if regex_match is None: + return None + properties = regex_match.groupdict() + properties = {key: int(value) if value.isdigit() else value for key, value in properties.items()} + properties["absolute_path"] = str(data_folder) + if root_data_folder is not None: + properties["relative_path"] = str(data_folder.relative_to(root_data_folder)) + return properties + + +def get_latest_data_folder( + root_data_folder: Path, + folder_pattern: str = DEFAULT_FOLDER_PATTERN, + relative_path: Path = Path("."), + current_folder_pattern: str = None, +) -> Optional[Dict[str, Union[str, int]]]: + """Get the latest data folder in a given root data folder. + + Typically this is the folder within a date folder with the highest index. + + :param root_data_folder: The root data folder to search for the latest data folder. + :param folder_pattern: The pattern of the data folder, e.g. "%Y-%m-%d/#{idx}_{name}_%H%M%S". + :param relative_path: The relative path to the data folder. Used for recursive calls. + :param current_folder_pattern: The current folder pattern. Used for recursive calls. + :return: A dictionary with the properties of the latest data folder. + Dictionary keys: + - idx: The index of the data folder. + - name: The name of the data folder. + - datetime attributes "year", "month", "day", "hour", "minute", "second". + - absolute_path: The absolute path of the data folder. + - relative_path: The relative path of the data folder w.r.t the root_data_folder. + """ + if isinstance(root_data_folder, str): + root_data_folder = Path(root_data_folder) + + if not root_data_folder.exists(): + raise NotADirectoryError(f"Root data folder {root_data_folder} does not exist.") + + if current_folder_pattern is None: + current_folder_pattern = folder_pattern + + current_folder_pattern, *remaining_folder_pattern = current_folder_pattern.split("/", maxsplit=1) + + folder_path = root_data_folder / relative_path + + if not remaining_folder_pattern: + if "{idx}" not in current_folder_pattern: + raise ValueError("The folder pattern must contain '{idx}' at the end.") + # Get the latest idx + folders = [f for f in folder_path.iterdir() if f.is_dir()] + folders = [ + f for f in folders if extract_data_folder_properties(f, folder_pattern, root_data_folder=root_data_folder) + ] + + if not folders: + return None + + latest_folder = max(folders, key=lambda f: f.name) + return extract_data_folder_properties( + data_folder=latest_folder, pattern=folder_pattern, root_data_folder=root_data_folder + ) + elif "{idx}" in current_folder_pattern: + raise ValueError("The folder pattern must only contain '{idx}' in the last part.") + else: + # Filter out elements that aren't folders + folders = filter(lambda f: f.is_dir(), folder_path.iterdir()) + # Filter folders that match the datetime of the current folder pattern + folders = filter(lambda f: _validate_datetime(f.name, current_folder_pattern), folders) + + if not folders: + return None + + # Sort folders by name (either datetime or index) + sorted_folders = sorted(folders, key=lambda f: f.name, reverse=True) + + # Iterate over the folders, recursively call determine_latest_data_folder_idx + for folder in sorted_folders: + sub_folder_idx = get_latest_data_folder( + root_data_folder, + folder_pattern=folder_pattern, + current_folder_pattern=remaining_folder_pattern[0], + relative_path=relative_path / folder.name, + ) + if sub_folder_idx is not None: + return sub_folder_idx + return None + + +def create_data_folder( + root_data_folder: Path, + name: str, + idx: Optional[int] = None, + folder_pattern: str = DEFAULT_FOLDER_PATTERN, + use_datetime: Optional[datetime] = None, + create: bool = True, +) -> Dict[str, Union[str, int]]: + """Create a new data folder in a given root data folder. + + First checks the index of the latest data folder and increments by one. + + :param root_data_folder: The root data folder to create the new data folder in. + :param name: The name of the new data folder. + :param idx: The index of the new data folder. If not provided, the index is determined automatically. + :param folder_pattern: The pattern of the data folder, e.g. "%Y-%m-%d/#{idx}_{name}_%H%M%S". + :param use_datetime: The datetime to use for the folder name. + :param create: Whether to create the folder or not. + """ + if isinstance(root_data_folder, str): + root_data_folder = Path(root_data_folder) + + if not root_data_folder.exists(): + raise NotADirectoryError(f"Root data folder {root_data_folder} does not exist.") + + # Determine the latest folder index and increment by one + latest_folder_properties = get_latest_data_folder(root_data_folder, folder_pattern=folder_pattern) + + if use_datetime is None: + use_datetime = datetime.now() + + if latest_folder_properties is None: + # Create new folder with index 1 + idx = 1 + else: + idx = latest_folder_properties["idx"] + 1 + + relative_folder_name = folder_pattern.format(idx=idx, name=name) + relative_folder_name = use_datetime.strftime(relative_folder_name) + + data_folder = root_data_folder / relative_folder_name + + if data_folder.exists(): + raise FileExistsError(f"Data folder {data_folder} already exists.") + + if create: + data_folder.mkdir(parents=True) + + return extract_data_folder_properties(data_folder, folder_pattern, root_data_folder) diff --git a/tests/data_handler/test_create_data_folder.py b/tests/data_handler/test_create_data_folder.py new file mode 100644 index 00000000..678d09df --- /dev/null +++ b/tests/data_handler/test_create_data_folder.py @@ -0,0 +1,76 @@ +import pytest +from datetime import datetime +from pathlib import Path +from qualang_tools.results.data_handler import create_data_folder, DEFAULT_FOLDER_PATTERN + + +def test_create_data_folder(tmp_path): + with pytest.raises(NotADirectoryError): + create_data_folder(tmp_path / "nonexisting", name="test") + + +def test_create_data_folder_empty(tmp_path): + now = datetime.now() + + properties = create_data_folder(tmp_path, name="my_test", use_datetime=now) + + path = DEFAULT_FOLDER_PATTERN.format(idx=1, name="my_test") + path = now.strftime(path) + + properties_expected = { + "idx": 1, + "name": "my_test", + "year": now.year, + "month": now.month, + "day": now.day, + "hour": now.hour, + "minute": now.minute, + "second": now.second, + "absolute_path": str(tmp_path / path), + "relative_path": path, + } + + assert properties == properties_expected + + +def test_create_successive_data_folder(tmp_path): + now = datetime.now() + + properties = create_data_folder(tmp_path, name="my_test", use_datetime=now) + path = DEFAULT_FOLDER_PATTERN.format(idx=1, name="my_test") + path = now.strftime(path) + + properties_expected = { + "idx": 1, + "name": "my_test", + "year": now.year, + "month": now.month, + "day": now.day, + "hour": now.hour, + "minute": now.minute, + "second": now.second, + "absolute_path": str(tmp_path / path), + "relative_path": path, + } + + assert properties == properties_expected + + properties = create_data_folder(tmp_path, name="my_test", use_datetime=now) + + path = DEFAULT_FOLDER_PATTERN.format(idx=2, name="my_test") + path = now.strftime(path) + + properties_expected = { + "idx": 2, + "name": "my_test", + "year": now.year, + "month": now.month, + "day": now.day, + "hour": now.hour, + "minute": now.minute, + "second": now.second, + "absolute_path": str(tmp_path / path), + "relative_path": path, + } + + assert properties == properties_expected diff --git a/tests/data_handler/test_data_handler.py b/tests/data_handler/test_data_handler.py new file mode 100644 index 00000000..fd08440f --- /dev/null +++ b/tests/data_handler/test_data_handler.py @@ -0,0 +1,16 @@ +import pytest +from datetime import datetime + +from qualang_tools.results.data_handler import * + + +def test_determine_data_folder_nonextisting(tmp_path): + with pytest.raises(NotADirectoryError): + determine_data_folder(root_data_folder=tmp_path / "nonexisting", name="test") + + +def test_determine_data_folder_default_structure(tmp_path): + date_time = datetime.now() + data_folder = determine_data_folder(root_data_folder=tmp_path, name="test", idx=123) + + assert data_folder == tmp_path / date_time.strftime("%Y-%m-%d") / f"#123_test_{date_time.strftime('%H%M%S')}" diff --git a/tests/data_handler/test_extract_data_folder_properties.py b/tests/data_handler/test_extract_data_folder_properties.py new file mode 100644 index 00000000..4ad767e0 --- /dev/null +++ b/tests/data_handler/test_extract_data_folder_properties.py @@ -0,0 +1,21 @@ +import pytest +from pathlib import Path +from qualang_tools.results.data_handler import extract_data_folder_properties + + +def test_extract_data_folder_properties(): + properties = extract_data_folder_properties(Path("#123_test_123456"), "#{idx}_{name}_%H%M%S") + expected_properties = { + "idx": 123, + "name": "test", + "hour": 12, + "minute": 34, + "second": 56, + "absolute_path": "#123_test_123456", + } + assert properties == expected_properties + + properties = extract_data_folder_properties(Path("#123_my_test_123456"), "#{idx}_{name}_%H%M%S") + expected_properties["name"] = "my_test" + expected_properties["absolute_path"] = "#123_my_test_123456" + assert properties == expected_properties diff --git a/tests/data_handler/test_get_latest_data_folder.py b/tests/data_handler/test_get_latest_data_folder.py new file mode 100644 index 00000000..e2029a7e --- /dev/null +++ b/tests/data_handler/test_get_latest_data_folder.py @@ -0,0 +1,125 @@ +from qualang_tools.results.data_handler import get_latest_data_folder + + +def test_get_latest_data_folder_empty(tmp_path): + assert get_latest_data_folder(tmp_path) is None + + +def test_get_latest_data_folder_default_structure(tmp_path): + date_folder = tmp_path / "2021-01-05" + date_folder.mkdir() + (date_folder / "#123_test_123456").mkdir() + + properties = get_latest_data_folder(tmp_path) + expected_properties = { + "idx": 123, + "name": "test", + "year": 2021, + "month": 1, + "day": 5, + "hour": 12, + "minute": 34, + "second": 56, + "absolute_path": str(date_folder / "#123_test_123456"), + "relative_path": f"{date_folder.name}/#123_test_123456", + } + + assert properties == expected_properties + + +def test_get_latest_data_folder_two_items(tmp_path): + date_folder = tmp_path / "2021-01-05" + date_folder.mkdir() + (date_folder / "#123_test_123456").mkdir() + (date_folder / "#124_test_123457").mkdir() + + properties = get_latest_data_folder(tmp_path) + expected_properties = { + "idx": 124, + "name": "test", + "year": 2021, + "month": 1, + "day": 5, + "hour": 12, + "minute": 34, + "second": 57, + "absolute_path": str(date_folder / "#124_test_123457"), + "relative_path": f"{date_folder.name}/#124_test_123457", + } + + assert properties == expected_properties + + +def test_get_latest_data_folder_two_items_different_date(tmp_path): + date_folder = tmp_path / "2021-01-05" + date_folder.mkdir() + (date_folder / "#123_test_123456").mkdir() + + date_folder = tmp_path / "2021-01-06" + date_folder.mkdir() + (date_folder / "#124_test_123457").mkdir() + + properties = get_latest_data_folder(tmp_path) + expected_properties = { + "idx": 124, + "name": "test", + "year": 2021, + "month": 1, + "day": 6, + "hour": 12, + "minute": 34, + "second": 57, + "absolute_path": str(date_folder / "#124_test_123457"), + "relative_path": f"{date_folder.name}/#124_test_123457", + } + + assert properties == expected_properties + + +def test_get_latest_data_folder_different_date_empty_last_folder(tmp_path): + date_folder = tmp_path / "2021-01-05" + date_folder.mkdir() + (date_folder / "#123_test_123456").mkdir() + + date_folder = tmp_path / "2021-01-06" + date_folder.mkdir() + + properties = get_latest_data_folder(tmp_path) + expected_properties = { + "idx": 123, + "name": "test", + "year": 2021, + "month": 1, + "day": 5, + "hour": 12, + "minute": 34, + "second": 56, + "absolute_path": str(tmp_path / "2021-01-05/#123_test_123456"), + "relative_path": "2021-01-05/#123_test_123456", + } + + assert properties == expected_properties + + +def test_get_latest_data_folder_switched_idxs(tmp_path): + date_folder = tmp_path / "2021-01-05" + (date_folder / "#124_test_123456").mkdir(parents=True) + + date_folder = tmp_path / "2021-01-06" + (date_folder / "#123_test_123457").mkdir(parents=True) + + properties = get_latest_data_folder(tmp_path) + expected_properties = { + "idx": 123, + "name": "test", + "year": 2021, + "month": 1, + "day": 6, + "hour": 12, + "minute": 34, + "second": 57, + "absolute_path": str(date_folder / "#123_test_123457"), + "relative_path": f"{date_folder.name}/#123_test_123457", + } + + assert properties == expected_properties diff --git a/tests/data_handler/test_validate_datetime.py b/tests/data_handler/test_validate_datetime.py new file mode 100644 index 00000000..9d568123 --- /dev/null +++ b/tests/data_handler/test_validate_datetime.py @@ -0,0 +1,15 @@ +from qualang_tools.results.data_handler import _validate_datetime + + +def test_validate_datetime_empty(): + assert not _validate_datetime("", "%Y-%m-%d") + + +def test_validate_datetime_empty_format(): + assert not _validate_datetime("2021-01-01", "") + assert _validate_datetime("", "") + + +def test_validate_datetime_basic(): + assert _validate_datetime("2021-01-01", "%Y-%m-%d") + assert not _validate_datetime("2021-01-01", "%Y-%m-%d %H:%M:%S") From cdbd19e1a60ef70184feadc3690d5ef3daf898b3 Mon Sep 17 00:00:00 2001 From: Serwan Asaad Date: Wed, 21 Feb 2024 14:09:31 +0100 Subject: [PATCH 02/28] started adding save_data --- .../results/data_handler/__init__.py | 0 .../data_folder_tools.py} | 0 .../results/data_handler/data_storage.py | 91 +++++++++++++++++++ .../test_extract_data_folder_properties.py | 2 +- .../data_handler/test_iterate_nested_dict.py | 22 +++++ tests/data_handler/test_validate_datetime.py | 2 +- 6 files changed, 115 insertions(+), 2 deletions(-) create mode 100644 qualang_tools/results/data_handler/__init__.py rename qualang_tools/results/{data_handler.py => data_handler/data_folder_tools.py} (100%) create mode 100644 qualang_tools/results/data_handler/data_storage.py create mode 100644 tests/data_handler/test_iterate_nested_dict.py diff --git a/qualang_tools/results/data_handler/__init__.py b/qualang_tools/results/data_handler/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/qualang_tools/results/data_handler.py b/qualang_tools/results/data_handler/data_folder_tools.py similarity index 100% rename from qualang_tools/results/data_handler.py rename to qualang_tools/results/data_handler/data_folder_tools.py diff --git a/qualang_tools/results/data_handler/data_storage.py b/qualang_tools/results/data_handler/data_storage.py new file mode 100644 index 00000000..550e9ed7 --- /dev/null +++ b/qualang_tools/results/data_handler/data_storage.py @@ -0,0 +1,91 @@ +from abc import ABC, abstractmethod +from typing import * +import json +from pathlib import Path + +from matplotlib import pyplot as plt + + +def iterate_nested_dict( + d: Dict[str, Any], parent_keys: Optional[List[str]] = None +) -> Generator[Tuple[List[str], Any], None, None]: + """Iterate over a nested dictionary + + :param d: The dictionary to iterate over + :param parent_keys: The keys of the parent dictionary. Used for recursion + + :return: A generator that yields a tuple of the keys and the value + + """ + if parent_keys is None: + parent_keys = [] + for k, v in d.items(): + keys = parent_keys + [k] + yield keys, v + if isinstance(v, dict): + yield from iterate_nested_dict(v, parent_keys=keys) + + +class DataProcessor(ABC): + @abstractmethod + def process(self, data): + pass + + @abstractmethod + def post_process(self, data_folder: Path): + pass + + +class MatplotlibPlotSaver(DataProcessor): + def __init__(self, file_format="png"): + self.file_format = file_format + + def process(self, data): + for key, val in iterate_nested_dict(data): + if isinstance(val, plt.Figure): + val.savefig(f"{key}.{self.file_format}") + return data + + +def save_data( + data_folder: Path, + data, + metadata=None, + data_filename="data.json", + metadata_filename="metadata.json", + data_processors=(), +): + """Save data to a folder + + :param data_folder: The folder where the data will be saved + :param data: The data to be saved + :param metadata: Metadata to be saved + :param data_filename: The filename of the data + :param metadata_filename: The filename of the metadata + :param data_processors: A list of data processors to be applied to the data + """ + if isinstance(data_folder, str): + data_folder = Path(data_folder) + + if not data_folder.exists(): + raise NotADirectoryError(f"Save_data: data_folder {data_folder} does not exist") + + if not isinstance(data, dict): + raise TypeError("save_data: 'data' must be a dictionary") + + processed_data = data + for data_processor in data_processors: + processed_data = data_processor.process(processed_data) + + with (data_folder / data_filename).open("w") as f: + json.dump(processed_data, f) + + if metadata is not None: + if not isinstance(metadata, dict): + raise TypeError("save_data: 'metadata' must be a dictionary") + + with (data_folder / metadata_filename).open("w") as f: + json.dump(metadata, f) + + for data_processor in data_processors: + data_processor.post_process(data_folder=data_folder) diff --git a/tests/data_handler/test_extract_data_folder_properties.py b/tests/data_handler/test_extract_data_folder_properties.py index 4ad767e0..0e3c7d48 100644 --- a/tests/data_handler/test_extract_data_folder_properties.py +++ b/tests/data_handler/test_extract_data_folder_properties.py @@ -1,6 +1,6 @@ import pytest from pathlib import Path -from qualang_tools.results.data_handler import extract_data_folder_properties +from qualang_tools.results.data_handler.data_folder_tools import extract_data_folder_properties def test_extract_data_folder_properties(): diff --git a/tests/data_handler/test_iterate_nested_dict.py b/tests/data_handler/test_iterate_nested_dict.py new file mode 100644 index 00000000..bf51cbb1 --- /dev/null +++ b/tests/data_handler/test_iterate_nested_dict.py @@ -0,0 +1,22 @@ +from qualang_tools.results.data_handler.data_storage import iterate_nested_dict + + +def test_iterate_nested_dict_empty(): + d = {} + + result = list(iterate_nested_dict(d)) + assert result == [] + + +def test_iterate_nested_dict_basic(): + d = {"a": 1, "b": 2, "c": 3} + + result = list(iterate_nested_dict(d)) + assert result == [(["a"], 1), (["b"], 2), (["c"], 3)] + + +def test_iterate_nested_dict_nested(): + d = {"a": 1, "b": {"c": 2, "d": 3}} + + result = list(iterate_nested_dict(d)) + assert result == [(["a"], 1), (["b"], d["b"]), (["b", "c"], 2), (["b", "d"], 3)] diff --git a/tests/data_handler/test_validate_datetime.py b/tests/data_handler/test_validate_datetime.py index 9d568123..5352fb46 100644 --- a/tests/data_handler/test_validate_datetime.py +++ b/tests/data_handler/test_validate_datetime.py @@ -1,4 +1,4 @@ -from qualang_tools.results.data_handler import _validate_datetime +from qualang_tools.results.data_handler.data_folder_tools import _validate_datetime def test_validate_datetime_empty(): From 5e5957efae2cc934cc55db3b98a92466fa694783 Mon Sep 17 00:00:00 2001 From: Serwan Asaad Date: Wed, 21 Feb 2024 19:27:59 +0100 Subject: [PATCH 03/28] basic data handler --- .../results/data_handler/data_folder_tools.py | 23 ++--- .../results/data_handler/data_handler.py | 96 +++++++++++++++++++ .../results/data_handler/data_processors.py | 75 +++++++++++++++ .../results/data_handler/data_storage.py | 91 ------------------ tests/data_handler/test_data_handler.py | 96 +++++++++++++++++-- .../test_data_processor_matplotlib.py | 39 ++++++++ .../test_extract_data_folder_properties.py | 4 +- tests/data_handler/test_save_data.py | 26 +++++ 8 files changed, 338 insertions(+), 112 deletions(-) create mode 100644 qualang_tools/results/data_handler/data_handler.py create mode 100644 qualang_tools/results/data_handler/data_processors.py delete mode 100644 qualang_tools/results/data_handler/data_storage.py create mode 100644 tests/data_handler/test_data_processor_matplotlib.py create mode 100644 tests/data_handler/test_save_data.py diff --git a/qualang_tools/results/data_handler/data_folder_tools.py b/qualang_tools/results/data_handler/data_folder_tools.py index 2c4369b4..75e4a578 100644 --- a/qualang_tools/results/data_handler/data_folder_tools.py +++ b/qualang_tools/results/data_handler/data_folder_tools.py @@ -36,7 +36,7 @@ def extract_data_folder_properties( - idx: The index of the data folder. - name: The name of the data folder. - datetime attributes "year", "month", "day", "hour", "minute", "second". - - absolute_path: The absolute path of the data folder. + - path: The absolute path of the data folder. - relative_path: The relative path of the data folder w.r.t the root_data_folder. """ pattern = pattern.replace("{idx}", r"(?P\d+)") @@ -58,7 +58,7 @@ def extract_data_folder_properties( return None properties = regex_match.groupdict() properties = {key: int(value) if value.isdigit() else value for key, value in properties.items()} - properties["absolute_path"] = str(data_folder) + properties["path"] = str(data_folder) if root_data_folder is not None: properties["relative_path"] = str(data_folder.relative_to(root_data_folder)) return properties @@ -83,7 +83,7 @@ def get_latest_data_folder( - idx: The index of the data folder. - name: The name of the data folder. - datetime attributes "year", "month", "day", "hour", "minute", "second". - - absolute_path: The absolute path of the data folder. + - path: The absolute path of the data folder. - relative_path: The relative path of the data folder w.r.t the root_data_folder. """ if isinstance(root_data_folder, str): @@ -167,17 +167,18 @@ def create_data_folder( if not root_data_folder.exists(): raise NotADirectoryError(f"Root data folder {root_data_folder} does not exist.") - # Determine the latest folder index and increment by one - latest_folder_properties = get_latest_data_folder(root_data_folder, folder_pattern=folder_pattern) - if use_datetime is None: use_datetime = datetime.now() - if latest_folder_properties is None: - # Create new folder with index 1 - idx = 1 - else: - idx = latest_folder_properties["idx"] + 1 + if idx is None: + # Determine the latest folder index and increment by one + latest_folder_properties = get_latest_data_folder(root_data_folder, folder_pattern=folder_pattern) + + if latest_folder_properties is None: + # Create new folder with index 1 + idx = 1 + else: + idx = latest_folder_properties["idx"] + 1 relative_folder_name = folder_pattern.format(idx=idx, name=name) relative_folder_name = use_datetime.strftime(relative_folder_name) diff --git a/qualang_tools/results/data_handler/data_handler.py b/qualang_tools/results/data_handler/data_handler.py new file mode 100644 index 00000000..7be22702 --- /dev/null +++ b/qualang_tools/results/data_handler/data_handler.py @@ -0,0 +1,96 @@ +from datetime import datetime +from pathlib import Path +import json +from typing import Any, Dict, Optional, Sequence, Union + +from .data_processors import DEFAULT_DATA_PROCESSORS, DataProcessor +from .data_folder_tools import DEFAULT_FOLDER_PATTERN, create_data_folder + + +def save_data( + data_folder: Path, + data: Dict[str, Any], + metadata: Optional[Dict[str, Any]] = None, + data_filename: str = "data.json", + metadata_filename: str = "metadata.json", + data_processors: Sequence[DataProcessor] = (), +) -> None: + """Save data to a folder + + :param data_folder: The folder where the data will be saved + :param data: The data to be saved + :param metadata: Metadata to be saved + :param data_filename: The filename of the data + :param metadata_filename: The filename of the metadata + :param data_processors: A list of data processors to be applied to the data + """ + if isinstance(data_folder, str): + data_folder = Path(data_folder) + + if not data_folder.exists(): + raise NotADirectoryError(f"Save_data: data_folder {data_folder} does not exist") + + if not isinstance(data, dict): + raise TypeError("save_data: 'data' must be a dictionary") + + processed_data = data.copy() + for data_processor in data_processors: + processed_data = data_processor.process(processed_data) + + json_data = json.dumps(processed_data, indent=4) + (data_folder / data_filename).write_text(json_data) + + if metadata is not None: + if not isinstance(metadata, dict): + raise TypeError("save_data: 'metadata' must be a dictionary") + + with (data_folder / metadata_filename).open("w") as f: + json.dump(metadata, f) + + for data_processor in data_processors: + data_processor.post_process(data_folder=data_folder) + + +class DataHandler: + default_data_processors = DEFAULT_DATA_PROCESSORS + root_data_folder: Path = None + folder_pattern: str = DEFAULT_FOLDER_PATTERN + data_filename: str = "data.json" + metadata_filename: str = "metadata.json" + + def __init__( + self, + data_processors: Optional[Sequence[DataProcessor]] = None, + root_data_folder: Optional[Union[str, Path]] = None, + folder_pattern: Optional[str] = None, + ): + if data_processors is not None: + self.data_processors = data_processors + else: + self.data_processors = [processor() for processor in self.default_data_processors] + + if root_data_folder is not None: + self.root_data_folder = root_data_folder + if folder_pattern is not None: + self.folder_pattern = folder_pattern + + def create_data_folder(self, name, idx=None, use_datetime: Optional[datetime] = None, create=True): + """Create a new data folder in the root data folder""" + return create_data_folder( + root_data_folder=self.root_data_folder, + folder_pattern=self.folder_pattern, + name=name, + idx=idx, + ) + + def save_data(self, name, data, metadata=None, idx=None, use_datetime: Optional[datetime] = None): + data_folder_properties = self.create_data_folder(name, idx=idx, use_datetime=use_datetime) + + return save_data( + data_folder=data_folder_properties["path"], + data=data, + metadata=metadata, + data_filename=self.data_filename, + metadata_filename=self.metadata_filename, + data_processors=self.data_processors, + ) diff --git a/qualang_tools/results/data_handler/data_processors.py b/qualang_tools/results/data_handler/data_processors.py new file mode 100644 index 00000000..ef123cb6 --- /dev/null +++ b/qualang_tools/results/data_handler/data_processors.py @@ -0,0 +1,75 @@ +from pathlib import Path +from abc import ABC +from typing import Dict, Any, Generator, List, Tuple, Optional + +from matplotlib import pyplot as plt + +DEFAULT_DATA_PROCESSORS = [] + + +def iterate_nested_dict( + d: Dict[str, Any], parent_keys: Optional[List[str]] = None +) -> Generator[Tuple[List[str], Any], None, None]: + """Iterate over a nested dictionary + + :param d: The dictionary to iterate over + :param parent_keys: The keys of the parent dictionary. Used for recursion + + :return: A generator that yields a tuple of the keys and the value + + """ + if parent_keys is None: + parent_keys = [] + for k, v in d.items(): + keys = parent_keys + [k] + yield keys, v + if isinstance(v, dict): + yield from iterate_nested_dict(v, parent_keys=keys) + + +def update_nested_dict(d, keys, value): + subdict = d + for key in keys[:-1]: + subdict = subdict[key] + + subdict[keys[-1]] = value + + +class DataProcessor(ABC): + def process(self, data): + return data + + def post_process(self, data_folder: Path): + pass + + +class MatplotlibPlotSaver(DataProcessor): + def __init__(self, file_format="png"): + self.file_format = file_format + self.data_figures = {} + + @property + def file_suffix(self): + suffix = self.file_format + if not suffix.startswith("."): + suffix = "." + suffix + return suffix + + def process(self, data): + self.data_figures = {} + + for keys, val in iterate_nested_dict(data): + if isinstance(val, plt.Figure): + path = Path("/".join(keys)).with_suffix(self.file_suffix) + + self.data_figures[path] = val + update_nested_dict(data, keys, f"./{path}") + + return data + + def post_process(self, data_folder: Path): + for path, fig in self.data_figures.items(): + fig.savefig(data_folder / path) + + +DEFAULT_DATA_PROCESSORS.append(MatplotlibPlotSaver) diff --git a/qualang_tools/results/data_handler/data_storage.py b/qualang_tools/results/data_handler/data_storage.py deleted file mode 100644 index 550e9ed7..00000000 --- a/qualang_tools/results/data_handler/data_storage.py +++ /dev/null @@ -1,91 +0,0 @@ -from abc import ABC, abstractmethod -from typing import * -import json -from pathlib import Path - -from matplotlib import pyplot as plt - - -def iterate_nested_dict( - d: Dict[str, Any], parent_keys: Optional[List[str]] = None -) -> Generator[Tuple[List[str], Any], None, None]: - """Iterate over a nested dictionary - - :param d: The dictionary to iterate over - :param parent_keys: The keys of the parent dictionary. Used for recursion - - :return: A generator that yields a tuple of the keys and the value - - """ - if parent_keys is None: - parent_keys = [] - for k, v in d.items(): - keys = parent_keys + [k] - yield keys, v - if isinstance(v, dict): - yield from iterate_nested_dict(v, parent_keys=keys) - - -class DataProcessor(ABC): - @abstractmethod - def process(self, data): - pass - - @abstractmethod - def post_process(self, data_folder: Path): - pass - - -class MatplotlibPlotSaver(DataProcessor): - def __init__(self, file_format="png"): - self.file_format = file_format - - def process(self, data): - for key, val in iterate_nested_dict(data): - if isinstance(val, plt.Figure): - val.savefig(f"{key}.{self.file_format}") - return data - - -def save_data( - data_folder: Path, - data, - metadata=None, - data_filename="data.json", - metadata_filename="metadata.json", - data_processors=(), -): - """Save data to a folder - - :param data_folder: The folder where the data will be saved - :param data: The data to be saved - :param metadata: Metadata to be saved - :param data_filename: The filename of the data - :param metadata_filename: The filename of the metadata - :param data_processors: A list of data processors to be applied to the data - """ - if isinstance(data_folder, str): - data_folder = Path(data_folder) - - if not data_folder.exists(): - raise NotADirectoryError(f"Save_data: data_folder {data_folder} does not exist") - - if not isinstance(data, dict): - raise TypeError("save_data: 'data' must be a dictionary") - - processed_data = data - for data_processor in data_processors: - processed_data = data_processor.process(processed_data) - - with (data_folder / data_filename).open("w") as f: - json.dump(processed_data, f) - - if metadata is not None: - if not isinstance(metadata, dict): - raise TypeError("save_data: 'metadata' must be a dictionary") - - with (data_folder / metadata_filename).open("w") as f: - json.dump(metadata, f) - - for data_processor in data_processors: - data_processor.post_process(data_folder=data_folder) diff --git a/tests/data_handler/test_data_handler.py b/tests/data_handler/test_data_handler.py index fd08440f..46f9f506 100644 --- a/tests/data_handler/test_data_handler.py +++ b/tests/data_handler/test_data_handler.py @@ -1,16 +1,96 @@ import pytest from datetime import datetime -from qualang_tools.results.data_handler import * +from qualang_tools.results.data_handler.data_handler import * +from qualang_tools.results.data_handler.data_processors import DataProcessor -def test_determine_data_folder_nonextisting(tmp_path): - with pytest.raises(NotADirectoryError): - determine_data_folder(root_data_folder=tmp_path / "nonexisting", name="test") +def test_data_handler_basic(tmp_path): + data_handler = DataHandler(root_data_folder=tmp_path) + data = {"a": 1, "b": 2, "c": 3} -def test_determine_data_folder_default_structure(tmp_path): - date_time = datetime.now() - data_folder = determine_data_folder(root_data_folder=tmp_path, name="test", idx=123) + now = datetime.now() - assert data_folder == tmp_path / date_time.strftime("%Y-%m-%d") / f"#123_test_{date_time.strftime('%H%M%S')}" + data_handler.save_data("my_data", data, use_datetime=now) + + expected_data_folder = DEFAULT_FOLDER_PATTERN.format(name="my_data", idx=1) + expected_data_folder = now.strftime(expected_data_folder) + + assert (tmp_path / expected_data_folder / "data.json").exists() + + file_data = json.loads((tmp_path / expected_data_folder / "data.json").read_text()) + assert file_data == data + + +def test_data_handler_metadata(tmp_path): + data_handler = DataHandler(root_data_folder=tmp_path) + + data = {"a": 1, "b": 2, "c": 3} + + metadata = {"meta": "data"} + + now = datetime.now() + + data_handler.save_data("my_data", data, metadata=metadata, use_datetime=now) + + expected_data_folder = DEFAULT_FOLDER_PATTERN.format(name="my_data", idx=1) + expected_data_folder = now.strftime(expected_data_folder) + + assert (tmp_path / expected_data_folder / "data.json").exists() + assert (tmp_path / expected_data_folder / "metadata.json").exists() + + file_data = json.loads((tmp_path / expected_data_folder / "data.json").read_text()) + file_metadata = json.loads((tmp_path / expected_data_folder / "metadata.json").read_text()) + + assert file_data == data + assert file_metadata == metadata + + +def test_data_handler_custom_processors(tmp_path): + class TestProcessor(DataProcessor): + def process(self, data): + data["a"] = 42 + return data + + data_handler = DataHandler(root_data_folder=tmp_path, data_processors=[TestProcessor()]) + + data = {"a": 1, "b": 2, "c": 3} + + now = datetime.now() + + data_handler.save_data("my_data", data, use_datetime=now) + + expected_data_folder = DEFAULT_FOLDER_PATTERN.format(name="my_data", idx=1) + expected_data_folder = now.strftime(expected_data_folder) + + assert (tmp_path / expected_data_folder / "data.json").exists() + + file_data = json.loads((tmp_path / expected_data_folder / "data.json").read_text()) + assert file_data == {"a": 42, "b": 2, "c": 3} + + +def test_data_handler_matplotlib_processor(tmp_path): + data_handler = DataHandler(root_data_folder=tmp_path) + + import matplotlib.pyplot as plt + + fig, ax = plt.subplots() + ax.plot([1, 2, 3], [1, 2, 3]) + + data = {"a": 1, "b": 2, "c": 3, "my_fig": fig} + + now = datetime.now() + + data_handler.save_data("my_data", data, use_datetime=now) + + expected_data_folder = DEFAULT_FOLDER_PATTERN.format(name="my_data", idx=1) + expected_data_folder = now.strftime(expected_data_folder) + + assert (tmp_path / expected_data_folder / "data.json").exists() + + file_data = json.loads((tmp_path / expected_data_folder / "data.json").read_text()) + + assert file_data == {"a": 1, "b": 2, "c": 3, "my_fig": "./my_fig.png"} + + assert (tmp_path / expected_data_folder / "my_fig.png").exists() diff --git a/tests/data_handler/test_data_processor_matplotlib.py b/tests/data_handler/test_data_processor_matplotlib.py new file mode 100644 index 00000000..b4b51d3c --- /dev/null +++ b/tests/data_handler/test_data_processor_matplotlib.py @@ -0,0 +1,39 @@ +import pytest +import json + +from qualang_tools.results.data_handler.data_storage import save_data +from qualang_tools.results.data_handler.data_processors import MatplotlibPlotSaver + + +@pytest.fixture +def fig(): + import matplotlib.pyplot as plt + + fig, ax = plt.subplots() + ax.plot([1, 2, 3], [1, 2, 3]) + return fig + + +def test_matplotlib_plot_saver_process(fig): + matplotlib_plot_saver = MatplotlibPlotSaver() + data = {"a": 1, "b": 2, "c": fig} + data = matplotlib_plot_saver.process(data) + + assert data == {"a": 1, "b": 2, "c": "./c.png"} + + +def test_save_plot_basic(tmp_path, fig): + data = {"a": 1, "b": 2, "c": fig} + + with pytest.raises(TypeError): + save_data(data_folder=tmp_path, data=data) + + assert len(list(tmp_path.iterdir())) == 0 + + save_data(data_folder=tmp_path, data=data, data_processors=[MatplotlibPlotSaver()]) + + assert set(f.name for f in tmp_path.iterdir()) == set(["data.json", "c.png"]) + + file_data = json.loads((tmp_path / "data.json").read_text()) + + assert file_data == {"a": 1, "b": 2, "c": "./c.png"} diff --git a/tests/data_handler/test_extract_data_folder_properties.py b/tests/data_handler/test_extract_data_folder_properties.py index 0e3c7d48..8fa46d06 100644 --- a/tests/data_handler/test_extract_data_folder_properties.py +++ b/tests/data_handler/test_extract_data_folder_properties.py @@ -11,11 +11,11 @@ def test_extract_data_folder_properties(): "hour": 12, "minute": 34, "second": 56, - "absolute_path": "#123_test_123456", + "path": "#123_test_123456", } assert properties == expected_properties properties = extract_data_folder_properties(Path("#123_my_test_123456"), "#{idx}_{name}_%H%M%S") expected_properties["name"] = "my_test" - expected_properties["absolute_path"] = "#123_my_test_123456" + expected_properties["path"] = "#123_my_test_123456" assert properties == expected_properties diff --git a/tests/data_handler/test_save_data.py b/tests/data_handler/test_save_data.py new file mode 100644 index 00000000..76126fa5 --- /dev/null +++ b/tests/data_handler/test_save_data.py @@ -0,0 +1,26 @@ +import json +from qualang_tools.results.data_handler.data_storage import save_data + + +def test_save_data_basic(tmp_path): + data = {"a": 1, "b": 2, "c": 3} + save_data(data_folder=tmp_path, data=data) + + assert list(f.name for f in tmp_path.iterdir()) == ["data.json"] + + file_data = json.loads((tmp_path / "data.json").read_text()) + + assert file_data == data + + +def test_save_data_metadata(tmp_path): + data = {"a": 1, "b": 2, "c": 3} + metadata = {"meta": "data"} + save_data(data_folder=tmp_path, data=data, metadata=metadata) + assert set(f.name for f in tmp_path.iterdir()) == set(["data.json", "metadata.json"]) + + file_data = json.loads((tmp_path / "data.json").read_text()) + file_metadata = json.loads((tmp_path / "metadata.json").read_text()) + + assert file_data == data + assert file_metadata == metadata From 36d40d49026330bbb6b0a0eda3eeac9feadebce1 Mon Sep 17 00:00:00 2001 From: Serwan Asaad Date: Thu, 22 Feb 2024 08:26:11 +0100 Subject: [PATCH 04/28] add numpy array processor --- .../results/data_handler/data_processors.py | 50 ++++++++++++- ...otlib.py => test_matplotlib_plot_saver.py} | 0 tests/data_handler/test_numpy_array_saver.py | 70 +++++++++++++++++++ 3 files changed, 117 insertions(+), 3 deletions(-) rename tests/data_handler/{test_data_processor_matplotlib.py => test_matplotlib_plot_saver.py} (100%) create mode 100644 tests/data_handler/test_numpy_array_saver.py diff --git a/qualang_tools/results/data_handler/data_processors.py b/qualang_tools/results/data_handler/data_processors.py index ef123cb6..526c8891 100644 --- a/qualang_tools/results/data_handler/data_processors.py +++ b/qualang_tools/results/data_handler/data_processors.py @@ -1,8 +1,8 @@ from pathlib import Path from abc import ABC from typing import Dict, Any, Generator, List, Tuple, Optional - from matplotlib import pyplot as plt +import numpy as np DEFAULT_DATA_PROCESSORS = [] @@ -44,8 +44,11 @@ def post_process(self, data_folder: Path): class MatplotlibPlotSaver(DataProcessor): - def __init__(self, file_format="png"): - self.file_format = file_format + file_format: str = "png" + + def __init__(self, file_format=None): + if file_format is not None: + self.file_format = file_format self.data_figures = {} @property @@ -73,3 +76,44 @@ def post_process(self, data_folder: Path): DEFAULT_DATA_PROCESSORS.append(MatplotlibPlotSaver) + + +class NumpyArraySaver(DataProcessor): + min_size: int = 100 + merge_arrays: bool = True + merged_array_name: str = "arrays.npz" + + def __init__(self, min_size=None, merge_arrays=None, merged_array_name=None): + if min_size is not None: + self.min_size = min_size + if merge_arrays is not None: + self.merge_arrays = merge_arrays + if merged_array_name is not None: + self.merged_array_name = merged_array_name + + self.data_arrays = {} + + def process(self, data): + self.data_arrays = {} + + for keys, val in iterate_nested_dict(data): + if not isinstance(val, np.ndarray): + continue + elif self.min_size is not False and val.size < self.min_size: + continue + + path = Path("/".join(keys)) + self.data_arrays[path] = val + if self.merge_arrays: + update_nested_dict(data, keys, f"./{self.merged_array_name}#{path}") + else: + update_nested_dict(data, keys, f"./{path.with_suffix('.npy')}") + return data + + def post_process(self, data_folder: Path): + if self.merge_arrays: + arrays = {str(path): arr for path, arr in self.data_arrays.items()} + np.savez(data_folder / self.merged_array_name, **arrays) + else: + for path, arr in self.data_arrays.items(): + np.save(data_folder / path.with_suffix(".npy"), arr) diff --git a/tests/data_handler/test_data_processor_matplotlib.py b/tests/data_handler/test_matplotlib_plot_saver.py similarity index 100% rename from tests/data_handler/test_data_processor_matplotlib.py rename to tests/data_handler/test_matplotlib_plot_saver.py diff --git a/tests/data_handler/test_numpy_array_saver.py b/tests/data_handler/test_numpy_array_saver.py new file mode 100644 index 00000000..f165fcd0 --- /dev/null +++ b/tests/data_handler/test_numpy_array_saver.py @@ -0,0 +1,70 @@ +import numpy as np + +from qualang_tools.results.data_handler.data_processors import DEFAULT_DATA_PROCESSORS, NumpyArraySaver + + +def test_numpy_array_saver_process_merged_below_min_size(): + data = {"a": np.array([1, 2, 3]), "b": np.array([4, 5, 6]), "c": 3} + + data_processor = NumpyArraySaver() + processed_data = data.copy() + processed_data = data_processor.process(processed_data) + assert processed_data == data + + +def test_numpy_array_saver_process_merged(): + data = {"a": np.array([1, 2, 3]), "b": np.array([4, 5, 6]), "c": 3} + + data_processor = NumpyArraySaver(min_size=False) + + processed_data = data.copy() + processed_data = data_processor.process(processed_data) + + assert processed_data == { + "a": "./arrays.npz#a", + "b": "./arrays.npz#b", + "c": 3, + } + + +def test_numpy_array_saver_process_separate(): + data = {"a": np.array([1, 2, 3]), "b": np.array([4, 5, 6]), "c": 3} + + data_processor = NumpyArraySaver(min_size=False) + processed_data = data_processor.process(data) + assert processed_data == { + "a": "./arrays.npz#a", + "b": "./arrays.npz#b", + "c": 3, + } + + +def test_numpy_array_saver_post_process_merged(tmp_path): + data = {"a": np.array([1, 2, 3]), "b": np.array([4, 5, 6]), "c": 3} + + data_processor = NumpyArraySaver(min_size=False) + + processed_data = data.copy() + data_processor.process(processed_data) + + data_processor.post_process(data_folder=tmp_path) + + assert (tmp_path / "arrays.npz").exists() + loaded_data = np.load(tmp_path / "arrays.npz") + assert list(loaded_data.keys()) == ["a", "b"] + assert np.array_equal(loaded_data["a"], data["a"]) + assert np.array_equal(loaded_data["b"], data["b"]) + + +def test_numpy_array_saver_post_process_separate(tmp_path): + data = {"a": np.array([1, 2, 3]), "b": np.array([4, 5, 6]), "c": 3} + + data_processor = NumpyArraySaver(min_size=False, merge_arrays=False) + data_processor.process(data.copy()) + + data_processor.post_process(data_folder=tmp_path) + + assert (tmp_path / "a.npy").exists() + assert (tmp_path / "b.npy").exists() + assert np.array_equal(np.load(tmp_path / "a.npy"), data["a"]) + assert np.array_equal(np.load(tmp_path / "b.npy"), data["b"]) From 9b90b0e0b5713c648b3665e227f28c494eda0da4 Mon Sep 17 00:00:00 2001 From: Serwan Asaad Date: Thu, 22 Feb 2024 09:52:33 +0100 Subject: [PATCH 05/28] add xarray data handler --- .../results/data_handler/data_processors.py | 65 +++++++++++++ tests/data_handler/test_xarray_saver.py | 92 +++++++++++++++++++ 2 files changed, 157 insertions(+) create mode 100644 tests/data_handler/test_xarray_saver.py diff --git a/qualang_tools/results/data_handler/data_processors.py b/qualang_tools/results/data_handler/data_processors.py index 526c8891..d2bf33a2 100644 --- a/qualang_tools/results/data_handler/data_processors.py +++ b/qualang_tools/results/data_handler/data_processors.py @@ -117,3 +117,68 @@ def post_process(self, data_folder: Path): else: for path, arr in self.data_arrays.items(): np.save(data_folder / path.with_suffix(".npy"), arr) + + +class XarraySaver(DataProcessor): + merge_arrays: bool = False + merged_array_name: str = "xarrays" + file_format: str = "hdf5" + + def __init__(self, merge_arrays=None, merged_array_name=None, file_format=None): + if merge_arrays is not None: + self.merge_arrays = merge_arrays + if merged_array_name is not None: + self.merged_array_name = merged_array_name + if file_format is not None: + self.file_format = file_format + + self.data_arrays = {} + + @property + def file_suffix(self) -> str: + suffixes = {"nc": ".nc", "netcdf": ".nc", "h5": ".h5", "hdf5": ".h5", "zarr": ".zarr"} + return suffixes[self.file_format.lower()] + + def process(self, data): + import xarray as xr + + self.data_arrays = {} + + for keys, val in iterate_nested_dict(data): + if not isinstance(val, xr.Dataset): + continue + + path = Path("/".join(keys)) + self.data_arrays[path] = val + if self.merge_arrays: + merged_array_name = Path(self.merged_array_name).with_suffix(self.file_suffix) + update_nested_dict(data, keys, f"./{merged_array_name}#{path}") + else: + update_nested_dict(data, keys, f"./{path.with_suffix(self.file_suffix)}") + return data + + def save_merged_netcdf_arrays(self, path: Path, arrays: dict): + for array_path, array in self.data_arrays.items(): + try: + array.to_netcdf(path, mode="a", group=str(array_path)) + except ValueError as e: + raise ValueError( + f"Error saving merged array {path}. You may neet to first run `pip install netcdf4`" + ) from e + + def post_process(self, data_folder: Path): + if self.file_suffix not in [".nc", ".h5"]: + raise NotImplementedError(f"File format {self.file_format} is not supported") + + if self.merge_arrays: + for path, array in self.data_arrays.items(): + merged_path = data_folder / Path(self.merged_array_name).with_suffix(self.file_suffix) + try: + array.to_netcdf(merged_path, mode="a", group=str(path)) + except ValueError as e: + raise ValueError( + f"Error saving merged array {merged_path}. You may neet to first run `pip install netcdf4`" + ) from e + else: + for path, array in self.data_arrays.items(): + array.to_netcdf(data_folder / path.with_suffix(self.file_suffix)) diff --git a/tests/data_handler/test_xarray_saver.py b/tests/data_handler/test_xarray_saver.py new file mode 100644 index 00000000..20f6d11f --- /dev/null +++ b/tests/data_handler/test_xarray_saver.py @@ -0,0 +1,92 @@ +from qualang_tools.results.data_handler.data_processors import XarraySaver + + +def test_xarray_saver_no_xarrays(): + xarray_saver = XarraySaver() + data = {"a": 1, "b": 2, "c": 3} + assert xarray_saver.process(data) == data + + +def test_xarray_data_saver_suffixes(): + xarray_saver = XarraySaver() + assert xarray_saver.file_format == "hdf5" + assert xarray_saver.file_suffix == ".h5" + + xarray_saver = XarraySaver(file_format="hdf5") + assert xarray_saver.file_suffix == ".h5" + + xarray_saver = XarraySaver(file_format="nc") + assert xarray_saver.file_suffix == ".nc" + + xarray_saver = XarraySaver(file_format="netcdf") + assert xarray_saver.file_suffix == ".nc" + + xarray_saver = XarraySaver(file_format="zarr") + assert xarray_saver.file_suffix == ".zarr" + + +def test_xarray_saver_merge_netcdf(tmp_path): + try: + # Test won't work if netCDF4 is not installed + import netCDF4 + except ImportError: + return + + import xarray as xr + + data = {"a": 1, "b": 2, "c": xr.Dataset(), "d": xr.Dataset()} + + xarray_saver = XarraySaver(merge_arrays=True, file_format="nc") + processed_data = xarray_saver.process(data.copy()) + + assert processed_data == {"a": 1, "b": 2, "c": "./xarrays.nc#c", "d": "./xarrays.nc#d"} + + xarray_saver.post_process(data_folder=tmp_path) + + assert (tmp_path / "xarrays.nc").exists() + + xr.load_dataset(tmp_path / "xarrays.nc", group="c") + xr.load_dataset(tmp_path / "xarrays.nc", group="d") + + +def test_xarray_saver_merge_hdf5(tmp_path): + try: + # Test won't work if netCDF4 is not installed + import netCDF4 + except ImportError: + return + + import xarray as xr + + data = {"a": 1, "b": 2, "c": xr.Dataset(), "d": xr.Dataset()} + + xarray_saver = XarraySaver(merge_arrays=True, file_format="h5") + processed_data = xarray_saver.process(data.copy()) + + assert processed_data == {"a": 1, "b": 2, "c": "./xarrays.h5#c", "d": "./xarrays.h5#d"} + + xarray_saver.post_process(data_folder=tmp_path) + + assert (tmp_path / "xarrays.h5").exists() + + xr.load_dataset(tmp_path / "xarrays.h5", group="c") + xr.load_dataset(tmp_path / "xarrays.h5", group="d") + + +def test_xarray_saver_no_merge_netcdf(tmp_path): + import xarray as xr + + data = {"a": 1, "b": 2, "c": xr.Dataset(), "d": xr.Dataset()} + + xarray_saver = XarraySaver(merge_arrays=False) + processed_data = xarray_saver.process(data.copy()) + + assert processed_data == {"a": 1, "b": 2, "c": "./c.h5", "d": "./d.h5"} + + xarray_saver.post_process(data_folder=tmp_path) + + assert (tmp_path / "c.h5").exists() + assert (tmp_path / "d.h5").exists() + + xr.load_dataset(tmp_path / "c.h5") + xr.load_dataset(tmp_path / "d.h5") From 2c61e3a219a695eb2486609e7a91ce0ae11bdb1f Mon Sep 17 00:00:00 2001 From: Serwan Asaad Date: Thu, 22 Feb 2024 14:19:50 +0100 Subject: [PATCH 06/28] working tests, added init --- qualang_tools/results/__init__.py | 4 +++- .../results/data_handler/__init__.py | 6 +++++ .../results/data_handler/data_folder_tools.py | 7 ++++-- .../results/data_handler/data_handler.py | 3 +++ .../results/data_handler/data_processors.py | 3 +++ tests/data_handler/test_create_data_folder.py | 9 ++++--- tests/data_handler/test_data_handler.py | 4 +++- .../test_get_latest_data_folder.py | 12 +++++----- .../data_handler/test_iterate_nested_dict.py | 2 +- .../test_matplotlib_plot_saver.py | 2 +- tests/data_handler/test_save_data.py | 2 +- tests/data_handler/test_xarray_saver.py | 24 +++++++++---------- 12 files changed, 48 insertions(+), 30 deletions(-) diff --git a/qualang_tools/results/__init__.py b/qualang_tools/results/__init__.py index e343302c..0b145c32 100644 --- a/qualang_tools/results/__init__.py +++ b/qualang_tools/results/__init__.py @@ -2,4 +2,6 @@ from qualang_tools.results.results import progress_counter from qualang_tools.results.results import wait_until_job_is_paused -__all__ = ["fetching_tool", "progress_counter", "wait_until_job_is_paused"] +from qualang_tools.results.data_handler import DataHandler, data_processors + +__all__ = ["fetching_tool", "progress_counter", "wait_until_job_is_paused", "DataHandler", "data_processors"] diff --git a/qualang_tools/results/data_handler/__init__.py b/qualang_tools/results/data_handler/__init__.py index e69de29b..cb496969 100644 --- a/qualang_tools/results/data_handler/__init__.py +++ b/qualang_tools/results/data_handler/__init__.py @@ -0,0 +1,6 @@ +from .data_folder_tools import * +from . import data_processors +from .data_processors import DEFAULT_DATA_PROCESSORS +from .data_handler import * + +__all__ = [*data_folder_tools.__all__, data_processors, DEFAULT_DATA_PROCESSORS, *data_handler.__all__] diff --git a/qualang_tools/results/data_handler/data_folder_tools.py b/qualang_tools/results/data_handler/data_folder_tools.py index 75e4a578..4d756d66 100644 --- a/qualang_tools/results/data_handler/data_folder_tools.py +++ b/qualang_tools/results/data_handler/data_folder_tools.py @@ -1,8 +1,11 @@ from pathlib import Path -from typing import Sequence, Dict, Union, Optional +from typing import Dict, Union, Optional import re from datetime import datetime -import re + + +__all__ = ["DEFAULT_FOLDER_PATTERN", "extract_data_folder_properties", "get_latest_data_folder", "create_data_folder"] + DEFAULT_FOLDER_PATTERN = "%Y-%m-%d/#{idx}_{name}_%H%M%S" diff --git a/qualang_tools/results/data_handler/data_handler.py b/qualang_tools/results/data_handler/data_handler.py index 7be22702..c933171e 100644 --- a/qualang_tools/results/data_handler/data_handler.py +++ b/qualang_tools/results/data_handler/data_handler.py @@ -7,6 +7,9 @@ from .data_folder_tools import DEFAULT_FOLDER_PATTERN, create_data_folder +__all__ = ["save_data", "DataHandler"] + + def save_data( data_folder: Path, data: Dict[str, Any], diff --git a/qualang_tools/results/data_handler/data_processors.py b/qualang_tools/results/data_handler/data_processors.py index d2bf33a2..cb07cfea 100644 --- a/qualang_tools/results/data_handler/data_processors.py +++ b/qualang_tools/results/data_handler/data_processors.py @@ -4,6 +4,9 @@ from matplotlib import pyplot as plt import numpy as np +__all__ = ["DEFAULT_DATA_PROCESSORS", "DataProcessor", "MatplotlibPlotSaver", "NumpyArraySaver", "XarraySaver"] + + DEFAULT_DATA_PROCESSORS = [] diff --git a/tests/data_handler/test_create_data_folder.py b/tests/data_handler/test_create_data_folder.py index 678d09df..4f50a709 100644 --- a/tests/data_handler/test_create_data_folder.py +++ b/tests/data_handler/test_create_data_folder.py @@ -1,7 +1,6 @@ import pytest from datetime import datetime -from pathlib import Path -from qualang_tools.results.data_handler import create_data_folder, DEFAULT_FOLDER_PATTERN +from qualang_tools.results.data_handler.data_folder_tools import create_data_folder, DEFAULT_FOLDER_PATTERN def test_create_data_folder(tmp_path): @@ -26,7 +25,7 @@ def test_create_data_folder_empty(tmp_path): "hour": now.hour, "minute": now.minute, "second": now.second, - "absolute_path": str(tmp_path / path), + "path": str(tmp_path / path), "relative_path": path, } @@ -49,7 +48,7 @@ def test_create_successive_data_folder(tmp_path): "hour": now.hour, "minute": now.minute, "second": now.second, - "absolute_path": str(tmp_path / path), + "path": str(tmp_path / path), "relative_path": path, } @@ -69,7 +68,7 @@ def test_create_successive_data_folder(tmp_path): "hour": now.hour, "minute": now.minute, "second": now.second, - "absolute_path": str(tmp_path / path), + "path": str(tmp_path / path), "relative_path": path, } diff --git a/tests/data_handler/test_data_handler.py b/tests/data_handler/test_data_handler.py index 46f9f506..e762de67 100644 --- a/tests/data_handler/test_data_handler.py +++ b/tests/data_handler/test_data_handler.py @@ -1,7 +1,9 @@ +import json import pytest from datetime import datetime -from qualang_tools.results.data_handler.data_handler import * +from qualang_tools.results.data_handler.data_folder_tools import DEFAULT_FOLDER_PATTERN +from qualang_tools.results.data_handler.data_handler import DataHandler from qualang_tools.results.data_handler.data_processors import DataProcessor diff --git a/tests/data_handler/test_get_latest_data_folder.py b/tests/data_handler/test_get_latest_data_folder.py index e2029a7e..ceb71e3a 100644 --- a/tests/data_handler/test_get_latest_data_folder.py +++ b/tests/data_handler/test_get_latest_data_folder.py @@ -1,4 +1,4 @@ -from qualang_tools.results.data_handler import get_latest_data_folder +from qualang_tools.results.data_handler.data_folder_tools import get_latest_data_folder def test_get_latest_data_folder_empty(tmp_path): @@ -20,7 +20,7 @@ def test_get_latest_data_folder_default_structure(tmp_path): "hour": 12, "minute": 34, "second": 56, - "absolute_path": str(date_folder / "#123_test_123456"), + "path": str(date_folder / "#123_test_123456"), "relative_path": f"{date_folder.name}/#123_test_123456", } @@ -43,7 +43,7 @@ def test_get_latest_data_folder_two_items(tmp_path): "hour": 12, "minute": 34, "second": 57, - "absolute_path": str(date_folder / "#124_test_123457"), + "path": str(date_folder / "#124_test_123457"), "relative_path": f"{date_folder.name}/#124_test_123457", } @@ -69,7 +69,7 @@ def test_get_latest_data_folder_two_items_different_date(tmp_path): "hour": 12, "minute": 34, "second": 57, - "absolute_path": str(date_folder / "#124_test_123457"), + "path": str(date_folder / "#124_test_123457"), "relative_path": f"{date_folder.name}/#124_test_123457", } @@ -94,7 +94,7 @@ def test_get_latest_data_folder_different_date_empty_last_folder(tmp_path): "hour": 12, "minute": 34, "second": 56, - "absolute_path": str(tmp_path / "2021-01-05/#123_test_123456"), + "path": str(tmp_path / "2021-01-05/#123_test_123456"), "relative_path": "2021-01-05/#123_test_123456", } @@ -118,7 +118,7 @@ def test_get_latest_data_folder_switched_idxs(tmp_path): "hour": 12, "minute": 34, "second": 57, - "absolute_path": str(date_folder / "#123_test_123457"), + "path": str(date_folder / "#123_test_123457"), "relative_path": f"{date_folder.name}/#123_test_123457", } diff --git a/tests/data_handler/test_iterate_nested_dict.py b/tests/data_handler/test_iterate_nested_dict.py index bf51cbb1..a6015241 100644 --- a/tests/data_handler/test_iterate_nested_dict.py +++ b/tests/data_handler/test_iterate_nested_dict.py @@ -1,4 +1,4 @@ -from qualang_tools.results.data_handler.data_storage import iterate_nested_dict +from qualang_tools.results.data_handler.data_processors import iterate_nested_dict def test_iterate_nested_dict_empty(): diff --git a/tests/data_handler/test_matplotlib_plot_saver.py b/tests/data_handler/test_matplotlib_plot_saver.py index b4b51d3c..d5c16cdc 100644 --- a/tests/data_handler/test_matplotlib_plot_saver.py +++ b/tests/data_handler/test_matplotlib_plot_saver.py @@ -1,7 +1,7 @@ import pytest import json -from qualang_tools.results.data_handler.data_storage import save_data +from qualang_tools.results.data_handler.data_handler import save_data from qualang_tools.results.data_handler.data_processors import MatplotlibPlotSaver diff --git a/tests/data_handler/test_save_data.py b/tests/data_handler/test_save_data.py index 76126fa5..aaec1351 100644 --- a/tests/data_handler/test_save_data.py +++ b/tests/data_handler/test_save_data.py @@ -1,5 +1,5 @@ import json -from qualang_tools.results.data_handler.data_storage import save_data +from qualang_tools.results.data_handler.data_handler import save_data def test_save_data_basic(tmp_path): diff --git a/tests/data_handler/test_xarray_saver.py b/tests/data_handler/test_xarray_saver.py index 20f6d11f..275afb02 100644 --- a/tests/data_handler/test_xarray_saver.py +++ b/tests/data_handler/test_xarray_saver.py @@ -1,6 +1,16 @@ +import pytest +import sys from qualang_tools.results.data_handler.data_processors import XarraySaver +def netcdf4_installed(): + try: + import netCDF4 + except ImportError: + return False + return True + + def test_xarray_saver_no_xarrays(): xarray_saver = XarraySaver() data = {"a": 1, "b": 2, "c": 3} @@ -25,13 +35,8 @@ def test_xarray_data_saver_suffixes(): assert xarray_saver.file_suffix == ".zarr" +@pytest.mark.skipif(not netcdf4_installed(), reason="netCDF4 not installed") def test_xarray_saver_merge_netcdf(tmp_path): - try: - # Test won't work if netCDF4 is not installed - import netCDF4 - except ImportError: - return - import xarray as xr data = {"a": 1, "b": 2, "c": xr.Dataset(), "d": xr.Dataset()} @@ -49,13 +54,8 @@ def test_xarray_saver_merge_netcdf(tmp_path): xr.load_dataset(tmp_path / "xarrays.nc", group="d") +@pytest.mark.skipif(not netcdf4_installed(), reason="netCDF4 not installed") def test_xarray_saver_merge_hdf5(tmp_path): - try: - # Test won't work if netCDF4 is not installed - import netCDF4 - except ImportError: - return - import xarray as xr data = {"a": 1, "b": 2, "c": xr.Dataset(), "d": xr.Dataset()} From d4e7be921fd381ad8ac4ba11c6862f3ab509efb7 Mon Sep 17 00:00:00 2001 From: Serwan Asaad Date: Thu, 22 Feb 2024 14:24:13 +0100 Subject: [PATCH 07/28] add DataHandler.path --- .../results/data_handler/data_handler.py | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/qualang_tools/results/data_handler/data_handler.py b/qualang_tools/results/data_handler/data_handler.py index c933171e..7281ec61 100644 --- a/qualang_tools/results/data_handler/data_handler.py +++ b/qualang_tools/results/data_handler/data_handler.py @@ -66,6 +66,7 @@ def __init__( data_processors: Optional[Sequence[DataProcessor]] = None, root_data_folder: Optional[Union[str, Path]] = None, folder_pattern: Optional[str] = None, + path: Optional[Path] = None, ): if data_processors is not None: self.data_processors = data_processors @@ -77,20 +78,30 @@ def __init__( if folder_pattern is not None: self.folder_pattern = folder_pattern - def create_data_folder(self, name, idx=None, use_datetime: Optional[datetime] = None, create=True): + self.path = path + self.path_properties = None + + def create_data_folder( + self, name: str, idx: Optional[int] = None, use_datetime: Optional[datetime] = None, create: bool = True + ) -> Dict[str, Union[str, int]]: """Create a new data folder in the root data folder""" - return create_data_folder( + self.path_properties = create_data_folder( root_data_folder=self.root_data_folder, folder_pattern=self.folder_pattern, + use_datetime=use_datetime, name=name, idx=idx, + create=create, ) + self.path = self.path_properties["path"] + return self.path_properties def save_data(self, name, data, metadata=None, idx=None, use_datetime: Optional[datetime] = None): - data_folder_properties = self.create_data_folder(name, idx=idx, use_datetime=use_datetime) + if self.path is None: + self.create_data_folder(name, idx=idx, use_datetime=use_datetime) return save_data( - data_folder=data_folder_properties["path"], + data_folder=self.path, data=data, metadata=metadata, data_filename=self.data_filename, From 695ca2159b2a5b8d7a1d53675cac3621f4e69034 Mon Sep 17 00:00:00 2001 From: Serwan Asaad Date: Thu, 22 Feb 2024 14:46:40 +0100 Subject: [PATCH 08/28] docs + small changes --- qualang_tools/results/README.md | 53 +++++++++++++++++++ .../results/data_handler/data_handler.py | 4 +- .../results/data_handler/data_processors.py | 9 ++-- tests/data_handler/test_data_handler.py | 15 ++++++ 4 files changed, 77 insertions(+), 4 deletions(-) diff --git a/qualang_tools/results/README.md b/qualang_tools/results/README.md index 4c9c4cb2..10a06b84 100644 --- a/qualang_tools/results/README.md +++ b/qualang_tools/results/README.md @@ -156,3 +156,56 @@ for i in range(len(freqs_external)): # Loop over the LO frequencies # Process and plot the results ... ``` + + +## Data handler +The `DataHandler` is used to easily save data once a measurement has been performed. +It saves data into an automatically generated folder with folder structure: +`{root_data_folder}/%Y-%m-%d/#{idx}_{name}_%H%M%S`. +- `root_data_folder` is the root folder for all data, defined once at the start +- `%Y-%m-%d`: All datasets are first ordered by date +- `{idx}`: Datasets are identified by an incrementer (starting at `#1`). + Whenever a save is performed, the index of the last saved dataset is determined and + increased by 1. +- `name`: Each data folder has a name +- `%H%M%S`: The time is also specified. +This structure can be changed in `DataHandler.folder_structure`. + +Data is generally saved using the command `data_handler.save_data("msmt_name", data)`, +where `data` is a dictionary. +The data is saved to the json file `data.json` in the data folder, but nonserialisable +types are saved into separate files. The following nonserialisable types are currently +supported: +- Matplotlib figures +- Numpy arrays +- Xarrays + +### Usage example +```python +# Assume a measurement has been performed, and all results are collected here +data = { + "T1": 5e-6, + "T1_figure": plt.figure(), + "IQ_array": np.array([[1, 2, 3], [4, 5, 6]]) +} + +# Initialize the DataHandler +data_handler = DataHandler(root_data_folder="C:/data") + +# Save results +data_folder = data_handler.save_data("T1_measurement", data=data) +print(data_folder) +# C:/data/2024-02-24/#152_T1_measurement_095214 +# This assumes the save was performed at 2024-02-24 at 09:52:14 +``` +After calling `data_handler.save_data()`, three files are created in `data_folder`: +- `T1_figure.png` +- `arrays.npz` containing all the numpy arrays +- `data.json` which contains: + ``` + { + "T1": 5e-06, + "T1_figure": "./T1_figure.png", + "IQ_array": "./arrays.npz#IQ_array" + } + ``` \ No newline at end of file diff --git a/qualang_tools/results/data_handler/data_handler.py b/qualang_tools/results/data_handler/data_handler.py index 7281ec61..af1cce67 100644 --- a/qualang_tools/results/data_handler/data_handler.py +++ b/qualang_tools/results/data_handler/data_handler.py @@ -17,7 +17,7 @@ def save_data( data_filename: str = "data.json", metadata_filename: str = "metadata.json", data_processors: Sequence[DataProcessor] = (), -) -> None: +) -> Path: """Save data to a folder :param data_folder: The folder where the data will be saved @@ -53,6 +53,8 @@ def save_data( for data_processor in data_processors: data_processor.post_process(data_folder=data_folder) + return data_folder + class DataHandler: default_data_processors = DEFAULT_DATA_PROCESSORS diff --git a/qualang_tools/results/data_handler/data_processors.py b/qualang_tools/results/data_handler/data_processors.py index cb07cfea..9546dbed 100644 --- a/qualang_tools/results/data_handler/data_processors.py +++ b/qualang_tools/results/data_handler/data_processors.py @@ -82,7 +82,6 @@ def post_process(self, data_folder: Path): class NumpyArraySaver(DataProcessor): - min_size: int = 100 merge_arrays: bool = True merged_array_name: str = "arrays.npz" @@ -102,8 +101,6 @@ def process(self, data): for keys, val in iterate_nested_dict(data): if not isinstance(val, np.ndarray): continue - elif self.min_size is not False and val.size < self.min_size: - continue path = Path("/".join(keys)) self.data_arrays[path] = val @@ -122,6 +119,9 @@ def post_process(self, data_folder: Path): np.save(data_folder / path.with_suffix(".npy"), arr) +DEFAULT_DATA_PROCESSORS.append(NumpyArraySaver) + + class XarraySaver(DataProcessor): merge_arrays: bool = False merged_array_name: str = "xarrays" @@ -185,3 +185,6 @@ def post_process(self, data_folder: Path): else: for path, array in self.data_arrays.items(): array.to_netcdf(data_folder / path.with_suffix(self.file_suffix)) + + +DEFAULT_DATA_PROCESSORS.append(XarraySaver) diff --git a/tests/data_handler/test_data_handler.py b/tests/data_handler/test_data_handler.py index e762de67..823bd7c3 100644 --- a/tests/data_handler/test_data_handler.py +++ b/tests/data_handler/test_data_handler.py @@ -96,3 +96,18 @@ def test_data_handler_matplotlib_processor(tmp_path): assert file_data == {"a": 1, "b": 2, "c": 3, "my_fig": "./my_fig.png"} assert (tmp_path / expected_data_folder / "my_fig.png").exists() + + +def test_custom(tmp_path): + from matplotlib import pyplot as plt + import numpy as np + + data = {"T1": 5e-6, "T1_figure": plt.figure(), "IQ_array": np.array([[1, 2, 3], [4, 5, 6]])} + + # Initialize the DataHandler + data_handler = DataHandler(root_data_folder=tmp_path) + + # Save results + data_folder = data_handler.save_data("T1_measurement", data=data) + + print(list(data_folder.iterdir())) From 6e7556abae62580f88d95427355b2554a8e9c8e4 Mon Sep 17 00:00:00 2001 From: Serwan Asaad Date: Fri, 23 Feb 2024 11:05:16 +0100 Subject: [PATCH 09/28] Added initialization name --- .../results/data_handler/data_handler.py | 34 ++++++- tests/data_handler/test_data_handler.py | 95 ++++++++++++++++--- 2 files changed, 110 insertions(+), 19 deletions(-) diff --git a/qualang_tools/results/data_handler/data_handler.py b/qualang_tools/results/data_handler/data_handler.py index af1cce67..cb4328f2 100644 --- a/qualang_tools/results/data_handler/data_handler.py +++ b/qualang_tools/results/data_handler/data_handler.py @@ -65,15 +65,19 @@ class DataHandler: def __init__( self, + name: Optional[str] = None, data_processors: Optional[Sequence[DataProcessor]] = None, root_data_folder: Optional[Union[str, Path]] = None, folder_pattern: Optional[str] = None, path: Optional[Path] = None, ): + self.name = name if data_processors is not None: self.data_processors = data_processors else: - self.data_processors = [processor() for processor in self.default_data_processors] + self.data_processors = [ + processor() for processor in self.default_data_processors + ] if root_data_folder is not None: self.root_data_folder = root_data_folder @@ -84,23 +88,43 @@ def __init__( self.path_properties = None def create_data_folder( - self, name: str, idx: Optional[int] = None, use_datetime: Optional[datetime] = None, create: bool = True + self, + name: Optional[str] = None, + idx: Optional[int] = None, + use_datetime: Optional[datetime] = None, + create: bool = True, ) -> Dict[str, Union[str, int]]: """Create a new data folder in the root data folder""" + if name is not None: + self.name = name + if self.name is None: + raise ValueError("DataHandler: name must be specified") + self.path_properties = create_data_folder( root_data_folder=self.root_data_folder, folder_pattern=self.folder_pattern, use_datetime=use_datetime, - name=name, + name=self.name, idx=idx, create=create, ) self.path = self.path_properties["path"] return self.path_properties - def save_data(self, name, data, metadata=None, idx=None, use_datetime: Optional[datetime] = None): + def save_data( + self, + data, + name=None, + metadata=None, + idx=None, + use_datetime: Optional[datetime] = None, + ): + if name is not None: + self.name = name + if self.name is None: + raise ValueError("DataHandler: name must be specified") if self.path is None: - self.create_data_folder(name, idx=idx, use_datetime=use_datetime) + self.create_data_folder(name=self.name, idx=idx, use_datetime=use_datetime) return save_data( data_folder=self.path, diff --git a/tests/data_handler/test_data_handler.py b/tests/data_handler/test_data_handler.py index 823bd7c3..ddcf118c 100644 --- a/tests/data_handler/test_data_handler.py +++ b/tests/data_handler/test_data_handler.py @@ -14,7 +14,7 @@ def test_data_handler_basic(tmp_path): now = datetime.now() - data_handler.save_data("my_data", data, use_datetime=now) + data_handler.save_data(data, "my_data", use_datetime=now) expected_data_folder = DEFAULT_FOLDER_PATTERN.format(name="my_data", idx=1) expected_data_folder = now.strftime(expected_data_folder) @@ -34,7 +34,7 @@ def test_data_handler_metadata(tmp_path): now = datetime.now() - data_handler.save_data("my_data", data, metadata=metadata, use_datetime=now) + data_handler.save_data(data, "my_data", metadata=metadata, use_datetime=now) expected_data_folder = DEFAULT_FOLDER_PATTERN.format(name="my_data", idx=1) expected_data_folder = now.strftime(expected_data_folder) @@ -43,7 +43,9 @@ def test_data_handler_metadata(tmp_path): assert (tmp_path / expected_data_folder / "metadata.json").exists() file_data = json.loads((tmp_path / expected_data_folder / "data.json").read_text()) - file_metadata = json.loads((tmp_path / expected_data_folder / "metadata.json").read_text()) + file_metadata = json.loads( + (tmp_path / expected_data_folder / "metadata.json").read_text() + ) assert file_data == data assert file_metadata == metadata @@ -55,13 +57,15 @@ def process(self, data): data["a"] = 42 return data - data_handler = DataHandler(root_data_folder=tmp_path, data_processors=[TestProcessor()]) + data_handler = DataHandler( + root_data_folder=tmp_path, data_processors=[TestProcessor()] + ) data = {"a": 1, "b": 2, "c": 3} now = datetime.now() - data_handler.save_data("my_data", data, use_datetime=now) + data_handler.save_data(data, "my_data", use_datetime=now) expected_data_folder = DEFAULT_FOLDER_PATTERN.format(name="my_data", idx=1) expected_data_folder = now.strftime(expected_data_folder) @@ -84,7 +88,7 @@ def test_data_handler_matplotlib_processor(tmp_path): now = datetime.now() - data_handler.save_data("my_data", data, use_datetime=now) + data_handler.save_data(data, "my_data", use_datetime=now) expected_data_folder = DEFAULT_FOLDER_PATTERN.format(name="my_data", idx=1) expected_data_folder = now.strftime(expected_data_folder) @@ -98,16 +102,79 @@ def test_data_handler_matplotlib_processor(tmp_path): assert (tmp_path / expected_data_folder / "my_fig.png").exists() -def test_custom(tmp_path): - from matplotlib import pyplot as plt - import numpy as np +def test_data_handler_no_name_create_folder(tmp_path): + data_handler = DataHandler(root_data_folder=tmp_path) + assert data_handler.name is None + + now = datetime.now() + + with pytest.raises(ValueError): + data_handler.create_data_folder(use_datetime=now) + + +def test_data_handler_initialized_name_create_folder(tmp_path): + data_handler = DataHandler(name="my_data", root_data_folder=tmp_path) + assert data_handler.name == "my_data" + + now = datetime.now() + + data_handler.create_data_folder(use_datetime=now) + + expected_data_folder = DEFAULT_FOLDER_PATTERN.format(name="my_data", idx=1) + expected_data_folder = now.strftime(expected_data_folder) + + assert (tmp_path / expected_data_folder).exists() + + +def test_data_handler_overwrite_initialized_name_create_folder(tmp_path): + data_handler = DataHandler(name="my_data", root_data_folder=tmp_path) + assert data_handler.name == "my_data" + + now = datetime.now() + + data_handler.create_data_folder(name="my_new_data", use_datetime=now) + assert data_handler.name == "my_new_data" + + expected_data_folder = DEFAULT_FOLDER_PATTERN.format(name="my_new_data", idx=1) + expected_data_folder = now.strftime(expected_data_folder) - data = {"T1": 5e-6, "T1_figure": plt.figure(), "IQ_array": np.array([[1, 2, 3], [4, 5, 6]])} + assert (tmp_path / expected_data_folder).exists() - # Initialize the DataHandler + +def test_data_handler_no_name_save_data(tmp_path): data_handler = DataHandler(root_data_folder=tmp_path) + assert data_handler.name is None + + with pytest.raises(ValueError): + data_handler.save_data({"a": 1, "b": 2, "c": 3}) + + +def test_data_handler_initialized_name_save_data(tmp_path): + data_handler = DataHandler(name="my_data", root_data_folder=tmp_path) + assert data_handler.name == "my_data" + + now = datetime.now() - # Save results - data_folder = data_handler.save_data("T1_measurement", data=data) + data_handler.save_data({"a": 1, "b": 2, "c": 3}, use_datetime=now) - print(list(data_folder.iterdir())) + expected_data_folder = DEFAULT_FOLDER_PATTERN.format(name="my_data", idx=1) + expected_data_folder = now.strftime(expected_data_folder) + + assert (tmp_path / expected_data_folder / "data.json").exists() + + +def test_data_handler_overwrite_initialized_name_save_data(tmp_path): + data_handler = DataHandler(name="my_data", root_data_folder=tmp_path) + assert data_handler.name == "my_data" + + now = datetime.now() + + data_handler.save_data( + {"a": 1, "b": 2, "c": 3}, name="my_new_data", use_datetime=now + ) + assert data_handler.name == "my_new_data" + + expected_data_folder = DEFAULT_FOLDER_PATTERN.format(name="my_new_data", idx=1) + expected_data_folder = now.strftime(expected_data_folder) + + assert (tmp_path / expected_data_folder / "data.json").exists() From 46f368192dc7be2c4cff21267b7142ded9a8092b Mon Sep 17 00:00:00 2001 From: Serwan Asaad Date: Fri, 23 Feb 2024 11:53:56 +0100 Subject: [PATCH 10/28] Proper sorting of data folders --- .../results/data_handler/data_folder_tools.py | 23 +++++++++++-------- tests/data_handler/test_create_data_folder.py | 19 ++++++++++++++- .../test_get_latest_data_folder.py | 23 ++++++++++++++++++- 3 files changed, 53 insertions(+), 12 deletions(-) diff --git a/qualang_tools/results/data_handler/data_folder_tools.py b/qualang_tools/results/data_handler/data_folder_tools.py index 4d756d66..3b2e00f1 100644 --- a/qualang_tools/results/data_handler/data_folder_tools.py +++ b/qualang_tools/results/data_handler/data_folder_tools.py @@ -29,7 +29,7 @@ def extract_data_folder_properties( ) -> Optional[Dict[str, Union[str, int]]]: """Extract properties from a data folder. - :param data_folder: The data folder to extract properties from. + :param data_folder: The data folder to extract properties from. Should be an absolute path. :param pattern: The pattern to extract the properties from, e.g. "#{idx}_{name}_%H%M%S". :param root_data_folder: The root data folder to extract the relative path from. If not provided, "relative_path" is not included in the properties. @@ -51,6 +51,7 @@ def extract_data_folder_properties( pattern = pattern.replace("%M", r"(?P\d{2})") pattern = pattern.replace("%S", r"(?P\d{2})") + data_folder = Path(data_folder) if root_data_folder is not None: folder_path_str = str(data_folder.relative_to(root_data_folder)) else: @@ -105,19 +106,21 @@ def get_latest_data_folder( if not remaining_folder_pattern: if "{idx}" not in current_folder_pattern: raise ValueError("The folder pattern must contain '{idx}' at the end.") - # Get the latest idx - folders = [f for f in folder_path.iterdir() if f.is_dir()] - folders = [ - f for f in folders if extract_data_folder_properties(f, folder_pattern, root_data_folder=root_data_folder) - ] + folders = {} + for f in folder_path.iterdir(): + if not f.is_dir(): + continue + properties = extract_data_folder_properties(f, folder_pattern, root_data_folder=root_data_folder) + if properties is None: + continue + + folders[f] = properties if not folders: return None - latest_folder = max(folders, key=lambda f: f.name) - return extract_data_folder_properties( - data_folder=latest_folder, pattern=folder_pattern, root_data_folder=root_data_folder - ) + latest_folder, latest_properties = max(folders.items(), key=lambda f: f[1]["idx"]) + return latest_properties elif "{idx}" in current_folder_pattern: raise ValueError("The folder pattern must only contain '{idx}' in the last part.") else: diff --git a/tests/data_handler/test_create_data_folder.py b/tests/data_handler/test_create_data_folder.py index 4f50a709..c9e10811 100644 --- a/tests/data_handler/test_create_data_folder.py +++ b/tests/data_handler/test_create_data_folder.py @@ -1,6 +1,9 @@ import pytest from datetime import datetime -from qualang_tools.results.data_handler.data_folder_tools import create_data_folder, DEFAULT_FOLDER_PATTERN +from qualang_tools.results.data_handler.data_folder_tools import ( + create_data_folder, + DEFAULT_FOLDER_PATTERN, +) def test_create_data_folder(tmp_path): @@ -73,3 +76,17 @@ def test_create_successive_data_folder(tmp_path): } assert properties == properties_expected + + +def test_performance_get_idxs(tmp_path): + from qualang_tools.results.data_handler.data_folder_tools import ( + get_latest_data_folder, + ) + + now = datetime.now() + + for k in range(1, 1000): + properties = create_data_folder(tmp_path, name="my_test", use_datetime=now) + properties_latest = get_latest_data_folder(tmp_path) + + assert properties["idx"] == properties_latest["idx"] == k \ No newline at end of file diff --git a/tests/data_handler/test_get_latest_data_folder.py b/tests/data_handler/test_get_latest_data_folder.py index ceb71e3a..0dc57419 100644 --- a/tests/data_handler/test_get_latest_data_folder.py +++ b/tests/data_handler/test_get_latest_data_folder.py @@ -1,4 +1,8 @@ -from qualang_tools.results.data_handler.data_folder_tools import get_latest_data_folder +from datetime import datetime +from qualang_tools.results.data_handler.data_folder_tools import ( + get_latest_data_folder, + DEFAULT_FOLDER_PATTERN, +) def test_get_latest_data_folder_empty(tmp_path): @@ -123,3 +127,20 @@ def test_get_latest_data_folder_switched_idxs(tmp_path): } assert properties == expected_properties + + +def test_create_data_folders_correct_order(tmp_path): + from qualang_tools.results.data_handler.data_folder_tools import ( + get_latest_data_folder, + ) + + now = datetime.now() + + for idx in range(1, 105): + path = DEFAULT_FOLDER_PATTERN.format(idx=idx, name="my_test") + path = now.strftime(path) + (tmp_path / path).mkdir(parents=True) + + properties_latest = get_latest_data_folder(tmp_path) + + assert properties_latest["idx"] == idx From 6cbc66e1572b3ffd793fb0124c5e0f1b5dee5d63 Mon Sep 17 00:00:00 2001 From: Serwan Asaad Date: Sun, 25 Feb 2024 10:50:30 +0100 Subject: [PATCH 11/28] add documentation --- .../results/data_handler/data_folder_tools.py | 2 + .../results/data_handler/data_handler.py | 102 +++++++++++++++++- 2 files changed, 99 insertions(+), 5 deletions(-) diff --git a/qualang_tools/results/data_handler/data_folder_tools.py b/qualang_tools/results/data_handler/data_folder_tools.py index 3b2e00f1..c8e6422b 100644 --- a/qualang_tools/results/data_handler/data_folder_tools.py +++ b/qualang_tools/results/data_handler/data_folder_tools.py @@ -1,3 +1,5 @@ +"""Tools for handling data folders.""" + from pathlib import Path from typing import Dict, Union, Optional import re diff --git a/qualang_tools/results/data_handler/data_handler.py b/qualang_tools/results/data_handler/data_handler.py index cb4328f2..516d7dc7 100644 --- a/qualang_tools/results/data_handler/data_handler.py +++ b/qualang_tools/results/data_handler/data_handler.py @@ -18,7 +18,19 @@ def save_data( metadata_filename: str = "metadata.json", data_processors: Sequence[DataProcessor] = (), ) -> Path: - """Save data to a folder + """ + Save data to a folder + + The data (assumed to be a dict) is saved as a json file to "{data_folder}/{data_filename}", which typically + follows the format "%Y-%m-%d/#{idx}_{name}_%H%M%S/data.json". + Non-serialisable contents in data such as figures and arrays are saved into separate files and the paths are + referenced from the data dictionary. + The optional metadata (assumed to be a dict) is saved as a json file to "{data_folder}/{metadata_filename}". + + This function also applies a list of data processors to the data before saving it. The data processors are + applied in the order they are provided. + + This function is used by the DataHandler class to save data to a folder. :param data_folder: The folder where the data will be saved :param data: The data to be saved @@ -26,6 +38,7 @@ def save_data( :param data_filename: The filename of the data :param metadata_filename: The filename of the metadata :param data_processors: A list of data processors to be applied to the data + :return: The path of the saved data folder """ if isinstance(data_folder, str): data_folder = Path(data_folder) @@ -57,6 +70,32 @@ def save_data( class DataHandler: + """A class to handle data saving. + + This class provides functionality to save data to a specified data folder. + It allows for the creation of a new data folder, saving data to the folder, + and applying data processors to the saved data. + + :param name: The name of the data handler. + :type name: str, optional + :param data_processors: The data processors to be applied to the saved data. + :type data_processors: Sequence[DataProcessor], optional + :param root_data_folder: The root folder where the data will be saved. + :type root_data_folder: str or Path, optional + :param folder_pattern: The pattern used to create the data folder. + :type folder_pattern: str, optional + :param path: The path to the data folder. + :type path: Path, optional + + Example usage: + + .. code-block:: python + + data_handler = DataHandler("T1_experiment") + data = {"T1": 1e-6, "T1_arr": np.array([1, 2, 3]), "T1_fig": plt.figure()} + data_handler.save_data(data) + """ + default_data_processors = DEFAULT_DATA_PROCESSORS root_data_folder: Path = None folder_pattern: str = DEFAULT_FOLDER_PATTERN @@ -75,9 +114,7 @@ def __init__( if data_processors is not None: self.data_processors = data_processors else: - self.data_processors = [ - processor() for processor in self.default_data_processors - ] + self.data_processors = [processor() for processor in self.default_data_processors] if root_data_folder is not None: self.root_data_folder = root_data_folder @@ -94,7 +131,33 @@ def create_data_folder( use_datetime: Optional[datetime] = None, create: bool = True, ) -> Dict[str, Union[str, int]]: - """Create a new data folder in the root data folder""" + """Create a new data folder in the root data folder. + + This method creates a new data folder in the root data folder specified + in the `root_data_folder` attribute. The name of the data folder can be + specified using the `name` parameter. An index can also be provided using + the `idx` parameter. If a datetime object is provided using the `use_datetime` + parameter, it will be used in the folder name. By default, the data folder + is created. + + :param name: The name of the data folder. + :type name: str, optional + :param idx: The index of the data folder. + :type idx: int, optional + :param use_datetime: The datetime to be used in the folder name. + :type use_datetime: datetime, optional + :param create: Whether to create the data folder or not. + :type create: bool, optional + :return: The properties of the created data folder. + :rtype: dict + :raises ValueError: If the name is not specified. + + Example usage: + + .. code-block:: python + + data_handler.create_data_folder(name="T1_experiment", idx=1, use_datetime=datetime.now()) + """ if name is not None: self.name = name if self.name is None: @@ -119,6 +182,35 @@ def save_data( idx=None, use_datetime: Optional[datetime] = None, ): + """Save the data to the data folder. + + This method saves the provided data to the data folder specified in the + `path` attribute. The name of the data folder can be specified using the + `name` parameter. The metadata associated with the data can be provided + using the `metadata` parameter. An index can also be provided using the + `idx` parameter. If a datetime object is provided using the `use_datetime` + parameter, it will be used in the folder name. + + :param data: The data to be saved. + :type data: any + :param name: The name of the data folder. + :type name: str, optional + :param metadata: The metadata associated with the data. + :type metadata: any, optional + :param idx: The index of the data folder. + :type idx: int, optional + :param use_datetime: The datetime to be used in the folder name. + :type use_datetime: datetime, optional + :raises ValueError: If the name is not specified. + :return: The result of saving the data. + :rtype: any + + Example usage: + + .. code-block:: python + + data_handler.save_data(data, name="T1_experiment", metadata=metadata, idx=1, use_datetime=datetime.now()) + """ if name is not None: self.name = name if self.name is None: From 4cad629f9dd184e7c4db2d68be1bc8a5aecbfbda Mon Sep 17 00:00:00 2001 From: Serwan Asaad Date: Sun, 25 Feb 2024 10:52:30 +0100 Subject: [PATCH 12/28] add optional xarray --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index c4839d78..dfa18f3b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -36,6 +36,7 @@ waitress = { version = "^2.0.0", optional = true } dill = { version = "^0.3.4", optional = true } pypiwin32 = { version = "^223", optional = true } ipython = { version = "^7.31.1", optional = true } +xarray = { version = "^2024.0.0", optional = true } scikit-learn = "^1.0.2" [tool.poetry.dev-dependencies] From c452b7a1c5dac402e28b1231217355eac5d42d7f Mon Sep 17 00:00:00 2001 From: Serwan Asaad Date: Sun, 25 Feb 2024 11:08:31 +0100 Subject: [PATCH 13/28] lower min xarray version --- poetry.lock | 65 ++++++++++++++++++++++++++++++++++---------------- pyproject.toml | 2 +- 2 files changed, 46 insertions(+), 21 deletions(-) diff --git a/poetry.lock b/poetry.lock index 501fdc5b..0249e70f 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.6.1 and should not be changed by hand. [[package]] name = "appnope" @@ -118,7 +118,7 @@ uvloop = ["uvloop (>=0.15.2)"] name = "brotli" version = "1.0.9" description = "Python bindings for the Brotli compression library" -optional = false +optional = true python-versions = "*" files = [ {file = "Brotli-1.0.9-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:268fe94547ba25b58ebc724680609c8ee3e5a843202e9a381f6f9c5e8bdb5c70"}, @@ -256,7 +256,7 @@ files = [ name = "dash" version = "2.0.0" description = "A Python framework for building reactive web-apps. Developed by Plotly." -optional = false +optional = true python-versions = ">=3.6" files = [ {file = "dash-2.0.0-py3-none-any.whl", hash = "sha256:23f331533663641a5c70a15c46da26d29ef5335f9aeb8bc03d09de865fc7cd62"}, @@ -296,7 +296,7 @@ pandas = ["numpy", "pandas"] name = "dash-core-components" version = "2.0.0" description = "Core component suite for Dash" -optional = false +optional = true python-versions = "*" files = [ {file = "dash_core_components-2.0.0-py3-none-any.whl", hash = "sha256:52b8e8cce13b18d0802ee3acbc5e888cb1248a04968f962d63d070400af2e346"}, @@ -331,7 +331,7 @@ files = [ name = "dash-html-components" version = "2.0.0" description = "Vanilla HTML components for Dash" -optional = false +optional = true python-versions = "*" files = [ {file = "dash_html_components-2.0.0-py3-none-any.whl", hash = "sha256:b42cc903713c9706af03b3f2548bda4be7307a7cf89b7d6eae3da872717d1b63"}, @@ -342,7 +342,7 @@ files = [ name = "dash-table" version = "5.0.0" description = "Dash table" -optional = false +optional = true python-versions = "*" files = [ {file = "dash_table-5.0.0-py3-none-any.whl", hash = "sha256:19036fa352bb1c11baf38068ec62d172f0515f73ca3276c79dee49b95ddc16c9"}, @@ -503,7 +503,7 @@ graph = ["objgraph (>=1.7.2)"] name = "docutils" version = "0.14" description = "Docutils -- Python Documentation Utilities" -optional = false +optional = true python-versions = "*" files = [ {file = "docutils-0.14-py2-none-any.whl", hash = "sha256:7a4bd47eaf6596e1295ecb11361139febe29b084a87bf005bf899f9a42edc3c6"}, @@ -531,7 +531,7 @@ pyflakes = ">=2.4.0,<2.5.0" name = "flask" version = "2.0.2" description = "A simple framework for building complex web applications." -optional = false +optional = true python-versions = ">=3.6" files = [ {file = "Flask-2.0.2-py3-none-any.whl", hash = "sha256:cb90f62f1d8e4dc4621f52106613488b5ba826b2e1e10a33eac92f723093ab6a"}, @@ -552,7 +552,7 @@ dotenv = ["python-dotenv"] name = "flask-compress" version = "1.10.1" description = "Compress responses in your Flask app with gzip, deflate or brotli." -optional = false +optional = true python-versions = "*" files = [ {file = "Flask-Compress-1.10.1.tar.gz", hash = "sha256:28352387efbbe772cfb307570019f81957a13ff718d994a9125fa705efb73680"}, @@ -751,7 +751,7 @@ test = ["ipykernel", "nbformat", "nose (>=0.10.1)", "numpy (>=1.17)", "pygments" name = "itsdangerous" version = "2.0.1" description = "Safely pass data to untrusted environments and back." -optional = false +optional = true python-versions = ">=3.6" files = [ {file = "itsdangerous-2.0.1-py3-none-any.whl", hash = "sha256:5174094b9637652bdb841a3029700391451bd092ba3db90600dea710ba28e97c"}, @@ -780,7 +780,7 @@ testing = ["Django (<3.1)", "colorama", "docopt", "pytest (<7.0.0)"] name = "jinja2" version = "3.0.3" description = "A very fast and expressive template engine." -optional = false +optional = true python-versions = ">=3.6" files = [ {file = "Jinja2-3.0.3-py3-none-any.whl", hash = "sha256:077ce6014f7b40d03b47d1f1ca4b0fc8328a692bd284016f806ed0eaca390ad8"}, @@ -861,7 +861,7 @@ files = [ name = "markupsafe" version = "2.0.1" description = "Safely add untrusted strings to HTML/XML markup." -optional = false +optional = true python-versions = ">=3.6" files = [ {file = "MarkupSafe-2.0.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:d8446c54dc28c01e5a2dbac5a25f071f6653e6e40f3a8818e8b45d790fe6ef53"}, @@ -1857,17 +1857,18 @@ files = [ [[package]] name = "traitlets" -version = "5.1.1" +version = "5.14.1" description = "Traitlets Python configuration system" optional = true -python-versions = ">=3.7" +python-versions = ">=3.8" files = [ - {file = "traitlets-5.1.1-py3-none-any.whl", hash = "sha256:2d313cc50a42cd6c277e7d7dc8d4d7fedd06a2c215f78766ae7b1a66277e0033"}, - {file = "traitlets-5.1.1.tar.gz", hash = "sha256:059f456c5a7c1c82b98c2e8c799f39c9b8128f6d0d46941ee118daace9eb70c7"}, + {file = "traitlets-5.14.1-py3-none-any.whl", hash = "sha256:2e5a030e6eff91737c643231bfcf04a65b0132078dad75e4936700b213652e74"}, + {file = "traitlets-5.14.1.tar.gz", hash = "sha256:8585105b371a04b8316a43d5ce29c098575c2e477850b62b848b964f1444527e"}, ] [package.extras] -test = ["pytest"] +docs = ["myst-parser", "pydata-sphinx-theme", "sphinx"] +test = ["argcomplete (>=3.0.3)", "mypy (>=1.7.0)", "pre-commit", "pytest (>=7.0,<7.5)", "pytest-mock", "pytest-mypy-testing"] [[package]] name = "typing-extensions" @@ -1900,7 +1901,7 @@ socks = ["PySocks (>=1.5.6,!=1.5.7,<2.0)"] name = "waitress" version = "2.0.0" description = "Waitress WSGI server" -optional = false +optional = true python-versions = ">=3.6.0" files = [ {file = "waitress-2.0.0-py3-none-any.whl", hash = "sha256:29af5a53e9fb4e158f525367678b50053808ca6c21ba585754c77d790008c746"}, @@ -1926,7 +1927,7 @@ files = [ name = "werkzeug" version = "2.0.2" description = "The comprehensive WSGI web application library." -optional = false +optional = true python-versions = ">=3.6" files = [ {file = "Werkzeug-2.0.2-py3-none-any.whl", hash = "sha256:63d3dc1cf60e7b7e35e97fa9861f7397283b75d765afcaefd993d6046899de8f"}, @@ -1936,6 +1937,30 @@ files = [ [package.extras] watchdog = ["watchdog"] +[[package]] +name = "xarray" +version = "2023.1.0" +description = "N-D labeled arrays and datasets in Python" +optional = true +python-versions = ">=3.8" +files = [ + {file = "xarray-2023.1.0-py3-none-any.whl", hash = "sha256:7e530b1deafdd43e5c2b577d0944e6b528fbe88045fd849e49a8d11871ecd522"}, + {file = "xarray-2023.1.0.tar.gz", hash = "sha256:7bee552751ff1b29dab8b7715726e5ecb56691ac54593cf4881dff41978ce0cd"}, +] + +[package.dependencies] +numpy = ">=1.20" +packaging = ">=21.3" +pandas = ">=1.3" + +[package.extras] +accel = ["bottleneck", "flox", "numbagg", "scipy"] +complete = ["bottleneck", "cfgrib", "cftime", "dask[complete]", "flox", "fsspec", "h5netcdf", "matplotlib", "nc-time-axis", "netCDF4", "numbagg", "pooch", "pydap", "rasterio", "scipy", "seaborn", "zarr"] +docs = ["bottleneck", "cfgrib", "cftime", "dask[complete]", "flox", "fsspec", "h5netcdf", "ipykernel", "ipython", "jupyter-client", "matplotlib", "nbsphinx", "nc-time-axis", "netCDF4", "numbagg", "pooch", "pydap", "rasterio", "scanpydoc", "scipy", "seaborn", "sphinx-autosummary-accessors", "sphinx-rtd-theme", "zarr"] +io = ["cfgrib", "cftime", "fsspec", "h5netcdf", "netCDF4", "pooch", "pydap", "rasterio", "scipy", "zarr"] +parallel = ["dask[complete]"] +viz = ["matplotlib", "nc-time-axis", "seaborn"] + [extras] configbuilder = ["dash", "dash-bootstrap-components", "dash-core-components", "dash-cytoscape", "dash-dangerously-set-inner-html", "dash-html-components", "dash-table", "docutils", "pandas", "waitress"] interplot = ["dill", "ipython", "pypiwin32"] @@ -1943,4 +1968,4 @@ interplot = ["dill", "ipython", "pypiwin32"] [metadata] lock-version = "2.0" python-versions = ">=3.8,<4.0" -content-hash = "fcee840fb928cf78fe30cab9cb07f6694042ad275fc9a21f5a7070ab62fa9a71" +content-hash = "747d53032388a16c4b1d9f0c64149a99ce79cc86b37136a21471ce266d934314" diff --git a/pyproject.toml b/pyproject.toml index dfa18f3b..6ac8921a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -36,7 +36,7 @@ waitress = { version = "^2.0.0", optional = true } dill = { version = "^0.3.4", optional = true } pypiwin32 = { version = "^223", optional = true } ipython = { version = "^7.31.1", optional = true } -xarray = { version = "^2024.0.0", optional = true } +xarray = { version = "^2023.0.0", optional = true } scikit-learn = "^1.0.2" [tool.poetry.dev-dependencies] From 087388fe18be965ee3e044ef34c2104873f6ed35 Mon Sep 17 00:00:00 2001 From: Serwan Asaad Date: Sun, 25 Feb 2024 11:15:42 +0100 Subject: [PATCH 14/28] add xarray as to poetry extras --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index 6ac8921a..a23403b3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -49,6 +49,7 @@ setuptools = "^69.0.2" [tool.poetry.extras] interplot = ["dill", "pypiwin32", "ipython"] configbuilder = ["pandas", "dash", "dash-html-components", "dash-core-components", "dash-bootstrap-components", "dash-cytoscape", "dash-table", "dash-dangerously-set-inner-html", "docutils", "waitress"] +data_handler = ["xarray", "netCDF4"] [tool.black] line-length = 120 From 798ce6393f2851d3bb0c276ccd750535fca1b7c5 Mon Sep 17 00:00:00 2001 From: Serwan Asaad Date: Sun, 25 Feb 2024 11:18:55 +0100 Subject: [PATCH 15/28] modify workflow to allow xarray --- .github/workflows/on-pull-request.yml | 2 +- pyproject.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/on-pull-request.yml b/.github/workflows/on-pull-request.yml index 722b220e..c2f43089 100644 --- a/.github/workflows/on-pull-request.yml +++ b/.github/workflows/on-pull-request.yml @@ -35,7 +35,7 @@ jobs: poetry- - name: Set up the project - run: poetry install --extras configbuilder + run: poetry install --extras "configbuilder data_handler" - name: Check formatting run: poetry run poe check-format diff --git a/pyproject.toml b/pyproject.toml index a23403b3..c71549dc 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -49,7 +49,7 @@ setuptools = "^69.0.2" [tool.poetry.extras] interplot = ["dill", "pypiwin32", "ipython"] configbuilder = ["pandas", "dash", "dash-html-components", "dash-core-components", "dash-bootstrap-components", "dash-cytoscape", "dash-table", "dash-dangerously-set-inner-html", "docutils", "waitress"] -data_handler = ["xarray", "netCDF4"] +data_handler = ["xarray"] [tool.black] line-length = 120 From 5af5273566d06eae601449025a6ac02da06f9c65 Mon Sep 17 00:00:00 2001 From: Serwan Asaad Date: Sun, 25 Feb 2024 11:20:51 +0100 Subject: [PATCH 16/28] remove underscore for workflow --- .github/workflows/on-pull-request.yml | 2 +- pyproject.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/on-pull-request.yml b/.github/workflows/on-pull-request.yml index c2f43089..d76b0a7a 100644 --- a/.github/workflows/on-pull-request.yml +++ b/.github/workflows/on-pull-request.yml @@ -35,7 +35,7 @@ jobs: poetry- - name: Set up the project - run: poetry install --extras "configbuilder data_handler" + run: poetry install --extras "configbuilder datahandler" - name: Check formatting run: poetry run poe check-format diff --git a/pyproject.toml b/pyproject.toml index c71549dc..706a4e89 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -49,7 +49,7 @@ setuptools = "^69.0.2" [tool.poetry.extras] interplot = ["dill", "pypiwin32", "ipython"] configbuilder = ["pandas", "dash", "dash-html-components", "dash-core-components", "dash-bootstrap-components", "dash-cytoscape", "dash-table", "dash-dangerously-set-inner-html", "docutils", "waitress"] -data_handler = ["xarray"] +datahandler = ["xarray"] [tool.black] line-length = 120 From 8d52ad9af6a924e1cd62a47a93f549319dcb6f97 Mon Sep 17 00:00:00 2001 From: Serwan Asaad Date: Sun, 25 Feb 2024 11:25:01 +0100 Subject: [PATCH 17/28] update lock file --- poetry.lock | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/poetry.lock b/poetry.lock index 0249e70f..5d46cd1f 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1963,9 +1963,10 @@ viz = ["matplotlib", "nc-time-axis", "seaborn"] [extras] configbuilder = ["dash", "dash-bootstrap-components", "dash-core-components", "dash-cytoscape", "dash-dangerously-set-inner-html", "dash-html-components", "dash-table", "docutils", "pandas", "waitress"] +datahandler = ["xarray"] interplot = ["dill", "ipython", "pypiwin32"] [metadata] lock-version = "2.0" python-versions = ">=3.8,<4.0" -content-hash = "747d53032388a16c4b1d9f0c64149a99ce79cc86b37136a21471ce266d934314" +content-hash = "e5b4865de8ce5e3f72f199425eb6459646164c5b3aa7de6fd5c1dae170aaa8ee" From 84a6b4e75a7eeb276b32072d691810d27dd4e4b9 Mon Sep 17 00:00:00 2001 From: Serwan Asaad Date: Sun, 25 Feb 2024 11:50:38 +0100 Subject: [PATCH 18/28] remove min_size numpy array --- .../results/data_handler/data_processors.py | 4 +--- tests/data_handler/test_numpy_array_saver.py | 17 ++++------------- 2 files changed, 5 insertions(+), 16 deletions(-) diff --git a/qualang_tools/results/data_handler/data_processors.py b/qualang_tools/results/data_handler/data_processors.py index 9546dbed..68acd79d 100644 --- a/qualang_tools/results/data_handler/data_processors.py +++ b/qualang_tools/results/data_handler/data_processors.py @@ -85,9 +85,7 @@ class NumpyArraySaver(DataProcessor): merge_arrays: bool = True merged_array_name: str = "arrays.npz" - def __init__(self, min_size=None, merge_arrays=None, merged_array_name=None): - if min_size is not None: - self.min_size = min_size + def __init__(self, merge_arrays=None, merged_array_name=None): if merge_arrays is not None: self.merge_arrays = merge_arrays if merged_array_name is not None: diff --git a/tests/data_handler/test_numpy_array_saver.py b/tests/data_handler/test_numpy_array_saver.py index f165fcd0..c6f38910 100644 --- a/tests/data_handler/test_numpy_array_saver.py +++ b/tests/data_handler/test_numpy_array_saver.py @@ -3,19 +3,10 @@ from qualang_tools.results.data_handler.data_processors import DEFAULT_DATA_PROCESSORS, NumpyArraySaver -def test_numpy_array_saver_process_merged_below_min_size(): - data = {"a": np.array([1, 2, 3]), "b": np.array([4, 5, 6]), "c": 3} - - data_processor = NumpyArraySaver() - processed_data = data.copy() - processed_data = data_processor.process(processed_data) - assert processed_data == data - - def test_numpy_array_saver_process_merged(): data = {"a": np.array([1, 2, 3]), "b": np.array([4, 5, 6]), "c": 3} - data_processor = NumpyArraySaver(min_size=False) + data_processor = NumpyArraySaver() processed_data = data.copy() processed_data = data_processor.process(processed_data) @@ -30,7 +21,7 @@ def test_numpy_array_saver_process_merged(): def test_numpy_array_saver_process_separate(): data = {"a": np.array([1, 2, 3]), "b": np.array([4, 5, 6]), "c": 3} - data_processor = NumpyArraySaver(min_size=False) + data_processor = NumpyArraySaver() processed_data = data_processor.process(data) assert processed_data == { "a": "./arrays.npz#a", @@ -42,7 +33,7 @@ def test_numpy_array_saver_process_separate(): def test_numpy_array_saver_post_process_merged(tmp_path): data = {"a": np.array([1, 2, 3]), "b": np.array([4, 5, 6]), "c": 3} - data_processor = NumpyArraySaver(min_size=False) + data_processor = NumpyArraySaver() processed_data = data.copy() data_processor.process(processed_data) @@ -59,7 +50,7 @@ def test_numpy_array_saver_post_process_merged(tmp_path): def test_numpy_array_saver_post_process_separate(tmp_path): data = {"a": np.array([1, 2, 3]), "b": np.array([4, 5, 6]), "c": 3} - data_processor = NumpyArraySaver(min_size=False, merge_arrays=False) + data_processor = NumpyArraySaver(merge_arrays=False) data_processor.process(data.copy()) data_processor.post_process(data_folder=tmp_path) From c8ee97a2dd5a3b25b0dbb59e6d184ee9be036eff Mon Sep 17 00:00:00 2001 From: Serwan Asaad Date: Sun, 25 Feb 2024 11:50:56 +0100 Subject: [PATCH 19/28] add test xarray skip if not installed --- tests/data_handler/test_xarray_saver.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/tests/data_handler/test_xarray_saver.py b/tests/data_handler/test_xarray_saver.py index 275afb02..9d6ed875 100644 --- a/tests/data_handler/test_xarray_saver.py +++ b/tests/data_handler/test_xarray_saver.py @@ -3,20 +3,22 @@ from qualang_tools.results.data_handler.data_processors import XarraySaver -def netcdf4_installed(): +def module_installed(module_name): try: - import netCDF4 + exec(f"import {module_name}") except ImportError: return False return True +@pytest.mark.skipif(not module_installed("xarray"), reason="xarray not installed") def test_xarray_saver_no_xarrays(): xarray_saver = XarraySaver() data = {"a": 1, "b": 2, "c": 3} assert xarray_saver.process(data) == data +@pytest.mark.skipif(not module_installed("xarray"), reason="xarray not installed") def test_xarray_data_saver_suffixes(): xarray_saver = XarraySaver() assert xarray_saver.file_format == "hdf5" @@ -35,7 +37,7 @@ def test_xarray_data_saver_suffixes(): assert xarray_saver.file_suffix == ".zarr" -@pytest.mark.skipif(not netcdf4_installed(), reason="netCDF4 not installed") +@pytest.mark.skipif(not (module_installed("xarray") and module_installed("netCDF4")), reason="xarray not installed") def test_xarray_saver_merge_netcdf(tmp_path): import xarray as xr @@ -54,7 +56,7 @@ def test_xarray_saver_merge_netcdf(tmp_path): xr.load_dataset(tmp_path / "xarrays.nc", group="d") -@pytest.mark.skipif(not netcdf4_installed(), reason="netCDF4 not installed") +@pytest.mark.skipif(not (module_installed("xarray") and module_installed("netCDF4")), reason="xarray not installed") def test_xarray_saver_merge_hdf5(tmp_path): import xarray as xr @@ -73,6 +75,7 @@ def test_xarray_saver_merge_hdf5(tmp_path): xr.load_dataset(tmp_path / "xarrays.h5", group="d") +@pytest.mark.skipif(not module_installed("xarray"), reason="xarray not installed") def test_xarray_saver_no_merge_netcdf(tmp_path): import xarray as xr From 7d620b4f0d6a1140af1781e9a442807cfc785519 Mon Sep 17 00:00:00 2001 From: Serwan Asaad Date: Sun, 25 Feb 2024 11:52:24 +0100 Subject: [PATCH 20/28] Reduce performance test duration --- tests/data_handler/test_create_data_folder.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/data_handler/test_create_data_folder.py b/tests/data_handler/test_create_data_folder.py index c9e10811..de0567d6 100644 --- a/tests/data_handler/test_create_data_folder.py +++ b/tests/data_handler/test_create_data_folder.py @@ -85,8 +85,8 @@ def test_performance_get_idxs(tmp_path): now = datetime.now() - for k in range(1, 1000): + for k in range(1, 110): properties = create_data_folder(tmp_path, name="my_test", use_datetime=now) properties_latest = get_latest_data_folder(tmp_path) - assert properties["idx"] == properties_latest["idx"] == k \ No newline at end of file + assert properties["idx"] == properties_latest["idx"] == k From bc07ff1527915e4324d816e8cccd07b596c98995 Mon Sep 17 00:00:00 2001 From: Serwan Asaad Date: Sun, 25 Feb 2024 13:07:29 +0100 Subject: [PATCH 21/28] added `additional_files` --- qualang_tools/results/README.md | 46 +++++++++++++++++-- .../results/data_handler/data_handler.py | 19 +++++++- tests/data_handler/test_data_handler.py | 28 +++++++++++ 3 files changed, 88 insertions(+), 5 deletions(-) diff --git a/qualang_tools/results/README.md b/qualang_tools/results/README.md index 10a06b84..aa8282d8 100644 --- a/qualang_tools/results/README.md +++ b/qualang_tools/results/README.md @@ -180,10 +180,11 @@ supported: - Numpy arrays - Xarrays -### Usage example + +### Basic example ```python # Assume a measurement has been performed, and all results are collected here -data = { +T1_data = { "T1": 5e-6, "T1_figure": plt.figure(), "IQ_array": np.array([[1, 2, 3], [4, 5, 6]]) @@ -193,7 +194,7 @@ data = { data_handler = DataHandler(root_data_folder="C:/data") # Save results -data_folder = data_handler.save_data("T1_measurement", data=data) +data_folder = data_handler.save_data(data=T1_data, name="T1_measurement") print(data_folder) # C:/data/2024-02-24/#152_T1_measurement_095214 # This assumes the save was performed at 2024-02-24 at 09:52:14 @@ -208,4 +209,41 @@ After calling `data_handler.save_data()`, three files are created in `data_folde "T1_figure": "./T1_figure.png", "IQ_array": "./arrays.npz#IQ_array" } - ``` \ No newline at end of file + ``` + +### Creating a data folder +A data folder can be created in two ways: +```python +# Method 1: explicitly creating data folder +data_folder_properties = data_handler.create_data_folder(name="new_data_folder") + +# Method 2: Create when saving results +data_folder = data_handler.save_data("T1_measurement", data=T1_data) +``` +Note that the methods return different results. +The method `DataHandler.save_data` simply returns the path to the newly-created data folder, whereas `DataHandler.create_data_folder` returns a dict with additional information on the data folder such as the `idx`. +This additional information can also be accessed after calling `DataHandler.save_data` through the attribute `DataHandler.path_properties`. + +### Manually adding additional files to data folder +After a data folder has been created, its path can be accessed from `DataHandler.path`. +This allows you to add additional files: + +```python +data_folder = data_handler.save_data(data) +assert data_folder == data_handler.path # data_folder is added to data_handler.path + +(data_handler.path / "test_file.txt").write_text("I'm adding a file to the data folder") +``` + +### Auto-saving additional files to data folder +In many cases certain files need to be added every time a data folder is created. +Instead of having to manually add these files each time, they can be specified beforehand: + +```python +DataHandler.additional_files = { + "configuration.py": "configuration.py +} +``` +Each key is a path from the current working directory, and the corresponding value is the target filepath w.r.t. the data folder. +The key does not have to be a relative filepath, it can also be an absolute path. +This can be useful if you want to autosave a specific file on a fixed location somewhere on your hard drive. \ No newline at end of file diff --git a/qualang_tools/results/data_handler/data_handler.py b/qualang_tools/results/data_handler/data_handler.py index 516d7dc7..f949b918 100644 --- a/qualang_tools/results/data_handler/data_handler.py +++ b/qualang_tools/results/data_handler/data_handler.py @@ -1,7 +1,9 @@ from datetime import datetime from pathlib import Path import json +import shutil from typing import Any, Dict, Optional, Sequence, Union +import warnings from .data_processors import DEFAULT_DATA_PROCESSORS, DataProcessor from .data_folder_tools import DEFAULT_FOLDER_PATTERN, create_data_folder @@ -101,6 +103,7 @@ class DataHandler: folder_pattern: str = DEFAULT_FOLDER_PATTERN data_filename: str = "data.json" metadata_filename: str = "metadata.json" + additional_files: Dict[str, str] = {} def __init__( self, @@ -108,6 +111,7 @@ def __init__( data_processors: Optional[Sequence[DataProcessor]] = None, root_data_folder: Optional[Union[str, Path]] = None, folder_pattern: Optional[str] = None, + additional_files: Optional[Dict[str, str]] = None, path: Optional[Path] = None, ): self.name = name @@ -120,6 +124,8 @@ def __init__( self.root_data_folder = root_data_folder if folder_pattern is not None: self.folder_pattern = folder_pattern + if additional_files is not None: + self.additional_files = additional_files self.path = path self.path_properties = None @@ -162,6 +168,8 @@ def create_data_folder( self.name = name if self.name is None: raise ValueError("DataHandler: name must be specified") + if self.root_data_folder is None: + raise ValueError("DataHandler: root_data_folder must be specified") self.path_properties = create_data_folder( root_data_folder=self.root_data_folder, @@ -218,7 +226,7 @@ def save_data( if self.path is None: self.create_data_folder(name=self.name, idx=idx, use_datetime=use_datetime) - return save_data( + data_folder = save_data( data_folder=self.path, data=data, metadata=metadata, @@ -226,3 +234,12 @@ def save_data( metadata_filename=self.metadata_filename, data_processors=self.data_processors, ) + + for source_name, destination_name in self.additional_files.items(): + if not Path(source_name).exists(): + warnings.warn(f"Additional file {source_name} does not exist, not copying", UserWarning) + continue + + shutil.copy(source_name, data_folder / destination_name) + + return data_folder \ No newline at end of file diff --git a/tests/data_handler/test_data_handler.py b/tests/data_handler/test_data_handler.py index ddcf118c..9d217240 100644 --- a/tests/data_handler/test_data_handler.py +++ b/tests/data_handler/test_data_handler.py @@ -1,6 +1,7 @@ import json import pytest from datetime import datetime +import warnings from qualang_tools.results.data_handler.data_folder_tools import DEFAULT_FOLDER_PATTERN from qualang_tools.results.data_handler.data_handler import DataHandler @@ -178,3 +179,30 @@ def test_data_handler_overwrite_initialized_name_save_data(tmp_path): expected_data_folder = now.strftime(expected_data_folder) assert (tmp_path / expected_data_folder / "data.json").exists() + + +def test_data_handler_additional_file(tmp_path): + root_data_folder = tmp_path / "my_data" + root_data_folder.mkdir() + + data_handler = DataHandler( + "my_data", root_data_folder=root_data_folder, additional_files={tmp_path / "test.txt": "test.txt"} + ) + data = {"test": 1} + metadata = {"test": 2} + + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + data_handler.save_data(data, metadata=metadata) + + assert any(str(w_elem.message).endswith("does not exist, not copying") for w_elem in w) + + (tmp_path / "test.txt").write_text("test_contents") + + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + data_folder = data_handler.save_data(data, metadata=metadata) + + assert not any(str(w_elem.message).endswith("does not exist, not copying") for w_elem in w) + + assert (data_folder / "test.txt").read_text() == "test_contents" From 2bbef32d8f5e9147acedd1368dcf1c836d349c00 Mon Sep 17 00:00:00 2001 From: Serwan Asaad Date: Sun, 25 Feb 2024 13:09:43 +0100 Subject: [PATCH 22/28] black formatting --- qualang_tools/results/data_handler/data_handler.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/qualang_tools/results/data_handler/data_handler.py b/qualang_tools/results/data_handler/data_handler.py index f949b918..ad34623d 100644 --- a/qualang_tools/results/data_handler/data_handler.py +++ b/qualang_tools/results/data_handler/data_handler.py @@ -242,4 +242,4 @@ def save_data( shutil.copy(source_name, data_folder / destination_name) - return data_folder \ No newline at end of file + return data_folder From 99c1853f68296d22426ae26d7e47f8f5c8acfb5b Mon Sep 17 00:00:00 2001 From: Serwan Asaad Date: Sun, 25 Feb 2024 13:13:21 +0100 Subject: [PATCH 23/28] added info on auto using filename as name --- qualang_tools/results/README.md | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/qualang_tools/results/README.md b/qualang_tools/results/README.md index aa8282d8..f9315ca9 100644 --- a/qualang_tools/results/README.md +++ b/qualang_tools/results/README.md @@ -218,7 +218,7 @@ A data folder can be created in two ways: data_folder_properties = data_handler.create_data_folder(name="new_data_folder") # Method 2: Create when saving results -data_folder = data_handler.save_data("T1_measurement", data=T1_data) +data_folder = data_handler.save_data(data=T1_data, name="T1_measurement") ``` Note that the methods return different results. The method `DataHandler.save_data` simply returns the path to the newly-created data folder, whereas `DataHandler.create_data_folder` returns a dict with additional information on the data folder such as the `idx`. @@ -246,4 +246,13 @@ DataHandler.additional_files = { ``` Each key is a path from the current working directory, and the corresponding value is the target filepath w.r.t. the data folder. The key does not have to be a relative filepath, it can also be an absolute path. -This can be useful if you want to autosave a specific file on a fixed location somewhere on your hard drive. \ No newline at end of file +This can be useful if you want to autosave a specific file on a fixed location somewhere on your hard drive. + +### Use filename as name +Instead of manually specifying the name for a data folder, often the current filename is a good choice. +This can be done by creating the data handler as such: + +```python +from pathlib import Path +data_handler = DataHandler(name=Path(__file__).stem) +``` \ No newline at end of file From c5ed7183bbc3817a29cc15199b9df463500c0cca Mon Sep 17 00:00:00 2001 From: Serwan Asaad Date: Mon, 26 Feb 2024 14:44:11 +0100 Subject: [PATCH 24/28] Update changelog and readme --- CHANGELOG.md | 1 + README.md | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 00429e63..08a1dd53 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) ## [Unreleased] ### Added - simulator - ``create_simulator_controller_connections`` can now be used to create the connections between a subset of a large cluster. +- results - ``DataHandler`` can be used to save data (values, matplotlib figures, numpy/xarray arrays) to the local file storage. ### Changed - config/waveform_tools - Added sampling rate argument with default value set to 1GS/s to the waveforms. diff --git a/README.md b/README.md index 1aed52fc..26e62a39 100644 --- a/README.md +++ b/README.md @@ -12,7 +12,7 @@ It includes: * [QUA Loops Tools](qualang_tools/loops/README.md) - This library includes tools for parametrizing QUA for_ loops using the numpy (linspace, arange, logspace) methods or by directly inputting a numpy array. * [Plotting Tools](qualang_tools/plot/README.md) - This library includes tools to help handling plots from QUA programs. -* [Result Tools](qualang_tools/results/README.md) - This library includes tools for handling and fetching results from QUA programs. +* [Result Tools](qualang_tools/results/README.md) - This library includes tools for handling and fetching results from QUA programs, and saving them to the local file storage. * [Units Tools](qualang_tools/units/README.md) - This library includes tools for using units (MHz, us, mV...) and converting data to other units (demodulated data to volts for instance). * [Analysis Tools](qualang_tools/analysis/README.md) - This library includes tools for analyzing data from experiments. It currently has a two-states discriminator for analyzing the ground and excited IQ blobs. From 9bd4419301d2e4835ea31162d030135dd0e1ef56 Mon Sep 17 00:00:00 2001 From: Serwan Asaad Date: Mon, 26 Feb 2024 16:35:39 +0100 Subject: [PATCH 25/28] Fix attempt: windows \ to / --- qualang_tools/results/data_handler/data_folder_tools.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/qualang_tools/results/data_handler/data_folder_tools.py b/qualang_tools/results/data_handler/data_folder_tools.py index c8e6422b..c487db87 100644 --- a/qualang_tools/results/data_handler/data_folder_tools.py +++ b/qualang_tools/results/data_handler/data_folder_tools.py @@ -59,6 +59,8 @@ def extract_data_folder_properties( else: folder_path_str = data_folder.name + folder_path_str = folder_path_str.replace("\\", "/") + regex_match = re.match(pattern, folder_path_str) if regex_match is None: return None From 47e308a411943b4ff519d746c535b21657c0632e Mon Sep 17 00:00:00 2001 From: Serwan Asaad Date: Mon, 26 Feb 2024 16:48:11 +0100 Subject: [PATCH 26/28] fix: create create_data without creating --- .../results/data_handler/data_folder_tools.py | 18 ++++++++++--- tests/data_handler/test_create_data_folder.py | 26 +++++++++++++++++++ .../test_get_latest_data_folder.py | 4 ++- 3 files changed, 44 insertions(+), 4 deletions(-) diff --git a/qualang_tools/results/data_handler/data_folder_tools.py b/qualang_tools/results/data_handler/data_folder_tools.py index c487db87..ebf865a0 100644 --- a/qualang_tools/results/data_handler/data_folder_tools.py +++ b/qualang_tools/results/data_handler/data_folder_tools.py @@ -198,7 +198,19 @@ def create_data_folder( if data_folder.exists(): raise FileExistsError(f"Data folder {data_folder} already exists.") - if create: - data_folder.mkdir(parents=True) + if not create: + return { + "idx": idx, + "name": name, + "path": data_folder, + "relative_path": data_folder.relative_to(root_data_folder), + **{attr: getattr(use_datetime, attr) for attr in ["year", "month", "day", "hour", "minute", "second"]}, + } - return extract_data_folder_properties(data_folder, folder_pattern, root_data_folder) + data_folder.mkdir(parents=True) + + properties = extract_data_folder_properties(data_folder, folder_pattern, root_data_folder) + if properties is None: + raise ValueError(f"Could not extract properties from data folder {data_folder}.") + + return properties diff --git a/tests/data_handler/test_create_data_folder.py b/tests/data_handler/test_create_data_folder.py index de0567d6..b3659b81 100644 --- a/tests/data_handler/test_create_data_folder.py +++ b/tests/data_handler/test_create_data_folder.py @@ -90,3 +90,29 @@ def test_performance_get_idxs(tmp_path): properties_latest = get_latest_data_folder(tmp_path) assert properties["idx"] == properties_latest["idx"] == k + + +def test_create_data_folder_without_creating(tmp_path): + now = datetime.now() + + for k in range(3): + properties = create_data_folder(tmp_path, name="my_test", use_datetime=now, create=False) + path = DEFAULT_FOLDER_PATTERN.format(idx=1, name="my_test") + path = now.strftime(path) + path = Path(path) + + properties_expected = { + "idx": 1, + "name": "my_test", + "year": now.year, + "month": now.month, + "day": now.day, + "hour": now.hour, + "minute": now.minute, + "second": now.second, + "path": tmp_path / path, + "relative_path": path, + } + + assert properties == properties_expected + assert not (list(tmp_path.iterdir())) diff --git a/tests/data_handler/test_get_latest_data_folder.py b/tests/data_handler/test_get_latest_data_folder.py index 0dc57419..72cf6efa 100644 --- a/tests/data_handler/test_get_latest_data_folder.py +++ b/tests/data_handler/test_get_latest_data_folder.py @@ -1,6 +1,8 @@ from datetime import datetime +from pathlib import Path from qualang_tools.results.data_handler.data_folder_tools import ( get_latest_data_folder, + create_data_folder, DEFAULT_FOLDER_PATTERN, ) @@ -129,7 +131,7 @@ def test_get_latest_data_folder_switched_idxs(tmp_path): assert properties == expected_properties -def test_create_data_folders_correct_order(tmp_path): +def test_get_latest_data_folder_correct_order(tmp_path): from qualang_tools.results.data_handler.data_folder_tools import ( get_latest_data_folder, ) From 3f22f6447de9527bb113e49a0cc371c31a7181ef Mon Sep 17 00:00:00 2001 From: Serwan Asaad Date: Mon, 26 Feb 2024 16:49:16 +0100 Subject: [PATCH 27/28] fix import pathlib --- tests/data_handler/test_create_data_folder.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/data_handler/test_create_data_folder.py b/tests/data_handler/test_create_data_folder.py index b3659b81..6767dc95 100644 --- a/tests/data_handler/test_create_data_folder.py +++ b/tests/data_handler/test_create_data_folder.py @@ -1,4 +1,5 @@ import pytest +from pathlib import Path from datetime import datetime from qualang_tools.results.data_handler.data_folder_tools import ( create_data_folder, From a9e14ea68ddc3984e6f2030ddf63110b0a42fee8 Mon Sep 17 00:00:00 2001 From: Serwan Asaad Date: Mon, 26 Feb 2024 19:14:06 +0100 Subject: [PATCH 28/28] Allow multiple saves --- qualang_tools/results/README.md | 19 +++++++++++ .../results/data_handler/data_folder_tools.py | 14 ++++---- .../results/data_handler/data_handler.py | 6 ++++ tests/data_handler/test_create_data_folder.py | 12 +++---- tests/data_handler/test_data_handler.py | 33 ++++++++++++++----- .../test_extract_data_folder_properties.py | 4 +-- .../test_get_latest_data_folder.py | 20 +++++------ 7 files changed, 74 insertions(+), 34 deletions(-) diff --git a/qualang_tools/results/README.md b/qualang_tools/results/README.md index f9315ca9..50654bc1 100644 --- a/qualang_tools/results/README.md +++ b/qualang_tools/results/README.md @@ -224,6 +224,25 @@ Note that the methods return different results. The method `DataHandler.save_data` simply returns the path to the newly-created data folder, whereas `DataHandler.create_data_folder` returns a dict with additional information on the data folder such as the `idx`. This additional information can also be accessed after calling `DataHandler.save_data` through the attribute `DataHandler.path_properties`. +### Saving multiple times +A `DataHandler` object can be used to save multiple times to different data folders: +```python + +data_handler = DataHandler(root_data_folder="C:/data") + +T1_data = {...} + +# Save results +data_folder = data_handler.save_data(data=T1_data, name="T1_measurement") +# C:/data/2024-02-24/#1_T1_measurement_095214 + +T1_modified_data = {...} + +data_folder = data_handler.save_data(data=T1_modified_data, name="T1_measurement") +# C:/data/2024-02-24/#2_T1_measurement_095217 +``` +The save second call to `DataHandler.save_data` creates a new data folder where the incrementer is increased by 1. + ### Manually adding additional files to data folder After a data folder has been created, its path can be accessed from `DataHandler.path`. This allows you to add additional files: diff --git a/qualang_tools/results/data_handler/data_folder_tools.py b/qualang_tools/results/data_handler/data_folder_tools.py index ebf865a0..2cf80b72 100644 --- a/qualang_tools/results/data_handler/data_folder_tools.py +++ b/qualang_tools/results/data_handler/data_folder_tools.py @@ -28,7 +28,7 @@ def _validate_datetime(datetime_str: str, datetime_format: str) -> bool: def extract_data_folder_properties( data_folder: Path, pattern: str = DEFAULT_FOLDER_PATTERN, root_data_folder: Path = None -) -> Optional[Dict[str, Union[str, int]]]: +) -> Optional[Dict[str, Union[str, int, Path]]]: """Extract properties from a data folder. :param data_folder: The data folder to extract properties from. Should be an absolute path. @@ -66,9 +66,9 @@ def extract_data_folder_properties( return None properties = regex_match.groupdict() properties = {key: int(value) if value.isdigit() else value for key, value in properties.items()} - properties["path"] = str(data_folder) + properties["path"] = data_folder if root_data_folder is not None: - properties["relative_path"] = str(data_folder.relative_to(root_data_folder)) + properties["relative_path"] = data_folder.relative_to(root_data_folder) return properties @@ -141,14 +141,14 @@ def get_latest_data_folder( # Iterate over the folders, recursively call determine_latest_data_folder_idx for folder in sorted_folders: - sub_folder_idx = get_latest_data_folder( + sub_folder_properties = get_latest_data_folder( root_data_folder, folder_pattern=folder_pattern, current_folder_pattern=remaining_folder_pattern[0], relative_path=relative_path / folder.name, ) - if sub_folder_idx is not None: - return sub_folder_idx + if sub_folder_properties is not None: + return sub_folder_properties return None @@ -159,7 +159,7 @@ def create_data_folder( folder_pattern: str = DEFAULT_FOLDER_PATTERN, use_datetime: Optional[datetime] = None, create: bool = True, -) -> Dict[str, Union[str, int]]: +) -> Dict[str, Union[str, int, Path]]: """Create a new data folder in a given root data folder. First checks the index of the latest data folder and increments by one. diff --git a/qualang_tools/results/data_handler/data_handler.py b/qualang_tools/results/data_handler/data_handler.py index ad34623d..015f78e7 100644 --- a/qualang_tools/results/data_handler/data_handler.py +++ b/qualang_tools/results/data_handler/data_handler.py @@ -199,6 +199,10 @@ def save_data( `idx` parameter. If a datetime object is provided using the `use_datetime` parameter, it will be used in the folder name. + A new data folder is created if + - the `path` attribute is not set + - the `path` attribute is set and the data folder already contains data + :param data: The data to be saved. :type data: any :param name: The name of the data folder. @@ -225,6 +229,8 @@ def save_data( raise ValueError("DataHandler: name must be specified") if self.path is None: self.create_data_folder(name=self.name, idx=idx, use_datetime=use_datetime) + elif self.path is not None and (self.path / self.data_filename).exists(): + self.create_data_folder(name=self.name, idx=idx, use_datetime=use_datetime) data_folder = save_data( data_folder=self.path, diff --git a/tests/data_handler/test_create_data_folder.py b/tests/data_handler/test_create_data_folder.py index 6767dc95..a316dcc6 100644 --- a/tests/data_handler/test_create_data_folder.py +++ b/tests/data_handler/test_create_data_folder.py @@ -29,8 +29,8 @@ def test_create_data_folder_empty(tmp_path): "hour": now.hour, "minute": now.minute, "second": now.second, - "path": str(tmp_path / path), - "relative_path": path, + "path": tmp_path / path, + "relative_path": Path(path), } assert properties == properties_expected @@ -52,8 +52,8 @@ def test_create_successive_data_folder(tmp_path): "hour": now.hour, "minute": now.minute, "second": now.second, - "path": str(tmp_path / path), - "relative_path": path, + "path": tmp_path / path, + "relative_path": Path(path), } assert properties == properties_expected @@ -72,8 +72,8 @@ def test_create_successive_data_folder(tmp_path): "hour": now.hour, "minute": now.minute, "second": now.second, - "path": str(tmp_path / path), - "relative_path": path, + "path": tmp_path / path, + "relative_path": Path(path), } assert properties == properties_expected diff --git a/tests/data_handler/test_data_handler.py b/tests/data_handler/test_data_handler.py index 9d217240..e93e1b6d 100644 --- a/tests/data_handler/test_data_handler.py +++ b/tests/data_handler/test_data_handler.py @@ -44,9 +44,7 @@ def test_data_handler_metadata(tmp_path): assert (tmp_path / expected_data_folder / "metadata.json").exists() file_data = json.loads((tmp_path / expected_data_folder / "data.json").read_text()) - file_metadata = json.loads( - (tmp_path / expected_data_folder / "metadata.json").read_text() - ) + file_metadata = json.loads((tmp_path / expected_data_folder / "metadata.json").read_text()) assert file_data == data assert file_metadata == metadata @@ -58,9 +56,7 @@ def process(self, data): data["a"] = 42 return data - data_handler = DataHandler( - root_data_folder=tmp_path, data_processors=[TestProcessor()] - ) + data_handler = DataHandler(root_data_folder=tmp_path, data_processors=[TestProcessor()]) data = {"a": 1, "b": 2, "c": 3} @@ -170,9 +166,7 @@ def test_data_handler_overwrite_initialized_name_save_data(tmp_path): now = datetime.now() - data_handler.save_data( - {"a": 1, "b": 2, "c": 3}, name="my_new_data", use_datetime=now - ) + data_handler.save_data({"a": 1, "b": 2, "c": 3}, name="my_new_data", use_datetime=now) assert data_handler.name == "my_new_data" expected_data_folder = DEFAULT_FOLDER_PATTERN.format(name="my_new_data", idx=1) @@ -206,3 +200,24 @@ def test_data_handler_additional_file(tmp_path): assert not any(str(w_elem.message).endswith("does not exist, not copying") for w_elem in w) assert (data_folder / "test.txt").read_text() == "test_contents" + + +def test_data_handler_multiple_saves(tmp_path): + data_handler = DataHandler(root_data_folder=tmp_path) + + data = {"a": 1, "b": 2, "c": 3} + now = datetime.now() + + data_handler.save_data(data, "my_data", use_datetime=now) + + expected_data_folder = DEFAULT_FOLDER_PATTERN.format(name="my_data", idx=1) + expected_data_folder = now.strftime(expected_data_folder) + + assert data_handler.path == (tmp_path / expected_data_folder) + + data_handler.save_data(data, "my_data", use_datetime=now) + + expected_data_folder = DEFAULT_FOLDER_PATTERN.format(name="my_data", idx=2) + expected_data_folder = now.strftime(expected_data_folder) + + assert data_handler.path == (tmp_path / expected_data_folder) diff --git a/tests/data_handler/test_extract_data_folder_properties.py b/tests/data_handler/test_extract_data_folder_properties.py index 8fa46d06..02ec7a2d 100644 --- a/tests/data_handler/test_extract_data_folder_properties.py +++ b/tests/data_handler/test_extract_data_folder_properties.py @@ -11,11 +11,11 @@ def test_extract_data_folder_properties(): "hour": 12, "minute": 34, "second": 56, - "path": "#123_test_123456", + "path": Path("#123_test_123456"), } assert properties == expected_properties properties = extract_data_folder_properties(Path("#123_my_test_123456"), "#{idx}_{name}_%H%M%S") expected_properties["name"] = "my_test" - expected_properties["path"] = "#123_my_test_123456" + expected_properties["path"] = Path("#123_my_test_123456") assert properties == expected_properties diff --git a/tests/data_handler/test_get_latest_data_folder.py b/tests/data_handler/test_get_latest_data_folder.py index 72cf6efa..3edbc303 100644 --- a/tests/data_handler/test_get_latest_data_folder.py +++ b/tests/data_handler/test_get_latest_data_folder.py @@ -26,8 +26,8 @@ def test_get_latest_data_folder_default_structure(tmp_path): "hour": 12, "minute": 34, "second": 56, - "path": str(date_folder / "#123_test_123456"), - "relative_path": f"{date_folder.name}/#123_test_123456", + "path": date_folder / "#123_test_123456", + "relative_path": Path(f"{date_folder.name}/#123_test_123456"), } assert properties == expected_properties @@ -49,8 +49,8 @@ def test_get_latest_data_folder_two_items(tmp_path): "hour": 12, "minute": 34, "second": 57, - "path": str(date_folder / "#124_test_123457"), - "relative_path": f"{date_folder.name}/#124_test_123457", + "path": date_folder / "#124_test_123457", + "relative_path": Path(f"{date_folder.name}/#124_test_123457"), } assert properties == expected_properties @@ -75,8 +75,8 @@ def test_get_latest_data_folder_two_items_different_date(tmp_path): "hour": 12, "minute": 34, "second": 57, - "path": str(date_folder / "#124_test_123457"), - "relative_path": f"{date_folder.name}/#124_test_123457", + "path": date_folder / "#124_test_123457", + "relative_path": Path(f"{date_folder.name}/#124_test_123457"), } assert properties == expected_properties @@ -100,8 +100,8 @@ def test_get_latest_data_folder_different_date_empty_last_folder(tmp_path): "hour": 12, "minute": 34, "second": 56, - "path": str(tmp_path / "2021-01-05/#123_test_123456"), - "relative_path": "2021-01-05/#123_test_123456", + "path": tmp_path / "2021-01-05/#123_test_123456", + "relative_path": Path("2021-01-05/#123_test_123456"), } assert properties == expected_properties @@ -124,8 +124,8 @@ def test_get_latest_data_folder_switched_idxs(tmp_path): "hour": 12, "minute": 34, "second": 57, - "path": str(date_folder / "#123_test_123457"), - "relative_path": f"{date_folder.name}/#123_test_123457", + "path": date_folder / "#123_test_123457", + "relative_path": Path(f"{date_folder.name}/#123_test_123457"), } assert properties == expected_properties