diff --git a/conda.yaml b/conda.yaml index ae44d61a..c8015bf2 100644 --- a/conda.yaml +++ b/conda.yaml @@ -19,4 +19,4 @@ dependencies: - h5py - scipy - pip: - - cyclopts + - cyclopts>=3.0.0 diff --git a/linc_convert/modalities/__init__.py b/linc_convert/modalities/__init__.py index 98ea30fa..0c741d82 100644 --- a/linc_convert/modalities/__init__.py +++ b/linc_convert/modalities/__init__.py @@ -1,5 +1,4 @@ """Converters for all imaging modalities.""" __all__ = ["df", "lsm", "wk", "psoct"] -from . import df, lsm, wk, psoct - +from . import df, lsm, psoct, wk diff --git a/linc_convert/modalities/df/multi_slice.py b/linc_convert/modalities/df/multi_slice.py index e9380f3b..0cedf329 100644 --- a/linc_convert/modalities/df/multi_slice.py +++ b/linc_convert/modalities/df/multi_slice.py @@ -22,7 +22,8 @@ from linc_convert.utils.j2k import WrappedJ2K, get_pixelsize from linc_convert.utils.math import ceildiv, floordiv from linc_convert.utils.orientation import center_affine, orientation_to_affine -from linc_convert.utils.zarr import make_compressor +from linc_convert.utils.zarr.compressor import make_compressor +from linc_convert.utils.zarr.zarr_config import ZarrConfig HOME = "/space/aspasia/2/users/linc/000003" @@ -38,13 +39,9 @@ @ms.default def convert( inp: list[str], - out: str | None = None, *, - chunk: int = 1024, - compressor: str = "blosc", - compressor_opt: str = "{}", + zarr_config: ZarrConfig, max_load: int = 16384, - nii: bool = False, orientation: str = "coronal", center: bool = True, thickness: float | None = None, @@ -84,18 +81,8 @@ def convert( ---------- inp Path to the input slices - out - Path to the output Zarr directory [.ome.zarr] - chunk - Output chunk size - compressor : {blosc, zlib, raw} - Compression method - compressor_opt - Compression options max_load Maximum input chunk size - nii - Convert to nifti-zarr. True if path ends in ".nii.zarr" orientation Orientation of the slice center @@ -103,6 +90,12 @@ def convert( thickness Slice thickness """ + out: str = zarr_config.out + chunk: int = zarr_config.chunk[0] + compressor: str = zarr_config.compressor + compressor_opt: str = zarr_config.compressor_opt + nii: bool = zarr_config.nii + # Default output path if not out: out = os.path.splitext(inp[0])[0] diff --git a/linc_convert/modalities/df/single_slice.py b/linc_convert/modalities/df/single_slice.py index 7bc7782d..e48a725c 100644 --- a/linc_convert/modalities/df/single_slice.py +++ b/linc_convert/modalities/df/single_slice.py @@ -21,7 +21,8 @@ from linc_convert.utils.j2k import WrappedJ2K, get_pixelsize from linc_convert.utils.math import ceildiv from linc_convert.utils.orientation import center_affine, orientation_to_affine -from linc_convert.utils.zarr import make_compressor +from linc_convert.utils.zarr.compressor import make_compressor +from linc_convert.utils.zarr.zarr_config import ZarrConfig ss = App(name="singleslice", help_format="markdown") df.command(ss) @@ -30,13 +31,9 @@ @ss.default def convert( inp: str, - out: str | None = None, *, - chunk: int = 1024, - compressor: str = "blosc", - compressor_opt: str = "{}", + zarr_config: ZarrConfig, max_load: int = 16384, - nii: bool = False, orientation: str = "coronal", center: bool = True, thickness: float | None = None, @@ -71,18 +68,8 @@ def convert( ---------- inp Path to the input JP2 file - out - Path to the output Zarr directory [.ome.zarr] - chunk - Output chunk size - compressor : {blosc, zlib, raw} - Compression method - compressor_opt - Compression options max_load Maximum input chunk size - nii - Convert to nifti-zarr. True if path ends in ".nii.zarr" orientation Orientation of the slice center @@ -90,6 +77,12 @@ def convert( thickness Slice thickness """ + out: str = zarr_config.out + chunk: int = zarr_config.chunk + compressor: str = zarr_config.compressor + compressor_opt: str = zarr_config.compressor_opt + nii: bool = zarr_config.nii + if not out: out = os.path.splitext(inp)[0] out += ".nii.zarr" if nii else ".ome.zarr" diff --git a/linc_convert/modalities/lsm/mosaic.py b/linc_convert/modalities/lsm/mosaic.py index e02c0dba..d58b20cc 100644 --- a/linc_convert/modalities/lsm/mosaic.py +++ b/linc_convert/modalities/lsm/mosaic.py @@ -22,7 +22,8 @@ from linc_convert.modalities.lsm.cli import lsm from linc_convert.utils.math import ceildiv from linc_convert.utils.orientation import center_affine, orientation_to_affine -from linc_convert.utils.zarr import make_compressor +from linc_convert.utils.zarr.compressor import make_compressor +from linc_convert.utils.zarr.zarr_config import ZarrConfig mosaic = cyclopts.App(name="mosaic", help_format="markdown") lsm.command(mosaic) @@ -31,13 +32,9 @@ @mosaic.default def convert( inp: str, - out: str = None, *, - chunk: int = 128, - compressor: str = "blosc", - compressor_opt: str = "{}", + zarr_config: ZarrConfig, max_load: int = 512, - nii: bool = False, orientation: str = "coronal", center: bool = True, thickness: float | None = None, @@ -72,18 +69,8 @@ def convert( Path to the root directory, which contains a collection of subfolders named `*_z{:02d}_y{:02d}*`, each containing a collection of files named `*_plane{:03d}_c{:d}.tiff`. - out - Path to the output Zarr directory [.ome.zarr] - chunk - Output chunk size - compressor : {blosc, zlib, raw} - Compression method - compressor_opt - Compression options max_load Maximum input chunk size when building pyramid - nii - Convert to nifti-zarr. True if path ends in ".nii.zarr". orientation Orientation of the slice center @@ -91,6 +78,12 @@ def convert( voxel_size Voxel size along the X, Y and Z dimension, in micron. """ + out: str = zarr_config.out + chunk: int = zarr_config.chunk[0] + compressor: str = zarr_config.compressor + compressor_opt: str = zarr_config.compressor_opt + nii: bool = zarr_config.nii + if isinstance(compressor_opt, str): compressor_opt = ast.literal_eval(compressor_opt) diff --git a/linc_convert/modalities/psoct/multi_slice.py b/linc_convert/modalities/psoct/multi_slice.py index ca787cd1..08911e92 100644 --- a/linc_convert/modalities/psoct/multi_slice.py +++ b/linc_convert/modalities/psoct/multi_slice.py @@ -30,7 +30,8 @@ from linc_convert.utils.math import ceildiv from linc_convert.utils.orientation import center_affine, orientation_to_affine from linc_convert.utils.unit import to_nifti_unit, to_ome_unit -from linc_convert.utils.zarr import make_compressor +from linc_convert.utils.zarr.compressor import make_compressor +from linc_convert.utils.zarr.zarr_config import ZarrConfig, _ZarrConfig multi_slice = cyclopts.App(name="multi_slice", help_format="markdown") psoct.command(multi_slice) @@ -40,13 +41,12 @@ def _automap(func: Callable) -> Callable: """Automatically maps the array in the mat file.""" @wraps(func) - def wrapper(inp: list[str], out: str = None, **kwargs: dict) -> callable: - if out is None: - out = os.path.splitext(inp[0])[0] - out += ".nii.zarr" if kwargs.get("nii", False) else ".ome.zarr" - kwargs["nii"] = kwargs.get("nii", False) or out.endswith(".nii.zarr") + def wrapper(inp: list[str], zarr_config: _ZarrConfig, **kwargs: dict) -> callable: + if zarr_config.out is None: + zarr_config.out = os.path.splitext(inp[0])[0] + zarr_config.out += ".nii.zarr" if zarr_config.nii else ".ome.zarr" dat = _mapmat(inp, kwargs.get("key", None)) - return func(dat, out, **kwargs) + return func(dat, zarr_config=zarr_config, **kwargs) return wrapper @@ -163,17 +163,13 @@ def make_wrapper(fname: str) -> callable: @_automap def convert( inp: list[str], - out: Optional[str] = None, *, + zarr_config: ZarrConfig, key: Optional[str] = None, meta: str = None, - chunk: int = 128, - compressor: str = "blosc", - compressor_opt: str = "{}", max_load: int = 128, max_levels: int = 5, no_pool: Optional[int] = None, - nii: bool = False, orientation: str = "RAS", center: bool = True, dtype: str | None = None, @@ -192,26 +188,16 @@ def convert( ---------- inp Path to the input mat file - out - Path to the output Zarr directory [.ome.zarr] key Key of the array to be extracted, default to first key found meta Path to the metadata file - chunk - Output chunk size - compressor : {blosc, zlib, raw} - Compression method - compressor_opt - Compression options max_load Maximum input chunk size max_levels Maximum number of pyramid levels no_pool Index of dimension to not pool when building pyramid. - nii - Convert to nifti-zarr. True if path ends in ".nii.zarr" orientation Orientation of the volume center @@ -219,6 +205,12 @@ def convert( dtype Data type to write into """ + out: str = zarr_config.out + chunk: int = zarr_config.chunk[0] + compressor: str = zarr_config.compressor + compressor_opt: str = zarr_config.compressor_opt + nii: bool = zarr_config.nii + if isinstance(compressor_opt, str): compressor_opt = ast.literal_eval(compressor_opt) diff --git a/linc_convert/modalities/psoct/single_volume.py b/linc_convert/modalities/psoct/single_volume.py index 529f32b4..4b2770f3 100644 --- a/linc_convert/modalities/psoct/single_volume.py +++ b/linc_convert/modalities/psoct/single_volume.py @@ -31,7 +31,8 @@ from linc_convert.utils.math import ceildiv from linc_convert.utils.orientation import center_affine, orientation_to_affine from linc_convert.utils.unit import to_nifti_unit, to_ome_unit -from linc_convert.utils.zarr import make_compressor +from linc_convert.utils.zarr.compressor import make_compressor +from linc_convert.utils.zarr.zarr_config import ZarrConfig, _ZarrConfig single_volume = cyclopts.App(name="single_volume", help_format="markdown") psoct.command(single_volume) @@ -41,13 +42,13 @@ def _automap(func: Callable) -> Callable: """Automatically map the array in the mat file.""" @wraps(func) - def wrapper(inp: str, out: str = None, **kwargs: dict) -> None: - if out is None: - out = os.path.splitext(inp[0])[0] - out += ".nii.zarr" if kwargs.get("nii", False) else ".ome.zarr" - kwargs["nii"] = kwargs.get("nii", False) or out.endswith(".nii.zarr") + def wrapper(inp: str, zarr_config: _ZarrConfig, **kwargs: dict) -> None: + if zarr_config.out is None: + zarr_config.out = os.path.splitext(inp[0])[0] + zarr_config.out += ".nii.zarr" if zarr_config.nii else ".ome.zarr" + # kwargs["nii"] = kwargs.get("nii", False) or out.endswith(".nii.zarr") with _mapmat(inp, kwargs.get("key", None)) as dat: - return func(dat, out, **kwargs) + return func(dat, zarr_config=zarr_config, **kwargs) return wrapper @@ -86,17 +87,13 @@ def _mapmat(fname: str, key: str = None) -> None: @_automap def convert( inp: str, - out: Optional[str] = None, *, + zarr_config: ZarrConfig, key: Optional[str] = None, meta: str = None, - chunk: int = 128, - compressor: str = "blosc", - compressor_opt: str = "{}", max_load: int = 128, max_levels: int = 5, no_pool: Optional[int] = None, - nii: bool = False, orientation: str = "RAS", center: bool = True, ) -> None: @@ -135,6 +132,12 @@ def convert( center Set RAS[0, 0, 0] at FOV center """ + out: str = zarr_config.out + chunk: int = zarr_config.chunk + compressor: str = zarr_config.compressor + compressor_opt: str = zarr_config.compressor_opt + nii: bool = zarr_config.nii + if isinstance(compressor_opt, str): compressor_opt = ast.literal_eval(compressor_opt) diff --git a/linc_convert/modalities/wk/webknossos_annotation.py b/linc_convert/modalities/wk/webknossos_annotation.py index 33f7b0dd..ba4b6581 100644 --- a/linc_convert/modalities/wk/webknossos_annotation.py +++ b/linc_convert/modalities/wk/webknossos_annotation.py @@ -16,7 +16,7 @@ # internals from linc_convert.modalities.wk.cli import wk from linc_convert.utils.math import ceildiv -from linc_convert.utils.zarr import make_compressor +from linc_convert.utils.zarr.compressor import make_compressor webknossos = cyclopts.App(name="webknossos", help_format="markdown") wk.command(webknossos) diff --git a/linc_convert/utils/zarr/__init__.py b/linc_convert/utils/zarr/__init__.py new file mode 100644 index 00000000..f1882e95 --- /dev/null +++ b/linc_convert/utils/zarr/__init__.py @@ -0,0 +1 @@ +"""Zarr utilities.""" diff --git a/linc_convert/utils/zarr.py b/linc_convert/utils/zarr/compressor.py similarity index 93% rename from linc_convert/utils/zarr.py rename to linc_convert/utils/zarr/compressor.py index a2c031cb..76531bcd 100644 --- a/linc_convert/utils/zarr.py +++ b/linc_convert/utils/zarr/compressor.py @@ -1,4 +1,4 @@ -"""Zarr utilities.""" +"""Functions for zarr compression.""" import numcodecs import numcodecs.abc diff --git a/linc_convert/utils/zarr/zarr_config.py b/linc_convert/utils/zarr/zarr_config.py new file mode 100644 index 00000000..68578c87 --- /dev/null +++ b/linc_convert/utils/zarr/zarr_config.py @@ -0,0 +1,55 @@ +"""Configuration related to output Zarr Archive.""" + +from dataclasses import dataclass +from typing import Annotated, Literal, Optional + +from cyclopts import Parameter + + +@dataclass +class _ZarrConfig: + """ + Configuration related to output Zarr Archive. + + Parameters + ---------- + out + Path to the output Zarr directory [.ome.zarr] + chunk + Output chunk size. + Behavior depends on the number of values provided: + * one: used for all spatial dimensions + * three: used for spatial dimensions ([z, y, x]) + * four: used for channels and spatial dimensions ([c, z, y, x]) + shard + Output shard size. + If `"auto"`, find shard size that ensures files smaller than 2TB, + assuming a compression ratio or 2. + version + Zarr version to use. If `shard` is used, 3 is required. + compressor : {blosc, zlib|gzip, raw} + Compression method + compressor_opt + Compression options + nii + Convert to nifti-zarr. True if path ends in ".nii.zarr". + driver : {"zarr-python", "tensorstore", "zarrita"} + library used for Zarr IO Operation + + """ + + out: Optional[str] = None + chunk: tuple[int] = (128,) + shard: list[int | str] | None = None + version: Literal[2, 3] = 3 + compressor: str = "blosc" + compressor_opt: str = "{}" + nii: bool = False + driver: Literal["zarr-python", "tensorstore", "zarrita"] = "zarr-python" + + def __post_init__(self) -> None: + print(self) + self.nii |= self.out.endswith(".nii.zarr") + + +ZarrConfig = Annotated[_ZarrConfig, Parameter(name="*")] diff --git a/pyproject.toml b/pyproject.toml index 977e5843..a703051c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,7 +24,7 @@ packages = [{include = "linc_convert"}] [tool.poetry.dependencies] python = "^3.10" -cyclopts = "^2.0.0" +cyclopts = "^3.0.0" numpy = "*" nibabel = "*" zarr = "^2.0.0" diff --git a/tests/test_df.py b/tests/test_df.py index bcac8077..692dca99 100644 --- a/tests/test_df.py +++ b/tests/test_df.py @@ -7,6 +7,7 @@ from helper import _cmp_zarr_archives from linc_convert.modalities.df import multi_slice +from linc_convert.utils.zarr.zarr_config import _ZarrConfig def _write_test_data(directory: str) -> None: @@ -25,5 +26,5 @@ def test_df(tmp_path): output_zarr = tmp_path / "output.zarr" files = glob.glob(os.path.join(tmp_path, "*.jp2")) files.sort() - multi_slice.convert(files, str(output_zarr)) + multi_slice.convert(files, zarr_config=_ZarrConfig(str(output_zarr))) assert _cmp_zarr_archives(str(output_zarr), "data/df.zarr.zip") diff --git a/tests/test_lsm.py b/tests/test_lsm.py index 601da020..7f05752b 100644 --- a/tests/test_lsm.py +++ b/tests/test_lsm.py @@ -5,6 +5,7 @@ from helper import _cmp_zarr_archives from linc_convert.modalities.lsm import mosaic +from linc_convert.utils.zarr.zarr_config import _ZarrConfig def _write_test_data(directory: str) -> None: @@ -24,5 +25,5 @@ def _write_test_data(directory: str) -> None: def test_lsm(tmp_path): _write_test_data(tmp_path) output_zarr = tmp_path / "output.zarr" - mosaic.convert(str(tmp_path), str(output_zarr)) + mosaic.convert(str(tmp_path), zarr_config=_ZarrConfig(str(output_zarr))) assert _cmp_zarr_archives(str(output_zarr), "data/lsm.zarr.zip") diff --git a/tests/test_wk.py b/tests/test_wk.py index f4c3704a..31294af2 100644 --- a/tests/test_wk.py +++ b/tests/test_wk.py @@ -1,4 +1,3 @@ - import os import numpy as np @@ -10,95 +9,92 @@ def _write_test_data(directory: str) -> None: - wkw_dir = f'{directory}/wkw' - ome_dir = f'{directory}/ome' + wkw_dir = f"{directory}/wkw" + ome_dir = f"{directory}/ome" store = zarr.storage.DirectoryStore(ome_dir) omz = zarr.group(store=store, overwrite=True) for level in range(5): - size = 2**(4-level) + size = 2 ** (4 - level) wkw_array = np.zeros((size, size, 5), dtype=np.uint8) ome_array = np.zeros((1, 5, size, size), dtype=np.uint8) - wkw_filepath = os.path.join(wkw_dir, get_mask_name(level)) + wkw_filepath = os.path.join(wkw_dir, get_mask_name(level)) with wkw.Dataset.create(wkw_filepath, wkw.Header(np.uint8)) as dataset: dataset.write((0, 0, 0), wkw_array) - omz.create_dataset(f'{level}', shape=[1, 5, size, size]) - array = omz[f'{level}'] - array[...] = ome_array + omz.create_dataset(f"{level}", shape=[1, 5, size, size]) + array = omz[f"{level}"] + array[...] = ome_array - multiscales = [{ - 'version': '0.4', - 'axes': [ + multiscales = [ + { + "version": "0.4", + "axes": [ {"name": "c", "type": "space", "unit": "millimeter"}, {"name": "z", "type": "space", "unit": "millimeter"}, {"name": "y", "type": "space", "unit": "micrometer"}, - {"name": "x", "type": "space", "unit": "micrometer"} + {"name": "x", "type": "space", "unit": "micrometer"}, ], - 'datasets': [], - 'type': 'jpeg2000', - 'name': '', - }] + "datasets": [], + "type": "jpeg2000", + "name": "", + } + ] for n in range(5): - multiscales[0]['datasets'].append({}) - level = multiscales[0]['datasets'][-1] + multiscales[0]["datasets"].append({}) + level = multiscales[0]["datasets"][-1] level["path"] = str(n) level["coordinateTransformations"] = [ { "type": "scale", "scale": [ - 1.0, - 1.0, + 1.0, + 1.0, float(2**n), float(2**n), - ] + ], }, { "type": "translation", "translation": [ - 0.0, - 0.0, - float(2**n - 1) *0.5, - float(2**n - 1) *0.5, - ] - } + 0.0, + 0.0, + float(2**n - 1) * 0.5, + float(2**n - 1) * 0.5, + ], + }, ] omz.attrs["multiscales"] = multiscales - - def test_wk(tmp_path): _write_test_data(tmp_path) wkw_dir = str(tmp_path / "wkw") ome_dir = str(tmp_path / "ome") - basename = os.path.basename(ome_dir)[:-9] - initials = wkw_dir.split('/')[-2][:2] - output_zarr = os.path.join(tmp_path, basename + '_dsec_' + initials + '.ome.zarr') + basename = os.path.basename(ome_dir)[:-9] + initials = wkw_dir.split("/")[-2][:2] + output_zarr = os.path.join(tmp_path, basename + "_dsec_" + initials + ".ome.zarr") print("starting the convert process") webknossos_annotation.convert(wkw_dir, ome_dir, tmp_path, "{}") - z = zarr.open(output_zarr, mode='r') + z = zarr.open(output_zarr, mode="r") for level in range(5): print("output_zarr has", np.shape(z[level]), np.unique(z[level])) - z = zarr.open('data/wk.zarr.zip', mode='r') + z = zarr.open("data/wk.zarr.zip", mode="r") for level in range(5): print("trusted result has", np.shape(z[level]), np.unique(z[level])) - assert _cmp_zarr_archives(str(output_zarr), "data/wk.zarr.zip") - def get_mask_name(level): if level == 0: - return '1' + return "1" else: - return f'{2**level}-{2**level}-1' - + return f"{2**level}-{2**level}-1"