Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixing Chunking issues with codec #461

Draft
wants to merge 1 commit into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 36 additions & 6 deletions recOrder/cli/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,15 +8,18 @@
from iohub.ngff_meta import TransformationMeta
from numpy.typing import DTypeLike

CODEC_MAX_BYTES = 2147483647
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Magic numbers should either be derived from an expression or accompanied by a comment. For long numbers, the digits should be separated e.g. 2_147_483_647.



def create_empty_hcs_zarr(
store_path: Path,
position_keys: list[Tuple[str]],
shape: Tuple[int],
chunks: Tuple[int],
scale: Tuple[float],
channel_names: list[str],
dtype: DTypeLike,
chunks: Tuple[int] = None,
max_chunk_size_bytes= 500e6,
) -> None:
"""If the plate does not exist, create an empty zarr plate.

Expand All @@ -26,23 +29,50 @@ def create_empty_hcs_zarr(
Parameters
----------
store_path : Path
hcs plate path
The path to the hcs plate.
position_keys : list[Tuple[str]]
Position keys, will append if not present in the plate.
e.g. [("A", "1", "0"), ("A", "1", "1")]
The position keys to append if not present in the plate.
Example: [("A", "1", "0"), ("A", "1", "1")]
shape : Tuple[int]
chunks : Tuple[int]
The shape of the plate.
scale : Tuple[float]
The scale of the plate.
channel_names : list[str]
Channel names, will append if not present in metadata.
The channel names to append if not present in the metadata.
dtype : DTypeLike
The data type of the plate.
chunks : Tuple[int], optional
The chunk size of the plate (ZYX). If None, it will be calculated based on the shape (ZYX) and max_chunk_size_bytes, by default None.
max_chunk_size_bytes : float, optional
The maximum chunk size in bytes, by default 500e6.
"""

# Create plate
output_plate = open_ome_zarr(
str(store_path), layout="hcs", mode="a", channel_names=channel_names
)

bytes_per_pixel = np.dtype(dtype).itemsize

# Limiting the chunking to max_chunk_size_bytes and CODEC_MAX_BYTES
if chunks is None or np.prod(chunks) * bytes_per_pixel > CODEC_MAX_BYTES:
chunk_zyx_shape = list(shape[-3:])
# chunk_zyx_shape[-3] > 1 ensures while loop will not stall if single
# XY image is larger than max_chunk_size_bytes
while (
chunk_zyx_shape[-3] > 1
and np.prod(chunk_zyx_shape) * bytes_per_pixel > max_chunk_size_bytes
):
chunk_zyx_shape[-3] = np.ceil(chunk_zyx_shape[-3] / 2).astype(int)
chunk_zyx_shape = tuple(chunk_zyx_shape)
chunks = 2 * (1,) + chunk_zyx_shape

# Raise warning if chunks are too large
if np.prod(chunks) * bytes_per_pixel > CODEC_MAX_BYTES:
raise Warning(
f"Chunks size is too large. Chunks size < {CODEC_MAX_BYTES} bytes. Changing chunks to {chunks}"
)

# Create positions
for position_key in position_keys:
position_key_string = "/".join(position_key)
Expand Down
52 changes: 43 additions & 9 deletions recOrder/tests/util_tests/test_create_empty.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,21 @@ def test_create_empty_hcs_zarr():
("A", "0", "3"),
("B", "10", "4"),
]
shape = (1, 2, 1, 1024, 1024)

shape = (1, 2, 100, 1024, 1024)
chunks = (1, 1, 1, 256, 256)
scale = (1, 1, 1, 0.5, 0.5)
channel_names = ["Channel1", "Channel2"]
dtype = np.uint16

create_empty_hcs_zarr(
store_path, position_keys, shape, chunks, scale, channel_names, dtype
store_path=store_path,
position_keys=position_keys,
shape=shape,
chunks=chunks,
scale=scale,
channel_names=channel_names,
dtype=dtype,
)

# Verify existence of positions and channels
Expand All @@ -32,13 +39,13 @@ def test_create_empty_hcs_zarr():
# Repeat creation should not fail
more_channel_names = ["Channel3"]
create_empty_hcs_zarr(
store_path,
position_keys,
shape,
chunks,
scale,
more_channel_names,
dtype,
store_path=store_path,
position_keys=position_keys,
shape=shape,
chunks=chunks,
scale=scale,
channel_names=more_channel_names,
dtype=dtype,
)

# Verify existence of appended channel names
Expand All @@ -49,3 +56,30 @@ def test_create_empty_hcs_zarr():
position_path /= element
with open_ome_zarr(position_path, mode="r") as position:
assert position.channel_names == channel_names

# Creation with larger chunks should not fail
store_path = Path("./test_store3.zarr")

# Target size in bytes (2,147,483,648 bytes = 2 GB)
target_size_bytes = 2147483648

# Size of each element in bytes
element_size_bytes = np.uint16().itemsize

# Calculate the total number of elements needed
total_elements = target_size_bytes // element_size_bytes

# Find the cube root of the total number of elements to get one dimension
one_dimension = int(round(total_elements ** (1 / 3)))

# Chunk > target_size_bytes
chunks = (1, 1, one_dimension + 10, one_dimension, one_dimension)
create_empty_hcs_zarr(
store_path=store_path,
position_keys=position_keys,
shape=shape,
chunks=chunks,
scale=scale,
channel_names=channel_names,
dtype=dtype,
)