Skip to content

Commit

Permalink
Merge pull request #898 from fractal-analytics-platform/897_chunking_…
Browse files Browse the repository at this point in the history
…params

Add parameter for chunk sizes in converter & set default Z chunking to 10
  • Loading branch information
jluethi authored Jan 14, 2025
2 parents 4fb7463 + 11e3fac commit 7c297bc
Show file tree
Hide file tree
Showing 5 changed files with 203 additions and 1 deletion.
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@

* Tasks:
* Remove overlap checking for output ROIs in Cellpose task to address performance issues (\#889).
* Expose chunking options in Cellvoyager OME-Zarr converter & set default Z chunking to 10 (\#898).
* Library
* Expose kwargs for build_pyramid (\#895).
* Testing:
* Fix issues with coverage report listing `.venv` Python modules for other dependencies (\#892).
* Add `persist-credentials: false` to all `actions/checkout@v4` GitHub Action steps (\#893).
Expand Down
68 changes: 68 additions & 0 deletions fractal_tasks_core/__FRACTAL_MANIFEST__.json
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,35 @@
},
"args_schema_parallel": {
"$defs": {
"ChunkSizes": {
"description": "Chunk size settings for OME-Zarrs.",
"properties": {
"t": {
"title": "T",
"type": "integer"
},
"c": {
"default": 1,
"title": "C",
"type": "integer"
},
"z": {
"default": 10,
"title": "Z",
"type": "integer"
},
"y": {
"title": "Y",
"type": "integer"
},
"x": {
"title": "X",
"type": "integer"
}
},
"title": "ChunkSizes",
"type": "object"
},
"InitArgsCellVoyager": {
"description": "Arguments to be passed from cellvoyager converter init to compute",
"properties": {
Expand Down Expand Up @@ -260,6 +289,11 @@
"$ref": "#/$defs/InitArgsCellVoyager",
"title": "Init Args",
"description": "Intialization arguments provided by `create_cellvoyager_ome_zarr_init`."
},
"chunk_sizes": {
"$ref": "#/$defs/ChunkSizes",
"title": "Chunk Sizes",
"description": "Used to overwrite the default chunk sizes for the OME-Zarr. By default, the task will chunk the same as the microscope field of view size, with 10 z planes per chunk. For example, that can mean c: 1, z: 10, y: 2160, x:2560"
}
},
"required": [
Expand Down Expand Up @@ -489,6 +523,35 @@
},
"args_schema_parallel": {
"$defs": {
"ChunkSizes": {
"description": "Chunk size settings for OME-Zarrs.",
"properties": {
"t": {
"title": "T",
"type": "integer"
},
"c": {
"default": 1,
"title": "C",
"type": "integer"
},
"z": {
"default": 10,
"title": "Z",
"type": "integer"
},
"y": {
"title": "Y",
"type": "integer"
},
"x": {
"title": "X",
"type": "integer"
}
},
"title": "ChunkSizes",
"type": "object"
},
"InitArgsCellVoyager": {
"description": "Arguments to be passed from cellvoyager converter init to compute",
"properties": {
Expand Down Expand Up @@ -548,6 +611,11 @@
"$ref": "#/$defs/InitArgsCellVoyager",
"title": "Init Args",
"description": "Intialization arguments provided by `create_cellvoyager_ome_zarr_init`."
},
"chunk_sizes": {
"$ref": "#/$defs/ChunkSizes",
"title": "Chunk Sizes",
"description": "Used to overwrite the default chunk sizes for the OME-Zarr. By default, the task will chunk the same as the microscope field of view size, with 10 z planes per chunk. For example, that can mean c: 1, z: 10, y: 2160, x:2560"
}
},
"required": [
Expand Down
16 changes: 15 additions & 1 deletion fractal_tasks_core/tasks/cellvoyager_to_ome_zarr_compute.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
import zarr
from anndata import read_zarr
from dask.array.image import imread
from pydantic import Field
from pydantic import validate_call

from fractal_tasks_core.cellvoyager.filenames import (
Expand All @@ -32,6 +33,7 @@
from fractal_tasks_core.roi import (
convert_ROI_table_to_indices,
)
from fractal_tasks_core.tasks.io_models import ChunkSizes
from fractal_tasks_core.tasks.io_models import InitArgsCellVoyager


Expand Down Expand Up @@ -59,6 +61,7 @@ def cellvoyager_to_ome_zarr_compute(
# Fractal parameters
zarr_url: str,
init_args: InitArgsCellVoyager,
chunk_sizes: ChunkSizes = Field(default_factory=ChunkSizes),
):
"""
Convert Yokogawa output (png, tif) to zarr file.
Expand All @@ -76,6 +79,10 @@ def cellvoyager_to_ome_zarr_compute(
(standard argument for Fractal tasks, managed by Fractal server).
init_args: Intialization arguments provided by
`create_cellvoyager_ome_zarr_init`.
chunk_sizes: Used to overwrite the default chunk sizes for the
OME-Zarr. By default, the task will chunk the same as the
microscope field of view size, with 10 z planes per chunk.
For example, that can mean c: 1, z: 10, y: 2160, x:2560
"""
zarr_url = zarr_url.rstrip("/")
# Read attributes from NGFF metadata
Expand Down Expand Up @@ -134,7 +141,14 @@ def cellvoyager_to_ome_zarr_compute(
sample = imread(tmp_images.pop())

# Initialize zarr
chunksize = (1, 1, sample.shape[1], sample.shape[2])
chunksize_default = {
"c": 1,
"z": 10,
"y": sample.shape[1],
"x": sample.shape[2],
}
chunksize = chunk_sizes.get_chunksize(chunksize_default=chunksize_default)
# chunksize["z"] =
canvas_zarr = zarr.create(
shape=(len(wavelength_ids), max_z, max_y, max_x),
chunks=chunksize,
Expand Down
55 changes: 55 additions & 0 deletions fractal_tasks_core/tasks/io_models.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
from typing import Dict
from typing import Literal
from typing import Optional
from typing import Tuple

from pydantic import BaseModel
from pydantic import Field
Expand Down Expand Up @@ -183,3 +185,56 @@ def channel_is_present(self: Self) -> Self:
f"Input item has type={_type} but channel={channel}."
)
return self


class ChunkSizes(BaseModel):
"""
Chunk size settings for OME-Zarrs.
Attributes:
t: Chunk size of time axis.
c: Chunk size of channel axis.
z: Chunk size of Z axis.
y: Chunk size of y axis.
x: Chunk size of x axis.
"""

t: Optional[int] = None
c: Optional[int] = 1
z: Optional[int] = 10
y: Optional[int] = None
x: Optional[int] = None

def get_chunksize(
self, chunksize_default: Optional[Dict[str, int]] = None
) -> Tuple[int, ...]:
# Define the valid keys
valid_keys = {"t", "c", "z", "y", "x"}

# If chunksize_default is not None, check for invalid keys
if chunksize_default:
invalid_keys = set(chunksize_default.keys()) - valid_keys
if invalid_keys:
raise ValueError(
f"Invalid keys in chunksize_default: {invalid_keys}. "
f"Only {valid_keys} are allowed."
)

# Filter and use only valid keys from chunksize_default
chunksize = {
key: chunksize_default[key]
for key in valid_keys
if chunksize_default and key in chunksize_default
}

# Overwrite with the values from the ChunkSizes instance if they are
# not None
for key in valid_keys:
if getattr(self, key) is not None:
chunksize[key] = getattr(self, key)

# Ensure the output tuple is ordered and matches the tczyx structure
ordered_keys = ["t", "c", "z", "y", "x"]
return tuple(
chunksize[key] for key in ordered_keys if key in chunksize
)
62 changes: 62 additions & 0 deletions tests/tasks/test_unit_chunksizes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
import pytest

from fractal_tasks_core.tasks.io_models import ChunkSizes


def test_valid_chunksize_default():
"""Test valid chunksize_default with no conflicts, but not all defaults
set."""
chunk_sizes = ChunkSizes(t=5, c=2)
chunksize_default = {"t": 10, "c": 1, "y": 2160, "x": 2560}
result = chunk_sizes.get_chunksize(chunksize_default)
# z = 10 is a ChunkSizes default that wasn't changed
assert result == (5, 2, 10, 2160, 2560)


def test_chunksize_default_with_overrides():
"""Test chunksize_default where some keys are overridden by ChunkSizes."""
chunk_sizes = ChunkSizes(t=5, c=None, z=20)
chunksize_default = {"t": 10, "c": 1, "z": 15, "y": 2160, "x": 2560}
result = chunk_sizes.get_chunksize(chunksize_default)
assert result == (5, 1, 20, 2160, 2560)


def test_chunksize_default_with_extra_keys():
"""Test chunksize_default containing invalid keys."""
chunk_sizes = ChunkSizes(t=5, c=2)
chunksize_default = {"a": 100, "c": 1, "x": 2560}
with pytest.raises(
ValueError, match="Invalid keys in chunksize_default: {'a'}"
):
chunk_sizes.get_chunksize(chunksize_default)


def test_chunksize_empty_default():
"""Test when chunksize_default is None."""
chunk_sizes = ChunkSizes(t=5, c=2)
result = chunk_sizes.get_chunksize()
assert result == (5, 2, 10)


def test_chunksize_empty_chunksizes():
"""Test when no values are set in ChunkSizes, but chunksize_default is
valid."""
chunk_sizes = ChunkSizes(c=None, z=None)
chunksize_default = {"c": 1, "z": 64}
result = chunk_sizes.get_chunksize(chunksize_default)
assert result == (1, 64)


def test_chunksize_default_with_empty_chunksize():
"""Test empty chunksize_default with all ChunkSizes as None."""
chunk_sizes = ChunkSizes(c=None, z=None)
result = chunk_sizes.get_chunksize()
assert result == ()


def test_partial_chunksize_default():
"""Test partial chunksize_default with some keys missing."""
chunk_sizes = ChunkSizes(t=5, c=None)
chunksize_default = {"z": 10, "y": 2160}
result = chunk_sizes.get_chunksize(chunksize_default)
assert result == (5, 10, 2160)

0 comments on commit 7c297bc

Please sign in to comment.