From 0f537bbad0feaabe197e354426167a73022691c9 Mon Sep 17 00:00:00 2001 From: Anne Haley Date: Wed, 20 Dec 2023 14:14:29 -0500 Subject: [PATCH 01/36] Initial method setup --- .../zarr/large_image_source_zarr/__init__.py | 145 +++++++++++++++++- test/test_sink.py | 9 +- 2 files changed, 148 insertions(+), 6 deletions(-) diff --git a/sources/zarr/large_image_source_zarr/__init__.py b/sources/zarr/large_image_source_zarr/__init__.py index 39bfb4f8f..f120467bf 100644 --- a/sources/zarr/large_image_source_zarr/__init__.py +++ b/sources/zarr/large_image_source_zarr/__init__.py @@ -1,6 +1,9 @@ import math import os import threading +import uuid +from pathlib import Path + from importlib.metadata import PackageNotFoundError from importlib.metadata import version as _importlib_version @@ -10,10 +13,10 @@ import large_image from large_image.cache_util import LruCacheMetaclass, methodcache -from large_image.constants import TILE_FORMAT_NUMPY, SourcePriority +from large_image.constants import NEW_IMAGE_PATH_FLAG, TILE_FORMAT_NUMPY, SourcePriority from large_image.exceptions import TileSourceError, TileSourceFileNotFoundError from large_image.tilesource import FileTileSource -from large_image.tilesource.utilities import nearPowerOfTwo +from large_image.tilesource.utilities import _imageToNumpy, nearPowerOfTwo try: __version__ = _importlib_version(__name__) @@ -52,6 +55,8 @@ def __init__(self, path, **kwargs): """ super().__init__(path, **kwargs) + if str(path).startswith(NEW_IMAGE_PATH_FLAG): + return self._initNew(path, **kwargs) self._largeImagePath = str(self._getLargeImagePath()) self._zarr = None if not os.path.isfile(self._largeImagePath) and '//:' not in self._largeImagePath: @@ -81,6 +86,42 @@ def __init__(self, path, **kwargs): raise TileSourceError(msg) self._tileLock = threading.RLock() + def _initNew(self, path, **kwargs): + """ + Initialize the tile class for creating a new image. + """ + self._zarr_store = zarr.SQLiteStore(path) + self._zarr = zarr.open(self._zarr_store, mode='w') + # Make unpickleable + self._unpickleable = True + self._largeImagePath = None + self._image = None + self.sizeX = self.sizeY = self.levels = 0 + self.tileWidth = self.tileHeight = self._tileSize + self._frames = [0] + self._cacheValue = str(uuid.uuid4()) + self._output = None + self._editable = True + self._bandRanges = None + self._addLock = threading.RLock() + self._framecount = 0 + self._mm_x = 0 + self._mm_y = 0 + + def __del__(self): + try: + self._zarr.close() + except: + pass + + def _checkEditable(self): + """ + Raise an exception if this is not an editable image. + """ + if not self._editable: + msg = 'Not an editable image' + raise TileSourceError(msg) + def _getGeneralAxes(self, arr): """ Examine a zarr array an guess what the axes are. We assume the two @@ -396,6 +437,8 @@ def _getAssociatedImage(self, imageKey): @methodcache() def getTile(self, x, y, z, pilImageAllowed=False, numpyAllowed=False, **kwargs): + # if self._image is None then call _validateZarr + frame = self._getFrame(**kwargs) self._xyzInRange(x, y, z, frame, self._framecount) x0, y0, x1, y1, step = self._xyzToCorners(x, y, z) @@ -438,6 +481,96 @@ def getTile(self, x, y, z, pilImageAllowed=False, numpyAllowed=False, **kwargs): return self._outputTile(tile, TILE_FORMAT_NUMPY, x, y, z, pilImageAllowed, numpyAllowed, **kwargs) + def addTile(self, tile, x=0, y=0, mask=None, axes=None, **kwargs): + """ + Add a numpy or image tile to the image, expanding the image as needed + to accommodate it. Note that x and y can be negative. If so, the + output image (and internal memory access of the image) will act as if + the 0, 0 point is the most negative position. Cropping is applied + after this offset. + + :param tile: a numpy array, PIL Image, or a binary string + with an image. The numpy array can have 2 or 3 dimensions. + :param x: location in destination for upper-left corner. + :param y: location in destination for upper-left corner. + :param mask: a 2-d numpy array (or 3-d if the last dimension is 1). + If specified, areas where the mask is false will not be altered. + :param axes: a string or list of strings specifying the names of axes + in the same order as the tile dimensions + :param kwargs: start locations for any additional axes + """ + + # default for axes='YXS' for 3d tile, 'YX' for 2d, + # call lower on axis names + + self._checkEditable() + tile, mode = _imageToNumpy(tile) + tile = tile.astype(float) + # interpretation = interpretation or mode + + # set self._image = None + + # print(self._zarr_store) + self._zarr.array('root', tile, overwrite=True) + # root = self._zarr.require_dataset('root', []) + # root = tile + # print(root) + + # if axes is None: + # axes = {} + # axes['x'] = x + # axes['y'] = y + + # find new zarr shape + # reverse_tile_shape = tile.shape.reverse() + # reverse_root_shape = self._zarr['root'].shape.reverse() + # new_root_shape = [ + # s for s in reverse_root_shape + # ] + # print(axes, tile.shape, self._zarr['root'].shape, new_root_shape) + + + + # TODO: with self._addLock: + # self._updateBandRanges(tile) + if mask is not None: + # TODO: apply mask + pass + + def write( + self, + path, + lossy=True, + alpha=True, + overwriteAllowed=True, + zarr_kwargs=None, + ): + """ + Output the current image to a file. + + :param path: output path. + :param lossy: if false, emit a lossless file. + :param alpha: True if an alpha channel is allowed. + :param overwriteAllowed: if False, raise an exception if the output + path exists. + :param zarr_kwargs: if not None, save the image using these kwargs to + the write_to_file function instead of the automatically chosen + ones. In this case, lossy is ignored and all zarr options must be + manually specified. + """ + if not overwriteAllowed and os.path.exists(path): + raise TileSourceError('Output path exists (%s)' % str(path)) + if Path(path).suffix.lower() != '.zarr': + raise TileSourceError('Output path must use ".zarr" suffix.') + # TODO: apply cropping + + if zarr_kwargs is None: + zarr_kwargs = dict() + arrays = dict(self._zarr.arrays()) + print(arrays) + if not len(arrays): + raise TileSourceError('No data; cannot write empty zarr.') + zarr.save(path, **arrays) def open(*args, **kwargs): """ @@ -451,3 +584,11 @@ def canRead(*args, **kwargs): Check if an input can be read by the module class. """ return ZarrFileTileSource.canRead(*args, **kwargs) + + +def new(*args, **kwargs): + """ + Create a new image, collecting the results from patches of numpy arrays or + smaller images. + """ + return ZarrFileTileSource(NEW_IMAGE_PATH_FLAG + str(uuid.uuid4()), *args, **kwargs) diff --git a/test/test_sink.py b/test/test_sink.py index 316a2c3ea..5b044b3de 100644 --- a/test/test_sink.py +++ b/test/test_sink.py @@ -5,7 +5,7 @@ import numpy as np import pytest -import large_image +import large_image_source_zarr possible_axes = { 'x': [1, 10], @@ -115,7 +115,7 @@ def frame_with_zeros(data, desired_size, start_location=None): @pytest.mark.parametrize('data_range', possible_data_ranges) def testImageGeneration(data_range): - source = large_image.new() + source = large_image_source_zarr.new() tile_grid = [ int(random.randint(*possible_axes['x'])), int(random.randint(*possible_axes['y'])), @@ -161,11 +161,12 @@ def testImageGeneration(data_range): np.putmask(expected, framed_mask, framed_tile) with tempfile.TemporaryDirectory() as tmp_dir: - # TODO: make destination use mdf5 extension - destination = pathlib.Path(tmp_dir, 'sample.tiff') + destination = pathlib.Path(tmp_dir, 'sample.zarr') source.write(destination, lossy=False) result, _ = source.getRegion(format='numpy') + print(result) + # trim unused space from expected expected = expected[:max_x, :max_y] From 5c6990bb294c72db0a07682003d71e23bdc9e304 Mon Sep 17 00:00:00 2001 From: Anne Haley Date: Thu, 4 Jan 2024 11:35:26 -0500 Subject: [PATCH 02/36] Populate addTile method; tests successful up to 3-D --- .../zarr/large_image_source_zarr/__init__.py | 99 ++++++++++++------- 1 file changed, 65 insertions(+), 34 deletions(-) diff --git a/sources/zarr/large_image_source_zarr/__init__.py b/sources/zarr/large_image_source_zarr/__init__.py index f120467bf..a0467fd3d 100644 --- a/sources/zarr/large_image_source_zarr/__init__.py +++ b/sources/zarr/large_image_source_zarr/__init__.py @@ -103,6 +103,7 @@ def _initNew(self, path, **kwargs): self._output = None self._editable = True self._bandRanges = None + self._tileLock = threading.RLock() self._addLock = threading.RLock() self._framecount = 0 self._mm_x = 0 @@ -437,7 +438,8 @@ def _getAssociatedImage(self, imageKey): @methodcache() def getTile(self, x, y, z, pilImageAllowed=False, numpyAllowed=False, **kwargs): - # if self._image is None then call _validateZarr + if self._image is None: + self._validateZarr() frame = self._getFrame(**kwargs) self._xyzInRange(x, y, z, frame, self._framecount) @@ -495,47 +497,75 @@ def addTile(self, tile, x=0, y=0, mask=None, axes=None, **kwargs): :param y: location in destination for upper-left corner. :param mask: a 2-d numpy array (or 3-d if the last dimension is 1). If specified, areas where the mask is false will not be altered. - :param axes: a string or list of strings specifying the names of axes + :param axes: a string or list of strings specifying the names of axes in the same order as the tile dimensions :param kwargs: start locations for any additional axes """ - # default for axes='YXS' for 3d tile, 'YX' for 2d, - # call lower on axis names + placement = { + 'x': x, + 'y': y, + } + if axes is None: + if len(tile.shape) == 2: + axes = 'yx' + elif len(tile.shape) == 3: + axes = 'yxs' + else: + axes = '' + if isinstance(axes, str): + axes = axes.lower() + elif isinstance(axes, list): + axes = [lower(x) for x in axes] + else: + raise ValueError(f'Invalid type for axes. Must be str or list[str].') + + if len(axes) != len(tile.shape): + raise ValueError(f'Invalid value for axes: {axes}. \ + Length {len(axes)} does not match number of tile dimensions {len(tile.shape)}.') self._checkEditable() tile, mode = _imageToNumpy(tile) tile = tile.astype(float) - # interpretation = interpretation or mode - - # set self._image = None - - # print(self._zarr_store) - self._zarr.array('root', tile, overwrite=True) - # root = self._zarr.require_dataset('root', []) - # root = tile - # print(root) - - # if axes is None: - # axes = {} - # axes['x'] = x - # axes['y'] = y - - # find new zarr shape - # reverse_tile_shape = tile.shape.reverse() - # reverse_root_shape = self._zarr['root'].shape.reverse() - # new_root_shape = [ - # s for s in reverse_root_shape - # ] - # print(axes, tile.shape, self._zarr['root'].shape, new_root_shape) + self._image = None + current_arrays = dict(self._zarr.arrays()) + if 'root' in current_arrays: + root = current_arrays['root'] + else: + root = self._zarr.create_dataset('root', data=tile) - - # TODO: with self._addLock: - # self._updateBandRanges(tile) - if mask is not None: - # TODO: apply mask - pass + new_dims = {a: max(root.shape[i], placement.get(a, 0) + tile.shape[i]) for i, a in enumerate(axes)} + root_data = np.pad( + root, + [(0, d-root.shape[i]) for i, d in enumerate(new_dims.values())], + mode='empty' + ) + root = self._zarr.create_dataset('root', data=root_data, overwrite=True) + + tile_data = np.pad( + tile, + [(placement.get(a, 0), d-placement.get(a, 0)-tile.shape[i]) + for i, (a, d) in enumerate(new_dims.items())], + mode='empty' + ) + + if mask is None: + mask = np.ones(tile.shape[:-1]) + mask_data = np.pad( + mask, + [ + (placement.get('y', 0), new_dims['y'] - placement.get('y', 0) - mask.shape[0]), + (placement.get('x', 0), new_dims['x'] - placement.get('x', 0) - mask.shape[1]) + ], + mode='constant', + constant_values=0 + ) + while len(mask_data.shape) < len(root_data.shape): + mask_data = np.expand_dims(mask_data, axis=-1) + mask_data = np.repeat(mask_data, root_data.shape[len(mask_data.shape) - 1], axis=-1) + mask_data = mask_data.astype(bool) + np.copyto(root_data, tile_data, where=mask_data) def write( self, @@ -567,10 +597,11 @@ def write( if zarr_kwargs is None: zarr_kwargs = dict() arrays = dict(self._zarr.arrays()) - print(arrays) if not len(arrays): raise TileSourceError('No data; cannot write empty zarr.') - zarr.save(path, **arrays) + zarr.save(str(path), **arrays) + + self._validateZarr() def open(*args, **kwargs): """ From 29d8b197d2f2e9cab0bb429335f955d0f14a0f49 Mon Sep 17 00:00:00 2001 From: Anne Haley Date: Thu, 11 Jan 2024 12:32:32 -0500 Subject: [PATCH 03/36] Use test source to copy 5D images to Zarr sink --- .../zarr/large_image_source_zarr/__init__.py | 127 ++++++---- test/test_sink.py | 227 +++--------------- 2 files changed, 121 insertions(+), 233 deletions(-) diff --git a/sources/zarr/large_image_source_zarr/__init__.py b/sources/zarr/large_image_source_zarr/__init__.py index a0467fd3d..fa6a00975 100644 --- a/sources/zarr/large_image_source_zarr/__init__.py +++ b/sources/zarr/large_image_source_zarr/__init__.py @@ -1,11 +1,12 @@ import math import os +import shutil +import tempfile import threading import uuid -from pathlib import Path - from importlib.metadata import PackageNotFoundError from importlib.metadata import version as _importlib_version +from pathlib import Path import numpy as np import packaging.version @@ -90,12 +91,12 @@ def _initNew(self, path, **kwargs): """ Initialize the tile class for creating a new image. """ - self._zarr_store = zarr.SQLiteStore(path) + self._tempfile = tempfile.NamedTemporaryFile(suffix=path) + self._zarr_store = zarr.SQLiteStore(self._tempfile.name) self._zarr = zarr.open(self._zarr_store, mode='w') # Make unpickleable self._unpickleable = True self._largeImagePath = None - self._image = None self.sizeX = self.sizeY = self.levels = 0 self.tileWidth = self.tileHeight = self._tileSize self._frames = [0] @@ -112,7 +113,8 @@ def _initNew(self, path, **kwargs): def __del__(self): try: self._zarr.close() - except: + self._tempfile.close() + except BaseException: pass def _checkEditable(self): @@ -438,7 +440,7 @@ def _getAssociatedImage(self, imageKey): @methodcache() def getTile(self, x, y, z, pilImageAllowed=False, numpyAllowed=False, **kwargs): - if self._image is None: + if self._levels is None: self._validateZarr() frame = self._getFrame(**kwargs) @@ -501,10 +503,19 @@ def addTile(self, tile, x=0, y=0, mask=None, axes=None, **kwargs): in the same order as the tile dimensions :param kwargs: start locations for any additional axes """ + # If default zarr chunking, adjust chunking (should only happen once) + # min for y and x is 256, max is 2k + # for s, use length of s. + # for any other axes, let zarr determine appropriate chunking + + # check band bookkeeping + + # also don't change dtypes placement = { 'x': x, 'y': y, + **kwargs, } if axes is None: if len(tile.shape) == 2: @@ -516,64 +527,82 @@ def addTile(self, tile, x=0, y=0, mask=None, axes=None, **kwargs): if isinstance(axes, str): axes = axes.lower() elif isinstance(axes, list): - axes = [lower(x) for x in axes] + axes = [x.lower() for x in axes] else: - raise ValueError(f'Invalid type for axes. Must be str or list[str].') - - if len(axes) != len(tile.shape): - raise ValueError(f'Invalid value for axes: {axes}. \ - Length {len(axes)} does not match number of tile dimensions {len(tile.shape)}.') + err = 'Invalid type for axes. Must be str or list[str].' + raise ValueError(err) self._checkEditable() tile, mode = _imageToNumpy(tile) - tile = tile.astype(float) - self._image = None + self._levels = None # reset zarr validation + + while len(tile.shape) < len(axes): + tile = np.expand_dims(tile, axis=0) current_arrays = dict(self._zarr.arrays()) if 'root' in current_arrays: root = current_arrays['root'] else: root = self._zarr.create_dataset('root', data=tile) - - new_dims = {a: max(root.shape[i], placement.get(a, 0) + tile.shape[i]) for i, a in enumerate(axes)} + + new_dims = { + a: max( + root.shape[i], + placement.get( + a, + 0) + + tile.shape[i]) for i, + a in enumerate(axes)} root_data = np.pad( root, - [(0, d-root.shape[i]) for i, d in enumerate(new_dims.values())], - mode='empty' + [(0, d - root.shape[i]) for i, d in enumerate(new_dims.values())], + mode='empty', ) - root = self._zarr.create_dataset('root', data=root_data, overwrite=True) tile_data = np.pad( tile, - [(placement.get(a, 0), d-placement.get(a, 0)-tile.shape[i]) - for i, (a, d) in enumerate(new_dims.items())], - mode='empty' + [(placement.get(a, 0), d - placement.get(a, 0) - tile.shape[i]) + for i, (a, d) in enumerate(new_dims.items())], + mode='empty', ) if mask is None: mask = np.ones(tile.shape[:-1]) + while len(mask.shape) < len(tile_data.shape): + mask = np.expand_dims(mask, axis=-1) + mask = np.repeat(mask, tile_data.shape[len(mask.shape) - 1], axis=-1) mask_data = np.pad( mask, - [ - (placement.get('y', 0), new_dims['y'] - placement.get('y', 0) - mask.shape[0]), - (placement.get('x', 0), new_dims['x'] - placement.get('x', 0) - mask.shape[1]) - ], + [(placement.get(a, 0), d - placement.get(a, 0) - mask.shape[i]) + for i, (a, d) in enumerate(new_dims.items())], mode='constant', - constant_values=0 + constant_values=0, ) - while len(mask_data.shape) < len(root_data.shape): - mask_data = np.expand_dims(mask_data, axis=-1) - mask_data = np.repeat(mask_data, root_data.shape[len(mask_data.shape) - 1], axis=-1) mask_data = mask_data.astype(bool) np.copyto(root_data, tile_data, where=mask_data) + with self._addLock: + root = self._zarr.create_dataset('root', data=root_data, overwrite=True) + + # Edit OME metadata + self._zarr.attrs.update({ + 'multiscales': [{ + 'version': '0.5-dev', + 'axes': [{ + 'name': a, + 'type': 'space' if a in ['x', 'y'] else 'other', + } for a in axes], + 'datasets': [{'path': 0}], + }], + 'omero': {'version': '0.5-dev'}, + }) + def write( self, path, lossy=True, alpha=True, overwriteAllowed=True, - zarr_kwargs=None, ): """ Output the current image to a file. @@ -583,25 +612,33 @@ def write( :param alpha: True if an alpha channel is allowed. :param overwriteAllowed: if False, raise an exception if the output path exists. - :param zarr_kwargs: if not None, save the image using these kwargs to - the write_to_file function instead of the automatically chosen - ones. In this case, lossy is ignored and all zarr options must be - manually specified. """ if not overwriteAllowed and os.path.exists(path): - raise TileSourceError('Output path exists (%s)' % str(path)) - if Path(path).suffix.lower() != '.zarr': - raise TileSourceError('Output path must use ".zarr" suffix.') + raise TileSourceError('Output path exists (%s).' % str(path)) # TODO: apply cropping - - if zarr_kwargs is None: - zarr_kwargs = dict() - arrays = dict(self._zarr.arrays()) - if not len(arrays): - raise TileSourceError('No data; cannot write empty zarr.') - zarr.save(str(path), **arrays) + # TODO: compute half, quarter, etc. resolutions self._validateZarr() + suffix = Path(path).suffix + if suffix in ['.db', '.sqlite']: + shutil.copy2(self._tempfile.name, path) + + # TODO: copy_store raises TypeError + elif suffix == '.zip': + zip_store = zarr.storage.ZipStore(path) + zarr.copy_store(self._zarr_store, zip_store) + zip_store.close() + elif suffix == '.zarr': + dir_store = zarr.storage.DirectoryStore(path) + zarr.copy_store(self._zarr_store, dir_store) + dir_store.close() + + else: + from large_image_converter import convert + + self._tempfile.flush() + convert(self._tempfile.name, path, overwrite=overwriteAllowed) + def open(*args, **kwargs): """ diff --git a/test/test_sink.py b/test/test_sink.py index 5b044b3de..b928d82f1 100644 --- a/test/test_sink.py +++ b/test/test_sink.py @@ -1,196 +1,47 @@ -import pathlib -import random -import tempfile - -import numpy as np -import pytest - +import large_image_source_test import large_image_source_zarr -possible_axes = { - 'x': [1, 10], - 'y': [1, 10], - 'c': [1, 40], - 'z': [1, 40], - 't': [1, 40], - 'p': [1, 20], - 'q': [1, 20], - 's': [3, 3], -} - -include_axes = { - 'c': False, - 'z': False, - 't': False, - 'p': False, - 'q': False, -} - -possible_data_ranges = [ - [0, 1, 2, float], - [0, 1, 2, np.float16], - [0, 1, 2, np.float32], - [0, 1, 2, np.float64], - [0, 2**8, -1, np.uint8], - [0, 2**8, -1, float], - [0, 2**8, -1, int], - [0, 2**16, -2, np.uint16], - [0, 2**16, -2, float], - [0, 2**32, -4, int], - [-2**7, 2**7, -1, np.int8], - [-2**7, 2**7, -1, float], - [-2**7, 2**7, -1, int], - [-2**15, 2**15, -2, np.int16], - [-2**15, 2**15, -2, float], - [-2**15, 2**15, -2, int], - [-2**31, 2**31, -4, np.int32], - [-2**31, 2**31, -4, float], - [-2**31, 2**31, -4, int], - [-1, 1, 2, float], - [-1, 1, 2, np.float16], - [-1, 1, 2, np.float32], - [-1, 1, 2, np.float64], -] - -max_tile_size = 100 -tile_overlap_ratio = 0.5 - - -# https://stackoverflow.com/questions/18915378/rounding-to-significant-figures-in-numpy -def signif(x, minval, maxval, digits): - if x == 0: - return 0 - return max(min(round(x, digits), max(1, maxval - 1)), minval) - -def get_dims(x, y, s, max=False): - tile_shape = [x, y] - for axis_name, include in include_axes.items(): - if include: - axis_min_max = possible_axes[axis_name] - if max: - tile_shape.append(axis_min_max[1]) - else: - tile_shape.append(random.randint(*axis_min_max)) - # s is last axis - tile_shape.append(s) - return tile_shape - - -def random_tile(data_range): - tile_shape = get_dims( - random.randint(1, max_tile_size), - random.randint(1, max_tile_size), - random.randint(*possible_axes['s']), - include_axes, +def testImageCopy(): + sink = large_image_source_zarr.new() + source = large_image_source_test.TestTileSource( + fractal=True, + maxLevel=4, + tileWidth=128, + tileHeight=128, + sizeX=512, + sizeY=1024, + frames='c=2,z=3', + # bands="red=400-12000,green=0-65535,blue=800-4000, + # ir1=200-24000,ir2=200-22000,gray=100-10000,other=0-65535" ) - tile = np.random.rand(*tile_shape) - tile *= (data_range[1] - data_range[0]) - tile += data_range[0] - tile = tile.astype(data_range[3]) # apply dtype - mask = np.random.randint(2, size=tile_shape[:-1]) - return (tile, mask) - - -def frame_with_zeros(data, desired_size, start_location=None): - if len(desired_size) == 0: - return data - if not start_location or len(start_location) == 0: - start_location = [0] - framed = [ - frame_with_zeros( - data[x - start_location[0]], - desired_size[1:], - start_location=start_location[1:], - ) - if ( # frame with zeros if x>=start and x= start_location[0] and - x < data.shape[0] + start_location[0] - ) # fill with zeros otherwise - else np.zeros(desired_size[1:]) - for x in range(desired_size[0]) - ] - return np.array(framed) - - -@pytest.mark.parametrize('data_range', possible_data_ranges) -def testImageGeneration(data_range): - source = large_image_source_zarr.new() - tile_grid = [ - int(random.randint(*possible_axes['x'])), - int(random.randint(*possible_axes['y'])), - ] - if data_range is None: - data_range = random.choice(possible_data_ranges) - - # create comparison matrix at max size and fill with zeros - expected_shape = get_dims( - tile_grid[1] * max_tile_size, tile_grid[0] * max_tile_size, 4, True, - ) - expected = np.ndarray(expected_shape) - expected.fill(0) - max_x, max_y = 0, 0 - - print( - f'placing {tile_grid[0] * tile_grid[1]} random tiles in available space: {expected_shape}') - print('tile overlap ratio:', tile_overlap_ratio) - print('data range:', data_range) - for x in range(tile_grid[0]): - for y in range(tile_grid[1]): - start_location = [ - int(x * max_tile_size * tile_overlap_ratio), - int(y * max_tile_size * tile_overlap_ratio), - ] - tile, mask = random_tile(data_range) - tile_shape = tile.shape - source.addTile(tile, *start_location, mask=mask) - max_x = max(max_x, start_location[1] + tile_shape[0]) - max_y = max(max_y, start_location[0] + tile_shape[1]) - - framed_tile = np.array(frame_with_zeros( - tile, - expected.shape, - start_location=start_location[::-1], - )) - framed_mask = np.array(frame_with_zeros( - mask.repeat(tile_shape[-1], -1).reshape(tile_shape), - expected.shape, - start_location=start_location[::-1], - )) - - np.putmask(expected, framed_mask, framed_tile) - - with tempfile.TemporaryDirectory() as tmp_dir: - destination = pathlib.Path(tmp_dir, 'sample.zarr') - source.write(destination, lossy=False) - result, _ = source.getRegion(format='numpy') - - print(result) - - # trim unused space from expected - expected = expected[:max_x, :max_y] - - # round to specified precision - precision_vector = np.vectorize(signif) - expected = precision_vector(expected, data_range[0], data_range[1], data_range[2]) - result = precision_vector(result, data_range[0], data_range[1], data_range[2]) - - # ignore alpha values for now - expected = expected.take(indices=range(-1), axis=-1) - result = result.take(indices=range(-1), axis=-1) - - # For debugging - # difference = numpy.subtract(result, expected) - # print(difference) - # print(expected[numpy.nonzero(difference)]) - # print(result[numpy.nonzero(difference)]) - assert np.array_equal(result, expected) - # resultFromFile, _ = large_image.open(destination).getRegion(format='numpy') - # print(resultFromFile.shape, result.shape) - # assert numpy.array_equal(result, resultFromFile) - print(f'Success; result matrix {result.shape} equals expected matrix {expected.shape}.') + metadata = source.getMetadata() + for frame in metadata.get('frames', []): + num_tiles = source.getSingleTile(frame=frame['Frame'])['iterator_range'][ + 'position' + ] + print(f'Copying {num_tiles} tiles for frame {frame}') + for tile in source.tileIterator(frame=frame['Frame'], format='numpy'): + t = tile['tile'] + x, y = tile['x'], tile['y'] + kwargs = { + 'z': frame['IndexZ'], + 'c': frame['IndexC'], + } + sink.addTile(t, x=x, y=y, axes='zcyxs', **kwargs) + + sink._validateZarr() + print('Final shape:', sink.getRegion(format='numpy')[0].shape) + + # sink.write('temp.tiff') + # sink.write('temp.sqlite') + sink.write('temp.zip') + # sink.write('temp.zarr') + # sink.write('temp.dz') + # sink.write('temp.szi') + # sink.write('temp.svs') if __name__ == '__main__': - testImageGeneration(None) + testImageCopy() From 599503f6aa827b90b6dc27dda6d4b05e6cdceb28 Mon Sep 17 00:00:00 2001 From: Anne Haley Date: Mon, 29 Jan 2024 10:15:42 -0500 Subject: [PATCH 04/36] Remove old TODOs --- sources/zarr/large_image_source_zarr/__init__.py | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/sources/zarr/large_image_source_zarr/__init__.py b/sources/zarr/large_image_source_zarr/__init__.py index fa6a00975..e6b23092d 100644 --- a/sources/zarr/large_image_source_zarr/__init__.py +++ b/sources/zarr/large_image_source_zarr/__init__.py @@ -503,14 +503,7 @@ def addTile(self, tile, x=0, y=0, mask=None, axes=None, **kwargs): in the same order as the tile dimensions :param kwargs: start locations for any additional axes """ - # If default zarr chunking, adjust chunking (should only happen once) - # min for y and x is 256, max is 2k - # for s, use length of s. - # for any other axes, let zarr determine appropriate chunking - - # check band bookkeeping - - # also don't change dtypes + # TODO: improve band bookkeeping placement = { 'x': x, @@ -582,6 +575,7 @@ def addTile(self, tile, x=0, y=0, mask=None, axes=None, **kwargs): np.copyto(root_data, tile_data, where=mask_data) with self._addLock: + # This will rechunk data when necessary, according to new shape root = self._zarr.create_dataset('root', data=root_data, overwrite=True) # Edit OME metadata @@ -623,7 +617,6 @@ def write( if suffix in ['.db', '.sqlite']: shutil.copy2(self._tempfile.name, path) - # TODO: copy_store raises TypeError elif suffix == '.zip': zip_store = zarr.storage.ZipStore(path) zarr.copy_store(self._zarr_store, zip_store) From 10a1f7ebf89b77fbcd538db161610fcb09400634 Mon Sep 17 00:00:00 2001 From: Anne Haley Date: Mon, 29 Jan 2024 10:27:16 -0500 Subject: [PATCH 05/36] Refactor __init__ to simplify (split into multiple methods) --- sources/zarr/large_image_source_zarr/__init__.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/sources/zarr/large_image_source_zarr/__init__.py b/sources/zarr/large_image_source_zarr/__init__.py index e6b23092d..74fae71ea 100644 --- a/sources/zarr/large_image_source_zarr/__init__.py +++ b/sources/zarr/large_image_source_zarr/__init__.py @@ -57,7 +57,12 @@ def __init__(self, path, **kwargs): super().__init__(path, **kwargs) if str(path).startswith(NEW_IMAGE_PATH_FLAG): - return self._initNew(path, **kwargs) + self._initNew(path, **kwargs) + else: + self._initOpen(**kwargs) + self._tileLock = threading.RLock() + + def _initOpen(self, **kwargs): self._largeImagePath = str(self._getLargeImagePath()) self._zarr = None if not os.path.isfile(self._largeImagePath) and '//:' not in self._largeImagePath: @@ -85,7 +90,6 @@ def __init__(self, path, **kwargs): except Exception: msg = 'File cannot be opened -- not an OME NGFF file or understandable zarr file.' raise TileSourceError(msg) - self._tileLock = threading.RLock() def _initNew(self, path, **kwargs): """ @@ -104,7 +108,6 @@ def _initNew(self, path, **kwargs): self._output = None self._editable = True self._bandRanges = None - self._tileLock = threading.RLock() self._addLock = threading.RLock() self._framecount = 0 self._mm_x = 0 From 12946a064b461473d21a5ce7ed4c1237b750b0c6 Mon Sep 17 00:00:00 2001 From: Anne Haley Date: Mon, 29 Jan 2024 10:38:10 -0500 Subject: [PATCH 06/36] Update Exception class in __del__ method --- sources/zarr/large_image_source_zarr/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sources/zarr/large_image_source_zarr/__init__.py b/sources/zarr/large_image_source_zarr/__init__.py index 74fae71ea..e5bdb1a38 100644 --- a/sources/zarr/large_image_source_zarr/__init__.py +++ b/sources/zarr/large_image_source_zarr/__init__.py @@ -117,7 +117,7 @@ def __del__(self): try: self._zarr.close() self._tempfile.close() - except BaseException: + except Exception: pass def _checkEditable(self): From d6979202201d81f8010d1f2d7d27595694b23f6c Mon Sep 17 00:00:00 2001 From: Anne Haley Date: Thu, 1 Feb 2024 11:57:54 -0500 Subject: [PATCH 07/36] Enable cropping during `write`; does not modify internal data --- .../zarr/large_image_source_zarr/__init__.py | 65 +++++++++++++++++-- 1 file changed, 58 insertions(+), 7 deletions(-) diff --git a/sources/zarr/large_image_source_zarr/__init__.py b/sources/zarr/large_image_source_zarr/__init__.py index e5bdb1a38..759b01ce4 100644 --- a/sources/zarr/large_image_source_zarr/__init__.py +++ b/sources/zarr/large_image_source_zarr/__init__.py @@ -594,6 +594,31 @@ def addTile(self, tile, x=0, y=0, mask=None, axes=None, **kwargs): 'omero': {'version': '0.5-dev'}, }) + @property + def crop(self): + """ + Crop only applies to the output file, not the internal data access. + + It consists of x, y, w, h in pixels. + """ + return getattr(self, '_crop', None) + + @crop.setter + def crop(self, value): + self._checkEditable() + if value is None: + self._crop = None + return + x, y, w, h = value + x = int(x) + y = int(y) + w = int(w) + h = int(h) + if x < 0 or y < 0 or w <= 0 or h <= 0: + msg = 'Crop must have non-negative x, y and positive w, h' + raise TileSourceError(msg) + self._crop = (x, y, w, h) + def write( self, path, @@ -610,30 +635,56 @@ def write( :param overwriteAllowed: if False, raise an exception if the output path exists. """ + # TODO: compute half, quarter, etc. resolutions if not overwriteAllowed and os.path.exists(path): raise TileSourceError('Output path exists (%s).' % str(path)) - # TODO: apply cropping - # TODO: compute half, quarter, etc. resolutions self._validateZarr() suffix = Path(path).suffix + data_file = self._tempfile + data_store = self._zarr_store + + if self.crop: + x, y, w, h = self.crop + current_arrays = dict(self._zarr.arrays()) + # create new temp storage for cropped data + data_file = tempfile.NamedTemporaryFile() + data_store = zarr.SQLiteStore(data_file.name) + cropped_zarr = zarr.open(data_store, mode='w') + for arr_name in current_arrays: + arr = np.array(current_arrays[arr_name]) + cropped_arr = arr.take( + indices=range(x, x + w), + axis=self._axes.get('x'), + ).take( + indices=range(y, y + h), + axis=self._axes.get('y'), + ) + cropped_zarr.create_dataset(arr_name, data=cropped_arr, overwrite=True) + cropped_zarr.attrs.update(self._zarr.attrs) + + data_file.flush() + if suffix in ['.db', '.sqlite']: - shutil.copy2(self._tempfile.name, path) + shutil.copy2(data_file.name, path) elif suffix == '.zip': zip_store = zarr.storage.ZipStore(path) - zarr.copy_store(self._zarr_store, zip_store) + zarr.copy_store(data_store, zip_store) zip_store.close() + elif suffix == '.zarr': dir_store = zarr.storage.DirectoryStore(path) - zarr.copy_store(self._zarr_store, dir_store) + zarr.copy_store(data_store, dir_store) dir_store.close() else: from large_image_converter import convert - self._tempfile.flush() - convert(self._tempfile.name, path, overwrite=overwriteAllowed) + convert(data_file.name, path, overwrite=overwriteAllowed) + + if self.crop: + data_file.close() def open(*args, **kwargs): From f36f1a886d677234e70f7bf67caf6c9d1e03316d Mon Sep 17 00:00:00 2001 From: Anne Haley Date: Tue, 5 Mar 2024 22:55:07 +0000 Subject: [PATCH 08/36] Protect `__del__` method and ensure both `close` operations are called --- sources/zarr/large_image_source_zarr/__init__.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/sources/zarr/large_image_source_zarr/__init__.py b/sources/zarr/large_image_source_zarr/__init__.py index 759b01ce4..8e114bd66 100644 --- a/sources/zarr/large_image_source_zarr/__init__.py +++ b/sources/zarr/large_image_source_zarr/__init__.py @@ -114,11 +114,15 @@ def _initNew(self, path, **kwargs): self._mm_y = 0 def __del__(self): - try: - self._zarr.close() - self._tempfile.close() - except Exception: - pass + if not hasattr(self, '_derivedSource'): + try: + self._zarr.close() + except Exception: + pass + try: + self._tempfile.close() + except Exception: + pass def _checkEditable(self): """ From 6b6a6a411e70fec616f0ac520b51b8ea63298c97 Mon Sep 17 00:00:00 2001 From: Anne Haley Date: Tue, 5 Mar 2024 23:15:31 +0000 Subject: [PATCH 09/36] Set `_axes`, `_dtype`, `_bandCount`, `sizeX`, `sizeY`, `levels`, and `_framecount` in `addTile` --- .../zarr/large_image_source_zarr/__init__.py | 22 ++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/sources/zarr/large_image_source_zarr/__init__.py b/sources/zarr/large_image_source_zarr/__init__.py index 8e114bd66..7d9afab58 100644 --- a/sources/zarr/large_image_source_zarr/__init__.py +++ b/sources/zarr/large_image_source_zarr/__init__.py @@ -512,6 +512,7 @@ def addTile(self, tile, x=0, y=0, mask=None, axes=None, **kwargs): """ # TODO: improve band bookkeeping + self._checkEditable() placement = { 'x': x, 'y': y, @@ -531,11 +532,9 @@ def addTile(self, tile, x=0, y=0, mask=None, axes=None, **kwargs): else: err = 'Invalid type for axes. Must be str or list[str].' raise ValueError(err) + self._axes = {k: i for i, k in enumerate(axes)} - self._checkEditable() tile, mode = _imageToNumpy(tile) - self._levels = None # reset zarr validation - while len(tile.shape) < len(axes): tile = np.expand_dims(tile, axis=0) @@ -551,8 +550,21 @@ def addTile(self, tile, x=0, y=0, mask=None, axes=None, **kwargs): placement.get( a, 0) + - tile.shape[i]) for i, - a in enumerate(axes)} + tile.shape[i]) + for a, i in self._axes.items() + } + self._dtype = tile.dtype + self._bandCount = new_dims.get(axes[-1]) # last axis is assumed to be bands + self.sizeX = new_dims.get('x') + self.sizeY = new_dims.get('y') + self._framecount = np.prod([ + length + for axis, length in new_dims.items() + if axis in axes[:-3] + ]) + self.levels = int(max(1, math.ceil(math.log(max( + self.sizeX / self.tileWidth, self.sizeY / self.tileHeight)) / math.log(2)) + 1)) + root_data = np.pad( root, [(0, d - root.shape[i]) for i, d in enumerate(new_dims.values())], From d6dd5ade4267b924c892bc2536c62e5520236df2 Mon Sep 17 00:00:00 2001 From: Anne Haley Date: Wed, 6 Mar 2024 21:15:06 +0000 Subject: [PATCH 10/36] Attempt to improve performance of `addTile`: remove `np.pad` and unnecessary usage of `create_dataset` --- .../zarr/large_image_source_zarr/__init__.py | 68 ++++++++----------- 1 file changed, 27 insertions(+), 41 deletions(-) diff --git a/sources/zarr/large_image_source_zarr/__init__.py b/sources/zarr/large_image_source_zarr/__init__.py index 7d9afab58..9bc016a4d 100644 --- a/sources/zarr/large_image_source_zarr/__init__.py +++ b/sources/zarr/large_image_source_zarr/__init__.py @@ -101,9 +101,9 @@ def _initNew(self, path, **kwargs): # Make unpickleable self._unpickleable = True self._largeImagePath = None + self._dims = {} self.sizeX = self.sizeY = self.levels = 0 self.tileWidth = self.tileHeight = self._tileSize - self._frames = [0] self._cacheValue = str(uuid.uuid4()) self._output = None self._editable = True @@ -538,21 +538,14 @@ def addTile(self, tile, x=0, y=0, mask=None, axes=None, **kwargs): while len(tile.shape) < len(axes): tile = np.expand_dims(tile, axis=0) - current_arrays = dict(self._zarr.arrays()) - if 'root' in current_arrays: - root = current_arrays['root'] - else: - root = self._zarr.create_dataset('root', data=tile) - new_dims = { a: max( - root.shape[i], - placement.get( - a, - 0) + - tile.shape[i]) + self._dims.get(a, 0), + placement.get(a, 0) + tile.shape[i], + ) for a, i in self._axes.items() } + self._dims = new_dims self._dtype = tile.dtype self._bandCount = new_dims.get(axes[-1]) # last axis is assumed to be bands self.sizeX = new_dims.get('x') @@ -565,37 +558,30 @@ def addTile(self, tile, x=0, y=0, mask=None, axes=None, **kwargs): self.levels = int(max(1, math.ceil(math.log(max( self.sizeX / self.tileWidth, self.sizeY / self.tileHeight)) / math.log(2)) + 1)) - root_data = np.pad( - root, - [(0, d - root.shape[i]) for i, d in enumerate(new_dims.values())], - mode='empty', - ) - - tile_data = np.pad( - tile, - [(placement.get(a, 0), d - placement.get(a, 0) - tile.shape[i]) - for i, (a, d) in enumerate(new_dims.items())], - mode='empty', - ) - - if mask is None: - mask = np.ones(tile.shape[:-1]) - while len(mask.shape) < len(tile_data.shape): - mask = np.expand_dims(mask, axis=-1) - mask = np.repeat(mask, tile_data.shape[len(mask.shape) - 1], axis=-1) - mask_data = np.pad( - mask, - [(placement.get(a, 0), d - placement.get(a, 0) - mask.shape[i]) - for i, (a, d) in enumerate(new_dims.items())], - mode='constant', - constant_values=0, - ) - mask_data = mask_data.astype(bool) - np.copyto(root_data, tile_data, where=mask_data) + if not mask: + mask = np.full(tile.shape, True) + full_mask = np.full(tuple(new_dims.values()), False) + mask_placement_slices = tuple([ + slice(placement.get(a, 0), placement.get(a, 0) + mask.shape[i], 1) + for i, a in enumerate(axes) + ]) + full_mask[mask_placement_slices] = mask + current_arrays = dict(self._zarr.arrays()) with self._addLock: - # This will rechunk data when necessary, according to new shape - root = self._zarr.create_dataset('root', data=root_data, overwrite=True) + if 'root' not in current_arrays: + chunking = tuple([ + self._tileSize if a in ['x', 'y'] else + 32 if a == 's' else 1 + for a in axes + ]) + self._zarr.create_dataset('root', data=tile, chunks=chunking) + else: + root = current_arrays['root'] + root.resize(*tuple(new_dims.values())) + data = root[:] + np.place(data, full_mask, tile) + root[:] = data # Edit OME metadata self._zarr.attrs.update({ From df8819f6a5fb6a90806b96e517981261b295c7b8 Mon Sep 17 00:00:00 2001 From: Anne Haley Date: Wed, 6 Mar 2024 21:29:10 +0000 Subject: [PATCH 11/36] Fix lint failure: remove trailing whitespace --- sources/zarr/large_image_source_zarr/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sources/zarr/large_image_source_zarr/__init__.py b/sources/zarr/large_image_source_zarr/__init__.py index 9bc016a4d..21a50e834 100644 --- a/sources/zarr/large_image_source_zarr/__init__.py +++ b/sources/zarr/large_image_source_zarr/__init__.py @@ -581,7 +581,7 @@ def addTile(self, tile, x=0, y=0, mask=None, axes=None, **kwargs): root.resize(*tuple(new_dims.values())) data = root[:] np.place(data, full_mask, tile) - root[:] = data + root[:] = data # Edit OME metadata self._zarr.attrs.update({ From 0a832fdf5f4bab6f94da04534d3ab4096a472107 Mon Sep 17 00:00:00 2001 From: Anne Haley Date: Thu, 7 Mar 2024 16:10:01 +0000 Subject: [PATCH 12/36] Set `self._levels` to None in `addTile` --- sources/zarr/large_image_source_zarr/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/sources/zarr/large_image_source_zarr/__init__.py b/sources/zarr/large_image_source_zarr/__init__.py index 21a50e834..526f30374 100644 --- a/sources/zarr/large_image_source_zarr/__init__.py +++ b/sources/zarr/large_image_source_zarr/__init__.py @@ -555,6 +555,7 @@ def addTile(self, tile, x=0, y=0, mask=None, axes=None, **kwargs): for axis, length in new_dims.items() if axis in axes[:-3] ]) + self._levels = None self.levels = int(max(1, math.ceil(math.log(max( self.sizeX / self.tileWidth, self.sizeY / self.tileHeight)) / math.log(2)) + 1)) From cd0c715d61812c4e3e2eefddcf92dd659a65fe89 Mon Sep 17 00:00:00 2001 From: Anne Haley Date: Thu, 7 Mar 2024 19:05:05 +0000 Subject: [PATCH 13/36] Avoid creating full mask --- sources/zarr/large_image_source_zarr/__init__.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/sources/zarr/large_image_source_zarr/__init__.py b/sources/zarr/large_image_source_zarr/__init__.py index 526f30374..9b8ecc4e7 100644 --- a/sources/zarr/large_image_source_zarr/__init__.py +++ b/sources/zarr/large_image_source_zarr/__init__.py @@ -561,12 +561,10 @@ def addTile(self, tile, x=0, y=0, mask=None, axes=None, **kwargs): if not mask: mask = np.full(tile.shape, True) - full_mask = np.full(tuple(new_dims.values()), False) mask_placement_slices = tuple([ slice(placement.get(a, 0), placement.get(a, 0) + mask.shape[i], 1) for i, a in enumerate(axes) ]) - full_mask[mask_placement_slices] = mask current_arrays = dict(self._zarr.arrays()) with self._addLock: @@ -580,9 +578,13 @@ def addTile(self, tile, x=0, y=0, mask=None, axes=None, **kwargs): else: root = current_arrays['root'] root.resize(*tuple(new_dims.values())) - data = root[:] + data = root[mask_placement_slices] + if mask is not None: + np.place(data, mask, tile) + else: + data = tile np.place(data, full_mask, tile) - root[:] = data + root[mask_placement_slices] = data # Edit OME metadata self._zarr.attrs.update({ From 41ea6b5e7938dc867122ad6d3da5836b58a47aed Mon Sep 17 00:00:00 2001 From: Anne Haley Date: Fri, 8 Mar 2024 14:13:30 +0000 Subject: [PATCH 14/36] Use `np.where` instead of `np.place` --- sources/zarr/large_image_source_zarr/__init__.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/sources/zarr/large_image_source_zarr/__init__.py b/sources/zarr/large_image_source_zarr/__init__.py index 9b8ecc4e7..e31617b03 100644 --- a/sources/zarr/large_image_source_zarr/__init__.py +++ b/sources/zarr/large_image_source_zarr/__init__.py @@ -578,13 +578,10 @@ def addTile(self, tile, x=0, y=0, mask=None, axes=None, **kwargs): else: root = current_arrays['root'] root.resize(*tuple(new_dims.values())) - data = root[mask_placement_slices] - if mask is not None: - np.place(data, mask, tile) + if mask: + root[mask_placement_slices] = np.where(mask, tile, root[mask_placement_slices]) else: - data = tile - np.place(data, full_mask, tile) - root[mask_placement_slices] = data + root[mask_placement_slices] = tile # Edit OME metadata self._zarr.attrs.update({ From 0414f719b0035dfe2b6aaac1412ab0b62d7faa02 Mon Sep 17 00:00:00 2001 From: Anne Haley Date: Fri, 8 Mar 2024 14:46:19 +0000 Subject: [PATCH 15/36] Rename `mask_placement_slices` -> `placement_slices` and avoid mask creation --- sources/zarr/large_image_source_zarr/__init__.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/sources/zarr/large_image_source_zarr/__init__.py b/sources/zarr/large_image_source_zarr/__init__.py index e31617b03..e5ba56ffd 100644 --- a/sources/zarr/large_image_source_zarr/__init__.py +++ b/sources/zarr/large_image_source_zarr/__init__.py @@ -559,10 +559,8 @@ def addTile(self, tile, x=0, y=0, mask=None, axes=None, **kwargs): self.levels = int(max(1, math.ceil(math.log(max( self.sizeX / self.tileWidth, self.sizeY / self.tileHeight)) / math.log(2)) + 1)) - if not mask: - mask = np.full(tile.shape, True) - mask_placement_slices = tuple([ - slice(placement.get(a, 0), placement.get(a, 0) + mask.shape[i], 1) + placement_slices = tuple([ + slice(placement.get(a, 0), placement.get(a, 0) + tile.shape[i], 1) for i, a in enumerate(axes) ]) @@ -578,10 +576,10 @@ def addTile(self, tile, x=0, y=0, mask=None, axes=None, **kwargs): else: root = current_arrays['root'] root.resize(*tuple(new_dims.values())) - if mask: - root[mask_placement_slices] = np.where(mask, tile, root[mask_placement_slices]) + if mask is not None: + root[placement_slices] = np.where(mask, tile, root[placement_slices]) else: - root[mask_placement_slices] = tile + root[placement_slices] = tile # Edit OME metadata self._zarr.attrs.update({ From 3da02f6086ae84e05eac7c0809a2a0854f43c514 Mon Sep 17 00:00:00 2001 From: Anne Haley Date: Mon, 11 Mar 2024 14:29:18 +0000 Subject: [PATCH 16/36] Include statement to add samples axis to mask if necessary --- sources/zarr/large_image_source_zarr/__init__.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sources/zarr/large_image_source_zarr/__init__.py b/sources/zarr/large_image_source_zarr/__init__.py index e5ba56ffd..4d2ae0f99 100644 --- a/sources/zarr/large_image_source_zarr/__init__.py +++ b/sources/zarr/large_image_source_zarr/__init__.py @@ -563,6 +563,8 @@ def addTile(self, tile, x=0, y=0, mask=None, axes=None, **kwargs): slice(placement.get(a, 0), placement.get(a, 0) + tile.shape[i], 1) for i, a in enumerate(axes) ]) + if mask is not None and len(mask.shape) + 1 == len(tile.shape): + mask = mask[:, :, np.newaxis] current_arrays = dict(self._zarr.arrays()) with self._addLock: From d0d6129d7e2ae1bf02db39dad98af8658ae38dd0 Mon Sep 17 00:00:00 2001 From: Anne Haley Date: Wed, 13 Mar 2024 18:38:55 +0000 Subject: [PATCH 17/36] Refactor checking of `axes` arg; ensure "s" axis exists --- .../zarr/large_image_source_zarr/__init__.py | 23 ++++++++----------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/sources/zarr/large_image_source_zarr/__init__.py b/sources/zarr/large_image_source_zarr/__init__.py index 4d2ae0f99..2f152a314 100644 --- a/sources/zarr/large_image_source_zarr/__init__.py +++ b/sources/zarr/large_image_source_zarr/__init__.py @@ -518,23 +518,20 @@ def addTile(self, tile, x=0, y=0, mask=None, axes=None, **kwargs): 'y': y, **kwargs, } - if axes is None: - if len(tile.shape) == 2: - axes = 'yx' - elif len(tile.shape) == 3: - axes = 'yxs' - else: - axes = '' - if isinstance(axes, str): - axes = axes.lower() - elif isinstance(axes, list): - axes = [x.lower() for x in axes] - else: + if not isinstance(tile, np.ndarray) or axes is None: + axes = 'yxs' + tile, mode = _imageToNumpy(tile) + elif not isinstance(axes, str) and not isinstance(axes, list): err = 'Invalid type for axes. Must be str or list[str].' raise ValueError(err) + axes = [x.lower() for x in axes] + if axes[-1] != 's': + axes.append('s') + if 'x' not in axes or 'y' not in axes: + err = 'Invalid value for axes. Must contain "y" and "x".' + raise ValueError(err) self._axes = {k: i for i, k in enumerate(axes)} - tile, mode = _imageToNumpy(tile) while len(tile.shape) < len(axes): tile = np.expand_dims(tile, axis=0) From 07e744fdef8bdba2f34e3c4f829fc4316d69abda Mon Sep 17 00:00:00 2001 From: Anne Haley Date: Thu, 14 Mar 2024 14:34:47 +0000 Subject: [PATCH 18/36] Use samples axis length in chunking (instead of 32) and rechunk if length changes --- sources/zarr/large_image_source_zarr/__init__.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/sources/zarr/large_image_source_zarr/__init__.py b/sources/zarr/large_image_source_zarr/__init__.py index 2f152a314..679d473b5 100644 --- a/sources/zarr/large_image_source_zarr/__init__.py +++ b/sources/zarr/large_image_source_zarr/__init__.py @@ -568,7 +568,7 @@ def addTile(self, tile, x=0, y=0, mask=None, axes=None, **kwargs): if 'root' not in current_arrays: chunking = tuple([ self._tileSize if a in ['x', 'y'] else - 32 if a == 's' else 1 + new_dims.get('s') if a == 's' else 1 for a in axes ]) self._zarr.create_dataset('root', data=tile, chunks=chunking) @@ -580,6 +580,15 @@ def addTile(self, tile, x=0, y=0, mask=None, axes=None, **kwargs): else: root[placement_slices] = tile + if root.chunks[-1] != new_dims.get('s'): + # rechunk if length of samples axis changes + chunking = tuple([ + self._tileSize if a in ['x', 'y'] else + new_dims.get('s') if a == 's' else 1 + for a in axes + ]) + self._zarr.create_dataset('root', data=root[:], chunks=chunking, overwrite=True) + # Edit OME metadata self._zarr.attrs.update({ 'multiscales': [{ From 5118fbc7121f470065f846a66b7ad49858c292a5 Mon Sep 17 00:00:00 2001 From: Anne Haley Date: Mon, 18 Mar 2024 17:03:32 +0000 Subject: [PATCH 19/36] Switch to using DirectoryStore internally (instead of SQLiteStore) --- .../zarr/large_image_source_zarr/__init__.py | 49 ++++++++++--------- 1 file changed, 27 insertions(+), 22 deletions(-) diff --git a/sources/zarr/large_image_source_zarr/__init__.py b/sources/zarr/large_image_source_zarr/__init__.py index 679d473b5..092c64855 100644 --- a/sources/zarr/large_image_source_zarr/__init__.py +++ b/sources/zarr/large_image_source_zarr/__init__.py @@ -68,7 +68,7 @@ def _initOpen(self, **kwargs): if not os.path.isfile(self._largeImagePath) and '//:' not in self._largeImagePath: raise TileSourceFileNotFoundError(self._largeImagePath) from None try: - self._zarr = zarr.open(zarr.SQLiteStore(self._largeImagePath), mode='r') + self._zarr = zarr.open(zarr.DirectoryStore(self._largeImagePath), mode='r') except Exception: try: self._zarr = zarr.open(self._largeImagePath, mode='r') @@ -95,8 +95,8 @@ def _initNew(self, path, **kwargs): """ Initialize the tile class for creating a new image. """ - self._tempfile = tempfile.NamedTemporaryFile(suffix=path) - self._zarr_store = zarr.SQLiteStore(self._tempfile.name) + self._tempdir = tempfile.TemporaryDirectory(path) + self._zarr_store = zarr.DirectoryStore(self._tempdir.name) self._zarr = zarr.open(self._zarr_store, mode='w') # Make unpickleable self._unpickleable = True @@ -120,7 +120,7 @@ def __del__(self): except Exception: pass try: - self._tempfile.close() + shutil.rmtree(self._tempdir) except Exception: pass @@ -643,21 +643,27 @@ def write( :param overwriteAllowed: if False, raise an exception if the output path exists. """ - # TODO: compute half, quarter, etc. resolutions - if not overwriteAllowed and os.path.exists(path): - raise TileSourceError('Output path exists (%s).' % str(path)) + if os.path.exists(path): + if overwriteAllowed: + if os.path.isdir(path): + shutil.rmtree(path) + else: + os.remove(path) + else: + raise TileSourceError('Output path exists (%s).' % str(path)) + # TODO: compute half, quarter, etc. resolutions self._validateZarr() suffix = Path(path).suffix - data_file = self._tempfile + data_dir = self._tempdir data_store = self._zarr_store if self.crop: x, y, w, h = self.crop current_arrays = dict(self._zarr.arrays()) # create new temp storage for cropped data - data_file = tempfile.NamedTemporaryFile() - data_store = zarr.SQLiteStore(data_file.name) + data_dir = tempfile.TemporaryDirectory() + data_store = zarr.DirectoryStore(data_dir.name) cropped_zarr = zarr.open(data_store, mode='w') for arr_name in current_arrays: arr = np.array(current_arrays[arr_name]) @@ -671,28 +677,27 @@ def write( cropped_zarr.create_dataset(arr_name, data=cropped_arr, overwrite=True) cropped_zarr.attrs.update(self._zarr.attrs) - data_file.flush() + if suffix == '.zarr': + shutil.copytree(data_dir.name, path) - if suffix in ['.db', '.sqlite']: - shutil.copy2(data_file.name, path) + elif suffix in ['.db', '.sqlite']: + sqlite_store = zarr.SQLiteStore(path) + zarr.copy_store(data_store, sqlite_store, if_exists='replace') + sqlite_store.close() elif suffix == '.zip': - zip_store = zarr.storage.ZipStore(path) - zarr.copy_store(data_store, zip_store) + zip_store = zarr.ZipStore(path) + zarr.copy_store(data_store, zip_store, if_exists='replace') zip_store.close() - elif suffix == '.zarr': - dir_store = zarr.storage.DirectoryStore(path) - zarr.copy_store(data_store, dir_store) - dir_store.close() - else: from large_image_converter import convert - convert(data_file.name, path, overwrite=overwriteAllowed) + attrs_path = Path(data_dir.name) / '.zattrs' + convert(str(attrs_path), path, overwrite=overwriteAllowed) if self.crop: - data_file.close() + shutil.rmtree(data_dir.name) def open(*args, **kwargs): From e1e9831fb4e686a92ecf793761b99dd63a7b2d5f Mon Sep 17 00:00:00 2001 From: Anne Haley Date: Mon, 18 Mar 2024 18:17:25 +0000 Subject: [PATCH 20/36] Refactor addTile: ensure placement from kwargs is used (including first iteration) --- .../zarr/large_image_source_zarr/__init__.py | 25 +++++++++++-------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/sources/zarr/large_image_source_zarr/__init__.py b/sources/zarr/large_image_source_zarr/__init__.py index 092c64855..94ec03567 100644 --- a/sources/zarr/large_image_source_zarr/__init__.py +++ b/sources/zarr/large_image_source_zarr/__init__.py @@ -530,8 +530,10 @@ def addTile(self, tile, x=0, y=0, mask=None, axes=None, **kwargs): if 'x' not in axes or 'y' not in axes: err = 'Invalid value for axes. Must contain "y" and "x".' raise ValueError(err) + for k in placement: + if k not in axes: + axes[0:0] = [k] self._axes = {k: i for i, k in enumerate(axes)} - while len(tile.shape) < len(axes): tile = np.expand_dims(tile, axis=0) @@ -555,31 +557,26 @@ def addTile(self, tile, x=0, y=0, mask=None, axes=None, **kwargs): self._levels = None self.levels = int(max(1, math.ceil(math.log(max( self.sizeX / self.tileWidth, self.sizeY / self.tileHeight)) / math.log(2)) + 1)) - + if mask is not None and len(mask.shape) + 1 == len(tile.shape): + mask = mask[:, :, np.newaxis] placement_slices = tuple([ slice(placement.get(a, 0), placement.get(a, 0) + tile.shape[i], 1) for i, a in enumerate(axes) ]) - if mask is not None and len(mask.shape) + 1 == len(tile.shape): - mask = mask[:, :, np.newaxis] current_arrays = dict(self._zarr.arrays()) with self._addLock: + chunking = None if 'root' not in current_arrays: + root = np.empty(tuple(new_dims.values())) chunking = tuple([ self._tileSize if a in ['x', 'y'] else new_dims.get('s') if a == 's' else 1 for a in axes ]) - self._zarr.create_dataset('root', data=tile, chunks=chunking) else: root = current_arrays['root'] root.resize(*tuple(new_dims.values())) - if mask is not None: - root[placement_slices] = np.where(mask, tile, root[placement_slices]) - else: - root[placement_slices] = tile - if root.chunks[-1] != new_dims.get('s'): # rechunk if length of samples axis changes chunking = tuple([ @@ -587,7 +584,13 @@ def addTile(self, tile, x=0, y=0, mask=None, axes=None, **kwargs): new_dims.get('s') if a == 's' else 1 for a in axes ]) - self._zarr.create_dataset('root', data=root[:], chunks=chunking, overwrite=True) + + if mask is not None: + root[placement_slices] = np.where(mask, tile, root[placement_slices]) + else: + root[placement_slices] = tile + if chunking: + self._zarr.create_dataset('root', data=root[:], chunks=chunking, overwrite=True) # Edit OME metadata self._zarr.attrs.update({ From 6706f6988764ffcbc02886e9ebd5e4d5333c7496 Mon Sep 17 00:00:00 2001 From: Anne Haley Date: Mon, 18 Mar 2024 18:18:36 +0000 Subject: [PATCH 21/36] Refactor addTile: move attribute changes within `addLock` context --- .../zarr/large_image_source_zarr/__init__.py | 29 ++++++++++--------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/sources/zarr/large_image_source_zarr/__init__.py b/sources/zarr/large_image_source_zarr/__init__.py index 94ec03567..06c8e7475 100644 --- a/sources/zarr/large_image_source_zarr/__init__.py +++ b/sources/zarr/large_image_source_zarr/__init__.py @@ -544,19 +544,6 @@ def addTile(self, tile, x=0, y=0, mask=None, axes=None, **kwargs): ) for a, i in self._axes.items() } - self._dims = new_dims - self._dtype = tile.dtype - self._bandCount = new_dims.get(axes[-1]) # last axis is assumed to be bands - self.sizeX = new_dims.get('x') - self.sizeY = new_dims.get('y') - self._framecount = np.prod([ - length - for axis, length in new_dims.items() - if axis in axes[:-3] - ]) - self._levels = None - self.levels = int(max(1, math.ceil(math.log(max( - self.sizeX / self.tileWidth, self.sizeY / self.tileHeight)) / math.log(2)) + 1)) if mask is not None and len(mask.shape) + 1 == len(tile.shape): mask = mask[:, :, np.newaxis] placement_slices = tuple([ @@ -605,6 +592,22 @@ def addTile(self, tile, x=0, y=0, mask=None, axes=None, **kwargs): 'omero': {'version': '0.5-dev'}, }) + # Edit large_image attributes + self._dims = new_dims + self._dtype = tile.dtype + self._bandCount = new_dims.get(axes[-1]) # last axis is assumed to be bands + self.sizeX = new_dims.get('x') + self.sizeY = new_dims.get('y') + self._framecount = np.prod([ + length + for axis, length in new_dims.items() + if axis in axes[:-3] + ]) + self._levels = None + self.levels = int(max(1, math.ceil(math.log(max( + self.sizeX / self.tileWidth, self.sizeY / self.tileHeight)) / math.log(2)) + 1)) + + @property def crop(self): """ From b58fcda0fad6c1d9aac985047416c318533cb3f5 Mon Sep 17 00:00:00 2001 From: Anne Haley Date: Mon, 18 Mar 2024 18:20:48 +0000 Subject: [PATCH 22/36] style: whitespace changes to fix lint failure --- sources/zarr/large_image_source_zarr/__init__.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/sources/zarr/large_image_source_zarr/__init__.py b/sources/zarr/large_image_source_zarr/__init__.py index 06c8e7475..1f7261e9a 100644 --- a/sources/zarr/large_image_source_zarr/__init__.py +++ b/sources/zarr/large_image_source_zarr/__init__.py @@ -571,13 +571,13 @@ def addTile(self, tile, x=0, y=0, mask=None, axes=None, **kwargs): new_dims.get('s') if a == 's' else 1 for a in axes ]) - + if mask is not None: root[placement_slices] = np.where(mask, tile, root[placement_slices]) else: root[placement_slices] = tile if chunking: - self._zarr.create_dataset('root', data=root[:], chunks=chunking, overwrite=True) + self._zarr.create_dataset('root', data=root[:], chunks=chunking, overwrite=True) # Edit OME metadata self._zarr.attrs.update({ @@ -607,7 +607,6 @@ def addTile(self, tile, x=0, y=0, mask=None, axes=None, **kwargs): self.levels = int(max(1, math.ceil(math.log(max( self.sizeX / self.tileWidth, self.sizeY / self.tileHeight)) / math.log(2)) + 1)) - @property def crop(self): """ From 756502b977233628d11f8239cc6e3fbbb99bd258 Mon Sep 17 00:00:00 2001 From: Anne Haley Date: Mon, 18 Mar 2024 18:44:03 +0000 Subject: [PATCH 23/36] Refactor addTile: expand mask dimensions with tile dimensions --- sources/zarr/large_image_source_zarr/__init__.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/sources/zarr/large_image_source_zarr/__init__.py b/sources/zarr/large_image_source_zarr/__init__.py index 1f7261e9a..8f85f527b 100644 --- a/sources/zarr/large_image_source_zarr/__init__.py +++ b/sources/zarr/large_image_source_zarr/__init__.py @@ -527,6 +527,8 @@ def addTile(self, tile, x=0, y=0, mask=None, axes=None, **kwargs): axes = [x.lower() for x in axes] if axes[-1] != 's': axes.append('s') + if mask is not None and len(axes) - 1 == len(mask.shape): + mask = mask[:, :, np.newaxis] if 'x' not in axes or 'y' not in axes: err = 'Invalid value for axes. Must contain "y" and "x".' raise ValueError(err) @@ -536,6 +538,8 @@ def addTile(self, tile, x=0, y=0, mask=None, axes=None, **kwargs): self._axes = {k: i for i, k in enumerate(axes)} while len(tile.shape) < len(axes): tile = np.expand_dims(tile, axis=0) + while len(mask.shape) < len(axes): + mask = np.expand_dims(mask, axis=0) new_dims = { a: max( @@ -544,8 +548,6 @@ def addTile(self, tile, x=0, y=0, mask=None, axes=None, **kwargs): ) for a, i in self._axes.items() } - if mask is not None and len(mask.shape) + 1 == len(tile.shape): - mask = mask[:, :, np.newaxis] placement_slices = tuple([ slice(placement.get(a, 0), placement.get(a, 0) + tile.shape[i], 1) for i, a in enumerate(axes) From a0b3fbcd5f1cdfd2263886b40d2b569c5f285c23 Mon Sep 17 00:00:00 2001 From: Anne Haley Date: Mon, 18 Mar 2024 18:56:48 +0000 Subject: [PATCH 24/36] fix: add NoneType protection to mask axis padding --- sources/zarr/large_image_source_zarr/__init__.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sources/zarr/large_image_source_zarr/__init__.py b/sources/zarr/large_image_source_zarr/__init__.py index 8f85f527b..5e6bf39b6 100644 --- a/sources/zarr/large_image_source_zarr/__init__.py +++ b/sources/zarr/large_image_source_zarr/__init__.py @@ -538,8 +538,9 @@ def addTile(self, tile, x=0, y=0, mask=None, axes=None, **kwargs): self._axes = {k: i for i, k in enumerate(axes)} while len(tile.shape) < len(axes): tile = np.expand_dims(tile, axis=0) - while len(mask.shape) < len(axes): - mask = np.expand_dims(mask, axis=0) + if mask is not None: + while len(mask.shape) < len(axes): + mask = np.expand_dims(mask, axis=0) new_dims = { a: max( From 5317daaa3ab2d8b89734fe14470df3f5d9902c1d Mon Sep 17 00:00:00 2001 From: Anne Haley Date: Tue, 19 Mar 2024 12:53:11 +0000 Subject: [PATCH 25/36] fix: Revert to using SQLiteStore when opening existing Zarr file --- sources/zarr/large_image_source_zarr/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sources/zarr/large_image_source_zarr/__init__.py b/sources/zarr/large_image_source_zarr/__init__.py index 5e6bf39b6..4bce48970 100644 --- a/sources/zarr/large_image_source_zarr/__init__.py +++ b/sources/zarr/large_image_source_zarr/__init__.py @@ -68,7 +68,7 @@ def _initOpen(self, **kwargs): if not os.path.isfile(self._largeImagePath) and '//:' not in self._largeImagePath: raise TileSourceFileNotFoundError(self._largeImagePath) from None try: - self._zarr = zarr.open(zarr.DirectoryStore(self._largeImagePath), mode='r') + self._zarr = zarr.open(zarr.SQLiteStore(self._largeImagePath), mode='r') except Exception: try: self._zarr = zarr.open(self._largeImagePath, mode='r') From 32c9c5819c60276749f3beebf0c5d36ef848eeab Mon Sep 17 00:00:00 2001 From: Anne Haley Date: Tue, 19 Mar 2024 18:09:53 +0000 Subject: [PATCH 26/36] fix: move start of `self._addLock` context --- .../zarr/large_image_source_zarr/__init__.py | 35 +++++++++---------- 1 file changed, 17 insertions(+), 18 deletions(-) diff --git a/sources/zarr/large_image_source_zarr/__init__.py b/sources/zarr/large_image_source_zarr/__init__.py index 4bce48970..88511f76c 100644 --- a/sources/zarr/large_image_source_zarr/__init__.py +++ b/sources/zarr/large_image_source_zarr/__init__.py @@ -535,27 +535,26 @@ def addTile(self, tile, x=0, y=0, mask=None, axes=None, **kwargs): for k in placement: if k not in axes: axes[0:0] = [k] - self._axes = {k: i for i, k in enumerate(axes)} - while len(tile.shape) < len(axes): - tile = np.expand_dims(tile, axis=0) - if mask is not None: - while len(mask.shape) < len(axes): + with self._addLock: + self._axes = {k: i for i, k in enumerate(axes)} + while len(tile.shape) < len(axes): + tile = np.expand_dims(tile, axis=0) + while mask is not None and len(mask.shape) < len(axes): mask = np.expand_dims(mask, axis=0) - new_dims = { - a: max( - self._dims.get(a, 0), - placement.get(a, 0) + tile.shape[i], - ) - for a, i in self._axes.items() - } - placement_slices = tuple([ - slice(placement.get(a, 0), placement.get(a, 0) + tile.shape[i], 1) - for i, a in enumerate(axes) - ]) + new_dims = { + a: max( + self._dims.get(a, 0), + placement.get(a, 0) + tile.shape[i], + ) + for a, i in self._axes.items() + } + placement_slices = tuple([ + slice(placement.get(a, 0), placement.get(a, 0) + tile.shape[i], 1) + for i, a in enumerate(axes) + ]) - current_arrays = dict(self._zarr.arrays()) - with self._addLock: + current_arrays = dict(self._zarr.arrays()) chunking = None if 'root' not in current_arrays: root = np.empty(tuple(new_dims.values())) From 728faeee41b1f69424cded40351c246e6edb1099 Mon Sep 17 00:00:00 2001 From: Anne Haley Date: Tue, 19 Mar 2024 18:10:25 +0000 Subject: [PATCH 27/36] fix: set dtype of `np.empty` when initializing root --- sources/zarr/large_image_source_zarr/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sources/zarr/large_image_source_zarr/__init__.py b/sources/zarr/large_image_source_zarr/__init__.py index 88511f76c..ed677a04a 100644 --- a/sources/zarr/large_image_source_zarr/__init__.py +++ b/sources/zarr/large_image_source_zarr/__init__.py @@ -557,7 +557,7 @@ def addTile(self, tile, x=0, y=0, mask=None, axes=None, **kwargs): current_arrays = dict(self._zarr.arrays()) chunking = None if 'root' not in current_arrays: - root = np.empty(tuple(new_dims.values())) + root = np.empty(tuple(new_dims.values()), dtype=tile.dtype) chunking = tuple([ self._tileSize if a in ['x', 'y'] else new_dims.get('s') if a == 's' else 1 From 14bee28e1d0ea69c455d0fe9ae412389f01fbcc2 Mon Sep 17 00:00:00 2001 From: Anne Haley Date: Tue, 19 Mar 2024 18:11:05 +0000 Subject: [PATCH 28/36] fix: call `validateZarr` in beginning of `getMetadata` if necessary --- sources/zarr/large_image_source_zarr/__init__.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sources/zarr/large_image_source_zarr/__init__.py b/sources/zarr/large_image_source_zarr/__init__.py index ed677a04a..4ad32d47f 100644 --- a/sources/zarr/large_image_source_zarr/__init__.py +++ b/sources/zarr/large_image_source_zarr/__init__.py @@ -383,6 +383,8 @@ def getMetadata(self): :returns: metadata dictionary. """ + if self._levels is None: + self._validateZarr() result = super().getMetadata() if self._framecount > 1: result['frames'] = frames = [] From ce1b1b77199be0114757a75248c6ed7be8e717f6 Mon Sep 17 00:00:00 2001 From: Anne Haley Date: Tue, 19 Mar 2024 18:11:51 +0000 Subject: [PATCH 29/36] fix: Set `_bandCount` from length of `s` axis if exists --- sources/zarr/large_image_source_zarr/__init__.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sources/zarr/large_image_source_zarr/__init__.py b/sources/zarr/large_image_source_zarr/__init__.py index 4ad32d47f..04f06eb75 100644 --- a/sources/zarr/large_image_source_zarr/__init__.py +++ b/sources/zarr/large_image_source_zarr/__init__.py @@ -334,6 +334,8 @@ def _validateZarr(self): baseArray.shape[self._axes.get('c')] in {1, 3, 4}): self._bandCount = baseArray.shape[self._axes['c']] self._axes['s'] = self._axes.pop('c') + elif 's' in self._axes: + self._bandCount = baseArray.shape[self._axes['s']] self._zarrFindLevels() self._getScale() stride = 1 From 5fac3a8dc1fcd46759145c39911c354d49db9348 Mon Sep 17 00:00:00 2001 From: Anne Haley Date: Tue, 19 Mar 2024 18:12:27 +0000 Subject: [PATCH 30/36] test: enable new algorithm progression tests --- test/test_examples.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/test_examples.py b/test/test_examples.py index 6ecc5d21f..a8db16a00 100644 --- a/test/test_examples.py +++ b/test/test_examples.py @@ -87,8 +87,8 @@ def test_sum_squares_import(): @pytest.mark.parametrize(('sink', 'outname', 'openpath'), [ ('multivips', 'sample', 'sample/results.yml'), - # ('zarr', 'sample.zip', 'sample.zip'), - # ('multizarr', 'sample', 'sample/results.yml'), + ('zarr', 'sample.zip', 'sample.zip'), + ('multizarr', 'sample', 'sample/results.yml'), ]) def test_algorithm_progression(sink, outname, openpath): import large_image From 23b70f74fb61c0296d3c9bb0eef187356d139274 Mon Sep 17 00:00:00 2001 From: Anne Haley Date: Tue, 19 Mar 2024 18:14:19 +0000 Subject: [PATCH 31/36] test: write basic use case tests for zarr sink in `test_sink.py` --- test/test_sink.py | 166 ++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 137 insertions(+), 29 deletions(-) diff --git a/test/test_sink.py b/test/test_sink.py index b928d82f1..46ced2382 100644 --- a/test/test_sink.py +++ b/test/test_sink.py @@ -1,27 +1,27 @@ + import large_image_source_test import large_image_source_zarr +import numpy as np +import pytest +import large_image -def testImageCopy(): - sink = large_image_source_zarr.new() - source = large_image_source_test.TestTileSource( - fractal=True, - maxLevel=4, - tileWidth=128, - tileHeight=128, - sizeX=512, - sizeY=1024, - frames='c=2,z=3', - # bands="red=400-12000,green=0-65535,blue=800-4000, - # ir1=200-24000,ir2=200-22000,gray=100-10000,other=0-65535" - ) +TMP_DIR = 'tmp/zarr_sink' +FILE_TYPES = [ + 'tiff', + 'sqlite', + 'db', + 'zip', + 'zarr', + # "dz", + # 'svi', + # 'svs', +] + +def copyFromSource(source, sink): metadata = source.getMetadata() for frame in metadata.get('frames', []): - num_tiles = source.getSingleTile(frame=frame['Frame'])['iterator_range'][ - 'position' - ] - print(f'Copying {num_tiles} tiles for frame {frame}') for tile in source.tileIterator(frame=frame['Frame'], format='numpy'): t = tile['tile'] x, y = tile['x'], tile['y'] @@ -29,19 +29,127 @@ def testImageCopy(): 'z': frame['IndexZ'], 'c': frame['IndexC'], } - sink.addTile(t, x=x, y=y, axes='zcyxs', **kwargs) + sink.addTile(t, x=x, y=y, **kwargs) + + +def testBasicAddTile(): + sink = large_image_source_zarr.new() + sink.addTile(np.random.random((10, 10)), 0, 0) + sink.addTile(np.random.random((10, 10, 2)), 10, 0) + + metadata = sink.getMetadata() + assert metadata.get('levels') == 1 + assert metadata.get('sizeX') == 20 + assert metadata.get('sizeY') == 10 + assert metadata.get('bandCount') == 2 + assert metadata.get('dtype') == 'float64' + + +def testExtraAxis(): + sink = large_image_source_zarr.new() + sink.addTile(np.random.random((256, 256)), 0, 0, z=1) + metadata = sink.getMetadata() + assert metadata.get('bandCount') == 1 + assert len(metadata.get('frames')) == 2 + + +@pytest.mark.parametrize('file_type', FILE_TYPES) +def testCrop(file_type, tmp_path): + output_file = tmp_path / f'test.{file_type}' + sink = large_image_source_zarr.new() + + # add tiles with some overlap + sink.addTile(np.random.random((10, 10)), 0, 0) + sink.addTile(np.random.random((10, 10)), 8, 0) + sink.addTile(np.random.random((10, 10)), 0, 8) + sink.addTile(np.random.random((10, 10)), 8, 8) + + region, _ = sink.getRegion(format='numpy') + shape = region.shape + assert shape == (18, 18, 1) + + sink.crop = (2, 2, 10, 10) + + # crop only applies when using write + sink.write(output_file) + if file_type == 'zarr': + output_file /= '.zattrs' + written = large_image.open(output_file) + region, _ = written.getRegion(format='numpy') + shape = region.shape + assert shape == (10, 10, 1) - sink._validateZarr() - print('Final shape:', sink.getRegion(format='numpy')[0].shape) - # sink.write('temp.tiff') - # sink.write('temp.sqlite') - sink.write('temp.zip') - # sink.write('temp.zarr') - # sink.write('temp.dz') - # sink.write('temp.szi') - # sink.write('temp.svs') +@pytest.mark.parametrize('file_type', FILE_TYPES) +def testImageCopySmall(file_type, tmp_path): + output_file = tmp_path / f'test.{file_type}' + sink = large_image_source_zarr.new() + source = large_image_source_test.TestTileSource( + fractal=True, + tileWidth=128, + tileHeight=128, + sizeX=512, + sizeY=1024, + frames='c=2,z=3', + ) + copyFromSource(source, sink) + + metadata = sink.getMetadata() + assert metadata.get('sizeX') == 512 + assert metadata.get('sizeY') == 1024 + assert metadata.get('dtype') == 'uint8' + assert metadata.get('levels') == 2 + assert metadata.get('bandCount') == 3 + assert len(metadata.get('frames')) == 6 + + # TODO: fix these failures; unexpected metadata when reading it back + sink.write(output_file) + if file_type == 'zarr': + output_file /= '.zattrs' + written = large_image.open(output_file) + new_metadata = written.metadata + + assert new_metadata.get('sizeX') == 512 + assert new_metadata.get('sizeY') == 1024 + assert new_metadata.get('dtype') == 'uint8' + assert new_metadata.get('levels') == 2 or new_metadata.get('levels') == 3 + assert new_metadata.get('bandCount') == 3 + assert len(new_metadata.get('frames')) == 6 + + +@pytest.mark.parametrize('file_type', FILE_TYPES) +def testImageCopySmallMultiband(file_type, tmp_path): + output_file = tmp_path / f'test.{file_type}' + sink = large_image_source_zarr.new() + source = large_image_source_test.TestTileSource( + fractal=True, + tileWidth=128, + tileHeight=128, + sizeX=512, + sizeY=1024, + frames='c=2,z=3', + bands='red=400-12000,green=0-65535,blue=800-4000,ir1=200-24000,ir2=200-22000,gray=100-10000,other=0-65535', + ) + copyFromSource(source, sink) + + metadata = sink.getMetadata() + assert metadata.get('sizeX') == 512 + assert metadata.get('sizeY') == 1024 + assert metadata.get('dtype') == 'uint16' + assert metadata.get('levels') == 2 + assert metadata.get('bandCount') == 7 + assert len(metadata.get('frames')) == 6 + # TODO: fix these failures; unexpected metadata when reading it back + sink.write(output_file) + if file_type == 'zarr': + output_file /= '.zattrs' + written = large_image.open(output_file) + new_metadata = written.getMetadata() -if __name__ == '__main__': - testImageCopy() + assert new_metadata.get('sizeX') == 512 + assert new_metadata.get('sizeY') == 1024 + assert new_metadata.get('dtype') == 'uint16' + assert new_metadata.get('levels') == 2 or new_metadata.get('levels') == 3 + assert new_metadata.get('bandCount') == 7 + assert len(new_metadata.get('frames')) == 6 From 414ba67ae3317c64576e61e47d24a8ccb8d5bc6e Mon Sep 17 00:00:00 2001 From: Anne Haley Date: Tue, 19 Mar 2024 18:29:28 +0000 Subject: [PATCH 32/36] style: fix line too long; use string append --- test/test_sink.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/test/test_sink.py b/test/test_sink.py index 46ced2382..ab522fbf6 100644 --- a/test/test_sink.py +++ b/test/test_sink.py @@ -121,6 +121,8 @@ def testImageCopySmall(file_type, tmp_path): def testImageCopySmallMultiband(file_type, tmp_path): output_file = tmp_path / f'test.{file_type}' sink = large_image_source_zarr.new() + bands = 'red=400-12000,green=0-65535,blue=800-4000' + bands += ',ir1=200-24000,ir2=200-22000,gray=100-10000,other=0-65535' source = large_image_source_test.TestTileSource( fractal=True, tileWidth=128, @@ -128,7 +130,7 @@ def testImageCopySmallMultiband(file_type, tmp_path): sizeX=512, sizeY=1024, frames='c=2,z=3', - bands='red=400-12000,green=0-65535,blue=800-4000,ir1=200-24000,ir2=200-22000,gray=100-10000,other=0-65535', + bands=bands, ) copyFromSource(source, sink) From f35d06e2d560323c64a89a2d11c62b642a14d1d2 Mon Sep 17 00:00:00 2001 From: Anne Haley Date: Wed, 20 Mar 2024 14:36:24 +0000 Subject: [PATCH 33/36] style: avoid explicit string concatenation --- test/test_sink.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/test/test_sink.py b/test/test_sink.py index ab522fbf6..60a77000d 100644 --- a/test/test_sink.py +++ b/test/test_sink.py @@ -121,8 +121,10 @@ def testImageCopySmall(file_type, tmp_path): def testImageCopySmallMultiband(file_type, tmp_path): output_file = tmp_path / f'test.{file_type}' sink = large_image_source_zarr.new() - bands = 'red=400-12000,green=0-65535,blue=800-4000' - bands += ',ir1=200-24000,ir2=200-22000,gray=100-10000,other=0-65535' + bands = ( + 'red=400-12000,green=0-65535,blue=800-4000,' + 'ir1=200-24000,ir2=200-22000,gray=100-10000,other=0-65535' + ) source = large_image_source_test.TestTileSource( fractal=True, tileWidth=128, From 157c7319ee8e6f85c5d58125dea1d337e2f3f397 Mon Sep 17 00:00:00 2001 From: Anne Haley Date: Wed, 20 Mar 2024 14:43:10 +0000 Subject: [PATCH 34/36] test: add `testNew` and initialize `_levels` --- sources/zarr/large_image_source_zarr/__init__.py | 1 + test/test_sink.py | 6 ++++++ 2 files changed, 7 insertions(+) diff --git a/sources/zarr/large_image_source_zarr/__init__.py b/sources/zarr/large_image_source_zarr/__init__.py index 04f06eb75..a4496e070 100644 --- a/sources/zarr/large_image_source_zarr/__init__.py +++ b/sources/zarr/large_image_source_zarr/__init__.py @@ -112,6 +112,7 @@ def _initNew(self, path, **kwargs): self._framecount = 0 self._mm_x = 0 self._mm_y = 0 + self._levels = [] def __del__(self): if not hasattr(self, '_derivedSource'): diff --git a/test/test_sink.py b/test/test_sink.py index 60a77000d..8a0396c7b 100644 --- a/test/test_sink.py +++ b/test/test_sink.py @@ -32,6 +32,12 @@ def copyFromSource(source, sink): sink.addTile(t, x=x, y=y, **kwargs) +def testNew(): + sink = large_image_source_zarr.new() + assert sink.metadata['levels'] == 0 + assert sink.getRegion(format='numpy')[0].shape[:2] == (0, 0) + + def testBasicAddTile(): sink = large_image_source_zarr.new() sink.addTile(np.random.random((10, 10)), 0, 0) From 62e657203e2b7947e368e90ded920e45bb21f8f8 Mon Sep 17 00:00:00 2001 From: Anne Haley Date: Wed, 20 Mar 2024 14:47:00 +0000 Subject: [PATCH 35/36] fix: unindent mask reshape clause --- sources/zarr/large_image_source_zarr/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sources/zarr/large_image_source_zarr/__init__.py b/sources/zarr/large_image_source_zarr/__init__.py index a4496e070..2b14a4806 100644 --- a/sources/zarr/large_image_source_zarr/__init__.py +++ b/sources/zarr/large_image_source_zarr/__init__.py @@ -532,8 +532,8 @@ def addTile(self, tile, x=0, y=0, mask=None, axes=None, **kwargs): axes = [x.lower() for x in axes] if axes[-1] != 's': axes.append('s') - if mask is not None and len(axes) - 1 == len(mask.shape): - mask = mask[:, :, np.newaxis] + if mask is not None and len(axes) - 1 == len(mask.shape): + mask = mask[:, :, np.newaxis] if 'x' not in axes or 'y' not in axes: err = 'Invalid value for axes. Must contain "y" and "x".' raise ValueError(err) From 71d10683424225d60922271931a1acc5bee1f83f Mon Sep 17 00:00:00 2001 From: Anne Haley Date: Wed, 20 Mar 2024 15:04:21 +0000 Subject: [PATCH 36/36] test: add `testAddTileWithMask` and fix caching to pass --- sources/zarr/large_image_source_zarr/__init__.py | 8 ++++++++ test/test_sink.py | 14 ++++++++++++++ 2 files changed, 22 insertions(+) diff --git a/sources/zarr/large_image_source_zarr/__init__.py b/sources/zarr/large_image_source_zarr/__init__.py index 2b14a4806..620bc682c 100644 --- a/sources/zarr/large_image_source_zarr/__init__.py +++ b/sources/zarr/large_image_source_zarr/__init__.py @@ -379,6 +379,13 @@ def getNativeMagnification(self): 'mm_y': mm_y, } + def getState(self): + # Use the _cacheValue to avoid caching the source and tiles if we are + # creating something new. + if not hasattr(self, '_cacheValue'): + return super().getState() + return super().getState() + ',%s' % (self._cacheValue, ) + def getMetadata(self): """ Return a dictionary of metadata containing levels, sizeX, sizeY, @@ -610,6 +617,7 @@ def addTile(self, tile, x=0, y=0, mask=None, axes=None, **kwargs): for axis, length in new_dims.items() if axis in axes[:-3] ]) + self._cacheValue = str(uuid.uuid4()) self._levels = None self.levels = int(max(1, math.ceil(math.log(max( self.sizeX / self.tileWidth, self.sizeY / self.tileHeight)) / math.log(2)) + 1)) diff --git a/test/test_sink.py b/test/test_sink.py index 8a0396c7b..f6bccf26c 100644 --- a/test/test_sink.py +++ b/test/test_sink.py @@ -51,6 +51,20 @@ def testBasicAddTile(): assert metadata.get('dtype') == 'float64' +def testAddTileWithMask(): + sink = large_image_source_zarr.new() + tile0 = np.random.random((10, 10)) + sink.addTile(tile0, 0, 0) + orig = sink.getRegion(format='numpy')[0] + tile1 = np.random.random((10, 10)) + sink.addTile(tile1, 0, 0, mask=np.random.random((10, 10)) > 0.5) + cur = sink.getRegion(format='numpy')[0] + assert (tile0 == orig[:, :, 0]).all() + assert not (tile1 == orig[:, :, 0]).all() + assert not (tile0 == cur[:, :, 0]).all() + assert not (tile1 == cur[:, :, 0]).all() + + def testExtraAxis(): sink = large_image_source_zarr.new() sink.addTile(np.random.random((256, 256)), 0, 0, z=1)