From 1adba5ca74233f9dd87c3b492877022245bc083d Mon Sep 17 00:00:00 2001 From: David Manthey Date: Fri, 24 Jul 2020 08:31:37 -0400 Subject: [PATCH] Speed up scanning tiff files. This especially speeds up scanning OME Tiff files that we can't ultimately read and have a lot of images. --- .../tiff/large_image_source_tiff/__init__.py | 90 ++++++++++++------- .../large_image_source_tiff/tiff_reader.py | 16 ++-- test/test_cached_tiles.py | 10 +-- 3 files changed, 71 insertions(+), 45 deletions(-) diff --git a/sources/tiff/large_image_source_tiff/__init__.py b/sources/tiff/large_image_source_tiff/__init__.py index ed571de38..a2ddb9b90 100644 --- a/sources/tiff/large_image_source_tiff/__init__.py +++ b/sources/tiff/large_image_source_tiff/__init__.py @@ -83,41 +83,12 @@ def __init__(self, path, **kwargs): super(TiffFileTileSource, self).__init__(path, **kwargs) largeImagePath = self._getLargeImagePath() - lastException = None - # Associated images are smallish TIFF images that have an image - # description and are not tiled. They have their own TIFF directory. - # Individual TIFF images can also have images embedded into their - # directory as tags (this is a vendor-specific method of adding more - # images into a file) -- those are stored in the individual - # directories' _embeddedImages field. - self._associatedImages = {} + try: + alldir = self._scanDirectories() + except (ValidationTiffException, TiffException) as exc: + alldir = [] + lastException = exc - # Query all know directories in the tif file. Only keep track of - # directories that contain tiled images. - alldir = [] - for directoryNum in itertools.count(): # pragma: no branch - try: - td = TiledTiffDirectory(largeImagePath, directoryNum) - except ValidationTiffException as exc: - lastException = exc - self._addAssociatedImage(largeImagePath, directoryNum) - continue - except TiffException as exc: - if not lastException: - lastException = exc - break - if not td.tileWidth or not td.tileHeight: - continue - # Calculate the tile level, where 0 is a single tile, 1 is up to a - # set of 2x2 tiles, 2 is 4x4, etc. - level = int(math.ceil(math.log(max( - float(td.imageWidth) / td.tileWidth, - float(td.imageHeight) / td.tileHeight)) / math.log(2))) - if level < 0: - continue - # Store information for sorting with the directory. - alldir.append((level > 0, td.tileWidth * td.tileHeight, level, - td.imageWidth * td.imageHeight, directoryNum, td)) # If there are no tiled images, raise an exception. if not len(alldir): msg = "File %s didn't meet requirements for tile source: %s" % ( @@ -162,6 +133,57 @@ def __init__(self, path, **kwargs): self.sizeX = highest.imageWidth self.sizeY = highest.imageHeight + def _scanDirectories(self): + largeImagePath = self._getLargeImagePath() + lastException = None + # Associated images are smallish TIFF images that have an image + # description and are not tiled. They have their own TIFF directory. + # Individual TIFF images can also have images embedded into their + # directory as tags (this is a vendor-specific method of adding more + # images into a file) -- those are stored in the individual + # directories' _embeddedImages field. + self._associatedImages = {} + + dir = None + # Query all know directories in the tif file. Only keep track of + # directories that contain tiled images. + alldir = [] + associatedDirs = [] + for directoryNum in itertools.count(): # pragma: no branch + try: + if dir is None: + dir = TiledTiffDirectory(largeImagePath, directoryNum, validate=False) + else: + dir._setDirectory(directoryNum) + dir._loadMetadata() + dir._validate() + except ValidationTiffException as exc: + lastException = exc + associatedDirs.append(directoryNum) + continue + except TiffException as exc: + if not lastException: + lastException = exc + break + if not dir.tileWidth or not dir.tileHeight: + continue + # Calculate the tile level, where 0 is a single tile, 1 is up to a + # set of 2x2 tiles, 2 is 4x4, etc. + level = int(math.ceil(math.log(max( + float(dir.imageWidth) / dir.tileWidth, + float(dir.imageHeight) / dir.tileHeight)) / math.log(2))) + if level < 0: + continue + td, dir = dir, None + # Store information for sorting with the directory. + alldir.append((level > 0, td.tileWidth * td.tileHeight, level, + td.imageWidth * td.imageHeight, directoryNum, td)) + if not alldir and lastException: + raise lastException + for directoryNum in associatedDirs: + self._addAssociatedImage(largeImagePath, directoryNum) + return alldir + def _addAssociatedImage(self, largeImagePath, directoryNum, mustBeTiled=False, topImage=None): """ Check if the specified TIFF directory contains an image with a sensible diff --git a/sources/tiff/large_image_source_tiff/tiff_reader.py b/sources/tiff/large_image_source_tiff/tiff_reader.py index 9ae6fc701..f11231571 100644 --- a/sources/tiff/large_image_source_tiff/tiff_reader.py +++ b/sources/tiff/large_image_source_tiff/tiff_reader.py @@ -104,7 +104,7 @@ class TiledTiffDirectory(object): 'IsMSB2LSB', 'NumberOfStrips', ] - def __init__(self, filePath, directoryNum, mustBeTiled=True, subDirectoryNum=0): + def __init__(self, filePath, directoryNum, mustBeTiled=True, subDirectoryNum=0, validate=True): """ Create a new reader for a tiled image file directory in a TIFF file. @@ -115,14 +115,15 @@ def __init__(self, filePath, directoryNum, mustBeTiled=True, subDirectoryNum=0): :type directoryNum: int :param mustBeTiled: if True, only tiled images validate. If False, only non-tiled images validate. None validates both. + :type mustBeTiled: bool :param subDirectoryNum: if set, the number of the TIFF subdirectory. + :type subDirectoryNum: int + :param validate: if False, don't validate that images can be read. + :type mustBeTiled: bool :raises: InvalidOperationTiffException or IOTiffException or ValidationTiffException """ - # TODO how many to keep in the cache - # create local cache to store Jpeg tables and - # getTileByteCountsType - + # create local cache to store Jpeg tables and getTileByteCountsType self.cache = LRUCache(10) self._mustBeTiled = mustBeTiled @@ -134,7 +135,8 @@ def __init__(self, filePath, directoryNum, mustBeTiled=True, subDirectoryNum=0): config.getConfig('logger').debug( 'TiffDirectory %d:%d Information %r', directoryNum, subDirectoryNum, self._tiffInfo) try: - self._validate() + if validate: + self._validate() except ValidationTiffException: self._close() raise @@ -174,7 +176,9 @@ def _open(self, filePath, directoryNum, subDirectoryNum=0): hasattr(self._tiffFile, func.lower())): setattr(self._tiffFile, func, getattr( self._tiffFile, func.lower())) + self._setDirectory(directoryNum, subDirectoryNum) + def _setDirectory(self, directoryNum, subDirectoryNum=0): self._directoryNum = directoryNum if self._tiffFile.SetDirectory(self._directoryNum) != 1: self._tiffFile.close() diff --git a/test/test_cached_tiles.py b/test/test_cached_tiles.py index d094d527c..87ad6652f 100644 --- a/test/test_cached_tiles.py +++ b/test/test_cached_tiles.py @@ -132,20 +132,20 @@ def countInit(*args, **kwargs): self.delCount = 0 source = large_image.getTileSource(imagePath) assert source is not None - assert self.initCount == 14 - assert self.delCount < 14 + assert self.initCount == 12 + assert self.delCount < 12 # Create another source; we shouldn't init it again, as it should be # cached. source = large_image.getTileSource(imagePath) assert source is not None - assert self.initCount == 14 - assert self.delCount < 14 + assert self.initCount == 12 + assert self.delCount < 12 source = None # Clear the cache to free references and force garbage collection cachesClear() gc.collect(2) cachesClear() - assert self.delCount == 14 + assert self.delCount == 12 class TestMemcachedCache(LargeImageCachedTilesTest):