Skip to content

Commit

Permalink
Speed up scanning tiff files.
Browse files Browse the repository at this point in the history
This especially speeds up scanning OME Tiff files that we can't
ultimately read and have a lot of images.
  • Loading branch information
manthey committed Jul 24, 2020
1 parent 1769dd7 commit 31bfb29
Show file tree
Hide file tree
Showing 3 changed files with 71 additions and 45 deletions.
90 changes: 56 additions & 34 deletions sources/tiff/large_image_source_tiff/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,41 +83,12 @@ def __init__(self, path, **kwargs):
super(TiffFileTileSource, self).__init__(path, **kwargs)

largeImagePath = self._getLargeImagePath()
lastException = None
# Associated images are smallish TIFF images that have an image
# description and are not tiled. They have their own TIFF directory.
# Individual TIFF images can also have images embedded into their
# directory as tags (this is a vendor-specific method of adding more
# images into a file) -- those are stored in the individual
# directories' _embeddedImages field.
self._associatedImages = {}
try:
alldir = self._scanDirectories()
except (ValidationTiffException, TiffException) as exc:
alldir = []
lastException = exc

# Query all know directories in the tif file. Only keep track of
# directories that contain tiled images.
alldir = []
for directoryNum in itertools.count(): # pragma: no branch
try:
td = TiledTiffDirectory(largeImagePath, directoryNum)
except ValidationTiffException as exc:
lastException = exc
self._addAssociatedImage(largeImagePath, directoryNum)
continue
except TiffException as exc:
if not lastException:
lastException = exc
break
if not td.tileWidth or not td.tileHeight:
continue
# Calculate the tile level, where 0 is a single tile, 1 is up to a
# set of 2x2 tiles, 2 is 4x4, etc.
level = int(math.ceil(math.log(max(
float(td.imageWidth) / td.tileWidth,
float(td.imageHeight) / td.tileHeight)) / math.log(2)))
if level < 0:
continue
# Store information for sorting with the directory.
alldir.append((level > 0, td.tileWidth * td.tileHeight, level,
td.imageWidth * td.imageHeight, directoryNum, td))
# If there are no tiled images, raise an exception.
if not len(alldir):
msg = "File %s didn't meet requirements for tile source: %s" % (
Expand Down Expand Up @@ -162,6 +133,57 @@ def __init__(self, path, **kwargs):
self.sizeX = highest.imageWidth
self.sizeY = highest.imageHeight

def _scanDirectories(self):
largeImagePath = self._getLargeImagePath()
lastException = None
# Associated images are smallish TIFF images that have an image
# description and are not tiled. They have their own TIFF directory.
# Individual TIFF images can also have images embedded into their
# directory as tags (this is a vendor-specific method of adding more
# images into a file) -- those are stored in the individual
# directories' _embeddedImages field.
self._associatedImages = {}

dir = None
# Query all know directories in the tif file. Only keep track of
# directories that contain tiled images.
alldir = []
associatedDirs = []
for directoryNum in itertools.count(): # pragma: no branch
try:
if dir is None:
dir = TiledTiffDirectory(largeImagePath, directoryNum, validate=False)
else:
dir._setDirectory(directoryNum)
dir._loadMetadata()
dir._validate()
except ValidationTiffException as exc:
lastException = exc
associatedDirs.append(directoryNum)
continue
except TiffException as exc:
if not lastException:
lastException = exc
break
if not dir.tileWidth or not dir.tileHeight:
continue
# Calculate the tile level, where 0 is a single tile, 1 is up to a
# set of 2x2 tiles, 2 is 4x4, etc.
level = int(math.ceil(math.log(max(
float(dir.imageWidth) / dir.tileWidth,
float(dir.imageHeight) / dir.tileHeight)) / math.log(2)))
if level < 0:
continue
td, dir = dir, None
# Store information for sorting with the directory.
alldir.append((level > 0, td.tileWidth * td.tileHeight, level,
td.imageWidth * td.imageHeight, directoryNum, td))
if not alldir and lastException:
raise lastException
for directoryNum in associatedDirs:
self._addAssociatedImage(largeImagePath, directoryNum)
return alldir

def _addAssociatedImage(self, largeImagePath, directoryNum, mustBeTiled=False, topImage=None):
"""
Check if the specified TIFF directory contains an image with a sensible
Expand Down
16 changes: 10 additions & 6 deletions sources/tiff/large_image_source_tiff/tiff_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ class TiledTiffDirectory(object):
'IsMSB2LSB', 'NumberOfStrips',
]

def __init__(self, filePath, directoryNum, mustBeTiled=True, subDirectoryNum=0):
def __init__(self, filePath, directoryNum, mustBeTiled=True, subDirectoryNum=0, validate=True):
"""
Create a new reader for a tiled image file directory in a TIFF file.
Expand All @@ -115,14 +115,15 @@ def __init__(self, filePath, directoryNum, mustBeTiled=True, subDirectoryNum=0):
:type directoryNum: int
:param mustBeTiled: if True, only tiled images validate. If False,
only non-tiled images validate. None validates both.
:type mustBeTiled: bool
:param subDirectoryNum: if set, the number of the TIFF subdirectory.
:type subDirectoryNum: int
:param validate: if False, don't validate that images can be read.
:type mustBeTiled: bool
:raises: InvalidOperationTiffException or IOTiffException or
ValidationTiffException
"""
# TODO how many to keep in the cache
# create local cache to store Jpeg tables and
# getTileByteCountsType

# create local cache to store Jpeg tables and getTileByteCountsType
self.cache = LRUCache(10)
self._mustBeTiled = mustBeTiled

Expand All @@ -134,7 +135,8 @@ def __init__(self, filePath, directoryNum, mustBeTiled=True, subDirectoryNum=0):
config.getConfig('logger').debug(
'TiffDirectory %d:%d Information %r', directoryNum, subDirectoryNum, self._tiffInfo)
try:
self._validate()
if validate:
self._validate()
except ValidationTiffException:
self._close()
raise
Expand Down Expand Up @@ -174,7 +176,9 @@ def _open(self, filePath, directoryNum, subDirectoryNum=0):
hasattr(self._tiffFile, func.lower())):
setattr(self._tiffFile, func, getattr(
self._tiffFile, func.lower()))
self._setDirectory(directoryNum, subDirectoryNum)

def _setDirectory(self, directoryNum, subDirectoryNum=0):
self._directoryNum = directoryNum
if self._tiffFile.SetDirectory(self._directoryNum) != 1:
self._tiffFile.close()
Expand Down
10 changes: 5 additions & 5 deletions test/test_cached_tiles.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,20 +132,20 @@ def countInit(*args, **kwargs):
self.delCount = 0
source = large_image.getTileSource(imagePath)
assert source is not None
assert self.initCount == 14
assert self.delCount < 14
assert self.initCount == 12
assert self.delCount < 12
# Create another source; we shouldn't init it again, as it should be
# cached.
source = large_image.getTileSource(imagePath)
assert source is not None
assert self.initCount == 14
assert self.delCount < 14
assert self.initCount == 12
assert self.delCount < 12
source = None
# Clear the cache to free references and force garbage collection
cachesClear()
gc.collect(2)
cachesClear()
assert self.delCount == 14
assert self.delCount == 12


class TestMemcachedCache(LargeImageCachedTilesTest):
Expand Down

0 comments on commit 31bfb29

Please sign in to comment.