Skip to content

Commit

Permalink
Convert to and read multiframe files.
Browse files Browse the repository at this point in the history
This uses tifftools to parse some files.
  • Loading branch information
manthey committed Feb 16, 2021
1 parent 3c31cdc commit 6f2bb16
Show file tree
Hide file tree
Showing 7 changed files with 422 additions and 26 deletions.
172 changes: 163 additions & 9 deletions sources/tiff/large_image_source_tiff/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,12 @@
import base64
import io
import itertools
import json
import math
import numpy
import PIL.Image
from pkg_resources import DistributionNotFound, get_distribution
import tifftools

from large_image import config
from large_image.cache_util import LruCacheMetaclass, methodcache
Expand Down Expand Up @@ -68,6 +70,8 @@ class TiffFileTileSource(FileTileSource, metaclass=LruCacheMetaclass):
# _maxSkippedLevels, such large gaps are composited in stages.
_maxSkippedLevels = 3

_maxAssociatedImageSize = 8192

def __init__(self, path, **kwargs):
"""
Initialize the tile class. See the base class for other available
Expand All @@ -79,6 +83,13 @@ def __init__(self, path, **kwargs):

largeImagePath = self._getLargeImagePath()
self._largeImagePath = largeImagePath

try:
self._initWithTiffTools()
return
except Exception as exc:
config.getConfig('logger').debug('Cannot read with tifftools route; %r', exc)

try:
alldir = self._scanDirectories()
except (ValidationTiffException, TiffException) as exc:
Expand Down Expand Up @@ -180,6 +191,127 @@ def _scanDirectories(self):
self._addAssociatedImage(largeImagePath, directoryNum)
return alldir

def _levelFromIfd(self, ifd, baseifd):
"""
Get the level based on information in an ifd and on the full-resolution
0-frame ifd. An exception is raised if the ifd does not seem to
represent a possible level.
:param ifd: an ifd record returned from tifftools.
:param baseifd: the ifd record of the full-resolution frame 0.
:returns: the level, where self.levels - 1 is full resolution and 0 is
the lowest resolution.
"""
sizeX = ifd['tags'][tifftools.Tag.ImageWidth.value]['data'][0]
sizeY = ifd['tags'][tifftools.Tag.ImageLength.value]['data'][0]
tileWidth = baseifd['tags'][tifftools.Tag.TileWidth.value]['data'][0]
tileHeight = baseifd['tags'][tifftools.Tag.TileLength.value]['data'][0]
for tag in {
tifftools.Tag.SamplesPerPixel.value,
tifftools.Tag.BitsPerSample.value,
tifftools.Tag.PlanarConfig.value,
tifftools.Tag.Photometric.value,
tifftools.Tag.Orientation.value,
tifftools.Tag.Compression.value,
tifftools.Tag.TileWidth.value,
tifftools.Tag.TileLength.value,
}:
if ((tag in ifd['tags'] and tag not in baseifd['tags']) or
(tag not in ifd['tags'] and tag in baseifd['tags']) or
(tag in ifd['tags'] and
ifd['tags'][tag]['data'] != baseifd['tags'][tag]['data'])):
raise TileSourceException('IFD does not match first IFD.')
sizes = [(self.sizeX, self.sizeY)]
for level in range(self.levels - 1, -1, -1):
if (sizeX, sizeY) in sizes:
return level
altsizes = []
for w, h in sizes:
w2f = int(math.floor(w / 2))
h2f = int(math.floor(h / 2))
w2c = int(math.ceil(w / 2))
h2c = int(math.ceil(h / 2))
w2t = int(math.floor((w / 2 + tileWidth - 1) / tileWidth)) * tileWidth
h2t = int(math.floor((h / 2 + tileHeight - 1) / tileHeight)) * tileHeight
for w2, h2 in [(w2f, h2f), (w2f, h2c), (w2c, h2f), (w2c, h2c), (w2t, h2t)]:
if (w2, h2) not in altsizes:
altsizes.append((w2, h2))
sizes = altsizes
raise TileSourceException('IFD size is not a power of two smaller than first IFD.')

def _initWithTiffTools(self):
"""
Use tifftools to read all of the tiff directory information. Check if
the zeroth directory can be validated as a tiled directory. If so,
then check if the remaining directories are either tiled in descending
size or have subifds with tiles in descending sizes. All primary tiled
directories are the same size and format; all non-tiled directories are
treated as associated images.
"""
dir0 = TiledTiffDirectory(self._largeImagePath, 0)
self.tileWidth = dir0.tileWidth
self.tileHeight = dir0.tileHeight
self.sizeX = dir0.imageWidth
self.sizeY = dir0.imageHeight
self.levels = int(math.ceil(math.log(max(
dir0.imageWidth / dir0.tileWidth,
dir0.imageHeight / dir0.tileHeight)) / math.log(2))) + 1
info = tifftools.read_tiff(self._largeImagePath)
frames = []
associated = [] # for now, a list of directories
curframe = -1
for idx, ifd in enumerate(info['ifds']):
# if not tiles, add to associated images
if tifftools.Tag.tileWidth.value not in ifd['tags']:
associated.append(idx)
continue
level = self._levelFromIfd(ifd, info['ifds'][0])
# if the same resolution as the main image, add a frame
if level == self.levels - 1:
curframe += 1
frames.append({'dirs': [None] * self.levels})
frames[-1]['dirs'][-1] = (idx, 0)
try:
frameMetadata = json.loads(
ifd['tags'][tifftools.Tag.ImageDescription.value]['data'])
for key in {'channels', 'frame'}:
if key in frameMetadata:
frames[-1][key] = frameMetadata[key]
except Exception:
pass
# otherwise, add to the first frame missing that level
elif level < self.levels - 1 and any(
frame for frame in frames if frame['dirs'][level] is None):
frames[next(
idx for idx, frame in enumerate(frames) if frame['dirs'][level] is None
)]['dirs'][level] = (idx, 0)
else:
raise TileSourceException('Tile layers are in a surprising order')
# if there are sub ifds, add them
if tifftools.Tag.SubIfd.value in ifd['tags']:
for subidx, subifds in enumerate(ifd['tags'][tifftools.Tag.SubIfd.value]['ifds']):
if len(subifds) != 1:
raise TileSourceException(
'When stored in subifds, each subifd should be a single ifd.')
level = self._levelFromIfd(subifds[0], info['ifds'][0])
if level < self.levels - 1 and frames[-1]['dirs'][level] is None:
frames[-1]['dirs'][level] = (idx, subidx + 1)
else:
raise TileSourceException('Tile layers are in a surprising order')
self._associatedImages = {}
for dirNum in associated:
self._addAssociatedImage(self._largeImagePath, dirNum)
self._frames = frames
self._tiffDirectories = [
TiledTiffDirectory(
self._largeImagePath,
frames[0]['dirs'][idx][0],
subDirectoryNum=frames[0]['dirs'][idx][1])
if frames[0]['dirs'][idx] is not None else None
for idx in range(self.levels - 1)]
self._tiffDirectories.append(dir0)
return True

def _addAssociatedImage(self, largeImagePath, directoryNum, mustBeTiled=False, topImage=None):
"""
Check if the specified TIFF directory contains an image with a sensible
Expand Down Expand Up @@ -209,8 +341,8 @@ def _addAssociatedImage(self, largeImagePath, directoryNum, mustBeTiled=False, t
# a reasonable length, alphanumeric characters, and the
# image isn't too large.
if (id.isalnum() and len(id) > 3 and len(id) <= 20 and
associated._pixelInfo['width'] <= 8192 and
associated._pixelInfo['height'] <= 8192):
associated._pixelInfo['width'] <= self._maxAssociatedImageSize and
associated._pixelInfo['height'] <= self._maxAssociatedImageSize):
image = associated._tiffFile.read_image()
# Optrascan scanners store xml image descriptions in a "tiled
# image". Check if this is the case, and, if so, parse such
Expand Down Expand Up @@ -297,10 +429,22 @@ def _xmlToMetadata(self, xml):
if key not in {'PIM_DP_IMAGE_DATA', }:
values[attr['Name'] + '|' + key] = subvalue
except Exception:
config.getConfig('logger').exception('Here')
return xml
return values

def getMetadata(self):
"""
Return a dictionary of metadata containing levels, sizeX, sizeY,
tileWidth, tileHeight, magnification, mm_x, mm_y, and frames.
:returns: metadata dictonary.
"""
result = super().getMetadata()
if hasattr(self, '_frames'):
result['frames'] = [frame.get('frame', {}) for frame in self._frames]
self._addMetadataFrameInformation(result, self._frames[0].get('channels', None))
return result

def getInternalMetadata(self, **kwargs):
"""
Return additional known metadata about the tile source. Data returned
Expand Down Expand Up @@ -335,10 +479,18 @@ def getInternalMetadata(self, **kwargs):
@methodcache()
def getTile(self, x, y, z, pilImageAllowed=False, numpyAllowed=False,
sparseFallback=False, **kwargs):
self._xyzInRange(x, y, z)
frame = int(kwargs.get('frame') or 0)
self._xyzInRange(x, y, z, frame, len(self._frames) if hasattr(self, '_frames') else None)
if frame > 0:
if self._frames[frame]['dirs'][z] is not None:
dir = self._getDirFromCache(*self._frames[frame]['dirs'][z])
else:
dir = None
else:
dir = self._tiffDirectories[z]
try:
allowStyle = True
if self._tiffDirectories[z] is None:
if dir is None:
try:
tile = self.getTileFromEmptyDirectory(x, y, z, **kwargs)
except Exception:
Expand All @@ -349,16 +501,14 @@ def getTile(self, x, y, z, pilImageAllowed=False, numpyAllowed=False,
allowStyle = False
format = TILE_FORMAT_PIL
else:
tile = self._tiffDirectories[z].getTile(x, y)
tile = dir.getTile(x, y)
format = 'JPEG'
if isinstance(tile, PIL.Image.Image):
format = TILE_FORMAT_PIL
if isinstance(tile, numpy.ndarray):
format = TILE_FORMAT_NUMPY
return self._outputTile(tile, format, x, y, z, pilImageAllowed,
numpyAllowed, applyStyle=allowStyle, **kwargs)
except IndexError:
raise TileSourceException('z layer does not exist')
except InvalidOperationTiffException as e:
raise TileSourceException(e.args[0])
except IOTiffException as e:
Expand Down Expand Up @@ -417,7 +567,11 @@ def getTileFromEmptyDirectory(self, x, y, z, **kwargs):
"""
basez = z
scale = 1
while self._tiffDirectories[z] is None:
dirlist = self._tiffDirectories
frame = int(kwargs.get('frame') or 0)
if frame > 0:
dirlist = self._frames[frame]['dirs']
while dirlist[z] is None:
scale *= 2
z += 1
while z - basez > self._maxSkippedLevels:
Expand Down
8 changes: 8 additions & 0 deletions sources/tiff/large_image_source_tiff/tiff_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

import ctypes
import io
import json
import math
import numpy
import os
Expand Down Expand Up @@ -818,6 +819,13 @@ def parse_image_description(self, meta=None): # noqa
return
if not isinstance(meta, str):
meta = meta.decode('utf8', 'ignore')
try:
parsed = json.loads(meta)
if isinstance(parsed, dict):
self._description_record = parsed
return True
except Exception:
pass
try:
xml = ElementTree.fromstring(meta)
except Exception:
Expand Down
10 changes: 5 additions & 5 deletions test/test_cached_tiles.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,20 +130,20 @@ def countInit(*args, **kwargs):
self.delCount = 0
source = large_image.getTileSource(imagePath)
assert source is not None
assert self.initCount == 12
assert self.delCount < 12
assert self.initCount == 11
assert self.delCount < 11
# Create another source; we shouldn't init it again, as it should be
# cached.
source = large_image.getTileSource(imagePath)
assert source is not None
assert self.initCount == 12
assert self.delCount < 12
assert self.initCount == 11
assert self.delCount < 11
source = None
# Clear the cache to free references and force garbage collection
cachesClear()
gc.collect(2)
cachesClear()
assert self.delCount == 12
assert self.delCount == 11


class TestMemcachedCache(LargeImageCachedTilesTest):
Expand Down
39 changes: 38 additions & 1 deletion test/test_converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,8 @@ def testConvertOMETif(tmpdir):
# Note: change this when we convert multi-frame files differently
large_image_converter.convert(imagePath, outputPath)
info = tifftools.read_tiff(outputPath)
assert len(info['ifds']) == 5
assert len(info['ifds']) == 3
assert len(info['ifds'][0]['tags'][tifftools.Tag.SubIFD.value]['ifds']) == 4


def testConvertTiffFloatPixels(tmpdir):
Expand Down Expand Up @@ -164,6 +165,32 @@ def testConvertFromLargeImage(tmpdir):
assert metadata['levels'] == 6


def testConvertFromMultiframeImage(tmpdir):
imagePath = utilities.externaldata('data/sample.ome.tif.sha512')
outputPath = os.path.join(tmpdir, 'out.tiff')
large_image_converter.convert(imagePath, outputPath)
source = large_image_source_tiff.TiffFileTileSource(outputPath)
metadata = source.getMetadata()
assert metadata['levels'] == 5
assert len(metadata['frames']) == 3
info = tifftools.read_tiff(outputPath)
assert len(info['ifds']) == 3


def testConvertFromMultiframeImageNoSubIFDS(tmpdir):
imagePath = utilities.externaldata('data/sample.ome.tif.sha512')
outputPath = os.path.join(tmpdir, 'out.tiff')
large_image_converter.convert(imagePath, outputPath, subifds=False)
source = large_image_source_tiff.TiffFileTileSource(outputPath)
metadata = source.getMetadata()
assert metadata['levels'] == 5
assert len(metadata['frames']) == 3
info = tifftools.read_tiff(outputPath)
assert len(info['ifds']) == 15


# Test main program

def testConverterMain(tmpdir):
testDir = os.path.dirname(os.path.realpath(__file__))
imagePath = os.path.join(testDir, 'test_files', 'yb10kx5k.png')
Expand Down Expand Up @@ -208,3 +235,13 @@ def testConverterMainFullStats(tmpdir):
info = tifftools.read_tiff(outputPath)
desc = json.loads(info['ifds'][0]['tags'][tifftools.Tag.ImageDescription.value]['data'])
assert 'psnr' in desc['large_image_converter']['conversion_stats']


def testConverterMainFullStatsWithWebp(tmpdir):
imagePath = utilities.externaldata('data/d042-353.crop.small.float32.tif.sha512')
outputPath = os.path.join(tmpdir, 'out.tiff')
main.main([imagePath, outputPath, '--compression', 'webp', '--full-stats'])
info = tifftools.read_tiff(outputPath)
desc = json.loads(info['ifds'][0]['tags'][tifftools.Tag.ImageDescription.value]['data'])
assert 'psnr' in desc['large_image_converter']['conversion_stats']
assert desc['large_image_converter']['conversion_stats']['psnr'] < 60
36 changes: 36 additions & 0 deletions test/test_source_tiff.py
Original file line number Diff line number Diff line change
Expand Up @@ -659,3 +659,39 @@ def testFromTiffRGBJPEG():
source = large_image_source_tiff.TiffFileTileSource(imagePath)
tile = source.getSingleTile()
assert list(tile['tile'][0, 0]) == [243, 243, 243]


def testTilesFromMultiFrameTiff():
imagePath = utilities.externaldata('data/sample.ome.tif.sha512')
source = large_image_source_tiff.TiffFileTileSource(imagePath)
tileMetadata = source.getMetadata()

assert tileMetadata['tileWidth'] == 1024
assert tileMetadata['tileHeight'] == 1024
assert tileMetadata['sizeX'] == 2106
assert tileMetadata['sizeY'] == 2016
assert tileMetadata['levels'] == 3
assert len(tileMetadata['frames']) == 3
assert tileMetadata['frames'][1]['Frame'] == 1
utilities.checkTilesZXY(source, tileMetadata)

tile = source.getSingleTile()
assert list(tile['tile'][0, 0]) == [7710]


def testTilesFromMultiFrameTiffWithSubIFD():
imagePath = utilities.externaldata('data/sample.subifd.ome.tif.sha512')
source = large_image_source_tiff.TiffFileTileSource(imagePath, frame=1)
tileMetadata = source.getMetadata()

assert tileMetadata['tileWidth'] == 256
assert tileMetadata['tileHeight'] == 256
assert tileMetadata['sizeX'] == 2106
assert tileMetadata['sizeY'] == 2016
assert tileMetadata['levels'] == 5
assert len(tileMetadata['frames']) == 3
assert tileMetadata['frames'][1]['Frame'] == 1
utilities.checkTilesZXY(source, tileMetadata)

tile = source.getSingleTile()
assert list(tile['tile'][0, 0]) == [7710]
Loading

0 comments on commit 6f2bb16

Please sign in to comment.