From 0e359a222254f562baf4c84d0a8be33f7d279037 Mon Sep 17 00:00:00 2001 From: David Manthey Date: Tue, 1 Oct 2019 15:47:29 -0400 Subject: [PATCH] Add an openjpeg source using the glymur library. --- .circleci/release_pypi.sh | 7 +- large_image/tilesource/__init__.py | 5 +- large_image/tilesource/base.py | 35 ++- requirements-dev.txt | 1 + setup.py | 1 + .../large_image_source_openjpeg/__init__.py | 215 ++++++++++++++++++ .../girder_source.py | 30 +++ sources/openjpeg/setup.py | 62 +++++ .../large_image_source_tiff/tiff_reader.py | 35 +-- test/data/sample_image.jp2.sha512 | 1 + test/test_source_openjpeg.py | 19 ++ 11 files changed, 372 insertions(+), 39 deletions(-) create mode 100644 sources/openjpeg/large_image_source_openjpeg/__init__.py create mode 100644 sources/openjpeg/large_image_source_openjpeg/girder_source.py create mode 100644 sources/openjpeg/setup.py create mode 100644 test/data/sample_image.jp2.sha512 create mode 100644 test/test_source_openjpeg.py diff --git a/.circleci/release_pypi.sh b/.circleci/release_pypi.sh index 58fd07c66..837c7cbb4 100755 --- a/.circleci/release_pypi.sh +++ b/.circleci/release_pypi.sh @@ -21,10 +21,13 @@ twine upload --verbose dist/* cd "$ROOTPATH/sources/mapnik" python setup.py sdist twine upload --verbose dist/* -cd "$ROOTPATH/sources/openslide" +cd "$ROOTPATH/sources/ometiff" python setup.py sdist twine upload --verbose dist/* -cd "$ROOTPATH/sources/ometiff" +cd "$ROOTPATH/sources/openjpeg" +python setup.py sdist +twine upload --verbose dist/* +cd "$ROOTPATH/sources/openslide" python setup.py sdist twine upload --verbose dist/* cd "$ROOTPATH/sources/pil" diff --git a/large_image/tilesource/__init__.py b/large_image/tilesource/__init__.py index a1f7401e2..bbc5ba42e 100644 --- a/large_image/tilesource/__init__.py +++ b/large_image/tilesource/__init__.py @@ -4,7 +4,8 @@ from pkg_resources import iter_entry_points from .base import TileSource, FileTileSource, TileOutputMimeTypes, \ - TILE_FORMAT_IMAGE, TILE_FORMAT_PIL, TILE_FORMAT_NUMPY, nearPowerOfTwo + TILE_FORMAT_IMAGE, TILE_FORMAT_PIL, TILE_FORMAT_NUMPY, nearPowerOfTwo, \ + etreeToDict from ..exceptions import TileGeneralException, TileSourceException, TileSourceAssetstoreException from .. import config from ..constants import SourcePriority @@ -82,5 +83,5 @@ def getTileSource(*args, **kwargs): 'TileSource', 'FileTileSource', 'exceptions', 'TileGeneralException', 'TileSourceException', 'TileSourceAssetstoreException', 'TileOutputMimeTypes', 'TILE_FORMAT_IMAGE', 'TILE_FORMAT_PIL', 'TILE_FORMAT_NUMPY', - 'AvailableTileSources', 'getTileSource', 'nearPowerOfTwo', + 'AvailableTileSources', 'getTileSource', 'nearPowerOfTwo', 'etreeToDict', ] diff --git a/large_image/tilesource/base.py b/large_image/tilesource/base.py index ea84fcbe4..6e5057fa3 100644 --- a/large_image/tilesource/base.py +++ b/large_image/tilesource/base.py @@ -7,6 +7,7 @@ import PIL.ImageColor import PIL.ImageDraw import six +from collections import defaultdict from six import BytesIO from ..cache_util import getTileCache, strhash, methodcache @@ -95,6 +96,37 @@ def _letterboxImage(image, width, height, fill): return result +def etreeToDict(t): + """ + Convert an xml etree to a nested dictionary without schema names in the + keys. + + @param t: an etree. + @returns: a python dictionary with the results. + """ + # Remove schema + tag = t.tag.split('}', 1)[1] if t.tag.startswith('{') else t.tag + d = {tag: {}} + children = list(t) + if children: + entries = defaultdict(list) + for entry in map(etreeToDict, children): + for k, v in six.iteritems(entry): + entries[k].append(v) + d = {tag: {k: v[0] if len(v) == 1 else v + for k, v in six.iteritems(entries)}} + + if t.attrib: + d[tag].update({(k.split('}', 1)[1] if k.startswith('{') else k): v + for k, v in six.iteritems(t.attrib)}) + text = (t.text or '').strip() + if text and len(d[tag]): + d[tag]['text'] = text + elif text: + d[tag] = text + return d + + def nearPowerOfTwo(val1, val2, tolerance=0.02): """ Check if two values are different by nearly a power of two. @@ -979,8 +1011,7 @@ def _pilFormatMatches(self, image, match=True, **kwargs): # compatibility could be an issue. return False - def _outputTile(self, tile, tileEncoding, x, y, z, pilImageAllowed=False, - **kwargs): + def _outputTile(self, tile, tileEncoding, x, y, z, pilImageAllowed=False, **kwargs): """ Convert a tile from a PIL image or image in memory to the desired encoding. diff --git a/requirements-dev.txt b/requirements-dev.txt index 568e549d7..97b18a7d2 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -3,6 +3,7 @@ girder>=3.0.3 girder-jobs>=3.0.3 -e sources/dummy -e sources/mapnik +-e sources/openjpeg -e sources/openslide -e sources/pil -e sources/test diff --git a/setup.py b/setup.py index 9157fb2d7..e119a9c27 100644 --- a/setup.py +++ b/setup.py @@ -16,6 +16,7 @@ 'dummy': ['large-image-source-dummy'], 'mapnik': ['large-image-source-mapnik'], 'ometiff': ['large-image-source-ometiff'], + 'openjpeg': ['large-image-source-openjpeg'], 'openslide': ['large-image-source-openslide'], 'pil': ['large-image-source-pil'], 'tiff': ['large-image-source-tiff'], diff --git a/sources/openjpeg/large_image_source_openjpeg/__init__.py b/sources/openjpeg/large_image_source_openjpeg/__init__.py new file mode 100644 index 000000000..93d393911 --- /dev/null +++ b/sources/openjpeg/large_image_source_openjpeg/__init__.py @@ -0,0 +1,215 @@ +# -*- coding: utf-8 -*- + +############################################################################## +# Copyright Kitware Inc. +# +# Licensed under the Apache License, Version 2.0 ( the "License" ); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +############################################################################## + +import glymur +import math +import PIL.Image +import six +import threading +import warnings + +from six import BytesIO +from xml.etree import cElementTree + +from pkg_resources import DistributionNotFound, get_distribution + +from large_image.cache_util import LruCacheMetaclass, methodcache +from large_image.constants import SourcePriority, TILE_FORMAT_PIL +from large_image.exceptions import TileSourceException +from large_image.tilesource import FileTileSource, etreeToDict + + +try: + __version__ = get_distribution(__name__).version +except DistributionNotFound: + # package is not installed + pass + + +warnings.filterwarnings('ignore', category=UserWarning, module='glymur') + + +@six.add_metaclass(LruCacheMetaclass) +class OpenjpegFileTileSource(FileTileSource): + """ + Provides tile access to SVS files and other files the openjpeg library can + read. + """ + + cacheName = 'tilesource' + name = 'openjpegfile' + extensions = { + None: SourcePriority.MEDIUM, + 'jp2': SourcePriority.PREFERRED, + 'jpf': SourcePriority.PREFERRED, + 'j2k': SourcePriority.PREFERRED, + 'jpx': SourcePriority.PREFERRED, + } + mimeTypes = { + None: SourcePriority.FALLBACK, + 'image/jp2': SourcePriority.PREFERRED, + 'image/jpx': SourcePriority.PREFERRED, + } + + _boxToTag = { + # In the few samples I've seen, both of these appear to be macro images + b'mig ': 'macro', + b'mag ': 'label', + # This contains a largish image + # b'psi ': 'other', + } + _xmlTag = b'mxl ' + + def __init__(self, path, **kwargs): + """ + Initialize the tile class. See the base class for other available + parameters. + + :param path: a filesystem path for the tile source. + """ + super(OpenjpegFileTileSource, self).__init__(path, **kwargs) + + largeImagePath = self._getLargeImagePath() + + self._largeImagePath = largeImagePath + self._pixelInfo = {} + self._openjpegLock = threading.RLock() + try: + self._openjpeg = glymur.Jp2k(largeImagePath) + except glymur.jp2box.InvalidJp2kError: + raise TileSourceException('File cannot be opened via Glymur and OpenJPEG.') + try: + self.sizeY, self.sizeX = self._openjpeg.shape[:2] + except IndexError: + raise TileSourceException('File cannot be opened via Glymur and OpenJPEG.') + self.levels = self._openjpeg.codestream.segment[2].num_res + 1 + self.tileWidth = self.tileHeight = 2 ** int(math.ceil(max( + math.log(float(self.sizeX)) / math.log(2) - self.levels + 1, + math.log(float(self.sizeY)) / math.log(2) - self.levels + 1))) + # read associated images and metadata from boxes + self._associatedImages = {} + for box in self._openjpeg.box: + if box.box_id == self._xmlTag or box.box_id in self._boxToTag: + data = self._readbox(box) + if data is None: + continue + if box.box_id == self._xmlTag: + self._parseMetadataXml(data) + continue + try: + self._associatedImages[self._boxToTag[box.box_id]] = PIL.Image.open( + BytesIO(data)) + except Exception: + pass + if box.box_id == 'jp2c': + for segment in box.codestream.segment: + if segment.marker_id == 'CME' and hasattr(segment, 'ccme'): + self._parseMetadataXml(segment.ccme) + + def getNativeMagnification(self): + """ + Get the magnification at a particular level. + + :return: magnification, width of a pixel in mm, height of a pixel in mm. + """ + mm_x = self._pixelInfo.get('mm_x') + mm_y = self._pixelInfo.get('mm_y') + # Estimate the magnification if we don't have a direct value + mag = self._pixelInfo.get('magnification') or 0.01 / mm_x if mm_x else None + return { + 'magnification': mag, + 'mm_x': mm_x, + 'mm_y': mm_y, + } + + def _parseMetadataXml(self, meta): + if not isinstance(meta, six.string_types): + meta = meta.decode('utf8', 'ignore') + try: + xml = cElementTree.fromstring(meta) + except Exception: + return + self._description_xml = etreeToDict(xml) + xml = self._description_xml + try: + # Optrascan metadata + scanDetails = xml.get('ScanInfo', xml.get('EncodeInfo'))['ScanDetails'] + mag = float(scanDetails['Magnification']) + # In microns; convert to mm + scale = float(scanDetails['PixelResolution']) * 1e-3 + self._pixelInfo = { + 'magnification': mag, + 'mm_x': scale, + 'mm_y': scale, + } + except Exception: + pass + + def _getAssociatedImage(self, imageKey): + """ + Get an associated image in PIL format. + + :param imageKey: the key of the associated image. + :return: the image in PIL format or None. + """ + return self._associatedImages.get(imageKey) + + def getAssociatedImagesList(self): + """ + Return a list of associated images. + + :return: the list of image keys. + """ + return list(self._associatedImages.keys()) + + def _readbox(self, box): + if box.length > 16 * 1024 * 1024: + return + try: + fp = open(self._largeImagePath, 'rb') + headerLength = 16 + fp.seek(box.offset + headerLength) + data = fp.read(box.length - headerLength) + return data + except Exception: + pass + + @methodcache() + def getTile(self, x, y, z, pilImageAllowed=False, **kwargs): + if z < 0 or z >= self.levels: + raise TileSourceException('z layer does not exist') + step = 2 ** (self.levels - 1 - z) + x0 = x * step * self.tileWidth + x1 = min((x + 1) * step * self.tileWidth, self.sizeX) + y0 = y * step * self.tileHeight + y1 = min((y + 1) * step * self.tileHeight, self.sizeY) + if x < 0 or x0 >= self.sizeX: + raise TileSourceException('x is outside layer') + if y < 0 or y0 >= self.sizeY: + raise TileSourceException('y is outside layer') + with self._openjpegLock: + tile = self._openjpeg[y0:y1:step, x0:x1:step] + mode = 'L' + if len(tile.shape) == 3: + mode = ['L', 'LA', 'RGB', 'RGBA'][tile.shape[2] - 1] + tile = PIL.Image.frombytes(mode, (tile.shape[1], tile.shape[0]), tile) + if tile.size != (self.tileWidth, self.tileHeight): + wrap = PIL.Image.new(mode, (self.tileWidth, self.tileHeight)) + wrap.paste(tile, (0, 0)) + tile = wrap + return self._outputTile(tile, TILE_FORMAT_PIL, x, y, z, pilImageAllowed, **kwargs) diff --git a/sources/openjpeg/large_image_source_openjpeg/girder_source.py b/sources/openjpeg/large_image_source_openjpeg/girder_source.py new file mode 100644 index 000000000..7d79e4cdb --- /dev/null +++ b/sources/openjpeg/large_image_source_openjpeg/girder_source.py @@ -0,0 +1,30 @@ +# -*- coding: utf-8 -*- + +############################################################################## +# Copyright Kitware Inc. +# +# Licensed under the Apache License, Version 2.0 ( the "License" ); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +############################################################################## + +from girder_large_image.girder_tilesource import GirderTileSource +from . import OpenjpegFileTileSource + + +class OpenjpegGirderTileSource(OpenjpegFileTileSource, GirderTileSource): + """ + Provides tile access to Girder items with an SVS file or other files that + the openslide library can read. + """ + + cacheName = 'tilesource' + name = 'openjpeg' diff --git a/sources/openjpeg/setup.py b/sources/openjpeg/setup.py new file mode 100644 index 000000000..a1fb16e27 --- /dev/null +++ b/sources/openjpeg/setup.py @@ -0,0 +1,62 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +import os +from setuptools import setup, find_packages + + +def prerelease_local_scheme(version): + """ + Return local scheme version unless building on master in CircleCI. + + This function returns the local scheme version number + (e.g. 0.0.0.dev+g) unless building on CircleCI for a + pre-release in which case it ignores the hash and produces a + PEP440 compliant pre-release version number (e.g. 0.0.0.dev). + """ + from setuptools_scm.version import get_local_node_and_date + + if os.getenv('CIRCLE_BRANCH') in ('master', ): + return '' + else: + return get_local_node_and_date(version) + + +setup( + name='large-image-source-openjpeg', + use_scm_version={'root': '../..', 'local_scheme': prerelease_local_scheme}, + setup_requires=['setuptools-scm'], + description='An Openjpeg tilesource for large_image', + long_description='See the large-image package for more details.', + author='Kitware, Inc.', + author_email='kitware@kitware.com', + classifiers=[ + 'Development Status :: 5 - Production/Stable', + 'License :: OSI Approved :: Apache Software License', + 'Programming Language :: Python :: 2', + 'Programming Language :: Python :: 2.7', + 'Programming Language :: Python :: 3', + 'Programming Language :: Python :: 3.5', + 'Programming Language :: Python :: 3.6', + 'Programming Language :: Python :: 3.7' + ], + install_requires=[ + 'large-image>=1.0.0', + 'glymur>=0.8.18', + ], + extras_require={ + 'girder': 'girder-large-image>=1.0.0', + }, + license='Apache Software License 2.0', + keywords='large_image, tile source', + packages=find_packages(exclude=['test', 'test.*']), + url='https://github.com/girder/large_image', + entry_points={ + 'large_image.source': [ + 'openjpeg = large_image_source_openjpeg:OpenjpegFileTileSource' + ], + 'girder_large_image.source': [ + 'openjpeg = large_image_source_openjpeg.girder_source:OpenjpegGirderTileSource' + ] + }, +) diff --git a/sources/tiff/large_image_source_tiff/tiff_reader.py b/sources/tiff/large_image_source_tiff/tiff_reader.py index 121ac2886..f87821f20 100644 --- a/sources/tiff/large_image_source_tiff/tiff_reader.py +++ b/sources/tiff/large_image_source_tiff/tiff_reader.py @@ -21,12 +21,12 @@ import os import six -from collections import defaultdict from functools import partial from xml.etree import cElementTree -from large_image.cache_util import LRUCache, strhash, methodcache from large_image import config +from large_image.cache_util import LRUCache, strhash, methodcache +from large_image.tilesource import etreeToDict try: from libtiff import libtiff_ctypes @@ -46,37 +46,6 @@ libtiff_ctypes.suppress_warnings() -def etreeToDict(t): - """ - Convert an xml etree to a nested dictionary without schema names in the - keys. - - @param t: an etree. - @returns: a python dictionary with the results. - """ - # Remove schema - tag = t.tag.split('}', 1)[1] if t.tag.startswith('{') else t.tag - d = {tag: {}} - children = list(t) - if children: - entries = defaultdict(list) - for entry in map(etreeToDict, children): - for k, v in six.iteritems(entry): - entries[k].append(v) - d = {tag: {k: v[0] if len(v) == 1 else v - for k, v in six.iteritems(entries)}} - - if t.attrib: - d[tag].update({(k.split('}', 1)[1] if k.startswith('{') else k): v - for k, v in six.iteritems(t.attrib)}) - text = (t.text or '').strip() - if text and len(d[tag]): - d[tag]['text'] = text - elif text: - d[tag] = text - return d - - def patchLibtiff(): libtiff_ctypes.libtiff.TIFFFieldWithTag.restype = \ ctypes.POINTER(libtiff_ctypes.TIFFFieldInfo) diff --git a/test/data/sample_image.jp2.sha512 b/test/data/sample_image.jp2.sha512 new file mode 100644 index 000000000..973a1f556 --- /dev/null +++ b/test/data/sample_image.jp2.sha512 @@ -0,0 +1 @@ +82f1dc64435ab959532ea845c93c28a1e05ed85999300bccf0e7196c91652d014d2a571c324d83279da4cabcd42cf4ed6d732e304ffa71e8b9f7ae3a1390f4c5 diff --git a/test/test_source_openjpeg.py b/test/test_source_openjpeg.py new file mode 100644 index 000000000..599e43322 --- /dev/null +++ b/test/test_source_openjpeg.py @@ -0,0 +1,19 @@ +# -*- coding: utf-8 -*- + +import large_image_source_openjpeg + +from . import utilities + + +def testTilesFromOpenJPEG(): + imagePath = utilities.externaldata('data/sample_image.jp2.sha512') + source = large_image_source_openjpeg.OpenjpegFileTileSource(imagePath) + tileMetadata = source.getMetadata() + + assert tileMetadata['tileWidth'] == 256 + assert tileMetadata['tileHeight'] == 256 + assert tileMetadata['sizeX'] == 4500 + assert tileMetadata['sizeY'] == 5800 + assert tileMetadata['levels'] == 6 + assert tileMetadata['magnification'] == 40 + utilities.checkTilesZXY(source, tileMetadata)