From ee855a6ac263e4b99902b74c29000414a01a58a3 Mon Sep 17 00:00:00 2001 From: David Manthey Date: Mon, 15 Jun 2020 10:21:16 -0400 Subject: [PATCH] Refactor image conversion task. This splits the conversion process so that it is separate from the Girder worker task. The new packages are now in the utilities directory. It adds explicit support for processing geotiffs. It adds support for reprocessing files that could be read by large image, making them a more preferred format for reading tiles. There are now options for lossless or lossy compression, plus more options for compression techniques. By default, if a file is not known to be compressed in a lossy manner, the resulting converted file will be lossless. The converter doesn't use subprocess (it uses the python interfaces for vips and GDAL); this keeps the memory footprint smaller. This is part of a series of PRs refactoring conversion. Future PRs will add support for multi-frame images, for converting files that can be read with large image but not by vips or gdal, and for adding jp2k tile support. --- .circleci/make_wheels.sh | 4 +- .circleci/release_pypi.sh | 5 +- .dockerignore | 2 +- CHANGELOG.md | 6 +- README.rst | 4 +- docs/make_docs.sh | 3 +- docs/source/index.rst | 1 + girder/girder_large_image/__init__.py | 2 + .../girder_large_image/models/image_item.py | 22 +- girder/girder_large_image/rest/tiles.py | 17 +- girder/test_girder/test_tiles_rest.py | 27 +- requirements-dev.txt | 9 +- requirements-worker.txt | 25 + setup.cfg | 4 +- .../large_image_source_tiff/tiff_reader.py | 35 +- tasks/large_image_tasks/tasks.py | 51 -- .../d042-353.crop.small.float32.tif.sha512 | 1 + test/test_converter.py | 156 ++++++ utilities/converter/README.rst | 77 +++ .../large_image_converter/__init__.py | 498 ++++++++++++++++++ .../large_image_converter/__main__.py | 86 +++ utilities/converter/setup.py | 65 +++ {tasks => utilities/tasks}/README.rst | 0 .../tasks}/large_image_tasks/__init__.py | 0 utilities/tasks/large_image_tasks/tasks.py | 71 +++ {tasks => utilities/tasks}/setup.py | 8 +- 26 files changed, 1088 insertions(+), 91 deletions(-) create mode 100644 requirements-worker.txt delete mode 100644 tasks/large_image_tasks/tasks.py create mode 100755 test/data/d042-353.crop.small.float32.tif.sha512 create mode 100644 test/test_converter.py create mode 100644 utilities/converter/README.rst create mode 100644 utilities/converter/large_image_converter/__init__.py create mode 100644 utilities/converter/large_image_converter/__main__.py create mode 100644 utilities/converter/setup.py rename {tasks => utilities/tasks}/README.rst (100%) rename {tasks => utilities/tasks}/large_image_tasks/__init__.py (100%) create mode 100644 utilities/tasks/large_image_tasks/tasks.py rename {tasks => utilities/tasks}/setup.py (91%) diff --git a/.circleci/make_wheels.sh b/.circleci/make_wheels.sh index 7e4b062e8..de5c39d5f 100755 --- a/.circleci/make_wheels.sh +++ b/.circleci/make_wheels.sh @@ -16,7 +16,9 @@ cd "$ROOTPATH/girder" pip wheel . --no-deps -w ~/wheels && rm -rf build cd "$ROOTPATH/girder_annotation" pip wheel . --no-deps -w ~/wheels && rm -rf build -cd "$ROOTPATH/tasks" +cd "$ROOTPATH/utilities/converter" +pip wheel . --no-deps -w ~/wheels && rm -rf build +cd "$ROOTPATH/utilities/tasks" pip wheel . --no-deps -w ~/wheels && rm -rf build cd "$ROOTPATH/sources/bioformats" pip wheel . --no-deps -w ~/wheels && rm -rf build diff --git a/.circleci/release_pypi.sh b/.circleci/release_pypi.sh index 63b991811..b672ace82 100755 --- a/.circleci/release_pypi.sh +++ b/.circleci/release_pypi.sh @@ -12,7 +12,10 @@ twine upload --verbose dist/* cd "$ROOTPATH/girder_annotation" python setup.py sdist twine upload --verbose dist/* -cd "$ROOTPATH/tasks" +cd "$ROOTPATH/utilities/converter" +python setup.py sdist +twine upload --verbose dist/* +cd "$ROOTPATH/utilities/tasks" python setup.py sdist twine upload --verbose dist/* cd "$ROOTPATH/sources/bioformats" diff --git a/.dockerignore b/.dockerignore index c3476c86e..ba3d153be 100644 --- a/.dockerignore +++ b/.dockerignore @@ -5,5 +5,5 @@ girder girder_annotation large_image sources -tasks +utilities examples diff --git a/CHANGELOG.md b/CHANGELOG.md index c54f3ed6f..b7eb3bd90 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,9 @@ ## Unreleased +### Changes +- The image conversion task has been split into two packages, large_image_converter and large_image_tasks. The tasks module is used with Girder and Girder Worker for converting images and depends on the converter package. The converter package can be used as a stand-alone command line tool (#518) + ### Features - Added a `canRead` method to the core module (#512) @@ -11,10 +14,9 @@ - The openjpeg tile source can decode with parallelism (#511) - Geospatial tile sources are preferred for geospatial files (#512) - Support decoding JP2k compressed tiles in the tiff tile source (#514) ->>>>>>> For the tiff tile source, allow decoding jp2k tiles. ### Bug Fixes -- Harden updates of the item view after making a large image (#508) +- Harden updates of the item view after making a large image (#508, #515) - Tiles in an unexpected color mode weren't consistently adjusted (#510) ## Version 1.3.2 diff --git a/README.rst b/README.rst index 6ee9ab2ef..b9d111c9d 100644 --- a/README.rst +++ b/README.rst @@ -20,12 +20,14 @@ Large Image consists of several Python modules designed to work together. These - ``large-image``: The core module. You can specify extras_require of the name of any tile source included with this repository, ``sources`` for all of the tile sources in the repository, ``memcached`` for using memcached for tile caching, or ``all`` for all of the tile sources and memcached. +- ``large-image-converter``: A utility for using pyvips and other libraries to convert images into pyramidal tiff files that can be read efficiently by large_image. + - ``girder-large-image``: Large Image as a Girder_ 3.x plugin. You can specify extras_require of ``tasks`` to install a Girder Worker task that can convert otherwise unreadable images to pyramidal tiff files. - ``girder-large-image-annotation``: Annotations for large images as a Girder_ 3.x plugin. -- ``large-image-tasks``: A utility for using pyvips to convert images into pyramidal tiff files that can be read efficiently by large_image. This can be used by itself or with Girder Worker. +- ``large-image-tasks``: A utility for running the converter via Girder Worker. - Tile sources: diff --git a/docs/make_docs.sh b/docs/make_docs.sh index a3666ff8d..6296681e5 100755 --- a/docs/make_docs.sh +++ b/docs/make_docs.sh @@ -21,7 +21,8 @@ sphinx-apidoc -f -o source/large_image_source_openslide ../sources/openslide/lar sphinx-apidoc -f -o source/large_image_source_pil ../sources/pil/large_image_source_pil sphinx-apidoc -f -o source/large_image_source_test ../sources/test/large_image_source_test sphinx-apidoc -f -o source/large_image_source_tiff ../sources/tiff/large_image_source_tiff -sphinx-apidoc -f -o source/large_image_tasks ../tasks/large_image_tasks +sphinx-apidoc -f -o source/large_image_converter ../utilities/converter/large_image_converter +sphinx-apidoc -f -o source/large_image_tasks ../utilities/tasks/large_image_tasks sphinx-apidoc -f -o source/girder_large_image ../girder/girder_large_image sphinx-apidoc -f -o source/girder_large_image_annotation ../girder_annotation/girder_large_image_annotation diff --git a/docs/source/index.rst b/docs/source/index.rst index 53ac130cb..8fd619598 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -26,6 +26,7 @@ large_image also works as a Girder plugin with optional annotation support. large_image_source_pil/modules large_image_source_test/modules large_image_source_tiff/modules + large_image_converter/modules large_image_tasks/modules girder_large_image/modules girder_large_image_annotation/modules diff --git a/girder/girder_large_image/__init__.py b/girder/girder_large_image/__init__.py index 917bfe0a2..fa8e01e19 100644 --- a/girder/girder_large_image/__init__.py +++ b/girder/girder_large_image/__init__.py @@ -77,6 +77,8 @@ def _postUpload(event): del item['largeImage']['expected'] item['largeImage']['fileId'] = fileObj['_id'] item['largeImage']['sourceName'] = 'tiff' + if fileObj['name'].endswith('.geo.tiff'): + item['largeImage']['sourceName'] = 'gdal' Item().save(item) diff --git a/girder/girder_large_image/models/image_item.py b/girder/girder_large_image/models/image_item.py index b6fcd0923..744533bb4 100644 --- a/girder/girder_large_image/models/image_item.py +++ b/girder/girder_large_image/models/image_item.py @@ -17,10 +17,8 @@ ############################################################################# import json -import os import pymongo import six -import time from girder import logger from girder.constants import SortDir @@ -54,11 +52,10 @@ def initialize(self): ], {})]) def createImageItem(self, item, fileObj, user=None, token=None, - createJob=True, notify=False): + createJob=True, notify=False, **kwargs): # Using setdefault ensures that 'largeImage' is in the item if 'fileId' in item.setdefault('largeImage', {}): - # TODO: automatically delete the existing large file - raise TileGeneralException('Item already has a largeImage set.') + raise TileGeneralException('Item already has largeImage set.') if fileObj['itemId'] != item['_id']: raise TileGeneralException('The provided file must be in the ' 'provided item.') @@ -75,13 +72,13 @@ def createImageItem(self, item, fileObj, user=None, token=None, sourceName = girder_tilesource.getGirderTileSourceName(item, fileObj) if sourceName: item['largeImage']['sourceName'] = sourceName - if not sourceName: + if not sourceName or createJob == 'always': if not createJob: raise TileGeneralException( 'A job must be used to generate a largeImage.') # No source was successful del item['largeImage']['fileId'] - job = self._createLargeImageJob(item, fileObj, user, token) + job = self._createLargeImageJob(item, fileObj, user, token, **kwargs) item['largeImage']['expected'] = True item['largeImage']['notify'] = notify item['largeImage']['originalId'] = fileObj['_id'] @@ -89,16 +86,12 @@ def createImageItem(self, item, fileObj, user=None, token=None, self.save(item) return job - def _createLargeImageJob(self, item, fileObj, user, token): + def _createLargeImageJob(self, item, fileObj, user, token, **kwargs): import large_image_tasks.tasks from girder_worker_utils.transforms.girder_io import GirderUploadToItem from girder_worker_utils.transforms.contrib.girder_io import GirderFileIdAllowDirect from girder_worker_utils.transforms.common import TemporaryDirectory - outputName = os.path.splitext(fileObj['name'])[0] + '.tiff' - if outputName == fileObj['name']: - outputName = (os.path.splitext(fileObj['name'])[0] + '.' + - time.strftime('%Y%m%d-%H%M%S') + '.tiff') try: localPath = File().getLocalFilePath(fileObj) except (FilePathException, AttributeError): @@ -111,11 +104,12 @@ def _createLargeImageJob(self, item, fileObj, user, token): 'task': 'createImageItem', }}, inputFile=GirderFileIdAllowDirect(str(fileObj['_id']), fileObj['name'], localPath), - outputName=outputName, + inputName=fileObj['name'], outputDir=TemporaryDirectory(), girder_result_hooks=[ GirderUploadToItem(str(item['_id']), False), - ] + ], + **kwargs, ) return job.job diff --git a/girder/girder_large_image/rest/tiles.py b/girder/girder_large_image/rest/tiles.py index c671fbc39..4de8fb156 100644 --- a/girder/girder_large_image/rest/tiles.py +++ b/girder/girder_large_image/rest/tiles.py @@ -143,9 +143,22 @@ def __init__(self, apiRoot): .param('fileId', 'The ID of the source file containing the image. ' 'Required if there is more than one file in the item.', required=False) + .param('force', 'Always use a job to create the large image.', + dataType='boolean', default=False, required=False) .param('notify', 'If a job is required to create the large image, ' 'a nofication can be sent when it is complete.', dataType='boolean', default=True, required=False) + .param('tileSize', 'Tile size', dataType='int', default=256, + required=False) + .param('compression', 'Internal compression format', required=False, + enum=['none', 'jpeg', 'deflate', 'lzw', 'zstd', 'packbits', 'webp']) + .param('quality', 'JPEG compression quality where 0 is small and 100 ' + 'is highest quality', dataType='int', default=90, + required=False) + .param('level', 'Compression level for deflate (zip) or zstd.', + dataType='int', required=False) + .param('predictor', 'Predictor for deflate (zip) or lzw.', + required=False, enum=['none', 'horizontal', 'float', 'yes']) ) @access.user @loadmodel(model='item', map={'itemId': 'item'}, level=AccessType.WRITE) @@ -164,6 +177,7 @@ def createTiles(self, item, params): try: return self.imageItemModel.createImageItem( item, largeImageFile, user, token, + createJob='always' if self.boolParam('force', params, default=False) else True, notify=self.boolParam('notify', params, default=True)) except TileGeneralException as e: raise RestException(e.args[0]) @@ -671,7 +685,8 @@ def getTilesThumbnail(self, item, params): enum=['0', '1', '2'], dataType='int', default='0') .param('tiffCompression', 'Compression method when storing a TIFF ' 'image', required=False, - enum=['raw', 'tiff_lzw', 'jpeg', 'tiff_adobe_deflate']) + enum=['none', 'raw', 'lzw', 'tiff_lzw', 'jpeg', 'deflate', + 'tiff_adobe_deflate']) .param('style', 'JSON-encoded style string', required=False) .param('resample', 'If false, an existing level of the image is used ' 'for the histogram. If true, the internal values are ' diff --git a/girder/test_girder/test_tiles_rest.py b/girder/test_girder/test_tiles_rest.py index 992c82002..99d494770 100644 --- a/girder/test_girder/test_tiles_rest.py +++ b/girder/test_girder/test_tiles_rest.py @@ -138,7 +138,7 @@ def _createTestTiles(server, admin, params=None, info=None, error=None): return infoDict -def _postTileViaHttp(server, admin, itemId, fileId, jobAction=None): +def _postTileViaHttp(server, admin, itemId, fileId, jobAction=None, data=None): """ When we know we need to process a job, we have to use an actual http request rather than the normal simulated request to cherrypy. This is @@ -148,6 +148,9 @@ def _postTileViaHttp(server, admin, itemId, fileId, jobAction=None): :param itemId: the id of the item with the file to process. :param fileId: the id of the file that should be processed. :param jobAction: if 'delete', delete the job immediately. + :param data: if not None, pass this as the data to the POST request. If + specified, fileId is ignored (pass as part of the data dictionary if + it is required). :returns: metadata from the tile if the conversion was successful, False if it converted but didn't result in useable tiles, and None if it failed. @@ -158,14 +161,14 @@ def _postTileViaHttp(server, admin, itemId, fileId, jobAction=None): } req = requests.post('http://127.0.0.1:%d/api/v1/item/%s/tiles' % ( server.boundPort, itemId), headers=headers, - data={'fileId': fileId}) + data={'fileId': fileId} if data is None else data) assert req.status_code == 200 # If we ask to create the item again right away, we should be told that # either there is already a job running or the item has already been # added req = requests.post('http://127.0.0.1:%d/api/v1/item/%s/tiles' % ( server.boundPort, itemId), headers=headers, - data={'fileId': fileId}) + data={'fileId': fileId} if data is None else data) assert req.status_code == 400 assert ('Item already has' in req.json()['message'] or 'Item is scheduled' in req.json()['message']) @@ -1159,3 +1162,21 @@ def testTilesFromMultipleDotName(boundServer, admin, fsAssetstore, girderWorker) assert tileMetadata['mm_x'] is None assert tileMetadata['mm_y'] is None _testTilesZXY(boundServer, admin, itemId, tileMetadata) + + +@pytest.mark.usefixtures('unbindLargeImage') # noqa +@pytest.mark.usefixtures('girderWorker') # noqa +@pytest.mark.plugin('large_image') +def testTilesForcedConversion(boundServer, admin, fsAssetstore, girderWorker): # noqa + file = utilities.uploadExternalFile( + 'data/landcover_sample_1000.tif.sha512', admin, fsAssetstore) + itemId = str(file['itemId']) + fileId = str(file['_id']) + # We should already have tile information. Ask to delete it so we can + # force convert it + boundServer.request(path='/item/%s/tiles' % itemId, method='DELETE', user=admin) + # Ask to do a forced conversion + tileMetadata = _postTileViaHttp(boundServer, admin, itemId, None, data={'force': True}) + assert tileMetadata['levels'] == 3 + item = Item().load(itemId, force=True) + assert item['largeImage']['fileId'] != fileId diff --git a/requirements-dev.txt b/requirements-dev.txt index 106828f0c..348f362a4 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -15,11 +15,16 @@ girder-jobs>=3.0.3 -e sources/ometiff # must be after source/gdal -e sources/mapnik -# Get both the girder and worker dependencies so tasks can be used stand-alone --e tasks[girder,worker] +# Don't specify extras for the converter; they are already present above +-e utilities/converter +# Girder and worker dependencies are already installed above +-e utilities/tasks -e girder/. -e girder_annotation/. +# Extras from main setup.py +pylibmc>=1.5.1 + # External dependencies pip>=9 tox diff --git a/requirements-worker.txt b/requirements-worker.txt new file mode 100644 index 000000000..12b6e6337 --- /dev/null +++ b/requirements-worker.txt @@ -0,0 +1,25 @@ +-e sources/bioformats +-e sources/dummy +-e sources/gdal +-e sources/nd2 +-e sources/openjpeg +-e sources/openslide +-e sources/pil +-e sources/test +-e sources/tiff +# must be after sources/tiff +-e sources/ometiff +# must be after source/gdal +-e sources/mapnik +# Don't specify extras for the converter; they are already present above +-e utilities/converter +# Worker dependencies are already installed above +-e utilities/tasks + +# Extras from main setup.py +pylibmc>=1.5.1 + +# External dependencies +pip>=9 + + diff --git a/setup.cfg b/setup.cfg index 3f653465b..30504fd52 100644 --- a/setup.cfg +++ b/setup.cfg @@ -29,7 +29,7 @@ source = ../girder/girder_large_image ../girder_annotation/girder_large_image_annotation ../sources/ - ../tasks/ + ../utilities/ ../examples/ ../build/tox/*/lib/*/site-packages/large_image/ @@ -42,7 +42,7 @@ include = girder/girder_large_image/* girder_annotation/girder_large_image_annotation/* sources/* - tasks/* + utilities/* examples/* build/tox/*/lib/*/site-packages/*large_image*/* parallel = True diff --git a/sources/tiff/large_image_source_tiff/tiff_reader.py b/sources/tiff/large_image_source_tiff/tiff_reader.py index af5a49fc4..0f5de341e 100644 --- a/sources/tiff/large_image_source_tiff/tiff_reader.py +++ b/sources/tiff/large_image_source_tiff/tiff_reader.py @@ -222,13 +222,15 @@ def _validate(self): # noqa raise ValidationTiffException( 'Only RGB and greyscale TIFF files are supported') - if self._tiffInfo.get('bitspersample') not in (8, 16): + if self._tiffInfo.get('bitspersample') not in (8, 16, 32, 64): raise ValidationTiffException( 'Only 8 and 16 bits-per-sample TIFF files are supported') if self._tiffInfo.get('sampleformat') not in { None, # default is still SAMPLEFORMAT_UINT - libtiff_ctypes.SAMPLEFORMAT_UINT}: + libtiff_ctypes.SAMPLEFORMAT_UINT, + libtiff_ctypes.SAMPLEFORMAT_INT, + libtiff_ctypes.SAMPLEFORMAT_IEEEFP}: raise ValidationTiffException( 'Only unsigned int sampled TIFF files are supported') @@ -615,10 +617,27 @@ def _getUncompressedTile(self, tileNum): libtiff_ctypes.ORIENTATION_RIGHTBOT, libtiff_ctypes.ORIENTATION_LEFTBOT}: tw, th = th, tw - image = numpy.ctypeslib.as_array( - ctypes.cast(imageBuffer, ctypes.POINTER( - ctypes.c_uint16 if self._tiffInfo.get('bitspersample') == 16 else ctypes.c_uint8)), - (th, tw, self._tiffInfo.get('samplesperpixel'))) + format = ( + self._tiffInfo.get('bitspersample'), + self._tiffInfo.get('sampleformat') if self._tiffInfo.get( + 'sampleformat') is not None else libtiff_ctypes.SAMPLEFORMAT_UINT) + formattbl = { + (8, libtiff_ctypes.SAMPLEFORMAT_UINT): numpy.uint8, + (8, libtiff_ctypes.SAMPLEFORMAT_INT): numpy.int8, + (16, libtiff_ctypes.SAMPLEFORMAT_UINT): numpy.uint16, + (16, libtiff_ctypes.SAMPLEFORMAT_INT): numpy.int16, + (16, libtiff_ctypes.SAMPLEFORMAT_IEEEFP): numpy.float16, + (32, libtiff_ctypes.SAMPLEFORMAT_UINT): numpy.uint32, + (32, libtiff_ctypes.SAMPLEFORMAT_INT): numpy.int32, + (32, libtiff_ctypes.SAMPLEFORMAT_IEEEFP): numpy.float32, + (64, libtiff_ctypes.SAMPLEFORMAT_UINT): numpy.uint64, + (64, libtiff_ctypes.SAMPLEFORMAT_INT): numpy.int64, + (64, libtiff_ctypes.SAMPLEFORMAT_IEEEFP): numpy.float64, + } + image = numpy.ctypeslib.as_array(ctypes.cast( + imageBuffer, ctypes.POINTER(ctypes.c_uint8)), (tileSize, )).view( + formattbl[format]).reshape( + (th, tw, self._tiffInfo.get('samplesperpixel'))) if (self._tiffInfo.get('samplesperpixel') == 3 and self._tiffInfo.get('photometric') == libtiff_ctypes.PHOTOMETRIC_YCBCR): if self._tiffInfo.get('bitspersample') == 16: @@ -766,7 +785,9 @@ def getTile(self, x, y): if (not self._tiffInfo.get('istiled') or self._tiffInfo.get('compression') not in ( libtiff_ctypes.COMPRESSION_JPEG, 33003, 33005, 34712) or - self._tiffInfo.get('bitspersample') != 8): + self._tiffInfo.get('bitspersample') != 8 or + self._tiffInfo.get('sampleformat') not in { + None, libtiff_ctypes.SAMPLEFORMAT_UINT}): return self._getUncompressedTile(tileNum) imageBuffer = six.BytesIO() diff --git a/tasks/large_image_tasks/tasks.py b/tasks/large_image_tasks/tasks.py deleted file mode 100644 index 859b19202..000000000 --- a/tasks/large_image_tasks/tasks.py +++ /dev/null @@ -1,51 +0,0 @@ -import os -import time - -from girder_worker.app import app -from girder_worker.utils import girder_job - - -@girder_job(title='Create a pyramidal tiff using vips', type='large_image_tiff') -@app.task(bind=True) -def create_tiff(self, inputFile, outputName=None, outputDir=None, quality=90, tileSize=256): - # Because of its use of gobject, pyvips should be invoked without concurrency - os.environ['VIPS_CONCURRENCY'] = '1' - import pyvips - - inputPath = os.path.abspath(os.path.expanduser(inputFile)) - inputName = os.path.basename(inputPath) - if not outputName: - outputName = (os.path.splitext(inputName)[0] + '.' + - time.strftime('%Y%m%d-%H%M%S') + '.tiff') - renameOutput = outputName - if not outputName.endswith('.tiff'): - outputName += '.tiff' - if not outputDir: - outputDir = os.path.dirname(inputPath) - outputPath = os.path.join(outputDir, outputName) - # This is equivalent to a vips command line of - # vips tiffsave - # followed by the convert params in the form of --[=] where no - # value needs to be specified if they are True. - convertParams = { - 'compression': 'jpeg', - 'Q': quality, - 'tile': True, - 'tile_width': tileSize, - 'tile_height': tileSize, - 'pyramid': True, - 'bigtiff': True - } - print('Input: %s\nOutput: %s\nOptions: %r' % (inputPath, outputPath, convertParams)) - pyvips.Image.new_from_file(inputPath).write_to_file(outputPath, **convertParams) - # vips always seems to raise its own exception, so this may be needless - if not os.path.exists(outputPath): - raise Exception('VIPS command failed to produce output') - if renameOutput != outputName: - import shutil - - renamePath = os.path.join(outputDir, renameOutput) - shutil.move(outputPath, renamePath) - outputPath = renamePath - print('Created a file of size %d' % os.path.getsize(outputPath)) - return outputPath diff --git a/test/data/d042-353.crop.small.float32.tif.sha512 b/test/data/d042-353.crop.small.float32.tif.sha512 new file mode 100755 index 000000000..f75d9c4d0 --- /dev/null +++ b/test/data/d042-353.crop.small.float32.tif.sha512 @@ -0,0 +1 @@ +ae05dbe6f3330c912893b203b55db27b0fdf3222a0e7f626d372c09668334494d07dc1d35533670cfac51b588d2292eeee7431317741fdb4cbb281c28a289115 diff --git a/test/test_converter.py b/test/test_converter.py new file mode 100644 index 000000000..418a1fd9c --- /dev/null +++ b/test/test_converter.py @@ -0,0 +1,156 @@ +# -*- coding: utf-8 -*- + +import os +import pytest +import shutil +import tifftools + +import large_image_converter +import large_image_converter.__main__ as main + + +from . import utilities + + +def testIsGeospatial(): + testDir = os.path.dirname(os.path.realpath(__file__)) + imagePath = os.path.join(testDir, 'test_files', 'rgb_geotiff.tiff') + assert large_image_converter.is_geospatial(imagePath) is True + + imagePath = utilities.externaldata( + 'data/sample_svs_image.TCGA-DU-6399-01A-01-TS1.e8eb65de-d63e-42db-' + 'af6f-14fefbbdf7bd.svs.sha512') + assert large_image_converter.is_geospatial(imagePath) is False + + testDir = os.path.dirname(os.path.realpath(__file__)) + imagePath = os.path.join(testDir, 'test_files', 'yb10kx5k.png') + assert large_image_converter.is_geospatial(imagePath) is False + + +def testIsVips(): + imagePath = utilities.externaldata( + 'data/sample_svs_image.TCGA-DU-6399-01A-01-TS1.e8eb65de-d63e-42db-' + 'af6f-14fefbbdf7bd.svs.sha512') + assert large_image_converter.is_vips(imagePath) is True + + imagePath = utilities.externaldata('data/HENormalN801.czi.sha512') + assert large_image_converter.is_vips(imagePath) is False + + +@pytest.mark.parametrize('convert_args,taglist', [ + ({}, { + tifftools.Tag.Compression.value: tifftools.constants.Compression.LZW.value, + tifftools.Tag.TileWidth.value: 256 + }), + ({'compression': 'jpeg'}, { + tifftools.Tag.Compression.value: tifftools.constants.Compression.JPEG.value + }), + ({'compression': 'deflate'}, { + tifftools.Tag.Compression.value: tifftools.constants.Compression.AdobeDeflate.value + }), + ({'compression': 'lzw'}, { + tifftools.Tag.Compression.value: tifftools.constants.Compression.LZW.value + }), + ({'compression': 'packbits'}, { + tifftools.Tag.Compression.value: tifftools.constants.Compression.Packbits.value + }), + ({'compression': 'zstd'}, { + tifftools.Tag.Compression.value: tifftools.constants.Compression.ZSTD.value + }), + ({'compression': 'jpeg', 'quality': 50}, { + tifftools.Tag.Compression.value: tifftools.constants.Compression.JPEG.value + }), + ({'compression': 'deflate', 'level': 2}, { + tifftools.Tag.Compression.value: tifftools.constants.Compression.AdobeDeflate.value + }), + ({'compression': 'lzw', 'predictor': 'yes'}, { + tifftools.Tag.Compression.value: tifftools.constants.Compression.LZW.value + }), + ({'tileSize': 512}, { + tifftools.Tag.TileWidth.value: 512 + }), +]) +def testConvert(tmpdir, convert_args, taglist): + testDir = os.path.dirname(os.path.realpath(__file__)) + imagePath = os.path.join(testDir, 'test_files', 'yb10kx5k.png') + outputPath = os.path.join(tmpdir, 'out.tiff') + large_image_converter.convert(imagePath, outputPath, **convert_args) + info = tifftools.read_tiff(outputPath) + for key, value in taglist.items(): + assert info['ifds'][0]['tags'][key]['data'][0] == value + + +def testConvertGeospatial(tmpdir): + testDir = os.path.dirname(os.path.realpath(__file__)) + imagePath = os.path.join(testDir, 'test_files', 'rgb_geotiff.tiff') + inputPath = os.path.join(tmpdir, 'in.geo.tiff') + shutil.copy(imagePath, inputPath) + outputPath = large_image_converter.convert(inputPath, level=5) + assert 'geo.tiff' in outputPath + assert outputPath != inputPath + info = tifftools.read_tiff(outputPath) + assert tifftools.Tag.ModelTiepointTag.value in info['ifds'][0]['tags'] + + +def testConvertPTIF(tmpdir): + imagePath = utilities.externaldata('data/sample_image.ptif.sha512') + outputPath = os.path.join(tmpdir, 'out.tiff') + large_image_converter.convert(imagePath, outputPath, compression='jpeg', quality=50) + info = tifftools.read_tiff(outputPath) + assert len(info['ifds']) == 11 + + +def testConvertOverwrite(tmpdir): + testDir = os.path.dirname(os.path.realpath(__file__)) + imagePath = os.path.join(testDir, 'test_files', 'yb10kx5k.png') + outputPath = os.path.join(tmpdir, 'out.tiff') + open(outputPath, 'w').write('placeholder') + with pytest.raises(Exception): + large_image_converter.convert(imagePath, outputPath) + large_image_converter.convert(imagePath, outputPath, overwrite=True) + assert os.path.getsize(outputPath) > 100 + + +def testConvertOMETif(tmpdir): + imagePath = utilities.externaldata('data/sample.ome.tif.sha512') + outputPath = os.path.join(tmpdir, 'out.tiff') + # Note: change this when we convert multi-frame files differently + large_image_converter.convert(imagePath, outputPath) + info = tifftools.read_tiff(outputPath) + assert len(info['ifds']) == 5 + + +def testConvertTiffFloatPixels(tmpdir): + imagePath = utilities.externaldata('data/d042-353.crop.small.float32.tif.sha512') + outputPath = os.path.join(tmpdir, 'out.tiff') + large_image_converter.convert(imagePath, outputPath) + info = tifftools.read_tiff(outputPath) + assert (info['ifds'][0]['tags'][tifftools.Tag.SampleFormat.value]['data'][0] == + tifftools.constants.SampleFormat.uint.value) + + +def testConverterMain(tmpdir): + testDir = os.path.dirname(os.path.realpath(__file__)) + imagePath = os.path.join(testDir, 'test_files', 'yb10kx5k.png') + outputPath = os.path.join(tmpdir, 'out.tiff') + main.main([imagePath, outputPath]) + assert os.path.getsize(outputPath) > 100 + + outputPath2 = os.path.join(tmpdir, 'out2.tiff') + main.main([imagePath, outputPath2, '--compression', 'zip']) + assert os.path.getsize(outputPath2) > 100 + assert os.path.getsize(outputPath2) < os.path.getsize(outputPath) + + +def testConverterMainNonFile(tmpdir): + outputPath = os.path.join(tmpdir, 'out.tiff') + assert main.main(['not a file', outputPath]) == 1 + + +def testConverterMainNonImageFile(tmpdir): + testDir = os.path.dirname(os.path.realpath(__file__)) + imagePath = os.path.join(testDir, 'test_files', 'notanimage.txt') + outputPath = os.path.join(tmpdir, 'out.tiff') + with pytest.raises(Exception): + main.main([imagePath, outputPath]) + assert not os.path.exists(outputPath) diff --git a/utilities/converter/README.rst b/utilities/converter/README.rst new file mode 100644 index 000000000..659131257 --- /dev/null +++ b/utilities/converter/README.rst @@ -0,0 +1,77 @@ +********************* +Large Image Converter +********************* + +Convert a variety of images into the most efficient format for Large Image. + +Geospatial files are converted into cloud-optimized geotiff_ via gdal_translate. +Single-image non-geospatial files are converted into pyramidal tiff files via pyvips. +Multi-image tiff files are converted into tiff files with multiple pyramidal tiff images and have a custom image description to store frame details. + +Some files can be read via the various tile sources in large_image without conversion but are inefficient (for example, uncompressed data in nd2 files). Converting these files will result in more efficient data access. + +Installation +============ + +To install via pip with custom-built wheels: + +``pip install large-image-converter[sources] --find-links https://girder.github.io/large_image_wheels`` + +The ``[sources]`` extra requirement is optional. When specified, all of the default large-image tile sources are installed for additional metadata extraction and format support. + +Requirements +------------ + +If the custom-built wheels do not cover your platform, or you want to use different versions of tools, you can install the prerequisites manually. For full functionality, the following packages and libraries are needed: + +- GDAL 3.1.0 or greater, including the command-line tools and the python library +- libtiff, including the command-line tools +- libvips + +Additionally, the various tile sources for large_image can be used to read input files to extract and preserve metadata and to read files that can't be read via libvips or GDAL. The requirements of those sources need to be installed. + +Usage +===== + +Command Line +------------ + +In the simplest use, an image can be converted via: + +``large_image_converter `` + +An output image will be generated in the same directory as the source image. + +The full list of options can be obtained via: + +``large_image_converter --help`` + +From Python +----------- + +The convert function contains all of the main functionality:: + + from large_image_converter import convert + + convert() + + # See the options + print(convert.__doc__) + +From Girder +----------- + +The converter is installed by default when ``girder-large-image`` is installed. It relies on Girder Worker to actually run the conversion. + +The conversion task can be reached via the user interface on the item details pages, via the ``createImageItem`` method on the ``ImageItem`` model, or via the ``POST`` ``item/{itemId}/tiles`` endpoint. + +Limitations and Future Development +================================== + +There are some limitations that may be improved with additional development. + +- For some multi-image files, such as OME Tiff files that cannot be read by an existing large_image tile source, the specific channel, z-value, or time step is not converted to readily useable metadata. + +- Whether the original file is stored in a lossy or lossless format is not always determined. If unknown, the output defaults to lossless, which may be needlessly large. + +.. _geotiff: https://gdal.org/drivers/raster/cog.html diff --git a/utilities/converter/large_image_converter/__init__.py b/utilities/converter/large_image_converter/__init__.py new file mode 100644 index 000000000..f3dbcb03e --- /dev/null +++ b/utilities/converter/large_image_converter/__init__.py @@ -0,0 +1,498 @@ +import datetime +import fractions +import json +import logging +import os +from pkg_resources import DistributionNotFound, get_distribution +from tempfile import TemporaryDirectory +import time + +import tifftools + +try: + __version__ = get_distribution(__name__).version +except DistributionNotFound: + # package is not installed + pass + + +logger = logging.getLogger('large-image-converter') + + +def _data_from_large_image(path, outputPath): + """ + Check if the input file can be read by installed large_image tile sources. + If so, return the metadata, internal metadata, and extract each associated + image. + + :param path: path of the file. + :param outputPath: the name of a temporary output file. + :returns: a dictionary of metadata, internal_metadata, and images. images + is a dictionary of keys and paths. Returns None if the path is not + readable by large_image. + """ + try: + import large_image + except ImportError: + return + + _import_pyvips() + try: + ts = large_image.getTileSource(path) + except Exception: + return + results = { + 'metadata': ts.getMetadata(), + 'internal_metadata': ts.getInternalMetadata(), + 'images': {}, + 'tilesource': ts, + } + for key in ts.getAssociatedImagesList(): + try: + img, mime = ts.getAssociatedImage(key) + except Exception: + continue + savePath = outputPath + '-%s-%s.tiff' % (key, time.strftime('%Y%m%d-%H%M%S')) + # TODO: allow specifying quality separately from main image quality + _convert_via_vips(img, savePath, outputPath, mime=mime, forTiled=False) + results['images'][key] = savePath + return results + + +def _generate_geotiff(inputPath, outputPath, **kwargs): + """ + Take a source input file, readable by gdal, and output a cloud-optimized + geotiff file. See https://gdal.org/drivers/raster/cog.html. + + :params inputPath: the path to the input file or base file of a set. + :params outputPath: the path of the output file. + Optional parameters that can be specified in kwargs: + :params tileSize: the horizontal and vertical tile size. + :param compression: one of 'jpeg', 'deflate' (zip), 'lzw', or 'zstd'. + :params quality: a jpeg quality passed to vips. 0 is small, 100 is high + quality. 90 or above is recommended. + :param level: compression level for zstd, 1-22 (default is 10). + :param predictor: one of 'none', 'horizontal', 'float', or 'yes' used for + lzw and deflate. + """ + from osgeo import gdal + from osgeo import gdalconst + + options = { + 'tileSize': 256, + 'compression': 'lzw', + 'quality': 90, + 'predictor': 'yes', + } + predictor = { + 'none': 'NO', + 'horizontal': 'STANDARD', + 'float': 'FLOATING_POINT', + 'yes': 'YES', + } + options.update({k: v for k, v in kwargs.items() if v not in (None, '')}) + cmdopt = ['-of', 'COG', '-co', 'BIGTIFF=YES'] + cmdopt += ['-co', 'BLOCKSIZE=%d' % options['tileSize']] + cmdopt += ['-co', 'COMPRESS=%s' % options['compression'].upper()] + cmdopt += ['-co', 'QUALITY=%s' % options['quality']] + cmdopt += ['-co', 'PREDICTOR=%s' % predictor[options['predictor']]] + if 'level' in options: + cmdopt += ['-co', 'LEVEL=%s' % options['level']] + cmd = ['gdal_translate', inputPath, outputPath] + cmdopt + logger.info('Convert to geotiff: %r' % (cmd)) + # subprocess.check_call(cmd) + ds = gdal.Open(inputPath, gdalconst.GA_ReadOnly) + gdal.Translate(outputPath, ds, options=cmdopt) + + +def _generate_tiff(inputPath, outputPath, tempPath, lidata, **kwargs): + """ + Take a source input file, readable by vips, and output a pyramidal tiff + file. + + :params inputPath: the path to the input file or base file of a set. + :params outputPath: the path of the output file. + :params tempPath: a temporary file in a temporary directory. + :params lidata: data from a large_image tilesource including associated + images. + Optional parameters that can be specified in kwargs: + :params tileSize: the horizontal and vertical tile size. + :param compression: one of 'jpeg', 'deflate' (zip), 'lzw', 'packbits', or + 'zstd'. + :params quality: a jpeg quality passed to vips. 0 is small, 100 is high + quality. 90 or above is recommended. + :param level: compression level for zstd, 1-22 (default is 10). + :param predictor: one of 'none', 'horizontal', or 'float' used for lzw and + deflate. + """ + _import_pyvips() + subOutputPath = tempPath + '-%s.tiff' % (time.strftime('%Y%m%d-%H%M%S')) + _convert_via_vips(inputPath, subOutputPath, tempPath, **kwargs) + _output_tiff([subOutputPath], outputPath, lidata) + + +def _convert_via_vips(inputPathOrBuffer, outputPath, tempPath, forTiled=True, + status=None, **kwargs): + # This is equivalent to a vips command line of + # vips tiffsave + # followed by the convert params in the form of --[=] where no + # value needs to be specified if they are True. + convertParams = _vips_parameters(forTiled, **kwargs) + status = (', ' + status) if status else '' + if type(inputPathOrBuffer) == bytes: + source = 'buffer' + image = pyvips.Image.new_from_buffer(inputPathOrBuffer, '') + else: + source = inputPathOrBuffer + image = pyvips.Image.new_from_file(inputPathOrBuffer) + logger.info('Input: %s, Output: %s, Options: %r%s' % ( + source, outputPath, convertParams, status)) + image = image.autorot() + image = _vips_cast(image) + # TODO: revisit the TMPDIR override; this is not thread safe + oldtmpdir = os.environ.get('TMPDIR') + os.environ['TMPDIR'] = os.path.dirname(tempPath) + try: + image.write_to_file(outputPath, **convertParams) + finally: + if oldtmpdir is not None: + os.environ['TMPDIR'] = oldtmpdir + else: + del os.environ['TMPDIR'] + + +def _output_tiff(inputs, outputPath, lidata, extraImages=None): + """ + Given a list of input tiffs and data as parsed by _data_from_large_image, + generate an output tiff file with the associated images, correct scale, and + other metadata. + + :param inputs: a list of pyramidal input files. + :param outputPath: the final destination. + :param lidata: large_image data including metadata and associated images. + :param extraImages: an optional dictionary of keys and paths to add as + extra associated images. + """ + logger.debug('Reading %s' % inputs[0]) + info = tifftools.read_tiff(inputs[0]) + imgDesc = info['ifds'][0]['tags'].get(tifftools.Tag.ImageDescription.value) + description = _make_li_description( + len(info['ifds']), len(inputs), lidata, + (len(extraImages) if extraImages else 0) + (len(lidata['images']) if lidata else 0), + imgDesc['data'] if imgDesc else None) + info['ifds'][0]['tags'][tifftools.Tag.ImageDescription.value] = { + 'data': description, + 'datatype': tifftools.Datatype.ASCII, + } + if lidata: + _set_resolution(info['ifds'], lidata['metadata']) + assocList = [] + if lidata: + assocList += list(lidata['images'].items()) + if extraImages: + assocList += list(extraImages.items()) + for key, assocPath in assocList: + logger.debug('Reading %s' % assocPath) + assocInfo = tifftools.read_tiff(assocPath) + assocInfo['ifds'][0]['tags'][tifftools.Tag.ImageDescription.value] = { + 'data': key, + 'datatype': tifftools.Datatype.ASCII, + } + info['ifds'] += assocInfo['ifds'] + logger.debug('Writing %s' % outputPath) + tifftools.write_tiff(info, outputPath, bigEndian=False, bigtiff=False, allowExisting=True) + + +def _set_resolution(ifds, metadata): + """ + Given metadata with a scale in mm_x and/or mm_y, set the resolution for + each ifd, assuming that each one is half the scale of the previous one. + + :param ifds: a list of ifds from a single pyramid. The resolution may be + set on each one. + :param metadata: metadata with a scale specified by mm_x and/or mm_y. + """ + if metadata.get('mm_x') or metadata.get('mm_y'): + for idx, ifd in enumerate(ifds): + ifd['tags'][tifftools.Tag.ResolutionUnit.value] = { + 'data': [tifftools.constants.ResolutionUnit.Centimeter], + 'datatype': tifftools.Datatype.SHORT, + } + for mkey, tkey in (('mm_x', 'XResolution'), ('mm_y', 'YResolution')): + if metadata[mkey]: + val = fractions.Fraction( + 10.0 / (metadata[mkey] * 2 ** idx)).limit_denominator() + if val.numerator >= 2**32 or val.denominator >= 2**32: + origval = val + denom = 1000000 + while val.numerator >= 2**32 or val.denominator >= 2**32 and denom > 1: + denom = int(denom / 10) + val = origval.limit_denominator(denom) + if val.numerator >= 2**32 or val.denominator >= 2**32: + continue + ifd['tags'][tifftools.Tag[tkey].value] = { + 'data': [val.numerator, val.denominator], + 'datatype': tifftools.Datatype.RATIONAL, + } + + +def _import_pyvips(): + """ + Import pyvips on demand. + """ + global pyvips + + # Because of its use of gobject, pyvips should be invoked without + # concurrency + # os.environ['VIPS_CONCURRENCY'] = '1' + import pyvips + + +def _is_eightbit(path, tiffinfo=None): + """ + Check if a path has an unsigned 8-bit per sample data size. If any known + channel is otherwise or this is unknown, this returns False. + + :params path: The path to the file + :params tiffinfo: data extracted from tifftools.read_tiff(path). + :returns: True if known to be 8 bits per sample. + """ + if not tiffinfo: + return False + try: + if not all(val == tifftools.constants.SampleFormat.uint for val in + tiffinfo['ifds'][0]['tags'][tifftools.Tag.SampleFormat.value]['data']): + return False + if tifftools.Tag.BitsPerSample.value in tiffinfo['ifds'][0]['tags'] and not all( + val == 8 for val in + tiffinfo['ifds'][0]['tags'][tifftools.Tag.BitsPerSample.value]['data']): + return False + except Exception: + return False + return True + + +def _is_lossy(path, tiffinfo=None): + """ + Check if a path uses lossy compression. This imperfectly just checks if + the file is a TIFF and stored in one of the JPEG formats. + + :params path: The path to the file + :params tiffinfo: data extracted from tifftools.read_tiff(path). + :returns: True if known to be lossy. + """ + if not tiffinfo: + return False + try: + return bool(tifftools.constants.Compression[ + tiffinfo['ifds'][0]['tags'][ + tifftools.Tag.Compression.value]['data'][0]].lossy) + except Exception: + return False + + +def json_serial(obj): + if isinstance(obj, (datetime.datetime, datetime.date)): + return obj.isoformat() + return str(obj) + + +def _make_li_description( + framePyramidSize, numFrames, lidata=None, numAssociatedImages=0, imageDescription=None): + """ + Given the number of frames, the number of levels per frame, the associated + image list, and any metadata from large_image, construct a json string with + information about the whole image. + + :param framePyramidSize: the number of layers per frame. + :param numFrames: the number of frames. + :param lidata: the data returned from _data_from_large_image. + :param numAssociatedImages: the number of associated images. + :param imageDescription: if present, the original description. + :returns: a json string + """ + results = { + 'large_image_converter': { + 'conversion_date': time.time(), + 'version': __version__, + 'levels': framePyramidSize, + 'frames': numFrames, + 'associated': numAssociatedImages, + }, + } + if lidata: + results['metadata'] = lidata['metadata'] + if len(lidata['metadata'].get('frames', [])) >= 1: + results['frame'] = lidata['metadata']['frames'][0] + results['internal'] = lidata['internal_metadata'] + if imageDescription: + results['image_description'] = imageDescription + return json.dumps(results, separators=(',', ':'), sort_keys=True, default=json_serial) + + +def _vips_cast(image): + """ + Cast a vips image to a format we want. + + :param image: a vips image + :returns: a vips image + """ + if image.format in {pyvips.BandFormat.UCHAR, pyvips.BandFormat.USHORT}: + return image + target = pyvips.BandFormat.UCHAR if image.format in { + pyvips.BandFormat.CHAR} else pyvips.BandFormat.USHORT + logger.debug('Casting image from %r to %r' % (image.format, target)) + image = image.cast(target) + # TODO: verify that this doesn't need any scaling + return image + + +def _vips_parameters(forTiled=True, **kwargs): + """ + Return a dictionary of vips conversion parameters. + + :param forTiled: True if this is for a tiled image. False for an + associated image. + Optional parameters that can be specified in kwargs: + :params tileSize: the horizontal and vertical tile size. + :param compression: one of 'jpeg', 'deflate' (zip), 'lzw', 'packbits', + 'zstd', or 'none'. + :param quality: a jpeg quality passed to vips. 0 is small, 100 is high + quality. 90 or above is recommended. + :param level: compression level for zstd, 1-22 (default is 10). + :param predictor: one of 'none', 'horizontal', or 'float' used for lzw and + deflate. + :returns: a dictionary of parameters. + """ + if not forTiled: + convertParams = { + 'compression': 'jpeg', + 'Q': 90, + 'predictor': 'horizontal', + 'tile': False, + } + if 'mime' in kwargs and kwargs.get('mime') != 'image/jpeg': + convertParams['compression'] = 'lzw' + return convertParams + convertParams = { + 'tile': True, + 'tile_width': 256, + 'tile_height': 256, + 'pyramid': True, + 'bigtiff': True, + 'compression': 'jpeg', + 'Q': 90, + 'predictor': 'horizontal', + } + for vkey, kwkey in { + 'tile_width': 'tileSize', + 'tile_height': 'tileSize', + 'compression': 'compression', + 'Q': 'quality', + 'level': 'level', + 'predictor': 'predictor', + }.items(): + if kwkey in kwargs and kwargs[kwkey] not in {None, ''}: + convertParams[vkey] = kwargs[kwkey] + if convertParams['predictor'] == 'yes': + convertParams['predictor'] = 'horizontal' + if convertParams['compression'] == 'jpeg': + convertParams['rgbjpeg'] = True + return convertParams + + +def convert(inputPath, outputPath=None, **kwargs): + """ + Take a source input file and output a pyramidal tiff file. + + :params inputPath: the path to the input file or base file of a set. + :params outputPath: the path of the output file. + Optional parameters that can be specified in kwargs: + :params tileSize: the horizontal and vertical tile size. + :param compression: one of 'jpeg', 'deflate' (zip), 'lzw', 'packbits', + 'zstd', or 'none'. + :params quality: a jpeg quality passed to vips. 0 is small, 100 is high + quality. 90 or above is recommended. + :param level: compression level for zstd, 1-22 (default is 10). + :param predictor: one of 'none', 'horizontal', or 'float' used for lzw and + deflate. + Additional optional parameters: + :param geospatial: if not None, a boolean indicating if this file is + geospatial. If not specified or None, this will be checked. + + :returns: outputPath if successful + """ + geospatial = kwargs.get('geospatial') + if geospatial is None: + geospatial = is_geospatial(inputPath) + if not outputPath: + suffix = '.tiff' if not geospatial else '.geo.tiff' + outputPath = os.path.splitext(inputPath)[0] + suffix + if outputPath.endswith('.geo' + suffix): + outputPath = outputPath[:len(outputPath) - len(suffix) - 4] + suffix + if outputPath == inputPath: + outputPath = (os.path.splitext(inputPath)[0] + '.' + + time.strftime('%Y%m%d-%H%M%S') + suffix) + if os.path.exists(outputPath) and not kwargs.get('overwrite'): + raise Exception('Output file already exists.') + try: + tiffinfo = tifftools.read_tiff(inputPath) + except Exception: + tiffinfo = None + if not kwargs.get('compression', None): + kwargs = kwargs.copy() + lossy = _is_lossy(inputPath, tiffinfo) + logger.debug('Is file lossy: %r', lossy) + eightbit = _is_eightbit(inputPath, tiffinfo) + logger.debug('Is file 8 bits per samples: %r', eightbit) + kwargs['compression'] = 'jpeg' if lossy and eightbit else 'lzw' + if geospatial: + _generate_geotiff(inputPath, outputPath, **kwargs) + else: + with TemporaryDirectory() as tempDir: + tempPath = os.path.join(tempDir, os.path.basename(outputPath)) + lidata = _data_from_large_image(inputPath, tempPath) + logger.debug('large_image information for %s: %r' % (inputPath, lidata)) + _generate_tiff(inputPath, outputPath, tempPath, lidata, **kwargs) + return outputPath + + +def is_geospatial(path): + """ + Check if a path is likely to be a geospatial file. + + :params path: The path to the file + :returns: True if geospatial. + """ + try: + from osgeo import gdal + from osgeo import gdalconst + except ImportError: + logger.warning('Cannot import GDAL.') + return False + gdal.UseExceptions() + try: + ds = gdal.Open(path, gdalconst.GA_ReadOnly) + except Exception: + return False + if ds and (ds.GetProjection() or ds.GetDriver().ShortName in {'NITF', 'netCDF'}): + return True + return False + + +def is_vips(path): + """ + Check if a path is readable by vips. + + :params path: The path to the file + :returns: True if readable by vips. + """ + _import_pyvips() + try: + image = pyvips.Image.new_from_file(path) + # image(0, 0) will throw if vips can't decode the image + if not image.width or not image.height or image(0, 0) is None: + return False + except Exception: + return False + return True diff --git a/utilities/converter/large_image_converter/__main__.py b/utilities/converter/large_image_converter/__main__.py new file mode 100644 index 000000000..83b968322 --- /dev/null +++ b/utilities/converter/large_image_converter/__main__.py @@ -0,0 +1,86 @@ +import argparse +import logging +import os +import sys +import time + +import large_image_converter + + +def get_parser(): + parser = argparse.ArgumentParser(description='Large Image image converter') + parser.add_argument( + '--version', action='version', + version=large_image_converter.__version__, help='Report version') + parser.add_argument( + '--verbose', '-v', action='count', default=0, help='Increase verbosity') + parser.add_argument( + '--silent', '-s', action='count', default=0, help='Decrease verbosity') + parser.add_argument( + 'source', help='Path to source image') + parser.add_argument( + 'dest', nargs='?', help='Output path') + parser.add_argument( + '--compression', '-c', + choices=[ + '', 'jpeg', 'deflate', 'zip', 'lzw', 'zstd', 'packbits', 'jbig', + 'lzma', 'webp', 'none', + ], + help='Internal compression. Default will use jpeg if the source ' + 'appears to be lossy or lzw if lossless. lzw is the most compatible ' + 'lossless mode. jpeg is the most compatible lossy mode. jbig and ' + 'lzma may not be available.') + parser.add_argument( + '--quality', '-q', default=90, type=int, + help='JPEG compression quality') + parser.add_argument( + '--level', '-l', type=int, + help='General compression level. Used for deflate (zip), zstd, and ' + 'some others.') + parser.add_argument( + '--predictor', '-p', choices=['', 'none', 'horizontal', 'float', 'yes'], + help='Predictor for some compressions. Default is horizontal for ' + 'non-geospatial data and yes for geospatial.') + parser.add_argument( + '--tile', '-t', type=int, default=256, help='Tile size', + dest='tileSize') + parser.add_argument( + '--overwrite', '-w', action='store_true', + help='Overwrite an existing output file') + return parser + + +def main(args=sys.argv[1:]): + parser = get_parser() + opts = parser.parse_args(args=args) + logger = logging.getLogger('large-image-converter') + if not len(logger.handlers): + logger.addHandler(logging.StreamHandler(sys.stderr)) + logger.setLevel(max(1, logging.WARNING - (opts.verbose - opts.silent) * 10)) + try: + import large_image + + li_logger = large_image.config.getConfig('logger') + li_logger.setLevel(max(1, logging.CRITICAL - (opts.verbose - opts.silent) * 10)) + except ImportError: + pass + logger.debug('Command line options: %r' % opts) + if not os.path.isfile(opts.source): + logger.error('Source is not a file (%s)', opts.source) + return 1 + if opts.compression == 'zip': + opts.compression = 'deflate' + converterOpts = { + k: v for k, v in vars(opts).items() + if k not in {'source', 'dest', 'verbose', 'silent'} and v is not None} + start_time = time.time() + dest = large_image_converter.convert(opts.source, opts.dest, **converterOpts) + end_time = time.time() + if not os.path.isfile(dest): + logger.error('Failed to generate file') + return 1 + logger.info('Created %s, %d bytes, %3.1f s', dest, os.path.getsize(dest), end_time - start_time) + + +if __name__ == '__main__': + sys.exit(main()) diff --git a/utilities/converter/setup.py b/utilities/converter/setup.py new file mode 100644 index 000000000..5c96f985d --- /dev/null +++ b/utilities/converter/setup.py @@ -0,0 +1,65 @@ +import os +from setuptools import setup, find_packages + +with open('README.rst', 'r') as fh: + long_desc = fh.read() + + +def prerelease_local_scheme(version): + """ + Return local scheme version unless building on master in CircleCI. + + This function returns the local scheme version number + (e.g. 0.0.0.dev+g) unless building on CircleCI for a + pre-release in which case it ignores the hash and produces a + PEP440 compliant pre-release version number (e.g. 0.0.0.dev). + """ + from setuptools_scm.version import get_local_node_and_date + + if os.getenv('CIRCLE_BRANCH') in ('master', ): + return '' + else: + return get_local_node_and_date(version) + + +setup( + name='large-image-converter', + use_scm_version={'root': '../..', 'local_scheme': prerelease_local_scheme}, + setup_requires=['setuptools-scm'], + description='Converter for Large Image.', + long_description=long_desc, + author='Kitware Inc', + author_email='kitware@kitware.com', + license='Apache Software License 2.0', + classifiers=[ + 'Development Status :: 5 - Production/Stable', + 'License :: OSI Approved :: Apache Software License', + 'Topic :: Scientific/Engineering', + 'Intended Audience :: Science/Research', + 'Programming Language :: Python :: 3', + 'Programming Language :: Python :: 3.6', + 'Programming Language :: Python :: 3.7', + 'Programming Language :: Python :: 3.8', + 'Programming Language :: Python :: 3.9', + ], + install_requires=[ + 'gdal', + 'numpy', + 'psutil', + 'pyvips', + 'tifftools', + ], + extras_require={ + 'jp2k': [ + 'glymur', + ], + 'sources': [ + 'large_image[sources]', + ], + }, + packages=find_packages(), + entry_points={ + 'console_scripts': ['large_image_converter = large_image_converter.__main__:main'] + }, + python_requires='>=3.6', +) diff --git a/tasks/README.rst b/utilities/tasks/README.rst similarity index 100% rename from tasks/README.rst rename to utilities/tasks/README.rst diff --git a/tasks/large_image_tasks/__init__.py b/utilities/tasks/large_image_tasks/__init__.py similarity index 100% rename from tasks/large_image_tasks/__init__.py rename to utilities/tasks/large_image_tasks/__init__.py diff --git a/utilities/tasks/large_image_tasks/tasks.py b/utilities/tasks/large_image_tasks/tasks.py new file mode 100644 index 000000000..a027c33a4 --- /dev/null +++ b/utilities/tasks/large_image_tasks/tasks.py @@ -0,0 +1,71 @@ +import logging +import os +import shutil +import sys +import time + +from girder_worker.app import app +from girder_worker.utils import girder_job + + +@girder_job(title='Create a pyramidal tiff using vips', type='large_image_tiff') +@app.task(bind=True) +def create_tiff(self, inputFile, outputName=None, outputDir=None, quality=90, + tileSize=256, **kwargs): + """ + Take a source input file, readable by vips, and output a pyramidal tiff + file. + + :params inputFile: the path to the input file or base file of a set. + :params outputName: the name of the output file. If None, the name is + based on the input name and current date and time. May be a full path. + :params outputDir: the location to store the output. If unspecified, the + inputFile's directory is used. If the outputName is a fully qualified + path, this is ignored. + :params quality: a jpeg quality passed to vips. 0 is small, 100 is high + quality. 90 or above is recommended. + :params tileSize: the horizontal and vertical tile size. + Optional parameters that can be specified in kwargs: + :param compression: one of 'jpeg', 'deflate' (zip), 'lzw', 'packbits', or + 'zstd'. + :param level: compression level for zstd, 1-22 (default is 10). + :param predictor: one of 'none', 'horizontal', or 'float' used for lzw and + deflate. + :param inputName: if no output name is specified, and this is specified, + this is used as the basis of the output name instead of extracting the + name from the inputFile path. + """ + import large_image_converter + + logger = logging.getLogger('large-image-converter') + if not len(logger.handlers): + logger.addHandler(logging.StreamHandler(sys.stdout)) + logger.setLevel(logging.INFO) + + inputPath = os.path.abspath(os.path.expanduser(inputFile)) + geospatial = large_image_converter.is_geospatial(inputPath) + inputName = kwargs.get('inputName', os.path.basename(inputPath)) + suffix = '.tiff' if not geospatial else '.geo.tiff' + if not outputName: + outputName = os.path.splitext(inputName)[0] + suffix + if outputName.endswith('.geo' + suffix): + outputName = outputName[:len(outputName) - len(suffix) - 4] + suffix + if outputName == inputName: + outputName = (os.path.splitext(inputName)[0] + '.' + + time.strftime('%Y%m%d-%H%M%S') + suffix) + renameOutput = outputName + if not outputName.endswith(suffix): + outputName += suffix + if not outputDir: + outputDir = os.path.dirname(inputPath) + outputPath = os.path.join(outputDir, outputName) + large_image_converter.convert( + inputPath, outputPath, quality=quality, tileSize=tileSize, **kwargs) + if not os.path.exists(outputPath): + raise Exception('Conversion command failed to produce output') + if renameOutput != outputName: + renamePath = os.path.join(outputDir, renameOutput) + shutil.move(outputPath, renamePath) + outputPath = renamePath + logger.info('Created a file of size %d' % os.path.getsize(outputPath)) + return outputPath diff --git a/tasks/setup.py b/utilities/tasks/setup.py similarity index 91% rename from tasks/setup.py rename to utilities/tasks/setup.py index 0ca8efa2d..15305c572 100644 --- a/tasks/setup.py +++ b/utilities/tasks/setup.py @@ -24,7 +24,7 @@ def prerelease_local_scheme(version): setup( name='large-image-tasks', - use_scm_version={'root': '..', 'local_scheme': prerelease_local_scheme}, + use_scm_version={'root': '../..', 'local_scheme': prerelease_local_scheme}, setup_requires=['setuptools-scm'], description='Girder Worker tasks for Large Image.', long_description=long_desc, @@ -32,7 +32,7 @@ def prerelease_local_scheme(version): author_email='kitware@kitware.com', license='Apache Software License 2.0', classifiers=[ - 'Development Status :: 3 - Alpha', + 'Development Status :: 5 - Production/Stable', 'License :: OSI Approved :: Apache Software License', 'Topic :: Scientific/Engineering', 'Intended Audience :: Science/Research', @@ -54,8 +54,8 @@ def prerelease_local_scheme(version): ], 'worker': [ # Dependencies required on the consumer (Girder Worker) side. - 'pyvips', - ] + 'large-image-converter[sources]', + ], }, python_requires='>=3.6', entry_points={