From 917ebdba27d264c6a9c989670b6a9a1e9ee96f4c Mon Sep 17 00:00:00 2001 From: David Manthey Date: Thu, 8 Jun 2023 08:41:52 -0400 Subject: [PATCH] Improve dicom multi level detection When there are multiple files in the same directory, we check if they can be used together as different levels of the same dicom. This was done by extension, but some dicoms have the name structure of DCM_ without extension. --- CHANGELOG.md | 5 ++++- large_image/tilesource/__init__.py | 8 +++++++- large_image/tilesource/base.py | 20 ++++++++++++------- .../large_image_source_dicom/__init__.py | 5 +++++ 4 files changed, 29 insertions(+), 9 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f9fd3959c..6dda7f396 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,13 +2,16 @@ ## 1.22.3 +### Improvements +- Better DICOM multi-level detection ([#1196](../../pull/1196)) + ### Changes - Change how extensions and fallback priorities interact ([#1192](../../pull/1192)) - Refactor reading the .large_image_config.yaml file on the girder client ([#1193](../../pull/1193)) - Refactor of the which-folders-have-annotations pipeline ([#1194](../../pull/1194)) ### Bug Fixes -- Fix an issue converting multiframe files that vips reads as single frame ([#1195](../../pull/1195)) +- Fix an issue converting multiframe files that vips reads as single frame ([#1195](../../pull/1195)) ## 1.22.2 diff --git a/large_image/tilesource/__init__.py b/large_image/tilesource/__init__.py index a0d54a9ae..ad3a823b7 100644 --- a/large_image/tilesource/__init__.py +++ b/large_image/tilesource/__init__.py @@ -1,4 +1,5 @@ import os +import re import uuid try: @@ -77,7 +78,8 @@ def getSortedSourceList(availableSources, pathOrUri, mimeType=None, *args, **kwa """ uriWithoutProtocol = str(pathOrUri).split('://', 1)[-1] isLargeImageUri = str(pathOrUri).startswith('large_image://') - extensions = [ext.lower() for ext in os.path.basename(uriWithoutProtocol).split('.')[1:]] + baseName = os.path.basename(uriWithoutProtocol) + extensions = [ext.lower() for ext in baseName.split('.')[1:]] properties = { '_geospatial_source': isGeospatial(pathOrUri), } @@ -90,6 +92,10 @@ def getSortedSourceList(availableSources, pathOrUri, mimeType=None, *args, **kwa mimeType in availableSources[sourceName].mimeTypes): fallback = False priority = min(priority, availableSources[sourceName].mimeTypes[mimeType]) + for regex in getattr(availableSources[sourceName], 'nameMatches', {}): + if re.match(regex, baseName): + fallback = False + priority = min(priority, availableSources[sourceName].nameMatches[regex]) for ext in extensions: if ext in sourceExtensions: fallback = False diff --git a/large_image/tilesource/base.py b/large_image/tilesource/base.py index 81ae459f1..9421f8e97 100644 --- a/large_image/tilesource/base.py +++ b/large_image/tilesource/base.py @@ -30,23 +30,29 @@ class TileSource: - #: Name of the tile source + # Name of the tile source name = None - #: A dictionary of known file extensions and the ``SourcePriority`` given - #: to each. It must contain a None key with a priority for the tile source - #: when the extension does not match. + # A dictionary of known file extensions and the ``SourcePriority`` given + # to each. It must contain a None key with a priority for the tile source + # when the extension does not match. extensions = { None: SourcePriority.FALLBACK } - #: A dictionary of common mime-types handled by the source and the - #: ``SourcePriority`` given to each. This are used in place of or in - #: additional to extensions. + # A dictionary of common mime-types handled by the source and the + # ``SourcePriority`` given to each. This are used in place of or in + # additional to extensions. mimeTypes = { None: SourcePriority.FALLBACK } + # A dictionary with regex strings as the keys and the ``SourcePriority`` + # given to names that match that expression. This is used in addition to + # extensions and mimeTypes, with the highest priority match taken. + nameMatches = { + } + geospatial = False def __init__(self, encoding='JPEG', jpegQuality=95, jpegSubsampling=0, diff --git a/sources/dicom/large_image_source_dicom/__init__.py b/sources/dicom/large_image_source_dicom/__init__.py index a2c236efd..d4d9ed40d 100644 --- a/sources/dicom/large_image_source_dicom/__init__.py +++ b/sources/dicom/large_image_source_dicom/__init__.py @@ -92,6 +92,9 @@ class DICOMFileTileSource(FileTileSource, metaclass=LruCacheMetaclass): None: SourcePriority.FALLBACK, 'application/dicom': SourcePriority.PREFERRED, } + nameMatches = { + r'DCM_\d+$': SourcePriority.MEDIUM, + } _minTileSize = 64 _maxTileSize = 4096 @@ -156,6 +159,8 @@ def _pathMightBeDicom(self, path): return True if re.match(r'^([1-9][0-9]*|0)(\.([1-9][0-9]*|0))+$', path) and len(path) <= 64: return True + if re.match(r'^DCM_\d+$', path): + return True return False def getNativeMagnification(self):