Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add the ability to compute additional columns for plottable data #1626

Merged
merged 1 commit into from
Sep 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@

## 1.29.8

### Improvements

- Add the option to compute additional columns for plottable data ([#1626](../../pull/1626))

### Bug Fixes

- Fix scaling small images in the multi source with bicubic smoothing ([#1627](../../pull/1627))
Expand Down
14 changes: 12 additions & 2 deletions girder_annotation/girder_large_image_annotation/rest/annotation.py
Original file line number Diff line number Diff line change
Expand Up @@ -661,19 +661,29 @@ def getItemPlottableElements(self, item, annotations, adjacentItems, sources=Non
.param('sources', 'An optional comma separated list that can contain '
'folder, item, annotation, annotationelement, datafile.',
required=False)
.jsonParam(
'compute', 'A dictionary with keys "columns": a list of columns '
'to include in the computation; if unspecified or an empty list, '
'no computation is done, "function": a string with the name of '
'the function, such as umap, "params": additional parameters to '
'pass to the function. If none of the requiredKeys are '
'compute.(x|y|z), the computation will not be performed. Only '
'rows which have all selected columns present will be included in '
'the computation.',
paramType='formData', requireObject=True, required=False)
.errorResponse('ID was invalid.')
.errorResponse('Read access was denied for the item.', 403),
)
@access.public(cookie=True, scope=TokenScope.DATA_READ)
def getItemPlottableData(
self, item, keys, adjacentItems, annotations, requiredKeys, sources=None):
self, item, keys, adjacentItems, annotations, requiredKeys, sources=None, compute=None):
user = self.getCurrentUser()
if adjacentItems != '__all__':
adjacentItems = str(adjacentItems).lower() == 'true'
sources = sources or None
data = utils.PlottableItemData(
user, item, annotations=annotations, adjacentItems=adjacentItems,
sources=sources)
sources=sources, compute=compute)
return data.data(keys, requiredKeys)

def getFolderAnnotations(self, id, recurse, user, limit=False, offset=False, sort=False,
Expand Down
118 changes: 111 additions & 7 deletions girder_annotation/girder_large_image_annotation/utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
'application/x-xls': 'read_excel',
}
scanDatafileRecords = 50
scanAnnotationElements = 5000


@functools.lru_cache(maxsize=100)
Expand Down Expand Up @@ -393,7 +394,8 @@
maxDistinct = 20
allowedTypes = (str, bool, int, float)

def __init__(self, user, item, annotations=None, adjacentItems=False, sources=None):
def __init__(self, user, item, annotations=None, adjacentItems=False,
sources=None, compute=None):
"""
Get plottable data associated with an item.

Expand All @@ -408,15 +410,32 @@
:param sources: None for all, or a string with a comma-separated list
or a list of strings; when a list, the options are folder, item,
annotation, datafile.
:param compute: None for none, or a dictionary with keys "columns": a
list of columns to include in the computation; if unspecified or an
empty list, no computation is done, "function": a string with the
name of the function, such as umap, "params": additional parameters
to pass to the function. If none of the requiredKeys are
compute.(x|y|z), the computation will not be performed. Only rows
which have all selected columns present will be included in the
computation.
"""
self.user = user
self._columns = None
self._datacolumns = None
self._data = None
self._compute = None
try:
if len(compute['columns']):
self._compute = {'function': 'umap', 'params': {

Check warning on line 429 in girder_annotation/girder_large_image_annotation/utils/__init__.py

View check run for this annotation

Codecov / codecov/patch

girder_annotation/girder_large_image_annotation/utils/__init__.py#L429

Added line #L429 was not covered by tests
'random_state': 1, 'n_jobs': 1}}
self._compute.update(compute)

Check warning on line 431 in girder_annotation/girder_large_image_annotation/utils/__init__.py

View check run for this annotation

Codecov / codecov/patch

girder_annotation/girder_large_image_annotation/utils/__init__.py#L431

Added line #L431 was not covered by tests
except Exception:
pass
if sources and not isinstance(sources, (list, tuple)):
sources = sources.split(',')
self._sources = tuple(sources) if sources else None
if self._sources and 'annotation' not in self._sources:
if (self._sources and 'annotation' not in self._sources and
'annotationelement' not in self._sources):
annotations = None
self._fullScan = adjacentItems == '__all__'
self._findItems(item, adjacentItems)
Expand Down Expand Up @@ -559,7 +578,11 @@
'bbox.y0': 'Bounding Box Low Y',
'bbox.x1': 'Bounding Box High X',
'bbox.y1': 'Bounding Box High Y',
'compute.x': 'Dimension Reduction X',
'compute.y': 'Dimension Reduction Y',
'compute.z': 'Dimension Reduction Z',
}
computeColumns = {'compute.x', 'compute.y', 'compute.z'}

def itemNameIDSelector(self, isName, selector):
"""
Expand Down Expand Up @@ -1068,6 +1091,78 @@
countsPerDataFile[dfidx] = count - startcount
return count

def _computeFunction(self, rows):
if self._compute['function'] == 'umap':
import umap

Check warning on line 1096 in girder_annotation/girder_large_image_annotation/utils/__init__.py

View check run for this annotation

Codecov / codecov/patch

girder_annotation/girder_large_image_annotation/utils/__init__.py#L1096

Added line #L1096 was not covered by tests

logger.info(f'Calling umap on {len(rows)} rows')
reducer = umap.UMAP(**self._compute['params'])
self._computed = reducer.fit_transform(list(rows.values()))
logger.info('Called umap')
return True

Check warning on line 1102 in girder_annotation/girder_large_image_annotation/utils/__init__.py

View check run for this annotation

Codecov / codecov/patch

girder_annotation/girder_large_image_annotation/utils/__init__.py#L1098-L1102

Added lines #L1098 - L1102 were not covered by tests

def _getColumnsFromCompute(self, columns): # noqa
"""
Collect columns and data from compute actions.
"""

def computeGetData(record):
return {}

Check warning on line 1110 in girder_annotation/girder_large_image_annotation/utils/__init__.py

View check run for this annotation

Codecov / codecov/patch

girder_annotation/girder_large_image_annotation/utils/__init__.py#L1110

Added line #L1110 was not covered by tests

def computeLength(record, data):
return len(self._computed)

Check warning on line 1113 in girder_annotation/girder_large_image_annotation/utils/__init__.py

View check run for this annotation

Codecov / codecov/patch

girder_annotation/girder_large_image_annotation/utils/__init__.py#L1113

Added line #L1113 was not covered by tests

def computeSelector(key):
axis = ord(key[-1:]) - ord('x')

def computeSelectorAxis(record, data, row):
return self._computed[row][axis]

Check warning on line 1119 in girder_annotation/girder_large_image_annotation/utils/__init__.py

View check run for this annotation

Codecov / codecov/patch

girder_annotation/girder_large_image_annotation/utils/__init__.py#L1119

Added line #L1119 was not covered by tests

return computeSelectorAxis

if not self._datacolumns:
for key in self.computeColumns:
title = self.commonColumns[key]
self._ensureColumn(
columns, key, title, 'compute', computeGetData,
computeSelector(key), computeLength)
columns[key]['count'] = 1
columns[key]['min'] = columns[key]['max'] = 0
return 0
if self._compute is None or not len(self._requiredColumns & self.computeColumns):
return 0
compcol = {
key for key, col in columns.items()
if col['type'] == 'number' and col.get('min') is not None
} & set(self._compute['columns'])
if not len(compcol):
return 0
rows = {}

Check warning on line 1140 in girder_annotation/girder_large_image_annotation/utils/__init__.py

View check run for this annotation

Codecov / codecov/patch

girder_annotation/girder_large_image_annotation/utils/__init__.py#L1139-L1140

Added lines #L1139 - L1140 were not covered by tests
cols = sorted({col for col in self._compute['columns'] if col in self._datacolumns})
for kidx, key in enumerate(cols):
for row, value in self._datacolumns[key].items():
if not kidx:
rows[row] = [value]

Check warning on line 1145 in girder_annotation/girder_large_image_annotation/utils/__init__.py

View check run for this annotation

Codecov / codecov/patch

girder_annotation/girder_large_image_annotation/utils/__init__.py#L1145

Added line #L1145 was not covered by tests
elif row in rows and len(rows[row]) == kidx:
rows[row].append(value)

Check warning on line 1147 in girder_annotation/girder_large_image_annotation/utils/__init__.py

View check run for this annotation

Codecov / codecov/patch

girder_annotation/girder_large_image_annotation/utils/__init__.py#L1147

Added line #L1147 was not covered by tests
rows = {k: row for k, row in rows.items() if len(row) == len(cols)}
if not len(rows):
return 0

Check warning on line 1150 in girder_annotation/girder_large_image_annotation/utils/__init__.py

View check run for this annotation

Codecov / codecov/patch

girder_annotation/girder_large_image_annotation/utils/__init__.py#L1150

Added line #L1150 was not covered by tests
if not self._computeFunction(rows):
return 0

Check warning on line 1152 in girder_annotation/girder_large_image_annotation/utils/__init__.py

View check run for this annotation

Codecov / codecov/patch

girder_annotation/girder_large_image_annotation/utils/__init__.py#L1152

Added line #L1152 was not covered by tests
for key in self.computeColumns:
if key in self._requiredColumns and key in self._datacolumns:
title = self.commonColumns[key]
self._ensureColumn(

Check warning on line 1156 in girder_annotation/girder_large_image_annotation/utils/__init__.py

View check run for this annotation

Codecov / codecov/patch

girder_annotation/girder_large_image_annotation/utils/__init__.py#L1155-L1156

Added lines #L1155 - L1156 were not covered by tests
columns, key, title, 'compute', computeGetData,
computeSelector(key), computeLength)
cidx = ord(key[-1:]) - ord('x')

Check warning on line 1159 in girder_annotation/girder_large_image_annotation/utils/__init__.py

View check run for this annotation

Codecov / codecov/patch

girder_annotation/girder_large_image_annotation/utils/__init__.py#L1159

Added line #L1159 was not covered by tests
for ridx, row in enumerate(rows):
self._datacolumns[key][row] = float(self._computed[ridx][cidx])
columns[key]['count'] = len(rows)
columns[key]['min'] = columns[key]['max'] = 0
return len(rows)

Check warning on line 1164 in girder_annotation/girder_large_image_annotation/utils/__init__.py

View check run for this annotation

Codecov / codecov/patch

girder_annotation/girder_large_image_annotation/utils/__init__.py#L1161-L1164

Added lines #L1161 - L1164 were not covered by tests

def _getColumns(self):
"""
Get a sorted list of plottable columns with some metadata for each.
Expand All @@ -1086,6 +1181,7 @@
count += self._collectColumns(columns, [item], 'item', first=False)
count += self._getColumnsFromAnnotations(columns)
count += self._getColumnsFromDataFiles(columns)
count += self._getColumnsFromCompute(columns)
for result in columns.values():
if len(result['distinct']) <= self.maxDistinct:
result['distinct'] = sorted(result['distinct'])
Expand All @@ -1095,7 +1191,9 @@
if result['type'] != 'number' or result['min'] is None:
result.pop('min', None)
result.pop('max', None)
prefixOrder = {'item': 0, 'annotation': 1, 'annotationelement': 2, 'data': 3, 'bbox': 4}
prefixOrder = {
'item': 0, 'annotation': 1, 'annotationelement': 2, 'data': 3,
'bbox': 4, 'compute': 5}
columns = sorted(columns.values(), key=lambda x: (
prefixOrder.get(x['key'].split('.', 1)[0], len(prefixOrder)), x['key']))
return columns
Expand Down Expand Up @@ -1168,7 +1266,7 @@
rows = [row for ridx, row in enumerate(rows) if rows[ridx] not in discard]
return data, rows

def data(self, columns, requiredColumns=None):
def data(self, columns, requiredColumns=None): # noqa
"""
Get plottable data.

Expand All @@ -1182,8 +1280,14 @@
columns = columns.split(',')
if not isinstance(requiredColumns, list):
requiredColumns = requiredColumns.split(',') if requiredColumns is not None else []
requiredColumns = set(requiredColumns)
specifiedReqColumns = set(requiredColumns)
self._requiredColumns = set(requiredColumns)
if self._compute:
if ('compute.z' in specifiedReqColumns and
self._compute['function'] == 'umap' and
'n_components' not in self._compute['params']):
self._compute['params']['n_components'] = 3
self._requiredColumns.update(self._compute['columns'])

Check warning on line 1290 in girder_annotation/girder_large_image_annotation/utils/__init__.py

View check run for this annotation

Codecov / codecov/patch

girder_annotation/girder_large_image_annotation/utils/__init__.py#L1289-L1290

Added lines #L1289 - L1290 were not covered by tests
with self._dataLock:
self._datacolumns = {c: {} for c in columns}
rows = set()
Expand All @@ -1201,7 +1305,7 @@
for cidx, col in enumerate(colsout):
colkey = col['key']
numrows = len(data)
if colkey in requiredColumns:
if colkey in specifiedReqColumns:
data = [row for row in data if row[cidx] is not None]
if len(data) < numrows:
logger.info(f'Reduced row count from {numrows} to {len(data)} '
Expand All @@ -1210,7 +1314,7 @@
for cidx, col in enumerate(colsout):
colkey = col['key']
numrows = len(data)
if colkey in self._requiredColumns and colkey not in requiredColumns:
if colkey in self._requiredColumns and colkey not in specifiedReqColumns:
subdata = [row for row in subdata if row[cidx] is not None]
if len(subdata) and len(subdata) < len(data):
logger.info(f'Reduced row count from {len(data)} to {len(subdata)} '
Expand Down
3 changes: 3 additions & 0 deletions girder_annotation/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,9 @@ def prerelease_local_scheme(version):
'orjson',
],
extras_require={
'compute': [
'umap-learn',
],
'tasks': [
f'girder-large-image[tasks]{limit_version}',
],
Expand Down
20 changes: 10 additions & 10 deletions girder_annotation/test_annotation/test_annotations.py
Original file line number Diff line number Diff line change
Expand Up @@ -783,7 +783,7 @@ def testPlottableDataAccess(admin):
plottable = girder_large_image_annotation.utils.PlottableItemData(admin, item)
col = plottable.columns
# Also contains item id, name, and description
assert len(col) == 12
assert len(col) == 15

data = plottable.data([c['key'] for c in col])
assert len(data['columns']) == 12
Expand Down Expand Up @@ -937,71 +937,71 @@ def testPlottableDataMultipleItems(admin):
plottable = girder_large_image_annotation.utils.PlottableItemData(
admin, item1, sources='item')
col = plottable.columns
assert len(col) == 3
assert len(col) == 6
data = plottable.data([c['key'] for c in col])
assert len(data['columns']) == 3
assert len(data['data']) == 1

plottable = girder_large_image_annotation.utils.PlottableItemData(
admin, item1, sources='item', adjacentItems=True)
col = plottable.columns
assert len(col) == 3
assert len(col) == 6
data = plottable.data([c['key'] for c in col])
assert len(data['columns']) == 3
assert len(data['data']) == 2

plottable = girder_large_image_annotation.utils.PlottableItemData(
admin, item1, sources='item', adjacentItems='__all__')
col = plottable.columns
assert len(col) == 4
assert len(col) == 7
data = plottable.data([c['key'] for c in col])
assert len(data['columns']) == 4
assert len(data['data']) == 2

plottable = girder_large_image_annotation.utils.PlottableItemData(
admin, item1)
col = plottable.columns
assert len(col) == 4
assert len(col) == 7
data = plottable.data([c['key'] for c in col])
assert len(data['columns']) == 4
assert len(data['data']) == 3

plottable = girder_large_image_annotation.utils.PlottableItemData(
admin, item1, adjacentItems=True)
col = plottable.columns
assert len(col) == 4
assert len(col) == 7
data = plottable.data([c['key'] for c in col])
assert len(data['columns']) == 4
assert len(data['data']) == 4

plottable = girder_large_image_annotation.utils.PlottableItemData(
admin, item1, annotations=[str(annot1a['_id']), str(annot1c['_id'])])
col = plottable.columns
assert len(col) == 14
assert len(col) == 17
data = plottable.data([c['key'] for c in col])
assert len(data['columns']) == 14
assert len(data['data']) == 6

plottable = girder_large_image_annotation.utils.PlottableItemData(
admin, item1, annotations=[str(annot1a['_id']), str(annot1c['_id'])], adjacentItems=True)
col = plottable.columns
assert len(col) == 14
assert len(col) == 17
data = plottable.data([c['key'] for c in col])
assert len(data['columns']) == 14
assert len(data['data']) == 8

plottable = girder_large_image_annotation.utils.PlottableItemData(
admin, item1, annotations='__all__')
col = plottable.columns
assert len(col) == 14
assert len(col) == 17
data = plottable.data([c['key'] for c in col])
assert len(data['columns']) == 14
assert len(data['data']) == 8

plottable = girder_large_image_annotation.utils.PlottableItemData(
admin, item1, annotations='__all__', adjacentItems=True)
col = plottable.columns
assert len(col) == 14
assert len(col) == 17
data = plottable.data([c['key'] for c in col])
assert len(data['columns']) == 14
assert len(data['data']) == 12
2 changes: 1 addition & 1 deletion girder_annotation/test_annotation/test_annotations_rest.py
Original file line number Diff line number Diff line change
Expand Up @@ -866,7 +866,7 @@ def testPlottableEndpoints(self, server, admin):
},
)
assert utilities.respStatus(resp) == 200
assert len(resp.json) == 2
assert len(resp.json) == 5

resp = server.request(
path=f'/annotation/item/{itemSrc["_id"]}/plot/list',
Expand Down
2 changes: 1 addition & 1 deletion requirements-dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ girder-jobs>=3.0.3
# Girder and worker dependencies are already installed above
-e utilities/tasks[girder]
-e girder/.
-e girder_annotation/.
-e girder_annotation/.[compute]

# Extras from main setup.py
pylibmc>=1.5.1
Expand Down