Skip to content

Commit

Permalink
Add the ability to compute additional columns for plottable data
Browse files Browse the repository at this point in the history
  • Loading branch information
manthey committed Sep 6, 2024
1 parent 80970f0 commit d165889
Show file tree
Hide file tree
Showing 6 changed files with 137 additions and 20 deletions.
14 changes: 12 additions & 2 deletions girder_annotation/girder_large_image_annotation/rest/annotation.py
Original file line number Diff line number Diff line change
Expand Up @@ -661,19 +661,29 @@ def getItemPlottableElements(self, item, annotations, adjacentItems, sources=Non
.param('sources', 'An optional comma separated list that can contain '
'folder, item, annotation, annotationelement, datafile.',
required=False)
.jsonParam(
'compute', 'A dictionary with keys "columns": a list of columns '
'to include in the computation; if unspecified or an empty list, '
'no computation is done, "function": a string with the name of '
'the function, such as umap, "params": additional parameters to '
'pass to the function. If none of the requiredKeys are '
'compute.(x|y|z), the computation will not be performed. Only '
'rows which have all selected columns present will be included in '
'the computation.',
paramType='formData', requireObject=True, required=False)
.errorResponse('ID was invalid.')
.errorResponse('Read access was denied for the item.', 403),
)
@access.public(cookie=True, scope=TokenScope.DATA_READ)
def getItemPlottableData(
self, item, keys, adjacentItems, annotations, requiredKeys, sources=None):
self, item, keys, adjacentItems, annotations, requiredKeys, sources=None, compute=None):
user = self.getCurrentUser()
if adjacentItems != '__all__':
adjacentItems = str(adjacentItems).lower() == 'true'
sources = sources or None
data = utils.PlottableItemData(
user, item, annotations=annotations, adjacentItems=adjacentItems,
sources=sources)
sources=sources, compute=compute)
return data.data(keys, requiredKeys)

def getFolderAnnotations(self, id, recurse, user, limit=False, offset=False, sort=False,
Expand Down
118 changes: 111 additions & 7 deletions girder_annotation/girder_large_image_annotation/utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
'application/x-xls': 'read_excel',
}
scanDatafileRecords = 50
scanAnnotationElements = 5000


@functools.lru_cache(maxsize=100)
Expand Down Expand Up @@ -393,7 +394,8 @@ class PlottableItemData:
maxDistinct = 20
allowedTypes = (str, bool, int, float)

def __init__(self, user, item, annotations=None, adjacentItems=False, sources=None):
def __init__(self, user, item, annotations=None, adjacentItems=False,
sources=None, compute=None):
"""
Get plottable data associated with an item.
Expand All @@ -408,15 +410,32 @@ def __init__(self, user, item, annotations=None, adjacentItems=False, sources=No
:param sources: None for all, or a string with a comma-separated list
or a list of strings; when a list, the options are folder, item,
annotation, datafile.
:param compute: None for none, or a dictionary with keys "columns": a
list of columns to include in the computation; if unspecified or an
empty list, no computation is done, "function": a string with the
name of the function, such as umap, "params": additional parameters
to pass to the function. If none of the requiredKeys are
compute.(x|y|z), the computation will not be performed. Only rows
which have all selected columns present will be included in the
computation.
"""
self.user = user
self._columns = None
self._datacolumns = None
self._data = None
self._compute = None
try:
if len(compute['columns']):
self._compute = {'function': 'umap', 'params': {
'random_state': 1, 'n_jobs': 1}}
self._compute.update(compute)
except Exception:
pass
if sources and not isinstance(sources, (list, tuple)):
sources = sources.split(',')
self._sources = tuple(sources) if sources else None
if self._sources and 'annotation' not in self._sources:
if (self._sources and 'annotation' not in self._sources and
'annotationelement' not in self._sources):
annotations = None
self._fullScan = adjacentItems == '__all__'
self._findItems(item, adjacentItems)
Expand Down Expand Up @@ -559,7 +578,11 @@ def _findDataFiles(self): # noqa
'bbox.y0': 'Bounding Box Low Y',
'bbox.x1': 'Bounding Box High X',
'bbox.y1': 'Bounding Box High Y',
'compute.x': 'Dimension Reduction X',
'compute.y': 'Dimension Reduction Y',
'compute.z': 'Dimension Reduction Z',
}
computeColumns = {'compute.x', 'compute.y', 'compute.z'}

def itemNameIDSelector(self, isName, selector):
"""
Expand Down Expand Up @@ -1068,6 +1091,78 @@ def _getColumnsFromDataFiles(self, columns):
countsPerDataFile[dfidx] = count - startcount
return count

def _computeFunction(self, rows):
if self._compute['function'] == 'umap':
import umap

logger.info(f'Calling umap on {len(rows)} rows')
reducer = umap.UMAP(**self._compute['params'])
self._computed = reducer.fit_transform(list(rows.values()))
logger.info('Called umap')
return True

def _getColumnsFromCompute(self, columns): # noqa
"""
Collect columns and data from compute actions.
"""

def computeGetData(record):
return {}

def computeLength(record, data):
return len(self._computed)

def computeSelector(key):
axis = ord(key[-1:]) - ord('x')

def computeSelectorAxis(record, data, row):
return self._computed[row][axis]

return computeSelectorAxis

if not self._datacolumns:
for key in self.computeColumns:
title = self.commonColumns[key]
self._ensureColumn(
columns, key, title, 'compute', computeGetData,
computeSelector(key), computeLength)
columns[key]['count'] = 1
columns[key]['min'] = columns[key]['max'] = 0
return 0
if self._compute is None or not len(self._requiredColumns & self.computeColumns):
return 0
compcol = {
key for key, col in columns.items()
if col['type'] == 'number' and col.get('min') is not None
} & set(self._compute['columns'])
if not len(compcol):
return 0
rows = {}
cols = sorted({col for col in self._compute['columns'] if col in self._datacolumns})
for kidx, key in enumerate(cols):
for row, value in self._datacolumns[key].items():
if not kidx:
rows[row] = [value]
elif row in rows and len(rows[row]) == kidx:
rows[row].append(value)
rows = {k: row for k, row in rows.items() if len(row) == len(cols)}
if not len(rows):
return 0
if not self._computeFunction(rows):
return 0
for key in self.computeColumns:
if key in self._requiredColumns and key in self._datacolumns:
title = self.commonColumns[key]
self._ensureColumn(
columns, key, title, 'compute', computeGetData,
computeSelector(key), computeLength)
cidx = ord(key[-1:]) - ord('x')
for ridx, row in enumerate(rows):
self._datacolumns[key][row] = float(self._computed[ridx][cidx])
columns[key]['count'] = len(rows)
columns[key]['min'] = columns[key]['max'] = 0
return len(rows)

def _getColumns(self):
"""
Get a sorted list of plottable columns with some metadata for each.
Expand All @@ -1086,6 +1181,7 @@ def _getColumns(self):
count += self._collectColumns(columns, [item], 'item', first=False)
count += self._getColumnsFromAnnotations(columns)
count += self._getColumnsFromDataFiles(columns)
count += self._getColumnsFromCompute(columns)
for result in columns.values():
if len(result['distinct']) <= self.maxDistinct:
result['distinct'] = sorted(result['distinct'])
Expand All @@ -1095,7 +1191,9 @@ def _getColumns(self):
if result['type'] != 'number' or result['min'] is None:
result.pop('min', None)
result.pop('max', None)
prefixOrder = {'item': 0, 'annotation': 1, 'annotationelement': 2, 'data': 3, 'bbox': 4}
prefixOrder = {
'item': 0, 'annotation': 1, 'annotationelement': 2, 'data': 3,
'bbox': 4, 'compute': 5}
columns = sorted(columns.values(), key=lambda x: (
prefixOrder.get(x['key'].split('.', 1)[0], len(prefixOrder)), x['key']))
return columns
Expand Down Expand Up @@ -1168,7 +1266,7 @@ def _collectData(self, rows, colsout):
rows = [row for ridx, row in enumerate(rows) if rows[ridx] not in discard]
return data, rows

def data(self, columns, requiredColumns=None):
def data(self, columns, requiredColumns=None): # noqa
"""
Get plottable data.
Expand All @@ -1182,8 +1280,14 @@ def data(self, columns, requiredColumns=None):
columns = columns.split(',')
if not isinstance(requiredColumns, list):
requiredColumns = requiredColumns.split(',') if requiredColumns is not None else []
requiredColumns = set(requiredColumns)
specifiedReqColumns = set(requiredColumns)
self._requiredColumns = set(requiredColumns)
if self._compute:
if ('compute.z' in specifiedReqColumns and
self._compute['function'] == 'umap' and
'n_components' not in self._compute['params']):
self._compute['params']['n_components'] = 3
self._requiredColumns.update(self._compute['columns'])
with self._dataLock:
self._datacolumns = {c: {} for c in columns}
rows = set()
Expand All @@ -1201,7 +1305,7 @@ def data(self, columns, requiredColumns=None):
for cidx, col in enumerate(colsout):
colkey = col['key']
numrows = len(data)
if colkey in requiredColumns:
if colkey in specifiedReqColumns:
data = [row for row in data if row[cidx] is not None]
if len(data) < numrows:
logger.info(f'Reduced row count from {numrows} to {len(data)} '
Expand All @@ -1210,7 +1314,7 @@ def data(self, columns, requiredColumns=None):
for cidx, col in enumerate(colsout):
colkey = col['key']
numrows = len(data)
if colkey in self._requiredColumns and colkey not in requiredColumns:
if colkey in self._requiredColumns and colkey not in specifiedReqColumns:
subdata = [row for row in subdata if row[cidx] is not None]
if len(subdata) and len(subdata) < len(data):
logger.info(f'Reduced row count from {len(data)} to {len(subdata)} '
Expand Down
3 changes: 3 additions & 0 deletions girder_annotation/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,9 @@ def prerelease_local_scheme(version):
'orjson',
],
extras_require={
'compute': [
'umap-learn',
],
'tasks': [
f'girder-large-image[tasks]{limit_version}',
],
Expand Down
18 changes: 9 additions & 9 deletions girder_annotation/test_annotation/test_annotations.py
Original file line number Diff line number Diff line change
Expand Up @@ -937,71 +937,71 @@ def testPlottableDataMultipleItems(admin):
plottable = girder_large_image_annotation.utils.PlottableItemData(
admin, item1, sources='item')
col = plottable.columns
assert len(col) == 3
assert len(col) == 6
data = plottable.data([c['key'] for c in col])
assert len(data['columns']) == 3
assert len(data['data']) == 1

plottable = girder_large_image_annotation.utils.PlottableItemData(
admin, item1, sources='item', adjacentItems=True)
col = plottable.columns
assert len(col) == 3
assert len(col) == 6
data = plottable.data([c['key'] for c in col])
assert len(data['columns']) == 3
assert len(data['data']) == 2

plottable = girder_large_image_annotation.utils.PlottableItemData(
admin, item1, sources='item', adjacentItems='__all__')
col = plottable.columns
assert len(col) == 4
assert len(col) == 7
data = plottable.data([c['key'] for c in col])
assert len(data['columns']) == 4
assert len(data['data']) == 2

plottable = girder_large_image_annotation.utils.PlottableItemData(
admin, item1)
col = plottable.columns
assert len(col) == 4
assert len(col) == 7
data = plottable.data([c['key'] for c in col])
assert len(data['columns']) == 4
assert len(data['data']) == 3

plottable = girder_large_image_annotation.utils.PlottableItemData(
admin, item1, adjacentItems=True)
col = plottable.columns
assert len(col) == 4
assert len(col) == 7
data = plottable.data([c['key'] for c in col])
assert len(data['columns']) == 4
assert len(data['data']) == 4

plottable = girder_large_image_annotation.utils.PlottableItemData(
admin, item1, annotations=[str(annot1a['_id']), str(annot1c['_id'])])
col = plottable.columns
assert len(col) == 14
assert len(col) == 17
data = plottable.data([c['key'] for c in col])
assert len(data['columns']) == 14
assert len(data['data']) == 6

plottable = girder_large_image_annotation.utils.PlottableItemData(
admin, item1, annotations=[str(annot1a['_id']), str(annot1c['_id'])], adjacentItems=True)
col = plottable.columns
assert len(col) == 14
assert len(col) == 17
data = plottable.data([c['key'] for c in col])
assert len(data['columns']) == 14
assert len(data['data']) == 8

plottable = girder_large_image_annotation.utils.PlottableItemData(
admin, item1, annotations='__all__')
col = plottable.columns
assert len(col) == 14
assert len(col) == 17
data = plottable.data([c['key'] for c in col])
assert len(data['columns']) == 14
assert len(data['data']) == 8

plottable = girder_large_image_annotation.utils.PlottableItemData(
admin, item1, annotations='__all__', adjacentItems=True)
col = plottable.columns
assert len(col) == 14
assert len(col) == 17
data = plottable.data([c['key'] for c in col])
assert len(data['columns']) == 14
assert len(data['data']) == 12
2 changes: 1 addition & 1 deletion girder_annotation/test_annotation/test_annotations_rest.py
Original file line number Diff line number Diff line change
Expand Up @@ -866,7 +866,7 @@ def testPlottableEndpoints(self, server, admin):
},
)
assert utilities.respStatus(resp) == 200
assert len(resp.json) == 2
assert len(resp.json) == 5

resp = server.request(
path=f'/annotation/item/{itemSrc["_id"]}/plot/list',
Expand Down
2 changes: 1 addition & 1 deletion requirements-dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ girder-jobs>=3.0.3
# Girder and worker dependencies are already installed above
-e utilities/tasks[girder]
-e girder/.
-e girder_annotation/.
-e girder_annotation/.[compute]

# Extras from main setup.py
pylibmc>=1.5.1
Expand Down

0 comments on commit d165889

Please sign in to comment.