From 4044db52487bc8d462e48878512f7ec81dda9fa7 Mon Sep 17 00:00:00 2001 From: Mike Gevaert Date: Fri, 1 Mar 2024 11:11:41 +0100 Subject: [PATCH] Faster _compute_region_cell_counts MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some timings: old: 391s new: 7.77 s ± 26.5 ms per loop So about 55 times faster maxium difference in cell_count: 9.313225746154785e-10 Note: `ValueToIndexVoxels` will at some point be included in voxcell Code used for testing: ``` from atlas_densities.densities.inhibitory_neuron_densities_optimization import _compute_region_cell_counts import voxcell from atlas_densities.densities import utils annotation = voxcell.VoxelData.load_nrrd('input-data/annotation_ccfv2_l23split_barrelsplit_validated.nrrd') density = voxcell.VoxelData.load_nrrd('input-data/neuron_density.nrrd') voxel_volume = annotation.voxel_volume / 1e9 rm =voxcell.RegionMap.load_json('input-data/hierarchy_ccfv2_l23split_barrelsplit.json') hierarchy_info = utils.get_hierarchy_info(rm, root='root') res = _compute_region_cell_counts(annotation.raw, density.raw, voxel_volume, hierarchy_info,) ``` --- ...nhibitory_neuron_densities_optimization.py | 85 ++++++++++++++++--- 1 file changed, 75 insertions(+), 10 deletions(-) diff --git a/atlas_densities/densities/inhibitory_neuron_densities_optimization.py b/atlas_densities/densities/inhibitory_neuron_densities_optimization.py index 675a657..21be29f 100644 --- a/atlas_densities/densities/inhibitory_neuron_densities_optimization.py +++ b/atlas_densities/densities/inhibitory_neuron_densities_optimization.py @@ -38,6 +38,7 @@ """ from __future__ import annotations +import itertools as it import logging import warnings from typing import Dict, List, Optional, Set, Tuple @@ -170,6 +171,66 @@ def _replace_inf_with_none(bounds: FloatArray) -> List[MinMaxPair]: return [(float(min_), None if np.isinf(max_) else float(max_)) for min_, max_ in bounds] +class ValueToIndexVoxels: + """Faster indexing of annotation style voxel volumes""" + + def __init__(self, values): + """Initialize. + + Args: + values(np.array): volume with each voxel marked with a value; usually to group regions + """ + self._order = "C" if values.flags["C_CONTIGUOUS"] else "F" + + values = values.ravel(order=self._order) + uniques, counts = np.unique(values, return_counts=True) + + offsets = np.empty(len(counts) + 1, dtype=np.uint64) + offsets[0] = 0 + offsets[1:] = np.cumsum(counts) + + self._indices = np.argsort(values, kind="stable") + self._offsets = offsets + self._mapping = {v: i for i, v in enumerate(uniques)} + + @property + def values(self): + """List of values that are found in the original volume.""" + return list(self._mapping) + + def value_to_1d_indices(self, value): + """Return the indices array indices corresponding to the 'value'. + + Note: These are 1D indices, so the assumption is they are applied to a volume + who has been ValueToIndexVoxels::ravel(volume) + """ + if value not in self._mapping: + return np.array([], dtype=np.uint64) + + group_index = self._mapping[value] + return self._indices[self._offsets[group_index] : self._offsets[group_index + 1]] + + def ravel(self, voxel_data): + """Ensures `voxel_data` matches the layout that the 1D indices can be used.""" + return voxel_data.ravel(order=self._order) + + def apply(self, values, funcs, voxel_data): + """For pairs of `values` and `funcs`, apply the func as if a mask was created from `value`. + + Args: + values(iterable of value): values to be found in original values array + funcs(iterable of funcs): if only a single function is provided, + it is used for all `values` + voxel_data(np.array): Array on which to apply function based on desired `values` + """ + flat_data = self.ravel(voxel_data) + if hasattr(funcs, "__call__"): + funcs = (funcs,) + for value, func in zip(values, it.cycle(funcs)): + idx = self.value_to_1d_indices(value) + yield func(flat_data[idx]) + + def _compute_region_cell_counts( annotation: AnnotationT, density: FloatArray, @@ -199,16 +260,20 @@ def _compute_region_cell_counts( ... ... The index is the sorted list of all region identifiers. """ - id_counts = [] - for id_ in tqdm(hierarchy_info.index): - mask = annotation == id_ - id_counts.append(np.sum(density[mask]) * voxel_volume) + vtiv = ValueToIndexVoxels(annotation) + density_copy = vtiv.ravel(density.copy()) + id_counts = [] + for id_ in hierarchy_info.index: + indices = vtiv.value_to_1d_indices(id_) + if len(indices): + id_counts.append(np.sum(density_copy[indices]) * voxel_volume) + else: + id_counts.append(0) result = pd.DataFrame( {"brain_region": hierarchy_info["brain_region"], "cell_count": id_counts}, index=hierarchy_info.index, ) - return result @@ -736,17 +801,17 @@ def create_inhibitory_neuron_densities( # pylint: disable=too-many-locals hierarchy_info = utils.get_hierarchy_info(RegionMap.from_dict(hierarchy), root=region_name) average_densities = _resize_average_densities(average_densities, hierarchy_info) - L.info("Initialization of the linear program: started") - region_counts, id_counts = _compute_initial_cell_counts( - annotation, voxel_volume, average_densities, hierarchy_info - ) - L.info("Retrieving overall neuron counts in atomic 3D regions ...") neuron_counts = _compute_region_cell_counts( annotation, neuron_density, voxel_volume, hierarchy_info ) assert np.all(neuron_counts["cell_count"] >= 0.0) + L.info("Initialization of the linear program: started") + region_counts, id_counts = _compute_initial_cell_counts( + annotation, voxel_volume, average_densities, hierarchy_info + ) + L.info("Setting the known values ...") x_result, deltas = set_known_values(region_counts, id_counts, hierarchy_info) # We assume that if the cell count of `cell_type` in `region_name` is known with certainty