From 4044db52487bc8d462e48878512f7ec81dda9fa7 Mon Sep 17 00:00:00 2001 From: Mike Gevaert Date: Fri, 1 Mar 2024 11:11:41 +0100 Subject: [PATCH 1/3] Faster _compute_region_cell_counts MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some timings: old: 391s new: 7.77 s ± 26.5 ms per loop So about 55 times faster maxium difference in cell_count: 9.313225746154785e-10 Note: `ValueToIndexVoxels` will at some point be included in voxcell Code used for testing: ``` from atlas_densities.densities.inhibitory_neuron_densities_optimization import _compute_region_cell_counts import voxcell from atlas_densities.densities import utils annotation = voxcell.VoxelData.load_nrrd('input-data/annotation_ccfv2_l23split_barrelsplit_validated.nrrd') density = voxcell.VoxelData.load_nrrd('input-data/neuron_density.nrrd') voxel_volume = annotation.voxel_volume / 1e9 rm =voxcell.RegionMap.load_json('input-data/hierarchy_ccfv2_l23split_barrelsplit.json') hierarchy_info = utils.get_hierarchy_info(rm, root='root') res = _compute_region_cell_counts(annotation.raw, density.raw, voxel_volume, hierarchy_info,) ``` --- ...nhibitory_neuron_densities_optimization.py | 85 ++++++++++++++++--- 1 file changed, 75 insertions(+), 10 deletions(-) diff --git a/atlas_densities/densities/inhibitory_neuron_densities_optimization.py b/atlas_densities/densities/inhibitory_neuron_densities_optimization.py index 675a657..21be29f 100644 --- a/atlas_densities/densities/inhibitory_neuron_densities_optimization.py +++ b/atlas_densities/densities/inhibitory_neuron_densities_optimization.py @@ -38,6 +38,7 @@ """ from __future__ import annotations +import itertools as it import logging import warnings from typing import Dict, List, Optional, Set, Tuple @@ -170,6 +171,66 @@ def _replace_inf_with_none(bounds: FloatArray) -> List[MinMaxPair]: return [(float(min_), None if np.isinf(max_) else float(max_)) for min_, max_ in bounds] +class ValueToIndexVoxels: + """Faster indexing of annotation style voxel volumes""" + + def __init__(self, values): + """Initialize. + + Args: + values(np.array): volume with each voxel marked with a value; usually to group regions + """ + self._order = "C" if values.flags["C_CONTIGUOUS"] else "F" + + values = values.ravel(order=self._order) + uniques, counts = np.unique(values, return_counts=True) + + offsets = np.empty(len(counts) + 1, dtype=np.uint64) + offsets[0] = 0 + offsets[1:] = np.cumsum(counts) + + self._indices = np.argsort(values, kind="stable") + self._offsets = offsets + self._mapping = {v: i for i, v in enumerate(uniques)} + + @property + def values(self): + """List of values that are found in the original volume.""" + return list(self._mapping) + + def value_to_1d_indices(self, value): + """Return the indices array indices corresponding to the 'value'. + + Note: These are 1D indices, so the assumption is they are applied to a volume + who has been ValueToIndexVoxels::ravel(volume) + """ + if value not in self._mapping: + return np.array([], dtype=np.uint64) + + group_index = self._mapping[value] + return self._indices[self._offsets[group_index] : self._offsets[group_index + 1]] + + def ravel(self, voxel_data): + """Ensures `voxel_data` matches the layout that the 1D indices can be used.""" + return voxel_data.ravel(order=self._order) + + def apply(self, values, funcs, voxel_data): + """For pairs of `values` and `funcs`, apply the func as if a mask was created from `value`. + + Args: + values(iterable of value): values to be found in original values array + funcs(iterable of funcs): if only a single function is provided, + it is used for all `values` + voxel_data(np.array): Array on which to apply function based on desired `values` + """ + flat_data = self.ravel(voxel_data) + if hasattr(funcs, "__call__"): + funcs = (funcs,) + for value, func in zip(values, it.cycle(funcs)): + idx = self.value_to_1d_indices(value) + yield func(flat_data[idx]) + + def _compute_region_cell_counts( annotation: AnnotationT, density: FloatArray, @@ -199,16 +260,20 @@ def _compute_region_cell_counts( ... ... The index is the sorted list of all region identifiers. """ - id_counts = [] - for id_ in tqdm(hierarchy_info.index): - mask = annotation == id_ - id_counts.append(np.sum(density[mask]) * voxel_volume) + vtiv = ValueToIndexVoxels(annotation) + density_copy = vtiv.ravel(density.copy()) + id_counts = [] + for id_ in hierarchy_info.index: + indices = vtiv.value_to_1d_indices(id_) + if len(indices): + id_counts.append(np.sum(density_copy[indices]) * voxel_volume) + else: + id_counts.append(0) result = pd.DataFrame( {"brain_region": hierarchy_info["brain_region"], "cell_count": id_counts}, index=hierarchy_info.index, ) - return result @@ -736,17 +801,17 @@ def create_inhibitory_neuron_densities( # pylint: disable=too-many-locals hierarchy_info = utils.get_hierarchy_info(RegionMap.from_dict(hierarchy), root=region_name) average_densities = _resize_average_densities(average_densities, hierarchy_info) - L.info("Initialization of the linear program: started") - region_counts, id_counts = _compute_initial_cell_counts( - annotation, voxel_volume, average_densities, hierarchy_info - ) - L.info("Retrieving overall neuron counts in atomic 3D regions ...") neuron_counts = _compute_region_cell_counts( annotation, neuron_density, voxel_volume, hierarchy_info ) assert np.all(neuron_counts["cell_count"] >= 0.0) + L.info("Initialization of the linear program: started") + region_counts, id_counts = _compute_initial_cell_counts( + annotation, voxel_volume, average_densities, hierarchy_info + ) + L.info("Setting the known values ...") x_result, deltas = set_known_values(region_counts, id_counts, hierarchy_info) # We assume that if the cell count of `cell_type` in `region_name` is known with certainty From 7bbb61554a730e6a8a2a1d3ae8ccba457c2edb32 Mon Sep 17 00:00:00 2001 From: Mike Gevaert Date: Tue, 26 Mar 2024 10:32:16 +0100 Subject: [PATCH 2/3] switch to ValueToIndexVoxels in voxcell --- ...nhibitory_neuron_densities_optimization.py | 65 +------------------ setup.py | 2 +- 2 files changed, 3 insertions(+), 64 deletions(-) diff --git a/atlas_densities/densities/inhibitory_neuron_densities_optimization.py b/atlas_densities/densities/inhibitory_neuron_densities_optimization.py index 21be29f..35c7837 100644 --- a/atlas_densities/densities/inhibitory_neuron_densities_optimization.py +++ b/atlas_densities/densities/inhibitory_neuron_densities_optimization.py @@ -38,7 +38,6 @@ """ from __future__ import annotations -import itertools as it import logging import warnings from typing import Dict, List, Optional, Set, Tuple @@ -48,7 +47,7 @@ from atlas_commons.typing import AnnotationT, FloatArray from scipy.optimize import linprog from tqdm import tqdm -from voxcell import RegionMap +from voxcell import RegionMap, voxel_data from atlas_densities.densities import utils from atlas_densities.densities.inhibitory_neuron_densities_helper import ( @@ -171,66 +170,6 @@ def _replace_inf_with_none(bounds: FloatArray) -> List[MinMaxPair]: return [(float(min_), None if np.isinf(max_) else float(max_)) for min_, max_ in bounds] -class ValueToIndexVoxels: - """Faster indexing of annotation style voxel volumes""" - - def __init__(self, values): - """Initialize. - - Args: - values(np.array): volume with each voxel marked with a value; usually to group regions - """ - self._order = "C" if values.flags["C_CONTIGUOUS"] else "F" - - values = values.ravel(order=self._order) - uniques, counts = np.unique(values, return_counts=True) - - offsets = np.empty(len(counts) + 1, dtype=np.uint64) - offsets[0] = 0 - offsets[1:] = np.cumsum(counts) - - self._indices = np.argsort(values, kind="stable") - self._offsets = offsets - self._mapping = {v: i for i, v in enumerate(uniques)} - - @property - def values(self): - """List of values that are found in the original volume.""" - return list(self._mapping) - - def value_to_1d_indices(self, value): - """Return the indices array indices corresponding to the 'value'. - - Note: These are 1D indices, so the assumption is they are applied to a volume - who has been ValueToIndexVoxels::ravel(volume) - """ - if value not in self._mapping: - return np.array([], dtype=np.uint64) - - group_index = self._mapping[value] - return self._indices[self._offsets[group_index] : self._offsets[group_index + 1]] - - def ravel(self, voxel_data): - """Ensures `voxel_data` matches the layout that the 1D indices can be used.""" - return voxel_data.ravel(order=self._order) - - def apply(self, values, funcs, voxel_data): - """For pairs of `values` and `funcs`, apply the func as if a mask was created from `value`. - - Args: - values(iterable of value): values to be found in original values array - funcs(iterable of funcs): if only a single function is provided, - it is used for all `values` - voxel_data(np.array): Array on which to apply function based on desired `values` - """ - flat_data = self.ravel(voxel_data) - if hasattr(funcs, "__call__"): - funcs = (funcs,) - for value, func in zip(values, it.cycle(funcs)): - idx = self.value_to_1d_indices(value) - yield func(flat_data[idx]) - - def _compute_region_cell_counts( annotation: AnnotationT, density: FloatArray, @@ -260,7 +199,7 @@ def _compute_region_cell_counts( ... ... The index is the sorted list of all region identifiers. """ - vtiv = ValueToIndexVoxels(annotation) + vtiv = voxel_data.ValueToIndexVoxels(annotation) density_copy = vtiv.ravel(density.copy()) id_counts = [] diff --git a/setup.py b/setup.py index 9fbd57d..308d6ee 100644 --- a/setup.py +++ b/setup.py @@ -28,7 +28,7 @@ # from the HiGHS library. We use the "highs" method in the densities module. "scipy>=1.6.0", "tqdm>=4.44.1", - "voxcell>=3.0.0", + "voxcell>=3.1.7", ], extras_require={ "tests": [ From db6d8a1e495d2a9f2b03f6d2822725b15f8404ea Mon Sep 17 00:00:00 2001 From: Mike Gevaert Date: Tue, 26 Mar 2024 11:10:58 +0100 Subject: [PATCH 3/3] review comments --- .../densities/inhibitory_neuron_densities_optimization.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/atlas_densities/densities/inhibitory_neuron_densities_optimization.py b/atlas_densities/densities/inhibitory_neuron_densities_optimization.py index 35c7837..0747918 100644 --- a/atlas_densities/densities/inhibitory_neuron_densities_optimization.py +++ b/atlas_densities/densities/inhibitory_neuron_densities_optimization.py @@ -200,15 +200,13 @@ def _compute_region_cell_counts( The index is the sorted list of all region identifiers. """ vtiv = voxel_data.ValueToIndexVoxels(annotation) - density_copy = vtiv.ravel(density.copy()) + density = vtiv.ravel(density) id_counts = [] for id_ in hierarchy_info.index: indices = vtiv.value_to_1d_indices(id_) - if len(indices): - id_counts.append(np.sum(density_copy[indices]) * voxel_volume) - else: - id_counts.append(0) + id_counts.append(np.sum(density[indices]) * voxel_volume) + result = pd.DataFrame( {"brain_region": hierarchy_info["brain_region"], "cell_count": id_counts}, index=hierarchy_info.index,