Faster _compute_region_cell_counts

Some timings: old: 391s new: 7.77 s ± 26.5 ms per loop So about 55 times faster maxium difference in cell_count: 9.313225746154785e-10 Note: `ValueToIndexVoxels` will at some point be included in voxcell Code used for testing: ``` from atlas_densities.densities.inhibitory_neuron_densities_optimization import _compute_region_cell_counts import voxcell from atlas_densities.densities import utils annotation = voxcell.VoxelData.load_nrrd('input-data/annotation_ccfv2_l23split_barrelsplit_validated.nrrd') density = voxcell.VoxelData.load_nrrd('input-data/neuron_density.nrrd') voxel_volume = annotation.voxel_volume / 1e9 rm =voxcell.RegionMap.load_json('input-data/hierarchy_ccfv2_l23split_barrelsplit.json') hierarchy_info = utils.get_hierarchy_info(rm, root='root') res = _compute_region_cell_counts(annotation.raw, density.raw, voxel_volume, hierarchy_info,) ```
BlueBrain · Mar 1, 2024 · 7affcf8 · 7affcf8
1 parent 08e2570
commit 7affcf8
Showing 1 changed file with 75 additions and 10 deletions.
diff --git a/atlas_densities/densities/inhibitory_neuron_densities_optimization.py b/atlas_densities/densities/inhibitory_neuron_densities_optimization.py
@@ -38,6 +38,7 @@
 """
 from __future__ import annotations
 
+import itertools as it
 import logging
 import warnings
 from typing import Dict, List, Optional, Set, Tuple
@@ -170,6 +171,66 @@ def _replace_inf_with_none(bounds: FloatArray) -> List[MinMaxPair]:
     return [(float(min_), None if np.isinf(max_) else float(max_)) for min_, max_ in bounds]
 
 
+class ValueToIndexVoxels:
+    """Faster indexing of annotation style voxel volumes"""
+
+    def __init__(self, values):
+        """Initialize.
+
+        Args:
+            values(np.array): volume with each voxel marked with a value; usually to group regions
+        """
+        self._order = "C" if values.flags["C_CONTIGUOUS"] else "F"
+
+        values = values.ravel(order=self._order)
+        uniques, counts = np.unique(values, return_counts=True)
+
+        offsets = np.empty(len(counts) + 1, dtype=np.uint64)
+        offsets[0] = 0
+        offsets[1:] = np.cumsum(counts)
+
+        self._indices = np.argsort(values, kind="stable")
+        self._offsets = offsets
+        self._mapping = {v: i for i, v in enumerate(uniques)}
+
+    @property
+    def values(self):
+        """List of values that are found in the original volume."""
+        return list(self._mapping)
+
+    def value_to_1d_indices(self, value):
+        """Return the indices array indices corresponding to the 'value'.
+
+        Note: These are 1D indices, so the assumption is they are applied to a volume
+        who has been ValueToIndexVoxels::ravel(volume)
+        """
+        if value not in self._mapping:
+            return np.array([], dtype=np.uint64)
+
+        group_index = self._mapping[value]
+        return self._indices[self._offsets[group_index] : self._offsets[group_index + 1]]
+
+    def ravel(self, voxel_data):
+        """Ensures `voxel_data` matches the layout that the 1D indices can be used."""
+        return voxel_data.ravel(order=self._order)
+
+    def apply(self, values, funcs, voxel_data):
+        """For pairs of `values` and `funcs`, apply the func as if a mask was created from `value`.
+
+        Args:
+            values(iterable of value): values to be found in original values array
+            funcs(iterable of funcs): if only a single function is provided,
+            it is used for all `values`
+            voxel_data(np.array): Array on which to apply function based on desired `values`
+        """
+        flat_data = self.ravel(voxel_data)
+        if hasattr(funcs, "__call__"):
+            funcs = (funcs,)
+        for value, func in zip(values, it.cycle(funcs)):
+            idx = self.value_to_1d_indices(value)
+            yield func(flat_data[idx])
+
+
 def _compute_region_cell_counts(
     annotation: AnnotationT,
     density: FloatArray,
@@ -199,16 +260,20 @@ def _compute_region_cell_counts(
              ...                             ...
         The index is the sorted list of all region identifiers.
     """
-    id_counts = []
-    for id_ in tqdm(hierarchy_info.index):
-        mask = annotation == id_
-        id_counts.append(np.sum(density[mask]) * voxel_volume)
+    vtiv = ValueToIndexVoxels(annotation)
+    density_copy = vtiv.ravel(density.copy())
 
+    id_counts = []
+    for id_ in hierarchy_info.index:
+        indices = vtiv.value_to_1d_indices(id_)
+        if len(indices):
+            id_counts.append(np.sum(density_copy[indices]) * voxel_volume)
+        else:
+            id_counts.append(0)
     result = pd.DataFrame(
         {"brain_region": hierarchy_info["brain_region"], "cell_count": id_counts},
         index=hierarchy_info.index,
     )
-
     return result
 
 
@@ -754,17 +819,17 @@ def create_inhibitory_neuron_densities(  # pylint: disable=too-many-locals
     hierarchy_info = utils.get_hierarchy_info(RegionMap.from_dict(hierarchy), root=region_name)
     average_densities = _resize_average_densities(average_densities, hierarchy_info)
 
-    L.info("Initialization of the linear program: started")
-    region_counts, id_counts = _compute_initial_cell_counts(
-        annotation, voxel_volume, average_densities, hierarchy_info
-    )
-
     L.info("Retrieving overall neuron counts in atomic 3D regions ...")
     neuron_counts = _compute_region_cell_counts(
         annotation, neuron_density, voxel_volume, hierarchy_info
     )
     assert np.all(neuron_counts["cell_count"] >= 0.0)
 
+    L.info("Initialization of the linear program: started")
+    region_counts, id_counts = _compute_initial_cell_counts(
+        annotation, voxel_volume, average_densities, hierarchy_info
+    )
+
     L.info("Setting the known values ...")
     x_result, deltas = set_known_values(region_counts, id_counts, hierarchy_info)
     # We assume that if the cell count of `cell_type` in `region_name` is known with certainty