From 416b05e2b2937f16d8863269a78cf3c46a3dae88 Mon Sep 17 00:00:00 2001
From: Dimitri RODARIE <d.rodarie@gmail.com>
Date: Wed, 5 Apr 2023 11:00:21 +0200
Subject: [PATCH] Density placement: Force scaling on voxels that are not full.
 Fix #27. (#28)

Density placement: Force scaling on voxels that are not full. Fix #27.

* Minor fixes in docstrings.
* Fix warnings from divide by zero. Add test to exit density placement.
* update to pandas 2.0
* Fix warnings from nan values.

---------

Co-authored-by: Mike Gevaert <michael.gevaert@epfl.ch>
---
 atlas_densities/app/cell_densities.py         | 14 +++++------
 atlas_densities/densities/cell_density.py     |  5 +++-
 atlas_densities/densities/excel_reader.py     | 10 +++-----
 .../excitatory_inhibitory_splitting.py        |  2 +-
 .../mtype_densities_from_composition.py       |  2 +-
 atlas_densities/densities/utils.py            | 25 +++++++++++--------
 6 files changed, 31 insertions(+), 27 deletions(-)

diff --git a/atlas_densities/app/cell_densities.py b/atlas_densities/app/cell_densities.py
index 4781296..df56b6e 100644
--- a/atlas_densities/app/cell_densities.py
+++ b/atlas_densities/app/cell_densities.py
@@ -155,7 +155,7 @@ def cell_density(annotation_path, hierarchy_path, nissl_path, output_path):
 
     \b
     - the Nissl stain intensity, which is supposed to represent the overall cell density, up to
-    to region-dependent constant scaling factors.
+    region-dependent constant scaling factors.
     - cell counts from the scientific literature, which are used to determine a local
     linear dependency factor for each region where a cell count is available.
     - the optional soma radii, used to operate a correction.
@@ -417,7 +417,7 @@ def inhibitory_and_excitatory_neuron_densities(
     Note: optimization is not fully implemented and the current process only returns a
     feasible point.
 
-    The ouput densities are saved in the specified output directory under the following
+    The output densities are saved in the specified output directory under the following
     names:
 
     \b
@@ -490,7 +490,7 @@ def compile_measurements(
     Sexual Dimorphism' by Kim et al., 2017.
     https://ars.els-cdn.com/content/image/1-s2.0-S0092867417310693-mmc3.xlsx
     - `atlas_densities/app/data/measurements/gaba_papers.xlsx`, a compilation of measurements
-    from the scientifc literature made by Rodarie Dimitri (BBP).
+    from the scientific literature made by Rodarie Dimitri (BBP).
 
     This command extracts measurements from the above two files and gathers them into a unique
     CSV file with the following columns:
@@ -531,7 +531,7 @@ def compile_measurements(
     See `atlas_densities/densities/excel_reader.py` for more information.
 
     Note: This function should be deprecated once its output has been stored permanently as the
-    the unique source of density-related measurements for the AIBS mouse brain. New measurements
+    unique source of density-related measurements for the AIBS mouse brain. New measurements
     should be added to the stored file (Nexus).
     """
 
@@ -619,7 +619,7 @@ def measurements_to_average_densities(
     If several combinations of measurements yield several average cell densities for the same
     region, then the output data frame records the average of these measurements.
 
-    The ouput average cell densities are saved in under the CSV format as a dataframe with the same
+    The output average cell densities are saved in under the CSV format as a dataframe with the same
     columns as the input data frame specified via `--measurements-path`.
     See :mod:`atlas_densities.app.densities.compile_measurements`.
 
@@ -744,7 +744,7 @@ def fit_average_densities(
     gad67+) in every homogenous region of type "inhibitory".
 
     Our linear fitting of density values relies on the assumption that the average cell density
-    (number of cells per mm^3) of a cell type T in a brain region R depends linearily on the
+    (number of cells per mm^3) of a cell type T in a brain region R depends linearly on the
     average intensity of a gene marker of T. The conversion factor is a constant which depends only
     on T and on which of the following three groups R belongs to:
 
@@ -894,7 +894,7 @@ def inhibitory_neuron_densities(
     (inhibitory neurons) and those of the inhibitory neuron subtypes PV+, SST+ and VIP+.
 
     The function modifies the estimates in `average_densities` (the "first estimates" of the paper)
-    to make them consistent accross cell types: the average density of GAD67+ cells in a leaf
+    to make them consistent across cell types: the average density of GAD67+ cells in a leaf
     region L should be at most the sum of the average densities of its subtypes under scrutiny
     (e.g. PV+, SST+ and VIP+) and should not exceed the neuron density of L.
 
diff --git a/atlas_densities/densities/cell_density.py b/atlas_densities/densities/cell_density.py
index bb60c48..d59d78a 100644
--- a/atlas_densities/densities/cell_density.py
+++ b/atlas_densities/densities/cell_density.py
@@ -107,7 +107,10 @@ def compute_cell_density(
     group_ids = get_group_ids(region_map)
     region_masks = get_region_masks(group_ids, annotation)
     fix_purkinje_layer_intensity(region_map, annotation, region_masks, nissl)
+    non_zero_nissl = nissl > 0
     for group, mask in region_masks.items():
-        nissl[mask] = nissl[mask] * (cell_counts()[group] / np.sum(nissl[mask]))
+        mask = np.logical_and(non_zero_nissl, mask)
+        if mask.any():
+            nissl[mask] = nissl[mask] * (cell_counts()[group] / np.sum(nissl[mask]))
 
     return nissl / voxel_volume
diff --git a/atlas_densities/densities/excel_reader.py b/atlas_densities/densities/excel_reader.py
index 2ff2345..93c0268 100644
--- a/atlas_densities/densities/excel_reader.py
+++ b/atlas_densities/densities/excel_reader.py
@@ -188,7 +188,7 @@ def _set_metadata_columns(
 
     In `dataframe`, the rows corresponding to the same article have no valid 'source title',
     'comment' nor 'specimen age' except the first one. This function makes sure every row is
-    set with approriate values in the aforementioned columns.
+    set with appropriate values in the aforementioned columns.
 
     Args:
         dataframe: DataFrame obtained when reading the worksheets 'GAD67 densities' or 'PV-SST-VIP'
@@ -352,8 +352,6 @@ def read_inhibitory_neuron_measurement_compilation(
     Read the neuron densities of the worksheet 'GAD67 densities' in gaba_papers.xlsx
 
     Args:
-        region_map: RegionMap object to navigate the brain regions hierarchy.
-            Assumed to be instantiated with AIBS 1.json.
         measurements_path: path to the file gaba_papers.xlsx
 
     Returns:
@@ -361,7 +359,7 @@ def read_inhibitory_neuron_measurement_compilation(
         this module.
     """
 
-    # Reading takes several seconds, possibly due to formulaes interpretation
+    # Reading takes several seconds, possibly due to formulas interpretation
     L.info("Loading excel worksheet ...")
     inhibitory_neurons_worksheet = pd.read_excel(
         str(measurements_path),
@@ -435,8 +433,6 @@ def read_pv_sst_vip_measurement_compilation(measurements_path: Union[str, "Path"
     Read the neuron densities of the worksheet 'PV-SST-VIP' in gaba_papers.xlsx
 
     Args:
-        region_map: RegionMap object to navigate the brain regions hierarchy.
-            Assumed to be instantiated with AIBS 1.json.
         measurements_path: path to the file gaba_papers.xlsx
 
     Returns:
@@ -484,7 +480,7 @@ def read_homogenous_neuron_type_regions(measurements_path: Union[str, "Path"]) -
 
     Returns:
         pd.DataFrame with two columns: 'brain_region' and 'cell_type'. A cell type value
-        is either 'ihibitory' or 'excitatory' and applies to every cell in the region.
+        is either 'inhibitory' or 'excitatory' and applies to every cell in the region.
     """
 
     homogenous_regions_worksheet = pd.read_excel(
diff --git a/atlas_densities/densities/excitatory_inhibitory_splitting.py b/atlas_densities/densities/excitatory_inhibitory_splitting.py
index 4474cfe..a10077f 100644
--- a/atlas_densities/densities/excitatory_inhibitory_splitting.py
+++ b/atlas_densities/densities/excitatory_inhibitory_splitting.py
@@ -35,7 +35,7 @@ def scale_excitatory_densities(output, brain_regions, mapping, layer_ids, densit
         L.info("Performing layer: %s", layer)
         idx = np.nonzero(np.isin(brain_regions.raw, layer_ids[layer]))
 
-        for mtype, scale in df.iteritems():
+        for mtype, scale in df.items():
             if scale == 0:
                 continue
 
diff --git a/atlas_densities/densities/mtype_densities_from_composition.py b/atlas_densities/densities/mtype_densities_from_composition.py
index f114773..a77819b 100644
--- a/atlas_densities/densities/mtype_densities_from_composition.py
+++ b/atlas_densities/densities/mtype_densities_from_composition.py
@@ -45,7 +45,7 @@ def create_from_composition(
 
     layer_masks = get_layer_masks(annotation.raw, region_map, metadata)
 
-    for layer, layer_data in excitatory_mtype_composition.groupby(["layer"]):
+    for layer, layer_data in excitatory_mtype_composition.groupby("layer"):
 
         layer_sum = layer_data["density"].sum()
 
diff --git a/atlas_densities/densities/utils.py b/atlas_densities/densities/utils.py
index 5c71464..aa1151c 100644
--- a/atlas_densities/densities/utils.py
+++ b/atlas_densities/densities/utils.py
@@ -158,7 +158,7 @@ def optimize_distance_to_line(  # pylint: disable=too-many-arguments
     """
     Find inside a box the closest point to a line with prescribed coordinate sum.
 
-    This function aims at solving the following (convex quadratic) optmization problem:
+    This function aims at solving the following (convex quadratic) optimization problem:
 
     Given a sum S >= 0.0, a line D in the non-negative orthant of the Euclidean N-dimensional
     space and a box B in this orthant (an N-dimensional vector with non-negative coordinates),
@@ -176,26 +176,30 @@ def optimize_distance_to_line(  # pylint: disable=too-many-arguments
     Args:
         line_direction_vector: N-dimensional float vector with non-negative coordinates.
         upper_bounds: N-dimensional float vector with non-negative coordinates. Defines the
-            the box constraining the optimization process.
-        sum_constraints: non-negative float number. The coordinate sum constraints imposed on
+            box constraining the optimization process.
+        sum_constraint: non-negative float number. The coordinate sum constraints imposed on
             the point P we are looking for.
         threshold: non-negative float value. If the coordinate sum of the current point
             is below `threshold`, the function returns the current point.
         max_iter: maximum number of iterations.
-        copy: If True, the function makes a copy of the input `line_direction_vector`. Otherwise
+        copy: If True, the function makes a copy of the input `line_direction_vector`. Otherwise,
             `line_direction_vector` is modified in-place and holds the optimal value.
 
     Returns: N-dimensional float vector with non-negative coordinates. The solution point of the
-        optimization problem, if it exists, up to inacurracy due to threshold size or early
-        termination of the algorithm. Otherwise a point on the boundary of the box B defined by
+        optimization problem, if it exists, up to inaccuracy due to threshold size or early
+        termination of the algorithm. Otherwise, a point on the boundary of the box B defined by
         `upper_bounds`.
     """
     diff = float("inf")
     iter_ = 0
     point = line_direction_vector.copy() if copy else line_direction_vector
-    while diff > threshold and iter_ < max_iter:
-        point *= sum_constraint / np.sum(point)
+    scalable_voxels = point != 0
+    while diff > threshold and iter_ < max_iter and scalable_voxels.any():
+        point[scalable_voxels] *= (sum_constraint - np.sum(point[~scalable_voxels])) / np.sum(
+            point[scalable_voxels]
+        )
         point = np.min([point, upper_bounds], axis=0)
+        scalable_voxels = np.logical_and(point != 0, point < upper_bounds)
         diff = np.abs(np.sum(point) - sum_constraint)
         iter_ += 1
 
@@ -385,7 +389,7 @@ def get_group_names(region_map: "RegionMap", cleanup_rest: bool = False) -> Dict
 
     Args:
         region_map: object to navigate the mouse brain regions hierarchy
-            (instantied from AIBS 1.json).
+            (instantiated from AIBS 1.json).
         cleanup_rest: (Optional) If True, the name of any ascendant region of the Cerebellum and
             Isocortex groups are removed from the names of the Rest group. This makes sure that
             the set of names of the Rest group is closed under taking descendants.
@@ -485,7 +489,8 @@ def get_hierarchy_info(
     ]
     region_names = [region_map.get(id_, attr="name") for id_ in region_ids]
     data_frame = pd.DataFrame(
-        {"brain_region": region_names, "descendant_id_set": descendant_id_sets}, index=region_ids
+        {"brain_region": region_names, "descendant_id_set": descendant_id_sets},
+        index=region_ids,
     )
 
     return data_frame