Skip to content

Commit

Permalink
Fix fitting and optimization steps (#75)
Browse files Browse the repository at this point in the history
* Offset slices indexes to take into account marker dataset offset.

* Remove unnecessary check on inhibitory neuron density.
if a mother region count is given as certain, children regions can be given as uncertain.
This can happen when literature provides certain counts in mother region (e.g., Striatum is purely inhibitory).

* Limit fitting to regions groups.
* Update docs
  • Loading branch information
drodarie authored Mar 25, 2024
1 parent 2b89cfd commit 31a341d
Show file tree
Hide file tree
Showing 6 changed files with 140 additions and 97 deletions.
17 changes: 11 additions & 6 deletions atlas_densities/app/cell_densities.py
Original file line number Diff line number Diff line change
Expand Up @@ -905,13 +905,18 @@ def fit_average_densities(
for (name, stddev) in cell_density_stddev.items()
}

gene_marker_volumes = {
gene: {
gene_marker_volumes = {}
for gene, gene_data in gene_voxeldata.items():
loc_slices = slices[config["sectionDataSetID"][gene]]
gene_marker_volumes[gene] = {
"intensity": gene_data.raw,
"slices": slices[config["sectionDataSetID"][gene]], # list of integer slice indices
# list of integer slice indices
"slices": (
loc_slices - np.asarray(gene_data.offset / gene_data.voxel_dimensions, dtype=int)[0]
if loc_slices is not None
else None
),
}
for (gene, gene_data) in gene_voxeldata.items()
}

group_ids_config = utils.load_json(group_ids_config_path)

Expand All @@ -932,7 +937,7 @@ def fit_average_densities(
group_ids_config=group_ids_config,
)

# Turn index into column so as to ease off the save and load operations on csv files
# Turn index into column to ease off the save and load operations on csv files
fitted_densities_df["brain_region"] = fitted_densities_df.index

L.info("Saving fitted densities to file %s ...", fitted_densities_output_path)
Expand Down
150 changes: 107 additions & 43 deletions atlas_densities/densities/fitting.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
"""
from __future__ import annotations

import itertools as it
import logging
import warnings
from typing import TYPE_CHECKING, Dict, List, Optional, Union
Expand Down Expand Up @@ -135,6 +136,9 @@ def fill_in_homogenous_regions(
in number of neurons per mm^3.
densities: the data frame returned by
:fun:`atlas_densities.densities.fitting.create_dataframe_from_known_densities`.
hierarchy_info: data frame with columns "descendant_id_set" (set[int]) and "brain_region"
(str) and whose index is a list of region ids.
See :fun:`atlas_densities.densities.utils.get_hierarchy_info`.
cell_density_stddevs: (Optional) dict whose keys are brain regions names and whose values
are standard deviations of average cell densities of the corresponding regions.
Defaults to None, in which case the output standard deviation is set with the density
Expand Down Expand Up @@ -418,24 +422,32 @@ def compute_fitting_coefficients(
len(cell_types),
)
for region_name in tqdm(groups[group_name]):
for cell_type in cell_types:
intensity = average_intensities.at[region_name, cell_type[:-1]]
density = densities.at[region_name, cell_type]
if not (
np.isnan(density) or np.isnan(intensity) or intensity == 0.0 or density == 0.0
):
assert_msg = f"in region {region_name} for cell type {cell_type}"
assert intensity >= 0.0, "Negative average intensity " + assert_msg
assert density >= 0.0, "Negative density " + assert_msg
standard_deviation = densities.at[
region_name, cell_type + "_standard_deviation"
]
assert not np.isnan(standard_deviation), "NaN standard deviation " + assert_msg
assert standard_deviation >= 0.0, "Negative standard deviation " + assert_msg

clouds[group_name][cell_type]["xdata"].append(intensity)
clouds[group_name][cell_type]["ydata"].append(density)
clouds[group_name][cell_type]["sigma"].append(standard_deviation)
if region_name in average_intensities.index:
for cell_type in cell_types:
intensity = average_intensities.at[region_name, cell_type[:-1]]
density = densities.at[region_name, cell_type]
if not (
np.isnan(density)
or np.isnan(intensity)
or intensity == 0.0
or density == 0.0
):
assert_msg = f"in region {region_name} for cell type {cell_type}"
assert intensity >= 0.0, "Negative average intensity " + assert_msg
assert density >= 0.0, "Negative density " + assert_msg
standard_deviation = densities.at[
region_name, cell_type + "_standard_deviation"
]
assert not np.isnan(standard_deviation), (
"NaN standard deviation " + assert_msg
)
assert standard_deviation >= 0.0, (
"Negative standard deviation " + assert_msg
)

clouds[group_name][cell_type]["xdata"].append(intensity)
clouds[group_name][cell_type]["ydata"].append(density)
clouds[group_name][cell_type]["sigma"].append(standard_deviation)

L.info("Computing regression coefficients for %d cell types ...", len(cell_types))
for cell_type in tqdm(cell_types):
Expand Down Expand Up @@ -479,23 +491,51 @@ def fit_unknown_densities(
for group_name, region_names in groups.items():
for region_name in region_names:
for marker in average_intensities.columns:
intensity = average_intensities.at[region_name, marker]
cell_type = marker + "+"
if np.isnan(densities.at[region_name, cell_type]) and not np.isnan(intensity):
fitting = fitting_coefficients[group_name][cell_type]
if np.isnan(fitting["coefficient"]):
warnings.warn(
f"Interpolating density of region {region_name} for cell type "
f"{cell_type} using NaN fitting coefficient of region group "
f"{group_name}.",
AtlasDensitiesWarning,
)
fitted_value = fitting["coefficient"] * intensity
standard_deviation = fitted_value * fitting["standard_deviation"]
densities.at[region_name, cell_type] = fitted_value
densities.at[
region_name, cell_type + "_standard_deviation"
] = standard_deviation
if region_name in average_intensities.index:
_apply_fitting(
region_name,
marker,
group_name,
average_intensities,
densities,
fitting_coefficients,
)


def _apply_fitting(
region_name, marker, group_name, average_intensities, densities, fitting_coefficients
):
"""
Apply the fitting function to a given region and marker based on the corresponding marker
intensity.
Args:
region_name: region name
marker: ISH marker name
group_name: fitting group name
average_intensities: pandas data frame returned by
:fun:`atlas_densities.densities.fitting.compute_average_intensities`.
densities: data frame returned by
:fun:`atlas_densities.densities.fitting.create_dataframe_from_known_densities`.
fitting_coefficients: dict returned by
:fun:`atlas_densities.densities.fitting.compute_fitting_coefficients`.
"""

intensity = average_intensities.at[region_name, marker]
cell_type = marker + "+"
if np.isnan(densities.at[region_name, cell_type]) and not np.isnan(intensity):
fitting = fitting_coefficients[group_name][cell_type]
if np.isnan(fitting["coefficient"]):
warnings.warn(
f"Interpolating density of region {region_name} for cell type "
f"{cell_type} using NaN fitting coefficient of region group "
f"{group_name}.",
AtlasDensitiesWarning,
)
fitted_value = fitting["coefficient"] * intensity
standard_deviation = fitted_value * fitting["standard_deviation"]
densities.at[region_name, cell_type] = fitted_value
densities.at[region_name, cell_type + "_standard_deviation"] = standard_deviation


def _check_homogenous_regions_sanity(homogenous_regions: pd.DataFrame) -> None:
Expand Down Expand Up @@ -556,7 +596,7 @@ def _get_group_names(region_map: "RegionMap", group_ids_config: dict) -> dict[st

group_ids = utils.get_group_ids(region_map, group_ids_config)

# Make the Rest group stable under taking descendants
# Make the Rest group stable undertaking descendants
# The name of any ascendant region of the Cerebellum and
# Isocortex groups are removed from the names of the Rest group. This makes sure that
# the set of names of the Rest group is closed under taking descendants.
Expand All @@ -573,6 +613,20 @@ def _get_group_names(region_map: "RegionMap", group_ids_config: dict) -> dict[st
}


def _get_group_region_names(groups):
"""
Create a set of the union of AIBS region names that belong to the given groups.
Args:
groups: dictionary from group name to list of region name.
Returns:
A list of region names.
"""

return list(set(it.chain.from_iterable(groups.values())))


def linear_fitting( # pylint: disable=too-many-arguments
region_map: "RegionMap",
annotation: AnnotationT,
Expand Down Expand Up @@ -601,6 +655,9 @@ def linear_fitting( # pylint: disable=too-many-arguments
define the regions whose cell densities will be computed.
annotation: int array of shape (W, H, D) holding the annotation of the whole
brain model. (The integers W, H and D are the dimensions of the array).
neuron_density: non-negative float array of shape (W, H, D) where W, H and D are integer
dimensions. This array holds the volumetric neuron density of the brain model expressed
in number of neurons per mm^3.
gene_marker_volumes: dict of the form {
"gad67": {"intensity": <array>, "slices": <list>},
"pv": {"intensity": <array>, "slices": <list>},
Expand All @@ -616,7 +673,7 @@ def linear_fitting( # pylint: disable=too-many-arguments
homogenous_regions: data frame with two columns, brain_region and cell_type.
The brain_region column holds region names and the values of the cell_type column are
"inhibitory" or "excitatory" depending on whether every neuron of the region is
inhibitory or excitatory. Note that the "inhibitory" cell type will superseded by
inhibitory or excitatory. Note that the "inhibitory" cell type will supersede by
its synonym "gad67+" in the output data frame.
cell_density_stddevs: dict whose keys are brain regions names and whose values are
standard deviations of average cell densities of the corresponding regions.
Expand Down Expand Up @@ -667,16 +724,23 @@ def linear_fitting( # pylint: disable=too-many-arguments
)

L.info("Computing average intensities ...")
groups = _get_group_names(region_map, group_ids_config)
# Limit the fitting to the regions groups.
indexes = np.isin(
hierarchy_info["brain_region"].to_numpy(),
_get_group_region_names(groups),
invert=True,
)
average_intensities = compute_average_intensities(
annotation, gene_marker_volumes, hierarchy_info
annotation,
gene_marker_volumes,
hierarchy_info.drop(hierarchy_info.index[indexes]),
)

L.info("Getting group names ...")
# We want group region names to be stable under taking descendants
groups = _get_group_names(region_map, group_ids_config)

L.info("Computing fitting coefficients ...")
fitting_coefficients = compute_fitting_coefficients(groups, average_intensities, densities)
fitting_coefficients = compute_fitting_coefficients(
groups, average_intensities, densities.drop(densities.index[indexes])
)
L.info("Fitting unknown average densities ...")
fit_unknown_densities(groups, average_intensities, densities, fitting_coefficients)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@
across the brain region hierarchy.
The file ``doc/source/bbpp82_628_linear_program.pdf`` serves as a documentation of the linear
program, see Section 2 in particular. The initalization of the linear program relies on average
density estimates, obtained obtained for instance via the ``fitting`` module. We refer to these
program, see Section 2 in particular. The initialization of the linear program relies on average
density estimates, obtained for instance via the ``fitting`` module. We refer to these
estimates as the initial estimates.
The output are volumetric density nrrd files, one for each of the aforementioned inhibitory
Expand Down Expand Up @@ -286,7 +286,7 @@ def _zero_descendants(id_, cell_type, x_result, deltas, hierarchy_info):
L.info("Setting known values ...")
# Set delta_{r, m} with `SKIP` if the corresponding cell count estimate is missing.
# This means that the region r won't impose any constraint wrt to the marker m in the linear
# program, i. e., the delta_{r, m} variable is omitted for such r and m.
# program, i.e., the delta_{r, m} variable is omitted for such r and m.
for id_, region_name in zip(hierarchy_info.index, region_counts.index):
for cell_type in cell_types:
# A region without cell count estimate for `cell_type` adds no constraint
Expand Down Expand Up @@ -353,7 +353,7 @@ def create_bounds(
bound.
- `x_map` is a dict mapping pairs (id_, cell_type) to indices of `bounds`.
The keys of this dict corresponds to cell count variables in the linear program.
- `deltas_map` is a dict mapping pairs (region_name, cell_type) to to indices of `bounds`.
- `deltas_map` is a dict mapping pairs (region_name, cell_type) to indices of `bounds`.
The keys of this dict corresponds to the slack "delta" variables which represent
distances of cell count variables to initial estimates.
"""
Expand Down Expand Up @@ -434,7 +434,7 @@ def check_constraint_3e(
constraint: FloatArray, b_value: float, region_name: str, id_: int
) -> bool:
"""
Check if `contraint` is a valid inequality constraint of type (3e).
Check if `constraint` is a valid inequality constraint of type (3e).
Returns:
True if the constraint is not void.
Expand Down Expand Up @@ -571,7 +571,7 @@ def _compute_initial_cell_counts(
:fun:`densities.inhibitory_densities_helper.average_densities_to_cell_counts`.
It holds region cell counts as well as associated standard deviations.
- `id_counts` has the return value format of
:fun:`densities.inhbitory_densities_helper.average_densities_to_cell_counts` except
:fun:`densities.inhibitory_densities_helper.average_densities_to_cell_counts` except
that its index is a list of unique integer region identifiers.
It holds the cell counts of the 3D regions labeled by a unique identifier, as well as
associated standard deviations (descendant subregions are excluded).
Expand All @@ -595,47 +595,29 @@ def _compute_initial_cell_counts(

def _check_variables_consistency(
x_result: pd.DataFrame,
deltas: pd.DataFrame,
cell_types: List[str],
neuron_counts: pd.DataFrame,
hierarchy_info: pd.DataFrame,
) -> None:
"""
Raises an error if a delta variable indexed by (region_name, cell_type) is set with a non-NaN
value whereas an x_result variable indexed by (desc_id, cell_type) is set with a NaN value for
some descendant id of region_name.
Raises also an error if a cell count value marked as final in `x_result` exceeds its
prescribed upper bound from `neuron_counts`.
Args:
x_result: see return value of :fun:`set_known_values`.
deltas: idem
cell_types: list of cell type names, e.g., ["pv+", "sst+", "vip+", "gad67+"].
hierarchy_info: data frame returned by
:func:`atlas_densities.densities.utils.get_hierarchy_info`.
neuron_counts: data frame with one column "cell_count" holding the neuron count each 3D
region labeled by an integer identifier in `neuron_count.index`.
Raises:
AtlasDensitiesError if on the the following assumptions is violated:
- if cell count estimate of a region is known with certainty for a given cell type,
then the cell count of every descendant region is also known with certainty.
- a cell count estimate which is given for certain does not
AtlasDensitiesError if a cell count estimate which is given for certain exceeds the neuron
count constraint.
"""
cell_count_tolerance = 1e-2 # absolute tolerance to rule out round-off errors
for region_name, id_, id_set in zip(
deltas.index, hierarchy_info.index, hierarchy_info["descendant_ids"]
):
for id_ in hierarchy_info.index:
for cell_type in cell_types:
if np.isfinite(deltas.loc[region_name, cell_type]):
for desc_id in id_set:
if np.isnan(x_result.loc[desc_id, cell_type]):
raise AtlasDensitiesError(
f"Cell count estimate of region named '{region_name}' for cell type "
f"{cell_type} was given for certain whereas the cell count of "
f"descendant id {desc_id} is not certain."
)
neuron_count = neuron_counts.loc[id_, "cell_count"]
if (
not np.isnan(x_result.loc[id_, cell_type])
Expand Down Expand Up @@ -770,7 +752,7 @@ def create_inhibitory_neuron_densities( # pylint: disable=too-many-locals
# We assume that if the cell count of `cell_type` in `region_name` is known with certainty
# then the same holds in every descendant subregion.
_check_variables_consistency(
x_result, deltas, get_cell_types(region_counts), neuron_counts, hierarchy_info
x_result, get_cell_types(region_counts), neuron_counts, hierarchy_info
)

L.info("Setting variable bounds and further inequality constraints ...")
Expand Down
2 changes: 1 addition & 1 deletion atlas_densities/densities/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -412,7 +412,7 @@ def get_group_ids(region_map: "RegionMap", config: dict, _skip_check=False) -> d
Args:
region_map: object to navigate the mouse brain regions hierarchy
(instantied from AIBS 1.json).
(instantiated from AIBS 1.json).
config: mapping of regions to their constituent ids
Returns:
A dictionary whose keys are region group names and whose values are
Expand Down
Loading

0 comments on commit 31a341d

Please sign in to comment.