diff --git a/atlas_densities/densities/fitting.py b/atlas_densities/densities/fitting.py index 7d2c440..6f9d558 100644 --- a/atlas_densities/densities/fitting.py +++ b/atlas_densities/densities/fitting.py @@ -21,21 +21,19 @@ import itertools as it import logging import warnings -from typing import TYPE_CHECKING, Dict, List, Optional, Union +from typing import Dict, List, Optional, Union import numpy as np import pandas as pd from atlas_commons.typing import AnnotationT, BoolArray, FloatArray from scipy.optimize import curve_fit from tqdm import tqdm +from voxcell import RegionMap, voxel_data from atlas_densities.densities import utils from atlas_densities.densities.measurement_to_density import remove_unknown_regions from atlas_densities.exceptions import AtlasDensitiesError, AtlasDensitiesWarning -if TYPE_CHECKING: # pragma: no cover - from voxcell import RegionMap - L = logging.getLogger(__name__) MarkerVolumes = Dict[str, Dict[str, Union[FloatArray, List[int]]]] @@ -214,10 +212,105 @@ def compute_average_intensity( return 0.0 +def _add_depths(region_map_df): + """Rdd a `depth` column to the `region_map_df` with how deep it is within the hierarchy.""" + region_map_df["depth"] = 0 + parent_ids = [ + -1, + ] + depth = 1 + while parent_ids: + children_ids = region_map_df[np.isin(region_map_df["parent_id"], list(parent_ids))].index + region_map_df.loc[children_ids, "depth"] = depth + parent_ids = set(children_ids) + depth += 1 + + +def _fill_densities(region_map, region_map_df, df): + """Fill all `voxel_count` and `density` for each region within `def`. + + This assumes the leaf nodes (ie: the deapest nodes in region_map) have had + their values filled in, and thus each of their parents can recursively be + filled in. + """ + order_idx = np.argsort(region_map_df.depth.to_numpy()) + + ids = region_map_df.index[order_idx[::-1]] + for id_ in ids: + if id_ not in region_map._children: # pylint: disable=protected-access + continue + + children = region_map._children[id_] # pylint: disable=protected-access + + if not children: + continue + + voxel_count = df.loc[children, "voxel_count"] + count = voxel_count.sum() + if count: + df.loc[id_, "voxel_count"] = count + df.loc[id_, "density"] = np.average(df.loc[children, "density"], weights=voxel_count) + + +def _apply_density_slices(gene_marker_volumes): + """For each marker in `gene_marker_volumes`, apply the slice mask to the indensities volume.""" + ret = {} + for marker, intensity in gene_marker_volumes.items(): + intensity, slices = intensity["intensity"], intensity["slices"] + + if slices is None: + mask = np.ones_like(intensity, dtype=bool) + else: + mask = np.zeros_like(intensity, dtype=bool) + slices_ = [slice_ for slice_ in slices if 0 <= slice_ < mask.shape[0]] + mask[slices_] = True + + ret[marker] = {"intensity": intensity, "slices": slices, "mask": mask} + + return ret + + +def _compute_per_marker_intensity(annotation, gene_marker_volumes): + """Compute the average intensity for `id_`, for all makers in `gene_marker_volumes`""" + vtiv = voxel_data.ValueToIndexVoxels(annotation) + + count_and_density = [] + for id_ in vtiv.values: + if id_ == 0: + continue + + voxel_ids = vtiv.value_to_1d_indices(id_) + + res = [] + for marker, intensity in gene_marker_volumes.items(): + mask_voxels = vtiv.ravel(intensity["mask"])[voxel_ids] + + count = mask_voxels.sum() + + if count <= 0: + continue + + mean_density = vtiv.ravel(intensity["intensity"])[voxel_ids][mask_voxels].sum() / count + + if mean_density == 0.0: + L.warning("Mean density for id=%s and marker=%s", id_, marker) + + count_and_density.append((marker.lower(), id_, count, mean_density)) + + res = ( + pd.DataFrame(count_and_density, columns=["marker", "id", "voxel_count", "density"]) + .set_index("id") + .pivot(columns="marker") + .swaplevel(axis=1) + ) + return res + + def compute_average_intensities( annotation: AnnotationT, gene_marker_volumes: MarkerVolumes, hierarchy_info: pd.DataFrame, + region_map, ) -> pd.DataFrame: """ Compute the average marker intensity of every region in `hierarchy_info` for every marker @@ -249,27 +342,27 @@ def compute_average_intensities( The index of the data frame is the list of regions `hierarchy_info["brain_region"]`. The column labels are the keys of `gene_marker_volumes` in lower case. """ - region_count = len(hierarchy_info["brain_region"]) - data = np.full((region_count, len(gene_marker_volumes)), np.nan) - hierarchy_info = hierarchy_info.set_index("brain_region") + gene_marker_volumes = _apply_density_slices(gene_marker_volumes) + + intensity = _compute_per_marker_intensity(annotation, gene_marker_volumes) + + region_map_df = region_map.as_dataframe() + _add_depths(region_map_df) + result = pd.DataFrame( - data=data, + data=np.nan, index=hierarchy_info.index, - columns=[marker_name.lower() for marker_name in gene_marker_volumes.keys()], + columns=[marker_name.lower() for marker_name in gene_marker_volumes], ) + result["brain_region"] = region_map_df.loc[result.index].name - L.info( - "Computing average intensities for %d markers in %d regions ...", - len(gene_marker_volumes), - region_count, - ) - for region_name in tqdm(result.index): - region_mask = np.isin(annotation, list(hierarchy_info.at[region_name, "descendant_ids"])) - for marker, intensity in gene_marker_volumes.items(): - result.at[region_name, marker.lower()] = compute_average_intensity( - intensity["intensity"], region_mask, intensity["slices"] - ) + for marker in gene_marker_volumes: + df = pd.DataFrame(data=0.0, index=result.index, columns=["voxel_count", "density"]) + df.update(intensity[marker.lower()]) + _fill_densities(region_map, region_map_df, df) + result[marker.lower()] = df["density"] + result = result.set_index("brain_region") return result @@ -580,7 +673,7 @@ def _check_average_densities_sanity(average_densities: pd.DataFrame) -> None: ) -def _get_group_names(region_map: "RegionMap", group_ids_config: dict) -> dict[str, set[str]]: +def _get_group_names(region_map: RegionMap, group_ids_config: dict) -> dict[str, set[str]]: """ Get AIBS names for regions in several region groups of interest. @@ -628,7 +721,7 @@ def _get_group_region_names(groups): def linear_fitting( # pylint: disable=too-many-arguments - region_map: "RegionMap", + region_map: RegionMap, annotation: AnnotationT, neuron_density: FloatArray, gene_marker_volumes: MarkerVolumes, @@ -735,6 +828,7 @@ def linear_fitting( # pylint: disable=too-many-arguments annotation, gene_marker_volumes, hierarchy_info.drop(hierarchy_info.index[indexes]), + region_map, ) L.info("Computing fitting coefficients ...") diff --git a/tests/app/test_mtype_densities.py b/tests/app/test_mtype_densities.py index 4f16ae0..1ca562f 100644 --- a/tests/app/test_mtype_densities.py +++ b/tests/app/test_mtype_densities.py @@ -87,54 +87,52 @@ def test_mtype_densities_from_profiles(tmp_path): assert "neuron density file" in str(result.exception) -def get_result_from_probablity_map_(runner, td): - return runner.invoke( - tested.app, - [ - # fmt: off - "--log-output-path", td, - "create-from-probability-map", - "--annotation-path", "annotation.nrrd", - "--hierarchy-path", "hierarchy.json", - "--probability-map", "probability_map01.csv", - "--probability-map", "probability_map02.csv", - "--marker", "pv", "pv.nrrd", - "--marker", "sst", "sst.nrrd", - "--marker", "vip", "vip.nrrd", - "--marker", "gad67", "gad67.nrrd", - "--marker", "approx_lamp5", "approx_lamp5.nrrd", - "--synapse-class", "EXC", - "--output-dir", "output_dir", - # fmt: on - ], - ) - - def test_mtype_densities_from_probability_map(tmp_path): data = create_from_probability_map_data() runner = CliRunner() with runner.isolated_filesystem(temp_dir=tmp_path) as td: - data["annotation"].save_nrrd("annotation.nrrd") - write_json("hierarchy.json", data["hierarchy"]) + td = Path(td) - data["probability_map01"].to_csv("probability_map01.csv", index=True) - data["probability_map02"].to_csv("probability_map02.csv", index=True) + data["annotation"].save_nrrd(td / "annotation.nrrd") + write_json("hierarchy.json", data["hierarchy"]) + data["probability_map01"].to_csv(td / "probability_map01.csv", index=True) + data["probability_map02"].to_csv(td / "probability_map02.csv", index=True) - for molecular_type, raw_data in data["molecular_type_densities"].items(): + for molecular_type, raw in data["molecular_type_densities"].items(): VoxelData( - raw_data, + raw, voxel_dimensions=data["annotation"].voxel_dimensions, - ).save_nrrd(f"{molecular_type}.nrrd") - - result = get_result_from_probablity_map_(runner, td) + ).save_nrrd(td / f"{molecular_type}.nrrd") + + result = runner.invoke( + tested.app, + [ + # fmt: off + "--log-output-path", str(td), + "create-from-probability-map", + "--annotation-path", "annotation.nrrd", + "--hierarchy-path", "hierarchy.json", + "--probability-map", "probability_map01.csv", + "--probability-map", "probability_map02.csv", + "--marker", "pv", "pv.nrrd", + "--marker", "sst", "sst.nrrd", + "--marker", "vip", "vip.nrrd", + "--marker", "gad67", "gad67.nrrd", + "--marker", "approx_lamp5", "approx_lamp5.nrrd", + "--synapse-class", "EXC", + "--output-dir", "output_dir", + # fmt: on + ], + ) assert result.exit_code == 0 - BPbAC = VoxelData.load_nrrd(str(Path("output_dir") / "BP|bAC_EXC_densities.nrrd")) + BPbAC = VoxelData.load_nrrd(Path("output_dir") / "BP|bAC_EXC_densities.nrrd") assert BPbAC.raw.dtype == float npt.assert_array_equal(BPbAC.voxel_dimensions, data["annotation"].voxel_dimensions) - with open(str(Path("output_dir") / "metadata.json"), "r") as file: + with open(Path("output_dir") / "metadata.json", "r") as file: metadata = json.load(file) + assert "BP" in metadata["density_files"] assert "bAC" in metadata["density_files"]["BP"] assert "EXC" == metadata["synapse_class"] diff --git a/tests/densities/test_fitting.py b/tests/densities/test_fitting.py index 51be985..b696b6f 100644 --- a/tests/densities/test_fitting.py +++ b/tests/densities/test_fitting.py @@ -50,7 +50,7 @@ def test_create_dataframe_from_known_densities(): @pytest.fixture -def hierarchy_info(): +def region_map(): hierarchy = { "id": 8, "name": "Basic cell groups and regions", @@ -104,7 +104,12 @@ def hierarchy_info(): ], } - return utils.get_hierarchy_info(RegionMap.from_dict(hierarchy)) + return RegionMap.from_dict(hierarchy) + + +@pytest.fixture +def hierarchy_info(region_map): + return utils.get_hierarchy_info(region_map) def test_fill_in_homogenous_regions(hierarchy_info): @@ -226,7 +231,7 @@ def test_compute_average_intensity(): assert actual == 0 -def test_compute_average_intensities(hierarchy_info): +def test_compute_average_intensities(region_map, hierarchy_info): annotation = np.array( [[[0, 976], [976, 936]], [[976, 936], [936, 936]]] # 976 = Lobule II, 936 = "Declive (VI)"" ) @@ -261,7 +266,9 @@ def test_compute_average_intensities(hierarchy_info): index=hierarchy_info["brain_region"], ) - actual = tested.compute_average_intensities(annotation, marker_volumes, hierarchy_info) + actual = tested.compute_average_intensities( + annotation, marker_volumes, hierarchy_info, region_map + ) pdt.assert_frame_equal(actual, expected)