diff --git a/README.rst b/README.rst old mode 100644 new mode 100755 index 7c544e6..f90efd2 --- a/README.rst +++ b/README.rst @@ -260,27 +260,32 @@ Fit transfer functions from mean region intensity to neuron density We fit here transfer functions that describe the relation between mean ISH expression in regions of the mouse brain and literature regional density estimates (see `Rodarie et al. (2022)`_ for more -details). This step leverages AIBS ISH marker datasets (in their expression form, see also -`fit_average_densities_ccfv2_config.yaml`_) and the previously computed -literature density values. -These transfer functions are used to obtain first estimates of neuron densities in regions not -covered by literature. +details). +This step leverages AIBS ISH marker datasets and the previously computed literature density values. +These transfer functions are used to obtain first estimates of neuron densities in regions not covered by literature. The result of the following command is a list of first density estimates for each neuron type and for each region of the annotation volume. +Note the usage of multiple ``--markers`` the format is ``marker name:marker id:path/to/marker.nrrd``. +The ``marker name`` is used for the output columns, the ``marker id`` is used to lookup which slices to use in the ``--realigned-slices-path``. .. code-block:: bash # make output folder mkdir -p data/ccfv2/first_estimates - atlas-densities cell-densities fit-average-densities \ - --hierarchy-path=data/1.json \ - --annotation-path=data/ccfv2/annotation_25.nrrd \ - --neuron-density-path=data/ccfv2/density_volumes/neuron_density.nrrd \ - --average-densities-path=data/ccfv2/measurements/lit_densities.csv \ - --homogenous-regions-path=data/ccfv2/measurements/homogeneous_regions.csv \ - --gene-config-path=atlas_densities/app/data/markers/fit_average_densities_ccfv2_config.yaml \ - --fitted-densities-output-path=data/ccfv2/first_estimates/first_estimates.csv \ + atlas-densities cell-densities fit-average-densities \ + --hierarchy-path=data/1.json \ + --annotation-path=data/ccfv2/annotation_25.nrrd \ + --neuron-density-path=data/ccfv2/density_volumes/neuron_density.nrrd \ + --average-densities-path=data/ccfv2/measurements/lit_densities.csv \ + --homogenous-regions-path=data/ccfv2/measurements/homogeneous_regions.csv \ + --marker=pv:868:data/ccfv2/marker_volumes/pvalb.nrrd \ + --marker=sst:1001:data/ccfv2/marker_volumes/SST.nrrd \ + --marker=vip:77371835:data/ccfv2/marker_volumes/VIP.nrrd \ + --marker=gad67:479:data/ccfv2/marker_volumes/gad1.nrrd \ + --realigned-slices-path=atlas_densities/app/data/markers/realigned_slices_ccfv2.json \ + --cell-density-standard-deviations=atlas_densities/app/data/measurements/std_cells.json \ + --fitted-densities-output-path=data/ccfv2/first_estimates/first_estimates.csv \ --fitting-maps-output-path=data/ccfv2/first_estimates/fitting.json diff --git a/atlas_densities/app/cell_densities.py b/atlas_densities/app/cell_densities.py index f60a87b..4d4cee9 100644 --- a/atlas_densities/app/cell_densities.py +++ b/atlas_densities/app/cell_densities.py @@ -47,7 +47,6 @@ import click import numpy as np import pandas as pd -import yaml # type: ignore from atlas_commons.app_utils import ( EXISTING_FILE_PATH, assert_properties, @@ -91,7 +90,6 @@ EXCITATORY_SPLIT_METADATA = DATA_PATH / "metadata" / "excitatory-inhibitory-splitting.json" HOMOGENOUS_REGIONS_PATH = DATA_PATH / "measurements" / "homogenous_regions.csv" HOMOGENOUS_REGIONS_REL_PATH = HOMOGENOUS_REGIONS_PATH.relative_to(AD_PATH) -MARKERS_README_REL_PATH = (DATA_PATH / "markers" / "README.rst").relative_to(AD_PATH) LINPROG_PATH = "doc/source/bbpp82_628_linprog.pdf" ALGORITHMS: Dict[str, Callable] = { @@ -759,17 +757,6 @@ def measurements_to_average_densities( "`glia-cell-densities`." ), ) -@click.option( - "--gene-config-path", - type=EXISTING_FILE_PATH, - required=True, - help=( - "Path to the gene markers configuration file. This yaml file contains the paths to the " - "gene marker volumes (nrrd files from AIBS) that will be used to estimate average cell " - "densities accross all AIBS brain regions: PV, SST, VIP and GAD67. See " - f"`{MARKERS_README_REL_PATH}`." - ), -) @click.option( "--average-densities-path", required=True, @@ -785,12 +772,30 @@ def measurements_to_average_densities( f"either all inhibitory or all excitatory. Defaults to `{HOMOGENOUS_REGIONS_REL_PATH}`.", default=HOMOGENOUS_REGIONS_PATH, ) +@click.option( + "--marker", + multiple=True, + type=str, + required=True, + help=("Marker information in the format `marker name`:`marker id`:`path/to/marker.nrrd`"), +) +@click.option( + "--realigned-slices-path", + type=EXISTING_FILE_PATH, + required=True, + help=("JSON file containing mapping of `marker id` to list of slices selected for that marker"), +) +@click.option( + "--cell-density-standard-deviations", + type=EXISTING_FILE_PATH, + required=True, + help=("Standard deviations for cells, in a CSV file"), +) @click.option( "--fitted-densities-output-path", required=True, help="Path where to write the data frame containing the average cell density of every region" - "found in the brain hierarchy (see --hierarchy-path option) for the cell types marked by the " - "gene markers listed in the gene configuration file (see --gene-config-path option). " + "found in the brain hierarchy (see --hierarchy-path option) for the marked cell types " "The output file is a CSV file whose first column is a list of region names. The other columns" " come in pairs for each cell type: ```` and ``_standard_deviation``." " Cell types are derived from marker names: `` = +``.", @@ -814,7 +819,9 @@ def fit_average_densities( annotation_path, region_name, neuron_density_path, - gene_config_path, + marker, + realigned_slices_path, + cell_density_standard_deviations, average_densities_path, homogenous_regions_path, fitted_densities_output_path, @@ -823,7 +830,6 @@ def fit_average_densities( ): # pylint: disable=too-many-arguments, too-many-locals """ Estimate average cell densities of brain regions in `hierarchy_path` for the cell types - marked by the markers listed in `gene_config_path`. We perform a linear fitting based on average cell densities inferred from the scientific literature (`average_densities_path`) to estimate average cell densities in regions where @@ -877,6 +883,7 @@ def fit_average_densities( L.info("Loading annotation ...") annotation = VoxelData.load_nrrd(annotation_path) + L.info("Loading neuron density ...") neuron_density = VoxelData.load_nrrd(neuron_density_path) if np.any(neuron_density.raw < 0.0): @@ -884,20 +891,26 @@ def fit_average_densities( L.info("Loading hierarchy ...") region_map = RegionMap.load_json(hierarchy_path) - L.info("Loading gene config ...") - with open(gene_config_path, "r", encoding="utf-8") as input_file: - config = yaml.load(input_file, Loader=yaml.FullLoader) - gene_voxeldata = { - gene: VoxelData.load_nrrd(path) for (gene, path) in config["inputGeneVolumePath"].items() - } - # Consistency check - voxel_data = [annotation, neuron_density] - voxel_data += list(gene_voxeldata.values()) - assert_properties(voxel_data) + slices = utils.load_json(realigned_slices_path) + + gene_marker_volumes = {} + for m in marker: + marker_name, marker_id, marker_path = m.split(":", 3) + gene_marker_volumes[marker_name] = { + "intensity": VoxelData.load_nrrd(marker_path), + "slices": slices[marker_id], # list of integer slice indices + } + + assert_properties( + [annotation, neuron_density] + + [intensity["intensity"] for intensity in gene_marker_volumes.values()] + ) + + for volume in gene_marker_volumes.values(): + volume["intensity"] = volume["intensity"].raw - slices = utils.load_json(config["realignedSlicesPath"]) - cell_density_stddev = utils.load_json(config["cellDensityStandardDeviationsPath"]) + cell_density_stddev = utils.load_json(cell_density_standard_deviations) cell_density_stddev = { # Use the AIBS name attribute as key (this is a unique identifier in 1.json) # (Ex: former key "|root|Basic cell groups and regions|Cerebrum" -> new key: "Cerebrum") @@ -905,14 +918,6 @@ def fit_average_densities( for (name, stddev) in cell_density_stddev.items() } - gene_marker_volumes = { - gene: { - "intensity": gene_data.raw, - "slices": slices[config["sectionDataSetID"][gene]], # list of integer slice indices - } - for (gene, gene_data) in gene_voxeldata.items() - } - group_ids_config = utils.load_json(group_ids_config_path) L.info("Loading average densities dataframe ...") @@ -968,7 +973,7 @@ def fit_average_densities( ) @click.option( "--algorithm", - type=click.Choice(list(ALGORITHMS.keys())), + type=click.Choice(list(ALGORITHMS)), required=False, default="linprog", help=f"Algorithm to be used. Defaults to 'linprog'. " diff --git a/atlas_densities/app/data/markers/README.rst b/atlas_densities/app/data/markers/README.rst index 700b719..a51a8da 100644 --- a/atlas_densities/app/data/markers/README.rst +++ b/atlas_densities/app/data/markers/README.rst @@ -52,28 +52,10 @@ reference volumes. Fitting of transfer functions from mean region intensity to neuron density >>>>>>>>>> -The option `--gene-config-path` of the CLI `atlas-densities cell-densities fit-average-densities` -expects a path to a yaml file of the following form: - -.. code:: yaml - - inputGeneVolumePath: - pv: "pv.nrrd" - sst: "sst.nrrd" - vip: "vip.nrrd" - gad67: "gad67.nrrd" - sectionDataSetID: - pv: 868 - sst: 1001 - vip: 77371835 - gad67: 479 - realignedSlicesPath: "realigned_slices_XXX.json" - cellDensityStandardDeviationsPath: "std_cells.json" - The sectionDataSetID values are AIBS dataset identifiers recorded in `realigned_slices_XXX.json`. An example of this configuration file (`fit_average_densities_ccfv2_config.yaml`) is provided for the CCFv2 reference volumes. .. _`Gene Search`: https://mouse.brain-map.org/ .. _`Rodarie et al. (2021)`: https://www.biorxiv.org/content/10.1101/2021.11.20.469384v2 -.. _`Eroe et al. (2018)`: https://www.frontiersin.org/articles/10.3389/fninf.2018.00084/full \ No newline at end of file +.. _`Eroe et al. (2018)`: https://www.frontiersin.org/articles/10.3389/fninf.2018.00084/full diff --git a/tests/app/test_cell_densities.py b/tests/app/test_cell_densities.py index df2d5dc..a3e8fcf 100644 --- a/tests/app/test_cell_densities.py +++ b/tests/app/test_cell_densities.py @@ -319,32 +319,23 @@ def test_measurements_to_average_densities(): assert np.all(actual["measurement_unit"] == "number of cells per mm^3") -def _get_fitting_result(runner): +@pytest.mark.filterwarnings("ignore::atlas_densities.exceptions.AtlasDensitiesWarning") +def test_fit_average_densities(): + # fmt: off args = [ "fit-average-densities", - "--hierarchy-path", - "hierarchy.json", - "--annotation-path", - "annotation.nrrd", - "--neuron-density-path", - "neuron_density.nrrd", - "--gene-config-path", - "gene_config.yaml", - "--average-densities-path", - "average_densities.csv", - "--homogenous-regions-path", - "homogenous_regions.csv", - "--fitted-densities-output-path", - "fitted_densities.csv", - "--fitting-maps-output-path", - "fitting_maps.json", + "--hierarchy-path", "hierarchy.json", + "--annotation-path", "annotation.nrrd", + "--neuron-density-path", "neuron_density.nrrd", + "--average-densities-path", "average_densities.csv", + "--homogenous-regions-path", "homogenous_regions.csv", + "--realigned-slices-path", "realigned_slices.json", + "--cell-density-standard-deviations", "std_cells.json", + "--fitted-densities-output-path", "fitted_densities.csv", + "--fitting-maps-output-path", "fitting_maps.json", ] + # fmt: on - return runner.invoke(tested.app, args) - - -@pytest.mark.filterwarnings("ignore::atlas_densities.exceptions.AtlasDensitiesWarning") -def test_fit_average_densities(): runner = CliRunner() with runner.isolated_filesystem(): input_ = get_fitting_input_data() @@ -363,23 +354,13 @@ def test_fit_average_densities(): with open("std_cells.json", "w", encoding="utf-8") as out: json.dump(input_["cell_density_stddevs"], out, indent=1, separators=(",", ": ")) - with open("gene_config.yaml", "w", encoding="utf-8") as out: - gene_config = { - "inputGeneVolumePath": {}, - "sectionDataSetID": {}, - "realignedSlicesPath": "realigned_slices.json", - "cellDensityStandardDeviationsPath": "std_cells.json", - } - for marker, intensity in input_["gene_marker_volumes"].items(): - VoxelData(intensity["intensity"], voxel_dimensions=[25.0] * 3).save_nrrd( - marker + ".nrrd" - ) - gene_config["inputGeneVolumePath"][marker] = marker + ".nrrd" - gene_config["sectionDataSetID"][marker] = input_["slice_map"][marker] - - yaml.dump(gene_config, out) - - result = _get_fitting_result(runner) + for marker, intensity in input_["gene_marker_volumes"].items(): + VoxelData(intensity["intensity"], voxel_dimensions=[25.0] * 3).save_nrrd( + marker + ".nrrd" + ) + args += [f'--marker={marker}:{input_["slice_map"][marker]}:{marker}.nrrd'] + + result = runner.invoke(tested.app, args) assert result.exit_code == 0 densities = pd.read_csv("fitted_densities.csv") @@ -404,7 +385,7 @@ def test_fit_average_densities(): VoxelData(input_["neuron_density"], voxel_dimensions=(10, 10, 10)).save_nrrd( "neuron_density.nrrd" ) - result = _get_fitting_result(runner) + result = runner.invoke(tested.app, args) assert "Negative density value" in str(result.exception)