diff --git a/notebooks/nexus_v1_neuroshapes.ipynb b/notebooks/nexus_v1_neuroshapes.ipynb new file mode 100644 index 00000000..b6ed2d3d --- /dev/null +++ b/notebooks/nexus_v1_neuroshapes.ipynb @@ -0,0 +1,715 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "jcyB6NFmQReL" + }, + "source": [ + "# Mapping of Allen Cell Types Database, MouseLight and NeuroMorpho.Org neuron morphologies to Neuroshapes" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# TODO: Add PyShacl validation\n", + "\n", + "## Prerequisites\n", + "\n", + "This notebook assumes you've created a project within the [demo organization](https://sandbox.bluebrainnexus.io/web/demo/) of the sandbox deployment of Blue Brain Nexus." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step 1: Configure the environment you will work in" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Step 1a: Configure the environment" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!pip install -U nexus-sdk\n", + "!pip install allensdk\n", + "!pip install rdflib\n", + "!pip install SPARQLWrapper" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "84rN5sccS3f6" + }, + "outputs": [], + "source": [ + "import nexussdk as nexus\n", + "import getpass\n", + "\n", + "from allensdk.core.cell_types_cache import CellTypesCache\n", + "from allensdk.api.queries.cell_types_api import CellTypesApi\n", + "from allensdk.core.cell_types_cache import ReporterStatus as RS\n", + "\n", + "import utils as ut" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%load_ext autoreload # TODO: remove before publishing" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%autoreload 1 # TODO: remove before publishing" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%aimport utils # TODO: remove before publishing\n", + "%aimport sparqlendpointhelper # TODO: remove before publishing\n", + "%aimport dataset # TODO: remove before publishing" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "aIkRfrNhe5zD" + }, + "source": [ + "We will be working in the **production** environment of Blue Brain Nexus" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "j-_eHVCne0wG" + }, + "outputs": [], + "source": [ + "DEPLOYMENT = \"https://sandbox.bluebrainnexus.io/v1\"" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "18aHuuRqfLIr" + }, + "source": [ + "Provide your **token** below. Your token can be obtained after log-in by clicking on *Copy token* in the top left corner of [Nexus Web](https://bbp.epfl.ch/nexus/web/)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "5qj5EzmNfO6X" + }, + "outputs": [], + "source": [ + "TOKEN = getpass.getpass()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "l44V5xawf7Zl" + }, + "source": [ + "Configure your environment and token to be used for the nexus python SDK" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "MvnYi1Bcf749" + }, + "outputs": [], + "source": [ + "nexus.config.set_environment(DEPLOYMENT)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "nexus.config.set_token(TOKEN)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We will be working in the **demo** organization of Blue Brain Nexus" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "9mB2AXLET5AT" + }, + "outputs": [], + "source": [ + "ORGANIZATION = \"demo\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Step 1b: Create your project space in Blue Brain Nexus\n", + "\n", + "Projects belong to an organization and and they are used to :\n", + "\n", + "* Group and categorize sub-resources.\n", + "* Define settings that apply for operations on all sub-resources.\n", + "* Provide isolation from resources inside other projects. This behavior can be changed by defining so-called resolvers.\n", + "\n", + "Check out the documentation for more information: \n", + "\n", + "* https://bluebrainnexus.io/docs/api/index.html\n", + "* https://bluebrainnexus.io/docs/tutorial/getting-started/quick-start/index.html\n", + "\n", + "To create a project, set a project label and description below." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "PROJECT_LABEL = \"nmc\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "PROJECT_DESCRIPTION = \"NMC project\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# TODO: should there be API-mappings, base, vocab?" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "response = nxs.projects.create(org_label=ORGANIZATION_LABEL, project_label=PROJECT_LABEL, description=PROJECT_DESCRIPTION)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Step 1c: Load the schemas from Neuroshapes into your project space" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "9W5M5Ck9Tq7q" + }, + "source": [ + "## Step 2: Download neuron morphologies from public databases" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Step 2a: Download mouse and human neuron morphologies from the Allen Cell Types Database" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We will be working with human and mouse neuron morphology data from the [Allen Cell Types Database](https://celltypes.brain-map.org/). The [AllenSDK](https://allensdk.readthedocs.io/en/latest/) can be used for data download" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Set the cell types cache for the Allen Cell Types Database: \"The CellTypesCache class provides a Python interface for downloading data in the Allen Cell Types Database into well known locations so that you don’t have to think about file names and directories.\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ctc = CellTypesCache(manifest_file=\"allen_cell_types_db/manifest.json\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Download neuron morphologies from human tissue:" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Get all cells from the Allen Cell Types Database which are from human and have a reconstruction " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "human_cells = ctc.get_cells(species=[CellTypesApi.HUMAN], require_reconstruction = True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(\"Total of human cells with reconstruction: %d\" % len(human_cells))\n", + "print(\"---\")\n", + "print(\"Metadata of an example cell (human):\")\n", + "ut.pretty_print(human_cells[0])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "human_cellIDs = [c[\"id\"] for c in human_cells][0:20] # TODO: remove the limit of 20" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "human_reconstruction = [ctc.get_reconstruction(i) for i in human_cellIDs]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Download neuron morphologies from mouse tissue" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Get all cells from the Allen Cell Types Database which are from mouse and have a reconstruction " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "mouse_cells = ctc.get_cells(species=[CellTypesApi.MOUSE], require_reconstruction = True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(\"Total of mouse cells with reconstruction: %d\" % len(mouse_cells))\n", + "print(\"---\")\n", + "print(\"Metadata of an example cell (mouse):\")\n", + "ut.pretty_print(mouse_cells[0])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We will download the first twenty of those neuron morphology reconstructions" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "mouse_cellIDs = [c[\"id\"] for c in mouse_cells][0:20] # TODO: remove the limit of 20" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "mouse_reconstruction = [ctc.get_reconstruction(i) for i in mouse_cellIDs]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + " ### Step 2a: Explore the data structure of the Allen Cell Types Database" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### The **cells.json** metadata file \n", + "\n", + "This file contains all the available metadata of all cells currently available through the Allen Cell Types Database. The structure presents one object per cell." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "allen_cell_types_meta = ut.get_json(\"allen_cell_types_db/cells.json\")\n", + "print(\"Metadata from the cells.json file of an example cell (human):\")\n", + "ut.pretty_print(allen_cell_types_meta[0])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### The metadata exposed through the **allenSDK**" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(\"Metadata exposed through the allenSDK of an example cell (human):\")\n", + "ut.pretty_print(human_cells[0])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### The **folder structure** of downloaded data\n", + "\n", + "When downloading data from the Allen Cell Types Database through the allenSDK, a folder per neuron morphology file is created. The folder name is composed as follows: **specimen_allenID**. This folder contains the neuron morphology reconstruction with the filename **reconstruction.swc**" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Step 2b: Download neuron morpholgies from the NeuroMorpho.Org database" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Step 2b: Explore the data structure of the neuron morphologies from NeuroMorpho.Org" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Step 2c: Access neuron morphologies downloaded from the MouseLight project" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Step 2c: Explore the data structure of the neuron morphologies from the MouseLight project" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step 4: Store the downloaded neuron morphology reconstruction files from the three databases in Blue Brain Nexus" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Integrate the downloaded neuron morphology reconstruction files into Blue Brain Nexus" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "morph_files_meta = {}" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Store the downloaded human neuron morphology reconstructions in Blue Brain Nexus:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "for cellID in human_cellIDs:\n", + " file_path = f\"./allen_cell_types_db/specimen_{cellID}/reconstruction.swc\"\n", + " response = nexus.files.create(org_label=ORGANIZATION, project_label=PROJECTLABEL, filepath=file_path)\n", + " morph_files_meta[cellID] = {\n", + " \"file_name\": response[\"_filename\"],\n", + " \"content_value\": response[\"_bytes\"],\n", + " \"file_id\": response[\"@id\"],\n", + " \"digest_value\": response[\"_digest\"][\"_value\"]}" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Store the downloaded mouse neuron morphology reconstructions in Blue Brain Nexus:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "for cellID in mouse_cellIDs:\n", + " file_path = f\"./allen_cell_types_db/specimen_{cellID}/reconstruction.swc\"\n", + " response = nexus.files.create(org_label=ORGANIZATION, project_label=PROJECTLABEL, filepath=file_path)\n", + " morph_files_meta[cellID] = {\n", + " \"file_name\": response[\"_filename\"],\n", + " \"content_value\": response[\"_bytes\"],\n", + " \"file_id\": response[\"@id\"],\n", + " \"digest_value\": response[\"_digest\"][\"_value\"]}" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step 5: Map the metadata to Neuroshapes" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step 6: Generate provenance-based data entities and store them in Blue Brain Nexus" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# TODO: Bring the relevant agents and protocols into the project space" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This is the Grid identifier of the Allen Institute of Brain Science which will be used to asign contribution:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ALLEN_GRID = \"https://www.grid.ac/institutes/grid.417881.3\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Generate the provenance entities for **Subject**, **PatchedCell** and **NeuronMorphology** for the human neuron morphology reconstructions and store them in Blue Brain Nexus:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "for human_cellID in human_cellIDs:\n", + " morph_meta = list(filter(lambda cell: cell['specimen__id'] == human_cellID, allen_cell_types_meta))[0]\n", + " morph_file_meta = morph_files_meta[human_cellID]\n", + "\n", + " try:\n", + " subject = ut.subject(morph_meta)\n", + " nexus.resources.create(org_label=ORGANIZATION, project_label=PROJECTLABEL, data=subject)\n", + " ut.pretty_print(subject)\n", + " except nexus.HTTPError as e:\n", + " nexus.tools.pretty_print(e.response.json())\n", + "\n", + " try: \n", + " patchedcell = ut.patchedcell(morph_meta, ALLEN_GRID)\n", + " nexus.resources.create(org_label=ORGANIZATION, project_label=PROJECTLABEL, data=patchedcell)\n", + " ut.pretty_print(patchedcell)\n", + " except nexus.HTTPError as e:\n", + " nexus.tools.pretty_print(e.response.json())\n", + " \n", + " try:\n", + " neuronmorphology = ut.neuronmorphology(morph_meta, ALLEN_GRID, morph_file_meta)\n", + " nexus.resources.create(org_label=ORGANIZATION, project_label=PROJECTLABEL, data=neuronmorphology)\n", + " ut.pretty_print(neuronmorphology)\n", + " except nexus.HTTPError as e:\n", + " nexus.tools.pretty_print(e.response.json())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Generate the provenance entities for **Subject**, **PatchedCell** and **NeuronMorphology** for the mouse neuron morphology reconstructions and store them in Blue Brain Nexus:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "for mouse_cellID in mouse_cellIDs:\n", + " morph_meta = list(filter(lambda cell: cell['specimen__id'] == mouse_cellID, allen_cell_types_meta))[0]\n", + " morph_file_meta = morph_files_meta[mouse_cellID]\n", + "\n", + " try:\n", + " subject = ut.subject(morph_meta)\n", + " nexus.resources.create(org_label=ORGANIZATION, project_label=PROJECTLABEL, data=subject)\n", + " ut.pretty_print(subject)\n", + " except nexus.HTTPError as e:\n", + " nexus.tools.pretty_print(e.response.json())\n", + "\n", + " try: \n", + " patchedcell = ut.patchedcell(morph_meta, ALLEN_GRID)\n", + " nexus.resources.create(org_label=ORGANIZATION, project_label=PROJECTLABEL, data=patchedcell)\n", + " ut.pretty_print(patchedcell)\n", + " except nexus.HTTPError as e:\n", + " nexus.tools.pretty_print(e.response.json())\n", + "\n", + " try:\n", + " neuronmorphology = ut.neuronmorphology(morph_meta, ALLEN_GRID, morph_file_meta)\n", + " nexus.resources.create(org_label=ORGANIZATION, project_label=PROJECTLABEL, data=neuronmorphology)\n", + " ut.pretty_print(neuronmorphology)\n", + " except nexus.HTTPError as e:\n", + " nexus.tools.pretty_print(e.response.json())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Check out [Nexus Web](https://sandbox.bluebrainnexus.io/web/demo/) to view and navigate your created resources." + ] + } + ], + "metadata": { + "colab": { + "collapsed_sections": [], + "name": "Step by step jupyter notebook for bringing data to Nexus v1.ipynb", + "provenance": [], + "version": "0.3.2" + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.8" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +}