From 6c08e4fa362d09bab32b4f448b9d52bc32b8e4af Mon Sep 17 00:00:00 2001 From: Alex Wolf Date: Thu, 22 Aug 2024 15:59:35 +0200 Subject: [PATCH] =?UTF-8?q?=F0=9F=92=84=20=20Prettify=20Vitessce=20guide?= =?UTF-8?q?=20(#23)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docs/vitessce.ipynb | 173 ++++++++++++++++++-------------------------- 1 file changed, 72 insertions(+), 101 deletions(-) diff --git a/docs/vitessce.ipynb b/docs/vitessce.ipynb index 4d78823..ff7ab52 100644 --- a/docs/vitessce.ipynb +++ b/docs/vitessce.ipynb @@ -4,7 +4,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "[![](https://img.shields.io/badge/Source%20on%20GitHub-orange)](https://github.com/laminlabs/lamin-spatial/blob/main/docs/vitessce.ipynb)" + "[![](https://img.shields.io/badge/Source%20on%20GitHub-orange)](https://github.com/laminlabs/lamin-spatial/blob/main/docs/vitessce.ipynb) [![hub](https://img.shields.io/badge/View%20on%20LaminHub-mediumseagreen)](https://lamin.ai/laminlabs/lamindata/transform/hqtT4OTr5Tiq5zKv/URbaThYhljgXbZEzgj5x)" ] }, { @@ -20,126 +20,109 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "This tutorial has been adopted from the data preparation examples in [the Vitessce documention](https://vitessce.github.io/vitessce-python)." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Setup\n", + "This tutorial has been adopted from the data preparation examples in [the Vitessce documention](https://vitessce.github.io/vitessce-python).\n", "\n", - "Install dependencies:\n", - "\n", - "```python\n", - "pip install vitessce\n", - "pip install 'lamindb[jupyter,aws,bionty]'\n", - "```" + "It uses a [dataset](https://www.covid19cellatlas.org/index.healthy.html#habib17) from the COVID-19 Cell Atlas." ] }, { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "tags": [ + "hide-output" + ] + }, "outputs": [], "source": [ + "# !pip install vitessce\n", + "# !pip install 'lamindb[jupyter,aws,bionty]'\n", "!lamin load laminlabs/lamin-dev # <-- replace with your instance" ] }, { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "tags": [ + "hide-output" + ] + }, "outputs": [], "source": [ "from urllib.request import urlretrieve\n", "from pathlib import Path\n", "from anndata import read_h5ad\n", - "from vitessce import (\n", - " VitessceConfig,\n", - " Component as cm,\n", - " AnnDataWrapper,\n", - ")\n", - "from vitessce.data_utils import (\n", - " to_uint8,\n", - " sort_var_axis,\n", - " optimize_adata,\n", - ")\n", - "import lamindb as ln" + "import vitessce as vit\n", + "from vitessce import data_utils as vitdu\n", + "import lamindb as ln\n", + "\n", + "# [optional] track the current notebook or script\n", + "ln.context.uid = \"BZhZQ6uIbkWv0000\"\n", + "ln.context.track()" ] }, { - "cell_type": "code", - "execution_count": null, + "cell_type": "markdown", "metadata": {}, - "outputs": [], "source": [ - "# [optional] track the current notebook or script\n", - "ln.context.uid = \"BZhZQ6uIbkWv0000\"\n", - "ln.context.track()" + "## Save your dataset" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "## Pre-process the dataset\n", - "\n", - "For this example, we use a dataset from the COVID-19 Cell Atlas https://www.covid19cellatlas.org/index.healthy.html#habib17." + "Convert the dataset to `.zarr` format." ] }, { "cell_type": "code", "execution_count": null, "metadata": { - "tags": [] + "tags": [ + "hide-output" + ] }, "outputs": [], "source": [ - "# From https://github.com/vitessce/vitessce-python/blob/main/demos/habib-2017/src/convert_to_zarr.py\n", + "# from https://github.com/vitessce/vitessce-python/blob/main/demos/habib-2017/src/convert_to_zarr.py\n", "def convert_h5ad_to_zarr(input_path, output_path):\n", " adata = read_h5ad(input_path)\n", " adata = adata[:, adata.var[\"highly_variable\"]].copy()\n", - " leaf_list = sort_var_axis(adata.X, adata.var.index.values)\n", + " leaf_list = vitdu.sort_var_axis(adata.X, adata.var.index.values)\n", " adata = adata[:, leaf_list].copy()\n", - " adata.layers[\"X_uint8\"] = to_uint8(adata.X, norm_along=\"var\")\n", - " adata = optimize_adata(\n", + " adata.layers[\"X_uint8\"] = vitdu.to_uint8(adata.X, norm_along=\"var\")\n", + " adata = vitdu.optimize_adata(\n", " adata, obs_cols=[\"CellType\"], obsm_keys=[\"X_umap\"], layer_keys=[\"X_uint8\"]\n", " )\n", - " adata.write_zarr(output_path)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [ - "hide-output" - ] - }, - "outputs": [], - "source": [ - "adata_filepath = \"./habib17.processed.h5ad\"\n", - "if not Path(adata_filepath).exists():\n", + " adata.write_zarr(output_path)\n", + "\n", + "\n", + "h5ad_filepath = \"./habib17.processed.h5ad\"\n", + "if not Path(h5ad_filepath).exists():\n", " urlretrieve(\n", - " \"https://covid19.cog.sanger.ac.uk/habib17.processed.h5ad\", adata_filepath\n", + " \"https://covid19.cog.sanger.ac.uk/habib17.processed.h5ad\", h5ad_filepath\n", " )\n", "zarr_filepath = \"./hhabib_2017_nature_methods.anndata.zarr\"\n", - "\n", - "convert_h5ad_to_zarr(adata_filepath, zarr_filepath)" + "convert_h5ad_to_zarr(h5ad_filepath, zarr_filepath)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "## Save dataset" + "Save a `.zarr` version of the dataset to lamindb." ] }, { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "tags": [ + "hide-output" + ] + }, "outputs": [], "source": [ "zarr_artifact = ln.Artifact(\n", @@ -153,25 +136,14 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Create a `VitessceConfig` object" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - ":::{note}\n", - "\n", - "Here is [a note](https://lamin.ai/laminlabs/lamindata/transform/WDjio16cQsdW5zKv) on folder upload speed and we why chose to not use the `.export(to=\"s3\")` functionality of Vitessce.\n", - "\n", - ":::" + "## Save a `VitessceConfig` object" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "Set up the configuration by adding the views and datasets of interest." + "You can create a dashboard for one or several datasets by using Vitessce's component API." ] }, { @@ -184,12 +156,12 @@ }, "outputs": [], "source": [ - "vc = VitessceConfig(\n", + "vc = vit.VitessceConfig(\n", " schema_version=\"1.0.15\",\n", " description=zarr_artifact.description,\n", ")\n", "dataset = vc.add_dataset(name=\"Habib 2017\").add_object(\n", - " AnnDataWrapper(\n", + " vit.AnnDataWrapper(\n", " adata_url=zarr_artifact.path.to_url(),\n", " obs_feature_matrix_path=\"layers/X_uint8\",\n", " obs_embedding_paths=[\"obsm/X_umap\"],\n", @@ -198,40 +170,41 @@ " obs_set_names=[\"Cell Type\"],\n", " )\n", ")\n", - "obs_sets = vc.add_view(cm.OBS_SETS, dataset=dataset)\n", - "obs_sets_sizes = vc.add_view(cm.OBS_SET_SIZES, dataset=dataset)\n", - "scatterplot = vc.add_view(cm.SCATTERPLOT, dataset=dataset, mapping=\"UMAP\")\n", - "heatmap = vc.add_view(cm.HEATMAP, dataset=dataset)\n", - "genes = vc.add_view(cm.FEATURE_LIST, dataset=dataset)\n", - "vc.layout(((scatterplot | obs_sets) / heatmap) | (obs_sets_sizes / genes))\n", - "\n", - "# inspect the config\n", - "vc.to_dict()" + "obs_sets = vc.add_view(vit.Component.OBS_SETS, dataset=dataset)\n", + "obs_sets_sizes = vc.add_view(vit.Component.OBS_SET_SIZES, dataset=dataset)\n", + "scatterplot = vc.add_view(vit.Component.SCATTERPLOT, dataset=dataset, mapping=\"UMAP\")\n", + "heatmap = vc.add_view(vit.Component.HEATMAP, dataset=dataset)\n", + "genes = vc.add_view(vit.Component.FEATURE_LIST, dataset=dataset)\n", + "vc.layout(((scatterplot | obs_sets) / heatmap) | (obs_sets_sizes / genes))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "## Save `VitessceConfig` object" + "Save the `VitessceConfig` object." ] }, { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "tags": [ + "hide-output" + ] + }, "outputs": [], "source": [ - "from lamindb.integrations import save_vitessce_config" + "vc_artifact = ln.integrations.save_vitessce_config(\n", + " vc, description=\"View Habib17 in Vitessce\"\n", + ")" ] }, { - "cell_type": "code", - "execution_count": null, + "cell_type": "markdown", "metadata": {}, - "outputs": [], "source": [ - "vc_artifact = save_vitessce_config(vc, description=\"View Habib17 in Vitessce\")" + "You can now see the Vitessce button show up on your dataset as in this [example dataset](https://lamin.ai/laminlabs/lamindata/artifact/HXJ4DDAw8012jVKwoxgd)." ] }, { @@ -249,21 +222,19 @@ "metadata": {}, "outputs": [], "source": [ - "# [optional] save run report to share notebook with collaborators\n", - "# ln.finish()" + "# [optional] finish run context and auto-save the notebook\n", + "# ln.context.finish()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "See how a Vitessce UI button looks like:\n", - "\n", - "[![hub](https://img.shields.io/badge/View%20in%20LaminHub-mediumseagreen)](https://lamin.ai/laminlabs/lamindata/transform/hqtT4OTr5Tiq5zKv/URbaThYhljgXbZEzgj5x)\n", + ":::{note}\n", "\n", - "\n", + "Here is [a note](https://lamin.ai/laminlabs/lamindata/transform/WDjio16cQsdW5zKv) on folder upload speed and we why chose not to use the `.export(to=\"s3\")` functionality of Vitessce.\n", "\n", - "Click the Vitessce button to visualize [this config file](https://lamin.ai/laminlabs/lamindata/artifact/HXJ4DDAw8012jVKwoxgd)." + ":::" ] }, { @@ -298,7 +269,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.1.-1" + "version": "3.10.13" } }, "nbformat": 4,