diff --git a/notebooks/cellphonedb.ipynb b/notebooks/cellphonedb.ipynb deleted file mode 100644 index 6da205f..0000000 --- a/notebooks/cellphonedb.ipynb +++ /dev/null @@ -1,471 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - " ![CellphoneDB Logo](https://www.cellphonedb.org/images/cellphonedb_logo_33.png) | CellphoneDB is a publicly available repository of curated receptors, ligands and their interactions. ||\n", - " :------------- | :------------- | :-------------\n", - "\n", - "CellphoneDB includes subunit architecture for both ligands and receptors, representing heteromeric complexes accurately. This is crucial, as cell-cell communication relies on multi-subunit protein complexes that go beyond the binary representation used in most databases and studies.\n", - "\n", - "CellPhoneDB integrates existing datasets that pertain to cellular communication and new manually reviewed information. CellPhoneDB utilises information from the following data bases: [UniProt](https://www.uniprot.org/), [Ensembl](https://www.ensembl.org/), [PDB](https://www.ebi.ac.uk/pdbe/), [the IMEx consortium](https://www.imexconsortium.org/) and [IUPHAR](https://www.guidetopharmacology.org/).\n", - "\n", - "CellPhoneDB can be used to search for a particular ligand/receptor or interrogate your own single-cell transcriptomics data.\n", - "\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Install CellphoneDB package" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "%%capture\n", - "pip install --force-reinstall \"git+https://github.com/ventolab/CellphoneDB.git\"" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## List CellphoneDB data releases" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from IPython.display import HTML, display\n", - "from cellphonedb.utils import db_releases_utils\n", - "display(HTML(db_releases_utils.get_remote_database_versions_html()['db_releases_html_table']))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Set CellphoneDB version and local directories for the database and user data" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "# The default version of CellphoneDB data is the latest one, but you can change it to a previous version \n", - "# at any point in this notebook (by re-setting the value of cpdb_version variable). \n", - "# Please note that the format of the database from version v4.1.0 is incompatible with that of previous \n", - "# versions, hence the lowest version number you may choose in this notebook is v4.1.0\n", - "cpdb_version = \"v5.0.0\"\n", - "# N.B. At the very least, please replace <your_user_id> with your user id\n", - "cpdb_dir = os.path.join(\"/Users/rp23/.cpdb/releases/\", cpdb_version)\n", - "# If you generated your own CellphoneDB database file, please replace the default path below to the your file's path\n", - "cpdb_file_path = os.path.join(cpdb_dir, \"cellphonedb.zip\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Download CellphoneDB database from https://github.com/ventolab/cellphonedb-data/" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "from cellphonedb.utils import db_utils\n", - "target_dir = os.path.join(\"/Users/rp23/.cpdb/releases/v5.0.0\")\n", - "db_utils.create_db(target_dir)\n", - "# db_utils.download_database(cpdb_dir, cpdb_version)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Search CellphoneDB Interactions\n", - "#### Search CellphoneDB interactions by (a comma- or space-separated list of): \n", - "* Ensembl ID (e.g. ENSG00000165029), \n", - "* Gene name (e.g. ABCA1), \n", - "* UniProt ID (e.g. KLRG2_HUMAN), \n", - "* UniProt Accession (e.g. A4D1S0) or \n", - "* Complex name (e.g. 12oxoLeukotrieneB4_byPTGR1)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "from cellphonedb.utils import file_utils, search_utils\n", - "from IPython.display import HTML, display\n", - "# Search CellphoneDB interactions by (a comma- or space-separated list of):\n", - "# Ensembl ID (e.g. ENSG00000165029), Gene name (e.g. ABCA1), UniProt ID (e.g. KLRG2_HUMAN), \n", - "# UniProt Accession (e.g. A4D1S0) or Complex name (e.g. 12oxoLeukotrieneB4_byPTGR1)\n", - "(results, complex_name2proteins_text, protein2Info, complex2Info, resource2Complex2Acc, proteinAcc2Name) = \\\n", - " search_utils.search(query_str = 'D17S1718,ENSG00000134780,integrin_a10b1_complex', \n", - " cpdb_file_path = cpdb_file_path)\n", - "# Display results in a html table\n", - "# Note: Mouse over complex names to see constituent proteins\n", - "display(HTML(search_utils.get_html_table(results, complex_name2proteins_text,\n", - " protein2Info, complex2Info, resource2Complex2Acc, proteinAcc2Name)))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Estimate memory required for the analyses" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from cellphonedb.utils import file_utils\n", - "meta_file_path=\"/Users/rp23/.cpdb/tests/data/examples/endometrium_v1/meta.tsv\"\n", - "file_utils.estimate_memory_for_analyses(meta_file_path, subsampling=False, scoring=False, num_cores=4)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Run Basic Analysis" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Please populate the following variables before executing the analysis\n", - "meta_file_path = None\n", - "counts_file_path = None\n", - "output_path = None\n", - "output_path = \"/Users/rp23/.cpdb/user_files/out\"\n", - "counts_file_path=\"/Users/rp23/.cpdb/tests/data/gonads/counts_normloqTransformed.h5ad\"\n", - "meta_file_path=\"/Users/rp23/.cpdb/tests/data/gonads/meta.tsv\"\n", - "degs_file_path=\"/Users/rp23/.cpdb/tests/data/gonads/DEGs.tsv\"\n", - "microenvs_file_path = \"/Users/rp23/.cpdb/tests/data/gonads/microenvironment.tsv\"\n", - "active_tfs_file_path=\"/Users/rp23/.cpdb/tests/data/gonads/active_TFs.tsv\"\n", - "cpdb_file_path = os.path.join(cpdb_dir, \"cellphonedb.zip\")\n", - "# Execute basic analysis\n", - "from cellphonedb.src.core.methods import cpdb_analysis_method\n", - "res = cpdb_analysis_method.call(\n", - " cpdb_file_path = cpdb_file_path, \n", - " meta_file_path = meta_file_path, \n", - " counts_file_path = counts_file_path,\n", - " counts_data = 'hgnc_symbol',\n", - " output_path = output_path,\n", - " microenvs_file_path = microenvs_file_path,\n", - " separator = \"|\",\n", - " threshold = 0.1,\n", - " result_precision = 3,\n", - " debug = False,\n", - " output_suffix = None,\n", - " score_interactions = True,\n", - " threads = 4)\n", - "\n", - "# print(res['means_result'].info)\n", - "# print(res['deconvoluted'].info)\n", - "# print(res['deconvoluted_percents'].info)\n", - "# print(res['interaction_scores'].info)\n", - "\n", - "if 'interaction_scores' in res:\n", - " example_table = res['interaction_scores'][['id_cp_interaction','partner_a','partner_b','Lymphoid|SOX9_prolif', \n", - " 'SOX9_prolif|Lymphoid']].sort_values('Lymphoid|SOX9_prolif', ascending = False)\n", - " example_table" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Run Statistical Analysis" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Please populate the following variables before executing the analysis\n", - "meta_file_path = None\n", - "counts_file_path = None\n", - "output_path = None\n", - "output_path = \"/Users/rp23/.cpdb/user_files/out\"\n", - "counts_file_path=\"/Users/rp23/.cpdb/tests/data/gonads/counts_normloqTransformed.h5ad\"\n", - "meta_file_path=\"/Users/rp23/.cpdb/tests/data/gonads/meta.tsv\"\n", - "degs_file_path=\"/Users/rp23/.cpdb/tests/data/gonads/DEGs.tsv\"\n", - "microenvs_file_path = \"/Users/rp23/.cpdb/tests/data/gonads/microenvironment.tsv\"\n", - "active_tfs_file_path=\"/Users/rp23/.cpdb/tests/data/gonads/active_TFs.tsv\"\n", - "cpdb_file_path = os.path.join(cpdb_dir, \"cellphonedb.zip\")\n", - "# Execute statistical analysis\n", - "from cellphonedb.src.core.methods import cpdb_statistical_analysis_method\n", - "res = \\\n", - " cpdb_statistical_analysis_method.call(\n", - " cpdb_file_path = cpdb_file_path, \n", - " meta_file_path = meta_file_path, \n", - " counts_file_path = counts_file_path,\n", - " counts_data = 'hgnc_symbol',\n", - " output_path = output_path,\n", - " microenvs_file_path = microenvs_file_path,\n", - " active_tfs_file_path = active_tfs_file_path,\n", - " iterations = 1000,\n", - " threshold = 0.1,\n", - " threads = 4,\n", - " debug_seed = -1,\n", - " result_precision = 3,\n", - " pvalue = 0.05,\n", - " subsampling = False,\n", - " subsampling_log = False,\n", - " subsampling_num_pc = 100,\n", - " subsampling_num_cells = None,\n", - " separator = '|',\n", - " debug = False,\n", - " output_suffix = None,\n", - " score_interactions = True)\n", - "# print(res['deconvoluted'].info)\n", - "# print(res['deconvoluted_percents'].info)\n", - "# print(res['means'].info)\n", - "# print(res['pvalues'].info)\n", - "# print(res['significant_means'].info)\n", - "# print(res['interaction_scores'].info)\n", - "# print(res['CellSign_active_interactions'].info)\n", - "# print(res['CellSign_active_interactions_deconvoluted'].info)\n", - "if 'interaction_scores' in res:\n", - " example_table = res['interaction_scores'][['id_cp_interaction','partner_a','partner_b','Lymphoid|SOX9_prolif', \n", - " 'SOX9_prolif|Lymphoid']].sort_values('Lymphoid|SOX9_prolif', ascending = False)\n", - " example_table" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Run Differential Analysis" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Please populate the following variables before executing the analysis\n", - "meta_file_path = None\n", - "counts_file_path = None\n", - "degs_file_path = None\n", - "output_path = None\n", - "# output_path = \"/Users/rp23/.cpdb/user_files/out\"\n", - "# counts_file_path=\"/Users/rp23/.cpdb/tests/data/examples/endometrium_v1/counts.h5ad\"\n", - "# meta_file_path=\"/Users/rp23/.cpdb/tests/data/examples/endometrium_v1/meta.tsv\"\n", - "# degs_file_path=\"/Users/rp23/.cpdb/tests/data/examples/endometrium_v1/degs_in_epithelials.tsv\"\n", - "# microenvs_file_path = \"/Users/rp23/.cpdb/tests/data/examples/endometrium_v1/microenviroments.tsv\"\n", - "# cpdb_file_path = os.path.join(cpdb_dir, \"cellphonedb.zip\")\n", - "output_path = \"/Users/rp23/.cpdb/user_files/out\"\n", - "counts_file_path=\"/Users/rp23/.cpdb/tests/data/gonads/counts_normloqTransformed.h5ad\"\n", - "meta_file_path=\"/Users/rp23/.cpdb/tests/data/gonads/meta.tsv\"\n", - "degs_file_path=\"/Users/rp23/.cpdb/tests/data/gonads/DEGs.tsv\"\n", - "microenvs_file_path = \"/Users/rp23/.cpdb/tests/data/gonads/microenvironment.tsv\"\n", - "active_tfs_file_path=\"/Users/rp23/.cpdb/tests/data/gonads/active_TFs.tsv\"\n", - "cpdb_file_path = os.path.join(cpdb_dir, \"cellphonedb.zip\")\n", - "# Execute differential analysis\n", - "from cellphonedb.src.core.methods import cpdb_degs_analysis_method\n", - "res = \\\n", - " cpdb_degs_analysis_method.call(\n", - " cpdb_file_path = cpdb_file_path, \n", - " meta_file_path = meta_file_path, \n", - " counts_file_path = counts_file_path,\n", - " degs_file_path = degs_file_path,\n", - " counts_data = 'hgnc_symbol',\n", - " microenvs_file_path = microenvs_file_path,\n", - " active_tfs_file_path = active_tfs_file_path,\n", - " threshold = 0.1,\n", - " result_precision = 3,\n", - " separator = '|',\n", - " debug = False,\n", - " output_path = output_path,\n", - " output_suffix = None,\n", - " score_interactions = True,\n", - " threads = 4)\n", - "# print(res['deconvoluted'].info)\n", - "# print(res['means'].info)\n", - "# print(res['relevant_interactions'].info)\n", - "# print(res['significant_means'].info)\n", - "# print(res['interaction_scores'].info)\n", - "# print(res['CellSign_active_interactions'].info)\n", - "# print(res['CellSign_active_interactions_deconvoluted'].info)\n", - "if 'interaction_scores' in res:\n", - " example_table = res['interaction_scores'][['id_cp_interaction','partner_a','partner_b','Lymphoid|SOX9_prolif', \n", - " 'SOX9_prolif|Lymphoid']].sort_values('Lymphoid|SOX9_prolif', ascending = False)\n", - " example_table" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Plot Statistical Analysis results" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "scrolled": true - }, - "outputs": [], - "source": [ - "import warnings\n", - "warnings.filterwarnings('ignore')\n", - "from ktplotspy.plot import plot_cpdb, plot_cpdb_heatmap\n", - "from cellphonedb.utils import file_utils\n", - "import os\n", - "\n", - "meta_fp='test_meta.txt'\n", - "# counts_fn='test_counts.txt'\n", - "counts_fp='test.h5ad'\n", - "# Create AnnData object with obs set to a DataFrame containing data from meta_fn\n", - "adata = file_utils.get_counts_meta_adata(counts_fp, meta_fp)\n", - "\n", - "# Exmaple dot plot\n", - "g1 = plot_cpdb(\n", - " adata=adata,\n", - " cell_type1=\"Myeloid\",\n", - " # '.' means any cell type\n", - " cell_type2=\".\",\n", - " means=means,\n", - " pvals=pvalues,\n", - " celltype_key=\"cell_type\",\n", - " genes=[\"FN1\", \"integrin-a5b1-complex\",\"COLEC12\"],\n", - " title=\"Example dot plot\"\n", - " )\n", - "\n", - "# Example heatmap\n", - "g2 = plot_cpdb_heatmap(\n", - " adata=adata,\n", - " pvals=pvalues,\n", - " celltype_key=\"cell_type\",\n", - " log1p_transform=True,\n", - " title=\"Example heatmap\"\n", - " )\n", - "g1, g2\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Search for interactions in Statistical or Differential Analysis Results" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "\"\"\"\n", - "Search results of either statistical or DEG analysis for relevant interactions matching any of:\n", - " 1. A gene in genes\n", - " 2. A complex containing a gene in genes\n", - " 3. An interaction name in interactions (e.g. 12oxoLeukotrieneB4_byPTGR1)\n", - " where at least one pair of cell types containing one cell type from cell_types_1\n", - " and one cell type from cell_types_2 has a significant mean.\n", - " NB. If genes and interactions are empty, and cell_types_1 and cell_types_2 are both set to \"All\"\n", - " then all relevant interactions are returned.\n", - "\"\"\"\n", - "from IPython.display import HTML, display\n", - "from cellphonedb.utils import search_utils\n", - "\n", - "cell_types_1=None\n", - "cell_types_2=None\n", - "genes=None\n", - "interactions=None\n", - "classifications=None\n", - "\n", - "cell_types_1=['Somatic.granulosa']\n", - "cell_types_2=['Germ.oocyte', 'Germ.pre_oocyte']\n", - "#genes=['COL4A4']\n", - "interactions=['COL4A4_integrin_a11b1_complex']\n", - "#classifications=['Adhesion by Collagen/Integrin']\n", - "\n", - "separator=\"|\"\n", - "# Set long_format to True to transpose the results table (so that cell type pairs are shown in a single column)\n", - "long_format = False\n", - "\n", - "search_results = search_utils.search_analysis_results(\n", - " query_cell_types_1=cell_types_1,\n", - " query_cell_types_2=cell_types_2,\n", - " query_genes=genes,\n", - " query_interactions=interactions,\n", - " query_classifications=classifications,\n", - " query_minimum_score=100,\n", - " significant_means=res['significant_means'],\n", - " deconvoluted=res['deconvoluted'],\n", - " interaction_scores=res['interaction_scores'],\n", - " separator=separator,\n", - " long_format=long_format\n", - ")\n", - "display(HTML(search_results.to_html(index=False)))\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.16" - }, - "vscode": { - "interpreter": { - "hash": "d4be38938b4384b4b72f771d57e174ff27b15af44a35842c7bbd2be6ebd16de6" - } - } - }, - "nbformat": 4, - "nbformat_minor": 4 -}