'.\n",
- " check_columns_against_constants(\n"
- ]
- },
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " identifier | \n",
- " identifier.source | \n",
- " target | \n",
- " target.source | \n",
- " OpenTargets_disease_compounds | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 0 | \n",
- " UMLS_C0000786 | \n",
- " UMLS | \n",
- " EFO_1001255 | \n",
- " EFO | \n",
- " [{'chembl_id': 'CHEMBL1276308', 'drugbank_id':... | \n",
- "
\n",
- " \n",
- " 1 | \n",
- " UMLS_C0000889 | \n",
- " UMLS | \n",
- " EFO_1000660 | \n",
- " EFO | \n",
- " [{'chembl_id': 'CHEMBL1431', 'drugbank_id': 'D... | \n",
- "
\n",
- " \n",
- " 2 | \n",
- " UMLS_C0002940 | \n",
- " UMLS | \n",
- " EFO_0004264 | \n",
- " EFO | \n",
- " [{'chembl_id': 'CHEMBL1491', 'drugbank_id': 'D... | \n",
- "
\n",
- " \n",
- " 3 | \n",
- " UMLS_C0002940 | \n",
- " UMLS | \n",
- " EFO_0009659 | \n",
- " EFO | \n",
- " [{'chembl_id': 'CHEMBL526', 'drugbank_id': 'DB... | \n",
- "
\n",
- " \n",
- " 4 | \n",
- " UMLS_C0003873 | \n",
- " UMLS | \n",
- " EFO_0000685 | \n",
- " EFO | \n",
- " [{'chembl_id': 'CHEMBL2103743', 'drugbank_id':... | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " identifier identifier.source target target.source \\\n",
- "0 UMLS_C0000786 UMLS EFO_1001255 EFO \n",
- "1 UMLS_C0000889 UMLS EFO_1000660 EFO \n",
- "2 UMLS_C0002940 UMLS EFO_0004264 EFO \n",
- "3 UMLS_C0002940 UMLS EFO_0009659 EFO \n",
- "4 UMLS_C0003873 UMLS EFO_0000685 EFO \n",
- "\n",
- " OpenTargets_disease_compounds \n",
- "0 [{'chembl_id': 'CHEMBL1276308', 'drugbank_id':... \n",
- "1 [{'chembl_id': 'CHEMBL1431', 'drugbank_id': 'D... \n",
- "2 [{'chembl_id': 'CHEMBL1491', 'drugbank_id': 'D... \n",
- "3 [{'chembl_id': 'CHEMBL526', 'drugbank_id': 'DB... \n",
- "4 [{'chembl_id': 'CHEMBL2103743', 'drugbank_id':... "
- ]
- },
- "execution_count": 12,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"(\n",
" opentargets_disease_compound_df,\n",
@@ -2196,7 +2092,7 @@
},
{
"cell_type": "code",
- "execution_count": 14,
+ "execution_count": 6,
"metadata": {},
"outputs": [
{
@@ -2288,7 +2184,7 @@
"4 [{'pathway_id': nan, 'pathway_label': nan, 'pa... "
]
},
- "execution_count": 14,
+ "execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
@@ -2300,6 +2196,36 @@
"minerva_df.head()"
]
},
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "{'datasource': 'MINERVA',\n",
+ " 'metadata': {'source_version': '17.1.3'},\n",
+ " 'query': {'size': 6,\n",
+ " 'input_type': 'Ensembl',\n",
+ " 'MINERVA project': 'COVID19 Disease Map',\n",
+ " 'MINERVA project URL': 'https://covid19map.elixir-luxembourg.org/minerva/',\n",
+ " 'time': '0:00:47.612140',\n",
+ " 'date': '2024-09-10 15:23:35',\n",
+ " 'url': 'https://covid19map.elixir-luxembourg.org/minerva/',\n",
+ " 'number_of_added_nodes': 1,\n",
+ " 'number_of_added_edges': 1}}"
+ ]
+ },
+ "execution_count": 7,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "minerva_metadata"
+ ]
+ },
{
"cell_type": "code",
"execution_count": 15,
@@ -4002,10 +3928,10 @@
}
],
"source": [
- "combined_df = generator.load_dataframe_from_pickle(\"combined_df.pkl\")\n",
- "opentargets_disease_compound_df = generator.load_dataframe_from_pickle(\n",
- " \"opentargets_disease_compound_df.pkl\"\n",
- ")\n",
+ "# combined_df = generator.load_dataframe_from_pickle(\"combined_df.pkl\")\n",
+ "# opentargets_disease_compound_df = generator.load_dataframe_from_pickle(\n",
+ "# \"opentargets_disease_compound_df.pkl\"\n",
+ "# )\n",
"\n",
"combined_df.head()"
]
@@ -4159,25 +4085,123 @@
"# plt.show()"
]
},
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "#### Cytosacpe\n",
+ "Make sure that the Cytoscape is open"
+ ]
+ },
{
"cell_type": "code",
- "execution_count": 10,
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from pyBiodatafuse.graph import cytoscape\n",
+ "\n",
+ "cytoscape.load_graph(pygraph, network_name=\"PCS network\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "#### Neo4j"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from pyBiodatafuse.graph import neo4j\n",
+ "\n",
+ "neo4j.save_graph_to_graphml(pygraph, \"pcs_networkx_graph.graphml\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
"metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Applying default style...\n",
- "Applying preferred layout\n"
- ]
- }
- ],
"source": [
- "from pyBiodatafuse.graph import cytoscape, neo4j\n",
+ "##### Steps to load the graph in Neo4j"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "- Add `.graphml` file in **import** subfolder of the DBMS folder\n",
+ "- Install apoc plugin\n",
+ "- Add `apoc.conf` file to **conf** subfolder of the DBMS folder\n",
+ "\n",
+ " `apoc.conf` file:\n",
+ " ```\n",
+ " apoc.trigger.enabled=true\n",
+ " apoc.import.file.enabled=true\n",
+ " apoc.export.file.enabled=true\n",
+ " apoc.import.file.use_neo4j_config=true\n",
+ " ```\n",
+ "\n",
+ "- Open Neo4j Browser\n",
+ "- (Optionl, only run if you have imported a graph before) Remove all the nodes before importing `.graphml` file\n",
+ "\n",
+ " ```MATCH (n) DETACH DELETE n```\n",
+ "\n",
+ "- Import `.graphml` file\n",
+ "\n",
+ " ```call apoc.import.graphml('file:///pcs_networkx_graph.graphml',{readLabels:TRUE})```\n",
+ "\n",
+ "- Add indexes after importing the graph for improving the performance of queries\n",
+ "\n",
+ " ```create index Gene for (n:Gene) on (n.node_type)```\n",
+ "\n",
+ " ```create index Pathway for (n:Pathway) on (n.node_type)```\n",
+ "\n",
+ " ```create index `Biological Process` for (n:`Biological Process`) on (n.node_type)```\n",
+ "\n",
+ " ```create index `Molecular Function` for (n:`Molecular Function`) on (n.node_type)```\n",
+ "\n",
+ " ```create index `Cellular Component` for (n:`Cellular Component`) on (n.node_type)```\n",
+ "\n",
+ " ```create index Disease for (n:Disease) on (n.node_type)```\n",
+ "\n",
+ " ```create index Compound for (n:Compound) on (n.node_type)```\n",
+ "\n",
+ " ```create index `Side Effect` for (n:`Side Effect`) on (n.node_type)```\n",
+ " \n",
+ "\n",
+ "- Count the number of each node type\n",
+ " - total (```MATCH (n) RETURN count(n)```) \n",
+ " - Gene (```MATCH (n:Gene) RETURN count(n)```)\n",
+ " - Pathway (```MATCH (n:Pathway) RETURN count(n)```)\n",
+ " - WikiPathways (```MATCH (n:Pathway {source: \"WikiPathways\"}) RETURN count(n)```) \n",
+ " - OpenTargets, Reactome (```MATCH (n:Pathway {source: \"OpenTargets\"}) RETURN count(n)```) \n",
+ " - MINERVA (```MATCH (n:Pathway {source: \"MINERVA\"}) RETURN count(n)```) \n",
+ " - Biological Process (```MATCH (n:`Biological Process`) RETURN count(n)```) \n",
+ " - Molecular Function (```MATCH (n:`Molecular Function`) RETURN count(n)```) \n",
+ " - Cellular Component (```MATCH (n:`Cellular Component`) RETURN count(n)```) \n",
+ " - Disease (```MATCH (n:Disease) RETURN count(n)```) \n",
+ " - Compound (```MATCH (n:Compound) RETURN count(n)```)\n",
+ " - Side Effect (```MATCH (n:`Side Effect`) RETURN count(n)```) \n",
+ "- Count the number of each edge type\n",
+ " - total (```MATCH ()-[r]->() RETURN count(r)```) \n",
+ " - interacts_with (```MATCH ()-[r:interacts_with]->() RETURN count(r)```) \n",
+ " - part_of (```MATCH ()-[r:part_of]->() RETURN count(r)```) \n",
+ " - WikiPathways (```MATCH ()-[r:part_of {source: \"WikiPathways\"}]->() RETURN count(r)```) \n",
+ " - OpenTargets, Reactome (```MATCH ()-[r:part_of {source: \"OpenTargets\"}]->() RETURN count(r)```) \n",
+ " - MINERVA (```MATCH ()-[r:part_of {source: \"MINERVA\"}]->() RETURN count(r)```) \n",
+ " - activates (```MATCH ()-[r:activates]->() RETURN count(r)```) \n",
+ " - treats (```MATCH ()-[r:treats]->() RETURN count(r)```) \n",
+ " - has_side_effect (```MATCH ()-[r:has_side_effect]->() RETURN count(r)```) \n",
+ " - inhibits (```MATCH ()-[r:inhibits]->() RETURN count(r)```) = 71\n",
+ " - associated_with (```MATCH ()-[r:associated_with]->() RETURN count(r)```) \n",
+ "\n",
+ "- Export the graph as a `.csv` file\n",
"\n",
- "neo4j.save_graph_to_graphml(pygraph, output_path=\"graph_to-test.graphml\")\n",
- "cytoscape.load_graph(pygraph, network_name=\"test_graph\")"
+ " ```call apoc.export.csv.all(\"pcs_networkx_graph.csv\",{})```"
]
}
],
diff --git a/examples/usecases/PCS/PCS_usecase.ipynb b/examples/usecases/PCS/PCS_usecase.ipynb
index 35ae98a2..cd68ffda 100644
--- a/examples/usecases/PCS/PCS_usecase.ipynb
+++ b/examples/usecases/PCS/PCS_usecase.ipynb
@@ -43,6 +43,7 @@
"\n",
"import matplotlib.pyplot as plt\n",
"import networkx as nx\n",
+ "import numpy as np\n",
"import pandas as pd\n",
"from dotenv import load_dotenv\n",
"\n",
@@ -513,6 +514,267 @@
"disgenet_df[DISGENET_DISEASE_COL][0]"
]
},
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Add litaliterature-based data\n",
+ "Genes found to be associated with Post-COVID-19"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Gene | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " CTLA4 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " PTPN22 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " KIT | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " KRAS | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " NF1 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Gene\n",
+ "0 CTLA4\n",
+ "1 PTPN22\n",
+ "2 KIT\n",
+ "3 KRAS\n",
+ "4 NF1"
+ ]
+ },
+ "execution_count": 8,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "pcs_associated_genes = pd.read_excel(\n",
+ " os.path.join(os.getcwd(), \"examples\", \"usecases\", \"PCS\", \"pcs_associated_genes.xlsx\")\n",
+ ")\n",
+ "pcs_associated_genes.head()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "#### Define the literature based info"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " identifier | \n",
+ " identifier.source | \n",
+ " target | \n",
+ " target.source | \n",
+ " DISGENET_diseases | \n",
+ " literature_based_info | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " A2ML1 | \n",
+ " HGNC | \n",
+ " 144568 | \n",
+ " NCBI Gene | \n",
+ " [{'disease_name': 'Noonan Syndrome', 'HPO': ''... | \n",
+ " [{'disease_name': nan, 'id': nan, 'source': nan}] | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " AAMDC | \n",
+ " HGNC | \n",
+ " 28971 | \n",
+ " NCBI Gene | \n",
+ " [{'disease_name': nan, 'HPO': nan, 'NCI': nan,... | \n",
+ " [{'disease_name': nan, 'id': nan, 'source': nan}] | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " ABCA1 | \n",
+ " HGNC | \n",
+ " 19 | \n",
+ " NCBI Gene | \n",
+ " [{'disease_name': 'Tangier Disease', 'HPO': ''... | \n",
+ " [{'disease_name': nan, 'id': nan, 'source': nan}] | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " ABCB1 | \n",
+ " HGNC | \n",
+ " 5243 | \n",
+ " NCBI Gene | \n",
+ " [{'disease_name': 'Epilepsy', 'HPO': 'HPO_HP:0... | \n",
+ " [{'disease_name': nan, 'id': nan, 'source': nan}] | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " ABCC6P1 | \n",
+ " HGNC | \n",
+ " 653190 | \n",
+ " NCBI Gene | \n",
+ " [{'disease_name': nan, 'HPO': nan, 'NCI': nan,... | \n",
+ " [{'disease_name': nan, 'id': nan, 'source': nan}] | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " identifier identifier.source target target.source \\\n",
+ "0 A2ML1 HGNC 144568 NCBI Gene \n",
+ "1 AAMDC HGNC 28971 NCBI Gene \n",
+ "2 ABCA1 HGNC 19 NCBI Gene \n",
+ "3 ABCB1 HGNC 5243 NCBI Gene \n",
+ "4 ABCC6P1 HGNC 653190 NCBI Gene \n",
+ "\n",
+ " DISGENET_diseases \\\n",
+ "0 [{'disease_name': 'Noonan Syndrome', 'HPO': ''... \n",
+ "1 [{'disease_name': nan, 'HPO': nan, 'NCI': nan,... \n",
+ "2 [{'disease_name': 'Tangier Disease', 'HPO': ''... \n",
+ "3 [{'disease_name': 'Epilepsy', 'HPO': 'HPO_HP:0... \n",
+ "4 [{'disease_name': nan, 'HPO': nan, 'NCI': nan,... \n",
+ "\n",
+ " literature_based_info \n",
+ "0 [{'disease_name': nan, 'id': nan, 'source': nan}] \n",
+ "1 [{'disease_name': nan, 'id': nan, 'source': nan}] \n",
+ "2 [{'disease_name': nan, 'id': nan, 'source': nan}] \n",
+ "3 [{'disease_name': nan, 'id': nan, 'source': nan}] \n",
+ "4 [{'disease_name': nan, 'id': nan, 'source': nan}] "
+ ]
+ },
+ "execution_count": 9,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "from pyBiodatafuse.constants import LITERATURE_DISEASE_COL, LITERATURE_DISEASE_OUTPUT_DICT\n",
+ "\n",
+ "literature_disease_attrs = LITERATURE_DISEASE_OUTPUT_DICT.copy()\n",
+ "literature_disease_attrs[\"disease_name\"] = \"Post-COVID-19\"\n",
+ "literature_disease_attrs[\"id\"] = \"C00000\"\n",
+ "literature_disease_attrs[\"source\"] = \"PMID: 37675861\"\n",
+ "\n",
+ "\n",
+ "def get_literature_based_info(gene):\n",
+ " if gene in pcs_associated_genes[\"Gene\"].values:\n",
+ " return literature_disease_attrs\n",
+ " else:\n",
+ " return [{\"disease_name\": np.nan, \"id\": np.nan, \"source\": np.nan}]\n",
+ "\n",
+ "\n",
+ "disgenet_df[LITERATURE_DISEASE_COL] = disgenet_df[\"identifier\"].apply(get_literature_based_info)\n",
+ "\n",
+ "disgenet_df.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "362 {'disease_name': 'Post-COVID-19', 'id': 'C0000...\n",
+ "Name: literature_based_info, dtype: object"
+ ]
+ },
+ "execution_count": 10,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "disgenet_df[disgenet_df[\"identifier\"] == \"DMP1\"][LITERATURE_DISEASE_COL]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "29\n"
+ ]
+ }
+ ],
+ "source": [
+ "print(pcs_associated_genes[\"Gene\"].isin(disgenet_df[\"identifier\"]).sum())"
+ ]
+ },
{
"cell_type": "markdown",
"metadata": {},
@@ -529,7 +791,7 @@
},
{
"cell_type": "code",
- "execution_count": 8,
+ "execution_count": 12,
"metadata": {},
"outputs": [
{
@@ -608,7 +870,7 @@
"4 UMLS_C0010054 UMLS EFO_0001645 EFO"
]
},
- "execution_count": 8,
+ "execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
@@ -629,7 +891,7 @@
},
{
"cell_type": "code",
- "execution_count": 9,
+ "execution_count": 13,
"metadata": {},
"outputs": [
{
@@ -721,7 +983,7 @@
"4 [{'chembl_id': 'CHEMBL628', 'drugbank_id': 'DB... "
]
},
- "execution_count": 9,
+ "execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
@@ -784,7 +1046,7 @@
},
{
"cell_type": "code",
- "execution_count": 10,
+ "execution_count": 14,
"metadata": {},
"outputs": [
{
@@ -879,7 +1141,7 @@
" {'name': 'drug withdrawal syndrome neonatal'}]}]"
]
},
- "execution_count": 10,
+ "execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
@@ -897,7 +1159,7 @@
},
{
"cell_type": "code",
- "execution_count": 11,
+ "execution_count": 15,
"metadata": {},
"outputs": [
{
@@ -989,7 +1251,7 @@
"4 [{'chembl_id': nan, 'drugbank_id': nan, 'compo... "
]
},
- "execution_count": 11,
+ "execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
@@ -1028,7 +1290,7 @@
},
{
"cell_type": "code",
- "execution_count": 12,
+ "execution_count": 16,
"metadata": {},
"outputs": [
{
@@ -1073,7 +1335,7 @@
" 'adverse_effect': None}]"
]
},
- "execution_count": 12,
+ "execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
@@ -1091,7 +1353,7 @@
},
{
"cell_type": "code",
- "execution_count": 13,
+ "execution_count": 17,
"metadata": {},
"outputs": [
{
@@ -1183,7 +1445,7 @@
"4 [{'pathway_id': nan, 'pathway_label': nan, 'pa... "
]
},
- "execution_count": 13,
+ "execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
@@ -1212,7 +1474,7 @@
},
{
"cell_type": "code",
- "execution_count": 14,
+ "execution_count": 18,
"metadata": {},
"outputs": [
{
@@ -1223,7 +1485,7 @@
" 'pathway_gene_count': 45.0}]"
]
},
- "execution_count": 14,
+ "execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
@@ -1241,7 +1503,7 @@
},
{
"cell_type": "code",
- "execution_count": 15,
+ "execution_count": 19,
"metadata": {},
"outputs": [
{
@@ -1333,7 +1595,7 @@
"4 [{'pathway_id': nan, 'pathway_label': nan, 'pa... "
]
},
- "execution_count": 15,
+ "execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
@@ -1365,7 +1627,7 @@
},
{
"cell_type": "code",
- "execution_count": 16,
+ "execution_count": 20,
"metadata": {},
"outputs": [
{
@@ -1409,7 +1671,7 @@
" 'pathway_gene_count': 35.0}]"
]
},
- "execution_count": 16,
+ "execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
@@ -1427,7 +1689,7 @@
},
{
"cell_type": "code",
- "execution_count": 17,
+ "execution_count": 21,
"metadata": {},
"outputs": [
{
@@ -1519,7 +1781,7 @@
"4 [{'pathway_label': nan, 'pathway_id': nan}] "
]
},
- "execution_count": 17,
+ "execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
@@ -1557,7 +1819,7 @@
},
{
"cell_type": "code",
- "execution_count": 18,
+ "execution_count": 22,
"metadata": {},
"outputs": [
{
@@ -1572,7 +1834,7 @@
" {'pathway_label': 'HDL assembly', 'pathway_id': 'R-HSA-8963896'}]"
]
},
- "execution_count": 18,
+ "execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
@@ -1590,7 +1852,7 @@
},
{
"cell_type": "code",
- "execution_count": 19,
+ "execution_count": 23,
"metadata": {},
"outputs": [
{
@@ -1682,7 +1944,7 @@
"4 [{'go_id': nan, 'go_name': nan, 'go_type': nan}] "
]
},
- "execution_count": 19,
+ "execution_count": 23,
"metadata": {},
"output_type": "execute_result"
}
@@ -1712,7 +1974,7 @@
},
{
"cell_type": "code",
- "execution_count": 20,
+ "execution_count": 24,
"metadata": {},
"outputs": [
{
@@ -1731,7 +1993,7 @@
" 'go_type': 'F'}]"
]
},
- "execution_count": 20,
+ "execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
@@ -1749,7 +2011,7 @@
},
{
"cell_type": "code",
- "execution_count": 21,
+ "execution_count": 25,
"metadata": {},
"outputs": [
{
@@ -1841,7 +2103,7 @@
"4 [{'stringdb_link_to': 'DEFB118', 'Ensembl': 'E... "
]
},
- "execution_count": 21,
+ "execution_count": 25,
"metadata": {},
"output_type": "execute_result"
}
@@ -1869,7 +2131,7 @@
},
{
"cell_type": "code",
- "execution_count": 22,
+ "execution_count": 26,
"metadata": {},
"outputs": [
{
@@ -1885,7 +2147,7 @@
" {'stringdb_link_to': 'RUNX2', 'Ensembl': 'ENSP00000360493', 'score': 0.713}]"
]
},
- "execution_count": 22,
+ "execution_count": 26,
"metadata": {},
"output_type": "execute_result"
}
@@ -1903,7 +2165,7 @@
},
{
"cell_type": "code",
- "execution_count": 23,
+ "execution_count": 27,
"metadata": {},
"outputs": [],
"source": [
@@ -1936,7 +2198,7 @@
},
{
"cell_type": "code",
- "execution_count": 24,
+ "execution_count": 28,
"metadata": {},
"outputs": [
{
@@ -1965,6 +2227,7 @@
" target | \n",
" target.source | \n",
" DISGENET_diseases | \n",
+ " literature_based_info | \n",
" OpenTargets_gene_compounds | \n",
" MINERVA | \n",
" WikiPathways | \n",
@@ -1981,6 +2244,7 @@
" ENSG00000152592 | \n",
" Ensembl | \n",
" [{'disease_name': 'Hypophosphatemic Rickets', ... | \n",
+ " {'disease_name': 'Post-COVID-19', 'id': 'C0000... | \n",
" [{'chembl_id': nan, 'drugbank_id': nan, 'compo... | \n",
" [{'pathway_id': nan, 'pathway_label': nan, 'pa... | \n",
" [{'pathway_id': 'WP3971', 'pathway_label': 'OS... | \n",
@@ -1995,6 +2259,7 @@
" ENSG00000175535 | \n",
" Ensembl | \n",
" [{'disease_name': 'Pancreatic Lipase Deficienc... | \n",
+ " [{'disease_name': nan, 'id': nan, 'source': nan}] | \n",
" [{'chembl_id': 'CHEMBL175247', 'drugbank_id': ... | \n",
" [{'pathway_id': nan, 'pathway_label': nan, 'pa... | \n",
" [{'pathway_id': nan, 'pathway_label': nan, 'pa... | \n",
@@ -2009,6 +2274,7 @@
" ENSG00000259435 | \n",
" Ensembl | \n",
" [{'disease_name': nan, 'HPO': nan, 'NCI': nan,... | \n",
+ " [{'disease_name': nan, 'id': nan, 'source': nan}] | \n",
" [{'chembl_id': nan, 'drugbank_id': nan, 'compo... | \n",
" [{'pathway_id': nan, 'pathway_label': nan, 'pa... | \n",
" [{'pathway_id': nan, 'pathway_label': nan, 'pa... | \n",
@@ -2023,6 +2289,7 @@
" ENSG00000268104 | \n",
" Ensembl | \n",
" [{'disease_name': 'Cystic Fibrosis', 'HPO': ''... | \n",
+ " [{'disease_name': nan, 'id': nan, 'source': nan}] | \n",
" [{'chembl_id': nan, 'drugbank_id': nan, 'compo... | \n",
" [{'pathway_id': nan, 'pathway_label': nan, 'pa... | \n",
" [{'pathway_id': 'WP2882', 'pathway_label': 'Nu... | \n",
@@ -2047,6 +2314,12 @@
"2 [{'disease_name': nan, 'HPO': nan, 'NCI': nan,... \n",
"3 [{'disease_name': 'Cystic Fibrosis', 'HPO': ''... \n",
"\n",
+ " literature_based_info \\\n",
+ "0 {'disease_name': 'Post-COVID-19', 'id': 'C0000... \n",
+ "1 [{'disease_name': nan, 'id': nan, 'source': nan}] \n",
+ "2 [{'disease_name': nan, 'id': nan, 'source': nan}] \n",
+ "3 [{'disease_name': nan, 'id': nan, 'source': nan}] \n",
+ "\n",
" OpenTargets_gene_compounds \\\n",
"0 [{'chembl_id': nan, 'drugbank_id': nan, 'compo... \n",
"1 [{'chembl_id': 'CHEMBL175247', 'drugbank_id': ... \n",
@@ -2084,7 +2357,7 @@
"3 [{'stringdb_link_to': 'SLC7A11', 'Ensembl': 'E... "
]
},
- "execution_count": 24,
+ "execution_count": 28,
"metadata": {},
"output_type": "execute_result"
}
@@ -2095,7 +2368,27 @@
},
{
"cell_type": "code",
- "execution_count": 25,
+ "execution_count": 29,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "{'disease_name': 'Post-COVID-19', 'id': 'C00000', 'source': 'PMID: 37675861'}"
+ ]
+ },
+ "execution_count": 29,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "combined_df[LITERATURE_DISEASE_COL][0]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 30,
"metadata": {},
"outputs": [
{
@@ -2247,7 +2540,7 @@
" 'request_string': 'LOC729609\\tH\\nLOC105374060\\tH\\nDMP1\\tH\\nPNLIP\\tH\\nOR4N3P\\tH\\nSLC6A14\\tH\\nLOC101927239\\tH\\nDEFB105A\\tH\\nDEFB105B\\tH\\nGSTTP1\\tH\\nNEUROD1\\tH\\nRND1\\tH\\nVN1R10P\\tH\\nLOC440446\\tH\\nLOC152225\\tH\\nLOC101929341\\tH\\nPGLYRP3\\tH\\nLINC01533\\tH\\nLINC01090\\tH\\nSPEM1\\tH\\nC16orf82\\tH\\nMIR4432HG\\tH\\nLINC01169\\tH\\nFAM71A\\tH\\nRNASE10\\tH\\nKLF17\\tH\\nC9\\tH\\nARC\\tH\\nMYL10\\tH\\nGCM1\\tH\\nAIPL1\\tH\\nHSPA6\\tH\\nLOC101929124\\tH\\nC7orf65\\tH\\nSLC2A14\\tH\\nPNLIPRP2\\tH\\nNPAS4\\tH\\nLOC101060498\\tH\\nPROP1\\tH\\nELAVL3\\tH\\nLOC105747689\\tH\\nTNF\\tH\\nADAMTS4\\tH\\nPCDH10\\tH\\nLOC101927274\\tH\\nNR4A2\\tH\\nLOC102724612\\tH\\nCEACAM22P\\tH\\nSNAI1\\tH\\nSLC2A3\\tH\\nDLX3\\tH\\nID2\\tH\\nLOC151475\\tH\\nATF3\\tH\\nNKAIN4\\tH\\nASAP1-IT2\\tH\\nNOXRED1\\tH\\nDNM1P41\\tH\\nSLC7A11\\tH\\nC10orf82\\tH\\nULBP2\\tH\\nTPTE2P6\\tH\\nNR4A3\\tH\\nLOC399715\\tH\\nCNTN3\\tH\\nGEM\\tH\\nHSPA7\\tH\\nNCMAP\\tH\\nPNP\\tH\\nPLK2\\tH\\nATP2C2\\tH\\nTNFRSF10D\\tH\\nULBP3\\tH\\nHSPA5\\tH\\nEFHB\\tH\\nHSD17B13\\tH\\nWNK3\\tH\\nLINC01535\\tH\\nELL2\\tH\\nRND3\\tH\\nDUSP5\\tH\\nNRXN3\\tH\\nIPCEF1\\tH\\nZNF492\\tH\\nSDR16C5\\tH\\nCENPL\\tH\\nSOX11\\tH\\nMAFF\\tH\\nPRG4\\tH\\nPCDH17\\tH\\nCDKN1A\\tH\\nPELI1\\tH\\nTMEM169\\tH\\nTMEM236\\tH\\nEFNA5\\tH\\nGCH1\\tH\\nANGPTL4\\tH\\nMAP1LC3C\\tH\\nCHL1\\tH\\nMPZ\\tH\\nSERPINE1\\tH\\nSLC2A1\\tH\\nLRRC16A\\tH\\nFRZB\\tH\\nGLIS3\\tH\\nTIAM1\\tH\\nSRGAP1\\tH\\nSH2D4A\\tH\\nMYEF2\\tH\\nNT5E\\tH\\nVGLL3\\tH\\nPRTG\\tH\\nDPP4\\tH\\nKLF11\\tH\\nTAF13\\tH\\nSTRADB\\tH\\nPOMP\\tH\\nLAMTOR5\\tH\\nCCDC69\\tH\\nZNF32\\tH\\nIQSEC2\\tH\\nAPIP\\tH\\nGDF9\\tH\\nSCUBE2\\tH\\nC20orf24\\tH\\nZSWIM7\\tH\\nTIMM8B\\tH\\nLOC102724532\\tH\\nPRR16\\tH\\nAHRR\\tH\\nLEFTY2\\tH\\nIRX3\\tH\\nVMO1\\tH\\nPVALB\\tH\\nMT1DP\\tH\\nCALML5\\tH\\nLOC101929116\\tH\\nLOC101929694\\tH\\nLINC01205\\tH\\nLINC01241\\tH\\nTMPRSS11A\\tH\\nLOC101928942\\tH\\nLOC100507461\\tH\\nLINC01565\\tH\\nLOC101928358\\tH\\nSCGB1D4\\tH\\nTTR\\tH\\nLINC01284\\tH\\nSSX8\\tH\\nTMEM225\\tH\\nNCRNA00250\\tH\\nOR13D1\\tH\\nLINC01192\\tH\\nCALCB\\tH\\nLINC00411\\tH\\nLINC01227\\tH\\nMIR5689HG\\tH\\nLINC00615\\tH\\nGHSR\\tH\\nLOC105375556\\tH\\nCT45A5\\tH\\nLOC646029\\tH\\nZFP42\\tH\\nCT45A9\\tH\\nFLJ46066\\tH\\nCGA\\tH\\nLOC285692\\tH\\nLOC105369509\\tH\\nCLEC1B\\tH\\nHIST1H4A\\tH\\nDSCAM-IT1\\tH\\nCT45A2\\tH\\nCT45A8\\tH\\nLINC00928\\tH\\nBDKRB1\\tH\\nLOC105370586\\tH\\nTRIM51\\tH\\nLOC101927480\\tH\\nLINC01568\\tH\\nCASC17\\tH\\nLOC101929631\\tH\\nLINC01233\\tH\\nLOC101927948\\tH\\nOR13C5\\tH\\nSSX2\\tH\\nSSX2B\\tH\\nCACNA1C-IT3\\tH\\nLOC100500773\\tH\\nSPATA3\\tH\\nLOC101927374\\tH\\nFBXO47\\tH\\nLINC01493\\tH\\nLOC105369431\\tH\\nLOC105376468\\tH\\nOR5W2\\tH\\nREG4\\tH\\nCD5L\\tH\\nLINC01514\\tH\\nLOC105376331\\tH\\nLOC102723557\\tH\\nPISRT1\\tH\\nHIGD2B\\tH\\nPAGE1\\tH\\nMMP26\\tH\\nLOC101928602\\tH\\nLOC102723895\\tH\\nACTR3BP2\\tH\\nLOC101927363\\tH\\nHNRNPKP3\\tH\\nLOC101927188\\tH\\nDISC1-IT1\\tH\\nLOC102467222\\tH\\nFAM9B\\tH\\nGLOD5\\tH\\nC2orf48\\tH\\nLOC100288254\\tH\\nFRG2\\tH\\nGACAT3\\tH\\nFOXCUT\\tH\\nLOC101927357\\tH\\nLOC101929260\\tH\\nOR13C2\\tH\\nLOC101929754\\tH\\nLOC146513\\tH\\nOR2AT4\\tH\\nPBOV1\\tH\\nTFDP3\\tH\\nLOC101929420\\tH\\nHRAT17\\tH\\nOR6W1P\\tH\\nSSX9\\tH\\nSSX3\\tH\\nHMGA1P7\\tH\\nLINC00374\\tH\\nLINC01288\\tH\\nLINC00836\\tH\\nLINC01320\\tH\\nTRIM64\\tH\\nSDR16C6P\\tH\\nLOC729966\\tH\\nLOC105375014\\tH\\nLINC01441\\tH\\nSCNN1G\\tH\\nC7orf69\\tH\\nOPN1LW\\tH\\nKRTAP5-4\\tH\\nANKUB1\\tH\\nTMEM213\\tH\\nTFAP2D\\tH\\nDANT2\\tH\\nLOC101927419\\tH\\nTXNDC2\\tH\\nOR11A1\\tH\\nLINC01317\\tH\\nLOC101805491\\tH\\nLOC286083\\tH\\nLOC101929563\\tH\\nLINC01216\\tH\\nLINC01163\\tH\\nLOC101927166\\tH\\nPHOX2B\\tH\\nLOC102467081\\tH\\nCT45A6\\tH\\nSND1-IT1\\tH\\nSSX4B\\tH\\nSSX4\\tH\\nSULT1E1\\tH\\nNOL4\\tH\\nZNF716\\tH\\nSUMO1P1\\tH\\nLOC440896\\tH\\nG6PC\\tH\\nMIR31HG\\tH\\nLOC101929259\\tH\\nHTR3C\\tH\\nLOC730100\\tH\\nMAB21L3\\tH\\nIL6\\tH\\nMIP\\tH\\nTRIM64B\\tH\\nCNGB1\\tH\\nLINC01531\\tH\\nFOXL2NB\\tH\\nCXCL8\\tH\\nSLC15A1\\tH\\nGABRB1\\tH\\nLINC00862\\tH\\nZPBP2\\tH\\nLOC101928992\\tH\\nDPPA4\\tH\\nPOU2F3\\tH\\nNUTM1\\tH\\nLOC105372440\\tH\\nSELE\\tH\\nGPR143\\tH\\nFSTL5\\tH\\nAXDND1\\tH\\nLINC01619\\tH\\nSAMD7\\tH\\nLOC100131257\\tH\\nABCC13\\tH\\nC17orf78\\tH\\nCRX\\tH\\nC12orf42\\tH\\nFOXG1\\tH\\nHTR3A\\tH\\nLOC644189\\tH\\nPNPLA1\\tH\\nLINC00880\\tH\\nTOP1P2\\tH\\nCAGE1\\tH\\nLINC00670\\tH\\nLOC101928231\\tH\\nFAM138C\\tH\\nRTP1\\tH\\nLOC101928617\\tH\\nSPAG11B\\tH\\nLOC101927691\\tH\\nSLC35G3\\tH\\nBCO1\\tH\\nSLC35G4\\tH\\nLINC00636\\tH\\nEPGN\\tH\\nPTGS2\\tH\\nPGC\\tH\\nLOC102724467\\tH\\nLOC101928103\\tH\\nTRPC5OS\\tH\\nLOC338694\\tH\\nLINC01036\\tH\\nDLX6\\tH\\nLINC00426\\tH\\nCXorf65\\tH\\nHP09025\\tH\\nLOC389273\\tH\\nDPCR1\\tH\\nC5orf60\\tH\\nPCSK1\\tH\\nLOC494141\\tH\\nGADD45B\\tH\\nC1orf87\\tH\\nANKS4B\\tH\\nJAKMIP2\\tH\\nLINC00266-3\\tH\\nDRAIC\\tH\\nTCAM1P\\tH\\nMIR202HG\\tH\\nSPRR2F\\tH\\nFAM138B\\tH\\nLINC00907\\tH\\nCCL19\\tH\\nASCL1\\tH\\nNUP210L\\tH\\nLINC01170\\tH\\nLINC00264\\tH\\nANKRD7\\tH\\nLOC102724601\\tH\\nSH2D6\\tH\\nFAM138F\\tH\\nFAM138A\\tH\\nGYPE\\tH\\nDDX4\\tH\\nIL5RA\\tH\\nTNFRSF9\\tH\\nLINC00368\\tH\\nLGSN\\tH\\nNEK5\\tH\\nLOC105374177\\tH\\nGLB1L3\\tH\\nLOC105379511\\tH\\nMT1A\\tH\\nFAM138E\\tH\\nTEKT3\\tH\\nSV2C\\tH\\nNR2E3\\tH\\nPLA2G10\\tH\\nLOC101927770\\tH\\nENO4\\tH\\nSBK2\\tH\\nA2ML1\\tH\\nLOC101927257\\tH\\nSPRY4-IT1\\tH\\nDNAH8\\tH\\nAK7\\tH\\nASXL3\\tH\\nTEX38\\tH\\nDNM1P35\\tH\\nCCL26\\tH\\nPPP3R2\\tH\\nCTSLP2\\tH\\nACBD7\\tH\\nSOX2-OT\\tH\\nSTC1\\tH\\nLOC284865\\tH\\nFDPSP2\\tH\\nMARVELD2\\tH\\nCDKL2\\tH\\nDCX\\tH\\nSHISA9\\tH\\nC4orf26\\tH\\nDNAH5\\tH\\nCD3G\\tH\\nTTC23L\\tH\\nPDE6A\\tH\\nAPOBEC3H\\tH\\nLINC00311\\tH\\nCXCL2\\tH\\nLINC00632\\tH\\nSALL4\\tH\\nLOC105372582\\tH\\nFAM106CP\\tH\\nRASD1\\tH\\nCACNA1F\\tH\\nELAVL2\\tH\\nKIAA0087\\tH\\nGIPR\\tH\\nCIDEA\\tH\\nBCL11B\\tH\\nTNFRSF11B\\tH\\nCA13\\tH\\nANKRD20A9P\\tH\\nFAM106B\\tH\\nSEMA3E\\tH\\nGPRC5A\\tH\\nLOC285819\\tH\\nLOC730101\\tH\\nIL1RL1\\tH\\nRGS2\\tH\\nRYBP\\tH\\nC3orf52\\tH\\nHOOK1\\tH\\nPCDH9\\tH\\nCDH19\\tH\\nPGA4\\tH\\nSTARD4\\tH\\nCYP2B7P\\tH\\nTFPI2\\tH\\nPDK4\\tH\\nPGA5\\tH\\nKCNAB3\\tH\\nLINC00641\\tH\\nLOC102724571\\tH\\nSEZ6L\\tH\\nTNFSF9\\tH\\nZNF483\\tH\\nM1AP\\tH\\nFAAP24\\tH\\nKLHL15\\tH\\nCHD1\\tH\\nAP1S3\\tH\\nCDS1\\tH\\nCRTAC1\\tH\\nGYG2\\tH\\nGRHL1\\tH\\nFSIP1\\tH\\nSYT1\\tH\\nPLCXD3\\tH\\nLOC101928371\\tH\\nPEG10\\tH\\nMPZL3\\tH\\nZNF331\\tH\\nKCNQ1OT1\\tH\\nLOC388436\\tH\\nLOC79999\\tH\\nFAM106A\\tH\\nRPS6KA6\\tH\\nBCL2L15\\tH\\nTBX5\\tH\\nEMP1\\tH\\nPPP2R2B\\tH\\nTACR1\\tH\\nSLC7A10\\tH\\nELOVL6\\tH\\nATP1B3\\tH\\nSEMA4A\\tH\\nCEP152\\tH\\nLINC01296\\tH\\nNRXN1\\tH\\nADGRG2\\tH\\nCLDN1\\tH\\nZSWIM6\\tH\\nWNT3\\tH\\nCCDC170\\tH\\nTHBS1\\tH\\nSLC35F2\\tH\\nZC3H12B\\tH\\nPLIN1\\tH\\nLOC401052\\tH\\nCATSPERG\\tH\\nIFRD1\\tH\\nGAS2L3\\tH\\nAPOBEC3D\\tH\\nPOU2F2\\tH\\nERRFI1\\tH\\nARSJ\\tH\\nFOXC1\\tH\\nPRDM1\\tH\\nRASGRP1\\tH\\nKIAA1683\\tH\\nPRELP\\tH\\nTIPARP\\tH\\nZC3H12A\\tH\\nSGIP1\\tH\\nPDE8B\\tH\\nGFPT2\\tH\\nCABP4\\tH\\nRAD51B\\tH\\nMICB\\tH\\nEIF4A3\\tH\\nFAM72C\\tH\\nC7\\tH\\nQPCT\\tH\\nMAP3K8\\tH\\nTUFT1\\tH\\nDUXAP10\\tH\\nSHROOM3\\tH\\nZC3HAV1\\tH\\nS1PR2\\tH\\nFAM122C\\tH\\nHRH1\\tH\\nUGCG\\tH\\nSOX9\\tH\\nLYVE1\\tH\\nBCL2L11\\tH\\nEIF2AK3\\tH\\nC11orf63\\tH\\nSERPINB8\\tH\\nLEPR\\tH\\nCACNB2\\tH\\nCACNA2D4\\tH\\nNR2F1\\tH\\nCLCF1\\tH\\nPSD3\\tH\\nADNP2\\tH\\nDYNC2H1\\tH\\nOR2A20P\\tH\\nSYT17\\tH\\nVASH2\\tH\\nTMEM2\\tH\\nOR2A9P\\tH\\nUSP32P2\\tH\\nEDIL3\\tH\\nLOX\\tH\\nMXD1\\tH\\nNHSL1\\tH\\nDLC1\\tH\\nCYBB\\tH\\nETV5\\tH\\nCEP126\\tH\\nPTPRF\\tH\\nCOCH\\tH\\nSCRN1\\tH\\nPPM1D\\tH\\nLILRB4\\tH\\nMFSD4A\\tH\\nCCDC144B\\tH\\nPXDNL\\tH\\nAHR\\tH\\nTRIM14\\tH\\nFRMD4B\\tH\\nCD84\\tH\\nTIAM2\\tH\\nADAMTS5\\tH\\nXYLT1\\tH\\nMYOF\\tH\\nSLC7A1\\tH\\nSMG1P3\\tH\\nUGDH\\tH\\nPMP22\\tH\\nAMPH\\tH\\nNPIPB5\\tH\\nNT5DC3\\tH\\nUBE2D2\\tH\\nPIGX\\tH\\nTTC1\\tH\\nSRP14\\tH\\nGKAP1\\tH\\nFIBP\\tH\\nMED11\\tH\\nVTI1B\\tH\\nATPAF1\\tH\\nDNAJC19\\tH\\nMRPL24\\tH\\nTRIM16L\\tH\\nPOLR2F\\tH\\nGCSH\\tH\\nTMEM147\\tH\\nLSM10\\tH\\nMRPL40\\tH\\nC11orf74\\tH\\nSERF2-C15ORF63\\tH\\nNDUFAF2\\tH\\nUBE3D\\tH\\nMALSU1\\tH\\nCOA4\\tH\\nELP6\\tH\\nMTX2\\tH\\nCMC4\\tH\\nMON1A\\tH\\nCABP7\\tH\\nMID1IP1\\tH\\nCOA6\\tH\\nKIF22\\tH\\nTSEN15\\tH\\nNDFIP2\\tH\\nHYPK\\tH\\nZCRB1\\tH\\nPARK7\\tH\\nCOX16\\tH\\nGTF3C6\\tH\\nMINOS1\\tH\\nMRPS15\\tH\\nSTOML2\\tH\\nKCNS3\\tH\\nCACNA2D3\\tH\\nCTNNBIP1\\tH\\nC7orf55\\tH\\nCOPS5\\tH\\nCHCHD5\\tH\\nYBX3P1\\tH\\nSPAG7\\tH\\nNDUFS3\\tH\\nTPI1\\tH\\nPET100\\tH\\nST3GAL2\\tH\\nMRPL21\\tH\\nTP53TG1\\tH\\nCDKN2AIPNL\\tH\\nOIP5\\tH\\nRPS20\\tH\\nATP5E\\tH\\nCBWD2\\tH\\nCDK5\\tH\\nTOMM5\\tH\\nPRR34\\tH\\nHINT1\\tH\\nBAD\\tH\\nATP5L\\tH\\nSFXN5\\tH\\nAAMDC\\tH\\nMRPL51\\tH\\nKIAA0930\\tH\\nVAMP5\\tH\\nSEPW1\\tH\\nNDUFA6\\tH\\nSLIRP\\tH\\nSHISA2\\tH\\nNUDT2\\tH\\nCOX5B\\tH\\nSNRPN\\tH\\nSNURF\\tH\\nAURKA\\tH\\nCBWD1\\tH\\nNDUFB2\\tH\\nNAA38\\tH\\nCKM\\tH\\nGPD1\\tH\\nRPS29\\tH\\nDHRS4L1\\tH\\nMRPL33\\tH\\nLOC100507291\\tH\\nATP23\\tH\\nUQCRQ\\tH\\nNDUFC2\\tH\\nBOLA3\\tH\\nTCEB2\\tH\\nCOX7A1\\tH\\nDHRS4\\tH\\nCOX6C\\tH\\nFHL2\\tH\\nSLN\\tH\\nNDUFA1\\tH\\nRPL21P28\\tH\\nRPL21\\tH\\nNDUFC2-KCTD14\\tH\\nATP5I\\tH\\nUQCC2\\tH\\nLOC101929231\\tH\\nDBNDD1\\tH\\nNDUFB9\\tH\\nLAMB3\\tH\\nCSF3R\\tH\\nUSMG5\\tH\\nDHRS4L2\\tH\\nSERPINA1\\tH\\nC1orf53\\tH\\nGLT1D1\\tH\\nGREM2\\tH\\nUQCRBP1\\tH\\nFAM24B\\tH\\nS100A8\\tH\\nCDH22\\tH\\nLEFTY1\\tH\\nC3orf14\\tH\\nLINC01291\\tH\\nTPI1P2\\tH\\nCHAF1B\\tH\\nCENPE\\tH\\nE2F2\\tH\\nOSMR\\tH\\nNDUFC1\\tH\\nGP9\\tH\\nCDON\\tH\\nPOU3F3\\tH\\nLINC01224\\tH\\nOR7G1\\tH\\nZNF735\\tH\\nRPL23AP53\\tH\\nSAMD12\\tH\\nPAMR1\\tH\\nHIST3H2A\\tH\\nLOC101927798\\tH\\nFMOD\\tH\\nOR8S1\\tH\\nKLHL11\\tH\\nLOC105375429\\tH\\nLINC01122\\tH\\nTMCO2\\tH\\nDNAH12\\tH\\nKLF4\\tH\\nCHRM4\\tH\\nLOC101928505\\tH\\nADAMTS1\\tH\\nBEX2\\tH\\nMCTP1\\tH\\nHSD3BP4\\tH\\nLINC01053\\tH\\nELK2AP\\tH\\nLOC105377458\\tH\\nFAM71E2\\tH\\nHAO1\\tH\\nCD68\\tH\\nLOC101928728\\tH\\nSYT15\\tH\\nBAGE\\tH\\nBPIFC\\tH\\nRAET1K\\tH\\nTMPRSS11BNL\\tH\\nTOMM7\\tH\\nHESX1\\tH\\nLRRC72\\tH\\nTUSC5\\tH\\nMUC13\\tH\\nLOC101929227\\tH\\nEDA2R\\tH\\nTM2D1\\tH\\nBCAT1\\tH\\nF13B\\tH\\nLINC00958\\tH\\nRFX4\\tH\\nBRD2\\tH\\nSCN3B\\tH\\nNANOS1\\tH\\nLINC01252\\tH\\nPHLDA2\\tH\\nSNAI3\\tH\\nLOC100506274\\tH\\nLINC01021\\tH\\nCHI3L1\\tH\\nTIMM10\\tH\\nKRTAP5-2\\tH\\nLY6G6C\\tH\\nLOC101927476\\tH\\nZNF169\\tH\\nTINCR\\tH\\nUBL5\\tH\\nLINC01551\\tH\\nFIRRE\\tH\\nRPS28\\tH\\nCYP2G1P\\tH\\nCASC21\\tH\\nWDR76\\tH\\nAGBL4-IT1\\tH\\nLINC01483\\tH\\nYEATS4\\tH\\nNUGGC\\tH\\nAPOBEC1\\tH\\nZAN\\tH\\nCNNM1\\tH\\nTMC1\\tH\\nAPOPT1\\tH\\nNT5M\\tH\\nLINC00877\\tH\\nLOC100133050\\tH\\nMRPL53\\tH\\nCBWD3\\tH\\nJMJD1C\\tH\\nNDUFA11\\tH\\nPLA2G2A\\tH\\nARRDC5\\tH\\nENPP1\\tH\\nNDUFB1\\tH\\nTSHZ2\\tH\\nCRIP3\\tH\\nSMIM4\\tH\\nNANOG\\tH\\nFBXO36\\tH\\nDGCR6L\\tH\\nFAM138D\\tH\\nARAP2\\tH\\nBMP6\\tH\\nMRPL20\\tH\\nMRPS18C\\tH\\nTGIF2-C20orf24\\tH\\nTPM1\\tH\\nSCML4\\tH\\nHRASLS\\tH\\nLOC105379450\\tH\\nNHS\\tH\\nLINC00888\\tH\\nLUADT1\\tH\\nTHBS2\\tH\\nSFTPB\\tH\\nSCN8A\\tH\\nCBWD6\\tH\\nSLC24A4\\tH\\nSRPX2\\tH\\nLCE3D\\tH\\nLCN12\\tH\\nGATA2\\tH\\nLINC00578\\tH\\nLOC101928449\\tH\\nGYPC\\tH\\nPDCL2\\tH\\nCHCHD3\\tH\\nGHET1\\tH\\nLOC101927284\\tH\\nC19orf35\\tH\\nPARP11\\tH\\nLOC100268168\\tH\\nANKRD45\\tH\\nCT45A3\\tH\\nAZGP1\\tH\\nARPC2\\tH\\nLINC01516\\tH\\nPTGER3\\tH\\nUROS\\tH\\nLOC101928887\\tH\\nFCGR1CP\\tH\\nLOC105375396\\tH\\nLOC727924\\tH\\nST20-MTHFS\\tH\\nTNIP3\\tH\\nTDGF1P3\\tH\\nCCL28\\tH\\nGALNT15\\tH\\nNME9\\tH\\nRSPH14\\tH\\nLINC00608\\tH\\nPCDH8\\tH\\nSHISA4\\tH\\nLVCAT5\\tH\\nDCUN1D3\\tH\\nLOC401463\\tH\\nLOC105375483\\tH\\nMRPL15\\tH\\nHS3ST2\\tH\\nC1orf194\\tH\\nRAB3B\\tH\\nTMEM251\\tH\\nLINC00152\\tH\\nLINC00102\\tH\\nCORO2B\\tH\\nBSPRY\\tH\\nCCR7\\tH\\nGLI3\\tH\\nAPOL4\\tH\\nKERA\\tH\\nGAMT\\tH\\nRBP4\\tH\\nLMO1\\tH\\nSNHG12\\tH\\nLINC01410\\tH\\nZNF280C\\tH\\nCCDC144A\\tH\\nSNRNP27\\tH\\nNDUFA3\\tH\\nSKIDA1\\tH\\nFZD5\\tH\\nRUNDC3B\\tH\\nSHFM1\\tH\\nZMAT5\\tH\\nGGT7\\tH\\nTXLNG\\tH\\nSMG1P1\\tH\\nMMADHC\\tH\\nKPNA2\\tH\\nPAM16\\tH\\nLOC101929697\\tH\\nCXCL13\\tH\\nIMPA2\\tH\\nPRKAG2\\tH\\nMEX3B\\tH\\nNCCRP1\\tH\\nMAFA\\tH\\nHIST1H3J\\tH\\nLDLR\\tH\\nKANK4\\tH\\nSHC4\\tH\\nMACROD1\\tH\\nTAC3\\tH\\nNKX2-5\\tH\\nCOX8A\\tH\\nCREB5\\tH\\nTIMM17B\\tH\\nCBWD5\\tH\\nMTFR2\\tH\\nGSTTP2\\tH\\nLINC01504\\tH\\nEMC4\\tH\\nLOC101928272\\tH\\nCWH43\\tH\\nAPOC4\\tH\\nCCND2\\tH\\nSDHAF4\\tH\\nC2orf91\\tH\\nMYCNOS\\tH\\nZNF80\\tH\\nSIK2\\tH\\nMRPL52\\tH\\nBAK1\\tH\\nEZH2\\tH\\nABCC6P1\\tH\\nHIST1H2BO\\tH\\nNRG1-IT1\\tH\\nWWC1\\tH\\nFAM183A\\tH\\nPABPC1L\\tH\\nTPTE\\tH\\nBRS3\\tH\\nPCDH19\\tH\\nAKR1D1\\tH\\nSLC4A8\\tH\\nLOC105377651\\tH\\nLDHA\\tH\\nRPGRIP1\\tH\\nPPP1R1B\\tH\\nATP5EP2\\tH\\nCACYBP\\tH\\nCHURC1-FNTB\\tH\\nBARX2\\tH\\nHELB\\tH\\nCTCFL\\tH\\nPTPN13\\tH\\nPGR\\tH\\nTMEM261\\tH\\nTRIM49B\\tH\\nMYLPF\\tH\\nLOC100131047\\tH\\nPAPPA\\tH\\nPGM2\\tH\\nMRC1\\tH\\nSNX29P2\\tH\\nLOC101929159\\tH\\nNAP1L3\\tH\\nHILPDA\\tH\\nEFNA2\\tH\\nTMEM35\\tH\\nLOC101243545\\tH\\nLOC101927829\\tH\\nHEPHL1\\tH\\nACER1\\tH\\nLYPD4\\tH\\nLOC101928510\\tH\\nLOC101929577\\tH\\nRELL1\\tH\\nSLC20A1\\tH\\nSSNA1\\tH\\nATP5G1\\tH\\nLRIT2\\tH\\nGDF6\\tH\\nNDUFA13\\tH\\nFAM227A\\tH\\nLOC101929431\\tH\\nGAPDH\\tH\\nSOAT1\\tH\\nPWRN2\\tH\\nLINC00173\\tH\\nFOXL2\\tH\\nUQCRHL\\tH\\nLINC00906\\tH\\nCA5A\\tH\\nAPOBEC2\\tH\\nCT45A1\\tH\\nPSMC3\\tH\\nPART1\\tH\\nLINC00305\\tH\\nLOC400655\\tH\\nSYT11\\tH\\nLINC01361\\tH\\nANGPTL7\\tH\\nMPC2\\tH\\nLGALS9B\\tH\\nLINC01276\\tH\\nRIPK2\\tH\\nHEPACAM\\tH\\nDKFZp779M0652\\tH\\nSOX4\\tH\\nSPATA21\\tH\\nEFCAB5\\tH\\nNDUFB5\\tH\\nTRAF3IP2\\tH\\nTRAPPC3\\tH\\nGADD45G\\tH\\nCXXC4\\tH\\nLINC00676\\tH\\nSOX1\\tH\\nC15orf61\\tH\\nHIST1H2BK\\tH\\nHIST1H2AC\\tH\\nLOC284950\\tH\\nTMEM266\\tH\\nMMP19\\tH\\nPLAUR\\tH\\nC20orf96\\tH\\nSLC9C2\\tH\\nLOC101060524\\tH\\nDRD5P2\\tH\\nMRPL11\\tH\\nAPOF\\tH\\nLRRC23\\tH\\nECT2L\\tH\\nNMNAT1\\tH\\nCCDC144CP\\tH\\nLOC101928539\\tH\\nRNLS\\tH\\nLOC105372179\\tH\\nMS4A10\\tH\\nTRAPPC2B\\tH\\nCHCHD2\\tH\\nLOC102724434\\tH\\nC7orf31\\tH\\nMIEN1\\tH\\nLOC100506444\\tH\\nPPP1R36\\tH\\nCCL2\\tH\\nSLC19A3\\tH\\nENDOU\\tH\\nLOC440028\\tH\\nPSMB10\\tH\\nFAM72D\\tH\\nGNG4\\tH\\nFOXO1\\tH\\nATP6V0A4\\tH\\nSKA1\\tH\\nPPP1R15B\\tH\\nTRPM5\\tH\\nANKRD33B\\tH\\nC1orf210\\tH\\nLOC101927058\\tH\\nMCF2\\tH\\nGALNT16\\tH\\nFRMD5\\tH\\nPCK1\\tH\\nPALM2\\tH\\nFIS1\\tH\\nKIAA0040\\tH\\nCIB2\\tH\\nNHEG1\\tH\\nCLDN11\\tH\\nPTGER4\\tH\\nCD83\\tH\\nNENF\\tH\\nLOC101928107\\tH\\nGLB1L2\\tH\\nLOC100505918\\tH\\nC2orf66\\tH\\nS100P\\tH\\nMBD3L3\\tH\\nLOC729970\\tH\\nREPS2\\tH\\nSNRPD2\\tH\\nCYP27A1\\tH\\nCDC20B\\tH\\nTAT\\tH\\nMDH1\\tH\\nCOX4I1\\tH\\nNHLH1\\tH\\nTMIGD1\\tH\\nTSACC\\tH\\nLOC101927596\\tH\\nWBSCR17\\tH\\nCYP1A2\\tH\\nPLK4\\tH\\nPSMD14\\tH\\nLOC105373782\\tH\\nMRPS28\\tH\\nARMC9\\tH\\nLINC01213\\tH\\nTGFBR3\\tH\\nARMCX4\\tH\\nLINC00243\\tH\\nDSC2\\tH\\nLOC105371335\\tH\\nLOC101927780\\tH\\nCXADR\\tH\\nDSG2\\tH\\nLPAR4\\tH\\nDAW1\\tH\\nBTG1\\tH\\nGLRX3\\tH\\nDUXAP8\\tH\\nMRPL34\\tH\\nSAT1\\tH\\nDHRS7C\\tH\\nOLR1\\tH\\nTM4SF1\\tH\\nSEMA3D\\tH\\nLOC101927650\\tH\\nLINC00668\\tH\\nRGS4\\tH\\nLOC644838\\tH\\nUBB\\tH\\nLOC101928514\\tH\\nELF4\\tH\\nCH25H\\tH\\nNCOA7\\tH\\nLINC01387\\tH\\nMSR1\\tH\\nNUTF2\\tH\\nZNF367\\tH\\nTSPAN5\\tH\\nATP5O\\tH\\nNKAIN3\\tH\\nCD44\\tH\\nFASN\\tH\\nMYBPC2\\tH\\nZNF611\\tH\\nLOC100287036\\tH\\nMTSS1L\\tH\\nGABRG2\\tH\\nZNF829\\tH\\nLOC100271832\\tH\\nUQCRH\\tH\\nPIGH\\tH\\nPOM121L8P\\tH\\nCTH\\tH\\nAK1\\tH\\nSLC7A14\\tH\\nFGF21\\tH\\nPAIP1\\tH\\nUBA3\\tH\\nMAPKAP1\\tH\\nZIM3\\tH\\nILDR1\\tH\\nFAHD1\\tH\\nMELK\\tH\\nTRIM29\\tH\\nNTM-IT\\tH\\nTPH1\\tH\\nSMIM10L1\\tH\\nCRYGB\\tH\\nSNAP91\\tH\\nNEURL1\\tH\\nLOC101929504\\tH\\nLOC102724053\\tH\\nLINC01268\\tH\\nFAM171B\\tH\\nFOSL1\\tH\\nC10orf126\\tH\\nLOC286059\\tH\\nLOC100506747\\tH\\nCXCR2\\tH\\nLINC00294\\tH\\nPPP1R7\\tH\\nTMA7\\tH\\nERC2-IT1\\tH\\nANTXR1\\tH\\nPRKACG\\tH\\nPIGR\\tH\\nTF\\tH\\nNME2\\tH\\nINE1\\tH\\nLCE3B\\tH\\nIMMP1L\\tH\\nLOC101927142\\tH\\nDNAJB1\\tH\\nVSTM1\\tH\\nLOC105372626\\tH\\nEPHA7\\tH\\nGUCY2F\\tH\\nANXA1\\tH\\nLOC101928973\\tH\\nLOC102723427\\tH\\nCD109\\tH\\nIER3\\tH\\nOVOL1\\tH\\nLOC101927630\\tH\\nRGS14\\tH\\nLOC100289333\\tH\\nMRGPRE\\tH\\nTRPC1\\tH\\nPDZK1\\tH\\nLOC285889\\tH\\nLOC100130899\\tH\\nLOC642929\\tH\\nGYPB\\tH\\nSF3B5\\tH\\nCRAT8\\tH\\nRDH14\\tH\\nIRGC\\tH\\nIGF2BP1\\tH\\nSep-14\\tH\\nCTD-2201E9.1\\tH\\nLOC100506085\\tH\\nCDH16\\tH\\nUGT8\\tH\\nCCL11\\tH\\nULK4P2\\tH\\nULK4P1\\tH\\nNDUFB10\\tH\\nLOC101927526\\tH\\nLOC440910\\tH\\nTLR6\\tH\\nZNF724P\\tH\\nTBX18\\tH\\nISCA2\\tH\\nINSC\\tH\\nISY1\\tH\\nTGIF2\\tH\\nIKBKB\\tH\\nXCL1\\tH\\nMID1\\tH\\nLOC100996251\\tH\\nSLC38A1\\tH\\nLOC105375401\\tH\\nLOC388692\\tH\\nLINC00710\\tH\\nOAZ1\\tH\\nTHSD7A\\tH\\nMAP6D1\\tH\\nLOC102723727\\tH\\nSHH\\tH\\nLOC339666\\tH\\nGAB3\\tH\\nNSUN6\\tH\\nCGN\\tH\\nOR7E156P\\tH\\nNXF1\\tH\\nOLIG1\\tH\\nHCG2040054\\tH\\nC6orf203\\tH\\nLOC441454\\tH\\nTRPM3\\tH\\nCXCL1\\tH\\nCMC2\\tH\\nCYP27C1\\tH\\nCCL22\\tH\\nBAZ1A\\tH\\nBMS1P5\\tH\\nMS4A2\\tH\\nTCAF2\\tH\\nDCST2\\tH\\nCCEPR\\tH\\nDLEU7\\tH\\nSLC2A7\\tH\\nTEKT2\\tH\\nCRY1\\tH\\nLOC105370792\\tH\\nCT45A7\\tH\\nTPM2\\tH\\nNME1-NME2\\tH\\nCT45A10\\tH\\nSLC25A26\\tH\\nIER5L\\tH\\nLINC01111\\tH\\nLEP\\tH\\nFLVCR1\\tH\\nTES\\tH\\nPRELID3A\\tH\\nCLEC19A\\tH\\nITGAE\\tH\\nDNAJB13\\tH\\nABHD12B\\tH\\nNTRK3\\tH\\nBANCR\\tH\\nHTRA4\\tH\\nCYP2B6\\tH\\nSLC6A4\\tH\\nRPL37A\\tH\\nTRIM71\\tH\\nSNTN\\tH\\nSNHG6\\tH\\nLINC01563\\tH\\nRIMS2\\tH\\nDPM3\\tH\\nFAM46A\\tH\\nZBP1\\tH\\nSERF1B\\tH\\nSERF1A\\tH\\nPTGER4P2-CDK2AP2P2\\tH\\nGPBAR1\\tH\\nCYR61\\tH\\nMRPL37\\tH\\nBAGE3\\tH\\nBAGE2\\tH\\nELMO1\\tH\\nTROAP\\tH\\nTMEM217\\tH\\nTMPRSS11E\\tH\\nMYH1\\tH\\nLOC101929234\\tH\\nSARNP\\tH\\nCRAT37\\tH\\nBAGE5\\tH\\nBAGE4\\tH\\nLINC00844\\tH\\nSLX4IP\\tH\\nLOC101928008\\tH\\nB4GALT3\\tH\\nLINC01206\\tH\\nNDUFA7\\tH\\nCOX14\\tH\\nMORC1\\tH\\nARID5B\\tH\\nPNKD\\tH\\nBIRC3\\tH\\nBTBD6\\tH\\nLOC101928902\\tH\\nFAM71D\\tH\\nLINC01251\\tH\\nARL2\\tH\\nLINC01265\\tH\\nTMEM205\\tH\\nLOC101929125\\tH\\nHCG22\\tH\\nLOC102724708\\tH\\nPRKCG\\tH\\nLINC01481\\tH\\nZNF98\\tH\\nPSMA8\\tH\\nCD14\\tH\\nPSMD4\\tH\\nAKR1C2\\tH\\nPSMB3\\tH\\nSMDT1\\tH\\nTCF7L1\\tH\\nMTCP1\\tH\\nHPSE\\tH\\nANGPTL5\\tH\\nFUNDC2P2\\tH\\nLINC00330\\tH\\nCACNG8\\tH\\nATRAID\\tH\\nPKHD1L1\\tH\\nHDAC11\\tH\\nC3orf18\\tH\\nSTX11\\tH\\nHIST2H2BA\\tH\\nTMTC4\\tH\\nLOC100506682\\tH\\nRPS14P3\\tH\\nELOVL7\\tH\\nTMEM156\\tH\\nBUB1B\\tH\\nLINC00477\\tH\\nMAP7D2\\tH\\nGPC6\\tH\\nPAQR5\\tH\\nPGAM2\\tH\\nPTS\\tH\\nS100A1\\tH\\nHEXIM2\\tH\\nOR4K2\\tH\\nS100G\\tH\\nATP5H\\tH\\nFKBP3\\tH\\nSCGB2A2\\tH\\nPLEKHH2\\tH\\nLOC102723322\\tH\\nACSM5\\tH\\nSFPQ\\tH\\nZNF358\\tH\\nGABRE\\tH\\nRRAGD\\tH\\nLMO7DN\\tH\\nNSMCE1\\tH\\nLINC00941\\tH\\nDAAM2\\tH\\nHPVC1\\tH\\nLINC00486\\tH\\nRPL26L1\\tH\\nLOC100287896\\tH\\nCASC6\\tH\\nREL\\tH\\nSPATA24\\tH\\nTMEM42\\tH\\nEFNB2\\tH\\nFNDC5\\tH\\nLKAAEAR1\\tH\\nCLDN4\\tH\\nTPTE2P1\\tH\\nSTEAP3\\tH\\nMLXIPL\\tH\\nCSF2\\tH\\nDYDC1\\tH\\nDPCD\\tH\\nABCB1\\tH\\nPRSS12\\tH\\nSDHB\\tH\\nTREML3P\\tH\\nLINC00911\\tH\\nFBXO25\\tH\\nLOC101928335\\tH\\nLNP1\\tH\\nLINC01138\\tH\\nLOC101928403\\tH\\nLOC101929565\\tH\\nCDCA8\\tH\\nLOC100505478\\tH\\nLY6K\\tH\\nINTS6L\\tH\\nBCAS1\\tH\\nLOC105376351\\tH\\nMRPL18\\tH\\nTRIM49\\tH\\nRUNX2\\tH\\nCITED2\\tH\\nLINC01436\\tH\\nABL2\\tH\\nUQCRFS1\\tH\\nOCLN\\tH\\nCCDC192\\tH\\nMERTK\\tH\\nSMKR1\\tH\\nCHCHD10\\tH\\nLOC100996634\\tH\\nTPI1P3\\tH\\nNTRK2\\tH\\nEMC6\\tH\\nLOC101928858\\tH\\nRARRES1\\tH\\nCLDN19\\tH\\nCLYBL\\tH\\nNDUFAF5\\tH\\nTIMM13\\tH\\nICAM1\\tH\\nRNF181\\tH\\nNCAPH\\tH\\nSAMM50\\tH\\nNDUFS2\\tH\\nPGA3\\tH\\nC4orf19\\tH\\nAIMP2\\tH\\nMARVELD3\\tH\\nLCE6A\\tH\\nRPS25\\tH\\nAP1B1P1\\tH\\nCOL12A1\\tH\\nATF4\\tH\\nGAP43\\tH\\nACKR2\\tH\\nSLMO2-ATP5E\\tH\\nARHGEF9-IT1\\tH\\nGTF3A\\tH\\nCDC26\\tH\\nTIMMDC1\\tH\\nLSM1\\tH\\nTRIM59\\tH\\nCDR2\\tH\\nCPT1A\\tH\\nGINS4\\tH\\nLOC102546299\\tH\\nTRH\\tH\\nLINC00942\\tH\\nARHGAP11A\\tH\\nNMBR\\tH\\nPRC1\\tH\\nSERF2\\tH\\nMC5R\\tH\\nCOX11\\tH\\nEFHC2\\tH\\nPLVAP\\tH\\nFCGR1A\\tH\\nGCG\\tH\\nOR2G3\\tH\\nSNAPIN\\tH\\nWBSCR28\\tH\\nPDCL3\\tH\\nFLJ40194\\tH\\nLOC407835\\tH\\nCT45A4\\tH\\nCCHCR1\\tH\\nUCHL3\\tH\\nMEP1B\\tH\\nNPIPB6\\tH\\nLOC101926940\\tH\\nLINC00959\\tH\\nLINC01180\\tH\\nDNAJC5G\\tH\\nFZD10\\tH\\nNDUFB8\\tH\\nERCC1\\tH\\nLOC389641\\tH\\nRPS14\\tH\\nARPC5L\\tH\\nDOCK10\\tH\\nLOC101928809\\tH\\nPLEKHA5\\tH\\nLINC00449\\tH\\nTFAP2B\\tH\\nMIR503HG\\tH\\nXG\\tH\\nCXCL3\\tH\\nCSTL1\\tH\\nLOC101928161\\tH\\nCOX6B1\\tH\\nCA8\\tH\\nIL1R1\\tH\\nLINC00619\\tH\\nGAGE1\\tH\\nNDUFA4\\tH\\nLINC01549\\tH\\nCCL16\\tH\\nERN2\\tH\\nALLC\\tH\\nCCDC43\\tH\\nFAM81B\\tH\\nMT2A\\tH\\nS100B\\tH\\nZSCAN12\\tH\\nCABP5\\tH\\nVAV3\\tH\\nIKZF3\\tH\\nDEFB118\\tH\\nDGCR6\\tH\\nLOC105371795\\tH\\nSLC28A3\\tH\\nLOC100129518\\tH\\nZNF503\\tH\\nJTB\\tH\\nLY9\\tH\\nMGC27345\\tH\\nMX2\\tH\\nLOC400002\\tH\\nUGGT2\\tH\\nNDUFA2\\tH\\nMFAP5\\tH\\nITGAM\\tH\\nXKR4\\tH\\nLINC01030\\tH\\nEBAG9\\tH\\nMAGEB5\\tH\\nTMEM150A\\tH\\nLOC101927653\\tH\\nEMC7\\tH\\nSIK1\\tH\\nEMB\\tH\\nDUXA\\tH\\nMIR3663HG\\tH\\nSPATA42\\tH\\nTNFRSF12A\\tH\\nLOC100507195\\tH\\nFAM78A\\tH\\nTENM2\\tH\\nLOC102724428\\tH\\nTRABD2A\\tH\\nTPTE2P3\\tH\\nRASAL1\\tH\\nITPRIP\\tH\\nADGRG6\\tH\\nVSIG4\\tH\\nADRBK2\\tH\\nTRIM49C\\tH\\nHOXC5\\tH\\nCMAHP\\tH\\nRPSAP58\\tH\\nOR7G3\\tH\\nLOC100288069\\tH\\nKRT9\\tH\\nARL6IP1\\tH\\nLINC00635\\tH\\nGPC3\\tH\\nSNX21\\tH\\nRIN2\\tH\\nMYHAS\\tH\\nPOTEE\\tH\\nCLEC2A\\tH\\nATP1A3\\tH\\nLOC105371267\\tH\\nLINC00696\\tH\\nBEND2\\tH\\nSPECC1\\tH\\nECM1\\tH\\nTSPAN1\\tH\\nFAM86JP\\tH\\nP2RX7\\tH\\nTMEM106A\\tH\\nPTPRH\\tH\\nEIF3K\\tH\\nSYK\\tH\\nAGR3\\tH\\nLINC00396\\tH\\nMR1\\tH\\nSLC9A2\\tH\\nGSTZ1\\tH\\nDEFB1\\tH\\nLOC101928370\\tH\\nCALD1\\tH\\nLINC01351\\tH\\nBICD1\\tH\\nFAM231D\\tH\\nSFRP5\\tH\\nEFNA1\\tH\\nLOC101929054\\tH\\nMETTL21A\\tH\\nHOXB5\\tH\\nRYR2\\tH\\nTCEA3\\tH\\nGOLGA8F\\tH\\nARL6IP6\\tH\\nLOC105369891\\tH\\nFAM185A\\tH\\nCCDC124\\tH\\nLOC100499194\\tH\\nKDM6A\\tH\\nLONRF1\\tH\\nADRA2A\\tH\\nFAM210B\\tH\\nTRIM31\\tH\\nRAB39B\\tH\\nKIAA0513\\tH\\nIQUB\\tH\\nTLL1\\tH\\nLRRC15\\tH\\nLOC284294\\tH\\nNQO1\\tH\\nRMST\\tH\\nC12orf57\\tH\\nSIRT1\\tH\\nPDGFC\\tH\\nPPIAL4C\\tH\\nPPIAL4A\\tH\\nC18orf61\\tH\\nLOC283194\\tH\\nRPS23\\tH\\nIFNLR1\\tH\\nGOLGA8G\\tH\\nLY6G6F\\tH\\nLINC00671\\tH\\nRPL23A\\tH\\nLOC101929726\\tH\\nOR10Q1\\tH\\nRNF7\\tH\\nSMCP\\tH\\nNCK2\\tH\\nRNF148\\tH\\nMIR17HG\\tH\\nLINC00479\\tH\\nLINC00551\\tH\\nSIRT4\\tH\\nHERC5\\tH\\nZNF738\\tH\\nLINC01209\\tH\\nTOB2P1\\tH\\nESPL1\\tH\\nLINC00116\\tH\\nHK1\\tH\\nLBP\\tH\\nLOC105369632\\tH\\nVIM\\tH\\nDSEL\\tH\\nPOTEJ\\tH\\nUSP44\\tH\\nLOC101927415\\tH\\nHSPH1\\tH\\nENPP7P13\\tH\\nTNFAIP3\\tH\\nBHLHE41\\tH\\nETV7\\tH\\nKCNQ4\\tH\\nLOC100287792\\tH\\nLOC101929511\\tH\\nMROH5\\tH\\nOAZ3\\tH\\nPPP1R15A\\tH\\nIDI2\\tH\\nCYB561A3\\tH\\nARMC4\\tH\\nBHMT2\\tH\\nNETO2\\tH\\nSUCNR1\\tH\\nSSU72\\tH\\nLOC399886\\tH\\nDISC1\\tH\\nSTAMBP\\tH\\nNLGN1\\tH\\nHAX1\\tH\\nTNRC18P1\\tH\\nAKR1B1\\tH\\nULK4P3\\tH\\nC1QTNF3\\tH\\nCT47A7\\tH\\nWBSCR22\\tH\\nHCAR1\\tH\\nRGL1\\tH\\nLINC01606\\tH\\nCLPS\\tH\\nDUPD1\\tH\\nSSX1\\tH\\nGSTK1\\tH\\nSPRY4\\tH\\nNUDCD2\\tH\\nRECK\\tH\\nNOL4L\\tH\\nPCBP4\\tH\\nCNTNAP2\\tH\\nKCNE1\\tH\\nLOC400541\\tH\\nLINC00261\\tH\\nC9orf173\\tH\\nMRPL48\\tH\\nPOM121L9P\\tH\\nMKRN2OS\\tH\\nRALY\\tH\\nESM1\\tH\\nEID1\\tH\\nNUDT6\\tH\\nHINT3\\tH\\nIPMK\\tH\\nC11orf98\\tH\\nCRLF1\\tH\\nCFL1P1\\tH\\nTMPRSS9\\tH\\nCHMP2A\\tH\\nOLFM1\\tH\\nZNF511\\tH\\nB3GNT7\\tH\\nSIK3\\tH\\nACER3\\tH\\nCIDEC\\tH\\nADGRD1\\tH\\nSPC25\\tH\\nLOC101926911\\tH\\nPELI3\\tH\\nEXT1\\tH\\nPCAT5\\tH\\nGDF15\\tH\\nMRPL47\\tH\\nPLSCR1\\tH\\nTOM1\\tH\\nC6\\tH\\nWDR87\\tH\\nFXYD5\\tH\\nCOBLL1\\tH\\nANGPT2\\tH\\nSRCIN1\\tH\\nSLC10A1\\tH\\nOAS1\\tH\\nMMP21\\tH\\nCOL19A1\\tH\\nGPR18\\tH\\nTMEM219\\tH\\nZNF296\\tH\\nUSP43\\tH\\nGOLGA2P9\\tH\\nRFX2\\tH\\nRAB27A\\tH\\nLOC102467217\\tH\\nMYH13\\tH\\nPHLPP2\\tH\\nLOC101928985\\tH\\nCDRT7\\tH\\nINTS6\\tH\\nHAS2\\tH\\nDZIP1\\tH\\nOR2V2\\tH\\nOR2H2\\tH\\nTSSC1\\tH\\nBOLA1\\tH\\nPABPC1P2\\tH\\nTMEM229A\\tH\\nATP8B1\\tH\\nLCNL1\\tH\\nDCDC5\\tH\\nSOD1\\tH\\nPAG1\\tH\\nCETN2\\tH\\nNCR1\\tH\\nTMEM100\\tH\\nURI1\\tH\\nTEKT4P2\\tH\\nPCAT1\\tH\\nSERTAD4\\tH\\nLINC00550\\tH\\nGLB1L\\tH\\nUNG\\tH\\nAGMAT\\tH\\nLOC101928540\\tH\\nZNF681\\tH\\nLINC01456\\tH\\nFCGR2C\\tH\\nABCG2\\tH\\nANAPC11\\tH\\nLOC102800447\\tH\\nCYLC2\\tH\\nC6orf226\\tH\\nREM2\\tH\\nBMPR1B\\tH\\nBECN1\\tH\\nADM\\tH\\nPDPR\\tH\\nKDM8\\tH\\nHMBS\\tH\\nMYO1H\\tH\\nLINC00493\\tH\\nFGF14\\tH\\nEIF2AK1\\tH\\nLOC101928489\\tH\\nKCNK1\\tH\\nCKS2\\tH\\nLOC101928035\\tH\\nLINC01221\\tH\\nEREG\\tH\\nNDUFB11\\tH\\nNARF\\tH\\nZC3HC1\\tH\\nADGRE2\\tH\\nUFC1\\tH\\nHOMER1\\tH\\nHDDC2\\tH\\nHIST1H3A\\tH\\nTNNT3\\tH\\nZNF670-ZNF695\\tH\\nGSR\\tH\\nNDRG4\\tH\\nTERC\\tH\\nFANCB\\tH\\nFFAR4\\tH\\nMGAM2\\tH\\nLRRTM4\\tH\\nINHBA\\tH\\nLOC403312\\tH\\nKLLN\\tH\\nDZANK1\\tH\\nRGS9BP\\tH\\nRIIAD1\\tH\\nARL2-SNX15\\tH\\nPLAU\\tH\\nSPDYE8P\\tH\\nSLC25A19\\tH\\nBMS1P6\\tH\\nZFYVE19\\tH\\nCTAGE1\\tH\\nMTIF3\\tH\\nSPACA4\\tH\\nSIPA1L1\\tH\\nSLC2A10\\tH\\nPGK1\\tH\\nGIF\\tH\\nMYH8\\tH\\nLOC101928098\\tH\\nFRMD4A\\tH\\nLINC01397\\tH\\nLIPE\\tH\\nTRIM49D2\\tH\\nPGM1\\tH\\nHRH4\\tH\\nLOC646241\\tH\\nLOC101927587\\tH\\nCTD-2201I18.1\\tH\\nRAPGEF4\\tH\\nRUNX1\\tH\\nC5\\tH\\nTRIM49D1\\tH\\nLOC100508046\\tH\\nLOC101928885\\tH\\nUCHL1\\tH\\nR3HDM4\\tH\\nMAP9\\tH\\nMIF4GD\\tH\\nLOC100190986\\tH\\nCOQ2\\tH\\nKNTC1\\tH\\nSAXO1\\tH\\nLOC105369860\\tH\\nFPR1\\tH\\nGP6\\tH\\nEIF2S2\\tH\\nLINC00461\\tH\\nHIST1H2AH\\tH\\nDHRS7\\tH\\nCHST8\\tH\\nHAGH\\tH\\nC4orf3\\tH\\nNMUR2\\tH\\nAKR1C3\\tH\\nLRRC70\\tH\\nREXO2\\tH\\nPRH1-TAS2R14\\tH\\nSLC9A1\\tH\\nMNAT1\\tH\\nSLC37A4\\tH\\nMGC34796\\tH\\nHSPB9\\tH\\nCADM3\\tH\\nMYEOV2\\tH\\nKRTAP6-3\\tH\\nARNTL2\\tH\\nENPP2\\tH\\nCUBN\\tH\\nLOC339059\\tH\\nGSDMA\\tH\\nBTG3\\tH\\nSTBD1\\tH\\nNAV3\\tH\\nALDH1L2\\tH\\nZBTB21\\tH\\nSPATA5\\tH\\nMRPL57\\tH\\nCWC15\\tH\\nNOMO3\\tH\\nUBTD1\\tH\\nIFI30\\tH\\nFMNL2\\tH\\nPRMT3\\tH\\nLOC101927692\\tH\\nNTPCR\\tH\\nDHRS7B\\tH\\nTBCB\\tH\\nC3orf58\\tH\\nKRT222\\tH\\nWRB-SH3BGR\\tH\\nLOC101928580\\tH\\nRWDD1\\tH\\nNKIRAS1\\tH\\nABCA1\\tH\\nCASC20\\tH\\nRTN4IP1\\tH\\nSPATA6L\\tH\\nLUZP1\\tH\\nCARS2\\tH\\nC2orf61\\tH\\nLOC102467226\\tH\\nMIR3945HG\\tH\\nFGF9\\tH\\nVRTN\\tH\\nPCDH18\\tH\\nPOLR3K\\tH\\nLINC00566\\tH\\nAOX1\\tH\\nPDLIM7\\tH\\nLOC102577426\\tH\\nUSE1\\tH\\nGINS2\\tH\\nRAPGEF2\\tH\\nLINC01492\\tH\\nTMEM70\\tH\\nCOX17\\tH\\nSRRM4\\tH\\nLOC101928295\\tH\\nISCA1\\tH\\nIL18R1\\tH\\nAPOC4-APOC2\\tH\\nMT1M\\tH\\nLMO2\\tH\\nSCN4B\\tH\\nRDH12\\tH\\nFEZF2\\tH\\nTMEM150B\\tH\\nCPS1\\tH\\nSLC35G2\\tH\\nTPM3\\tH\\nREG1A\\tH\\nLINC01133\\tH\\nAFAP1L2\\tH\\nPSENEN\\tH\\nFAM72A\\tH\\nLINC00467\\tH\\nHELLS\\tH\\nLINC00367\\tH\\nPLXNA4\\tH\\nC11orf73\\tH\\nKLF7\\tH\\nYBEY\\tH\\nOIT3\\tH\\nLOC101929681\\tH\\nPTPRD\\tH\\nLOC100422737\\tH\\nLINC01411\\tH\\nTSPAN17\\tH\\nUGT1A10\\tH\\nIFT22\\tH\\nRPS10P7\\tH\\nDBIL5P2\\tH\\nIFI44\\tH\\nBTK\\tH\\nMDP1\\tH\\nLOC284080\\tH\\nCYP2C18\\tH\\nFBXW12\\tH\\nCORO7-PAM16\\tH\\nTMEM14B\\tH\\nPOLQ\\tH\\nAFF4\\tH\\nLHFPL4\\tH\\nABTB2\\tH\\nNOMO1\\tH\\nFHDC1\\tH\\nTRIM38\\tH\\nCTSV\\tH\\nGATA3\\tH\\nLINCR-0002\\tH\\nCFAP20\\tH\\nNDUFB6\\tH\\nRASA4\\tH\\nLOC100288798\\tH\\nCFAP206\\tH\\nROR1\\tH\\nACOT13\\tH\\nLOC285626\\tH\\nBANF1\\tH\\nDCAF4L2\\tH\\nSH3BGR\\tH\\nOTOA\\tH\\nCD226\\tH\\nSLC29A4\\tH\\nRPL18\\tH\\nPRDX3\\tH\\nFGB\\tH\\nTEX14\\tH\\nFBN1\\tH\\nEPHA3\\tH\\n'}}]"
]
},
- "execution_count": 25,
+ "execution_count": 30,
"metadata": {},
"output_type": "execute_result"
}
@@ -2258,16 +2551,16 @@
},
{
"cell_type": "code",
- "execution_count": 26,
+ "execution_count": 31,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
- "(2421, 11)"
+ "(2421, 12)"
]
},
- "execution_count": 26,
+ "execution_count": 31,
"metadata": {},
"output_type": "execute_result"
}
@@ -2278,7 +2571,7 @@
},
{
"cell_type": "code",
- "execution_count": 27,
+ "execution_count": 32,
"metadata": {},
"outputs": [
{
@@ -2307,6 +2600,7 @@
" target | \n",
" target.source | \n",
" DISGENET_diseases | \n",
+ " literature_based_info | \n",
" OpenTargets_gene_compounds | \n",
" MINERVA | \n",
" WikiPathways | \n",
@@ -2323,6 +2617,7 @@
" ENSG00000165672 | \n",
" Ensembl | \n",
" [{'disease_name': 'SPINOCEREBELLAR ATAXIA, AUT... | \n",
+ " [{'disease_name': nan, 'id': nan, 'source': nan}] | \n",
" [{'chembl_id': nan, 'drugbank_id': nan, 'compo... | \n",
" [{'pathway_id': 933.0, 'pathway_label': 'Elect... | \n",
" [{'pathway_id': nan, 'pathway_label': nan, 'pa... | \n",
@@ -2337,6 +2632,7 @@
" ENSG00000171564 | \n",
" Ensembl | \n",
" [{'disease_name': 'Cardiovascular Diseases', '... | \n",
+ " [{'disease_name': nan, 'id': nan, 'source': nan}] | \n",
" [{'chembl_id': 'CHEMBL2109072', 'drugbank_id':... | \n",
" [{'pathway_id': 951.0, 'pathway_label': 'Coagu... | \n",
" [{'pathway_id': 'WP5115', 'pathway_label': 'Ne... | \n",
@@ -2351,6 +2647,7 @@
" ENSG00000121101 | \n",
" Ensembl | \n",
" [{'disease_name': 'Non-obstructive azoospermia... | \n",
+ " [{'disease_name': nan, 'id': nan, 'source': nan}] | \n",
" [{'chembl_id': nan, 'drugbank_id': nan, 'compo... | \n",
" [{'pathway_id': nan, 'pathway_label': nan, 'pa... | \n",
" [{'pathway_id': nan, 'pathway_label': nan, 'pa... | \n",
@@ -2365,6 +2662,7 @@
" ENSG00000166147 | \n",
" Ensembl | \n",
" [{'disease_name': 'Marfan Syndrome', 'HPO': ''... | \n",
+ " [{'disease_name': nan, 'id': nan, 'source': nan}] | \n",
" [{'chembl_id': nan, 'drugbank_id': nan, 'compo... | \n",
" [{'pathway_id': 945.0, 'pathway_label': 'Nsp9 ... | \n",
" [{'pathway_id': 'WP3668', 'pathway_label': 'Hy... | \n",
@@ -2379,6 +2677,7 @@
" ENSG00000044524 | \n",
" Ensembl | \n",
" [{'disease_name': 'Adenocarcinoma of lung (dis... | \n",
+ " [{'disease_name': nan, 'id': nan, 'source': nan}] | \n",
" [{'chembl_id': 'CHEMBL24828', 'drugbank_id': '... | \n",
" [{'pathway_id': nan, 'pathway_label': nan, 'pa... | \n",
" [{'pathway_id': 'WP2882', 'pathway_label': 'Nu... | \n",
@@ -2405,6 +2704,13 @@
"2419 [{'disease_name': 'Marfan Syndrome', 'HPO': ''... \n",
"2420 [{'disease_name': 'Adenocarcinoma of lung (dis... \n",
"\n",
+ " literature_based_info \\\n",
+ "2416 [{'disease_name': nan, 'id': nan, 'source': nan}] \n",
+ "2417 [{'disease_name': nan, 'id': nan, 'source': nan}] \n",
+ "2418 [{'disease_name': nan, 'id': nan, 'source': nan}] \n",
+ "2419 [{'disease_name': nan, 'id': nan, 'source': nan}] \n",
+ "2420 [{'disease_name': nan, 'id': nan, 'source': nan}] \n",
+ "\n",
" OpenTargets_gene_compounds \\\n",
"2416 [{'chembl_id': nan, 'drugbank_id': nan, 'compo... \n",
"2417 [{'chembl_id': 'CHEMBL2109072', 'drugbank_id':... \n",
@@ -2448,7 +2754,7 @@
"2420 [{'stringdb_link_to': 'EFNA2', 'Ensembl': 'ENS... "
]
},
- "execution_count": 27,
+ "execution_count": 32,
"metadata": {},
"output_type": "execute_result"
}
@@ -2466,7 +2772,7 @@
},
{
"cell_type": "code",
- "execution_count": 28,
+ "execution_count": 33,
"metadata": {},
"outputs": [],
"source": [
@@ -2487,21 +2793,21 @@
},
{
"cell_type": "code",
- "execution_count": 29,
+ "execution_count": 32,
"metadata": {},
"outputs": [],
"source": [
- "# pygraph = generator.networkx_graph(combined_df, opentargets_disease_compound_df)\n",
- "# with open(\n",
- "# os.path.join(os.getcwd(), \"examples\", \"usecases\", \"PCS\", \"pcs_networkx_graph.pkl\"), \"wb\"\n",
- "# ) as out:\n",
- "# pickle.dump(pygraph, out)\n",
- "\n",
+ "pygraph = generator.networkx_graph(combined_df, opentargets_disease_compound_df)\n",
"with open(\n",
- " os.path.join(os.getcwd(), \"examples\", \"usecases\", \"PCS\", \"pcs_networkx_graph.pkl\"),\n",
- " \"rb\",\n",
- ") as file:\n",
- " pygraph = pickle.load(file)"
+ " os.path.join(os.getcwd(), \"examples\", \"usecases\", \"PCS\", \"pcs_networkx_graph.pkl\"), \"wb\"\n",
+ ") as out:\n",
+ " pickle.dump(pygraph, out)\n",
+ "\n",
+ "# with open(\n",
+ "# os.path.join(os.getcwd(), \"examples\", \"usecases\", \"PCS\", \"pcs_networkx_graph.pkl\"),\n",
+ "# \"rb\",\n",
+ "# ) as file:\n",
+ "# pygraph = pickle.load(file)"
]
},
{
@@ -2551,7 +2857,7 @@
},
{
"cell_type": "code",
- "execution_count": 31,
+ "execution_count": 33,
"metadata": {},
"outputs": [],
"source": [
@@ -2569,7 +2875,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
- "##### Steps to load in Neo4j"
+ "##### Steps to load the graph in Neo4j"
]
},
{
@@ -2592,32 +2898,37 @@
"- (Optionl, only run if you have imported a graph before) Remove all the nodes before importing `.graphml` file\n",
"\n",
" ```\n",
- " neo4j$ MATCH (n) DETACH DELETE n\n",
+ " ```MATCH (n) DETACH DELETE n\n",
" ```\n",
"\n",
"- Import `.graphml` file\n",
"\n",
" ```\n",
- " neo4j$ call apoc.import.graphml('file:///pcs_networkx_graph.graphml',{readLabels:TRUE})\n",
+ " ```call apoc.import.graphml('file:///pcs_networkx_graph.graphml',{readLabels:TRUE})\n",
" ```\n",
"\n",
"- Add indexes after importing the graph for improving the performance of queries\n",
"\n",
- " ```\n",
- " neo4j$ create index Gene for (n:Gene) on (n.node_type)\n",
- " neo4j$ create index Pathway for (n:Pathway) on (n.node_type)\n",
- " neo4j$ create index `Biological Process` for (n:`Biological Process`) on (n.node_type)\n",
- " neo4j$ create index `Molecular Function` for (n:`Molecular Function`) on (n.node_type)\n",
- " neo4j$ create index `Cellular Component` for (n:`Cellular Component`) on (n.node_type)\n",
- " neo4j$ create index Disease for (n:Disease) on (n.node_type)\n",
- " neo4j$ create index Compound for (n:Compound) on (n.node_type)\n",
- " neo4j$ create index `Side Effect` for (n:`Side Effect`) on (n.node_type)\n",
- " ```\n",
+ " ```create index Gene for (n:Gene) on (n.node_type)```\n",
+ "\n",
+ " ```create index Pathway for (n:Pathway) on (n.node_type)```\n",
+ "\n",
+ " ```create index `Biological Process` for (n:`Biological Process`) on (n.node_type)```\n",
+ "\n",
+ " ```create index `Molecular Function` for (n:`Molecular Function`) on (n.node_type)```\n",
+ "\n",
+ " ```create index `Cellular Component` for (n:`Cellular Component`) on (n.node_type)```\n",
+ "\n",
+ " ```create index Disease for (n:Disease) on (n.node_type)```\n",
+ "\n",
+ " ```create index Compound for (n:Compound) on (n.node_type)```\n",
+ "\n",
+ " ```create index `Side Effect` for (n:`Side Effect`) on (n.node_type)```\n",
"\n",
"- Count the number of each node type\n",
- " - total (```neo4j$ MATCH (n) RETURN count(n)```) = 19859\n",
- " - Gene (```neo4j$ MATCH (n:Gene) RETURN count(n)```) = 1667\n",
- " - Pathway (```neo4j$ MATCH (n:Pathway) RETURN count(n)```) = 1847\n",
+ " - total (```MATCH (n) RETURN count(n)```) = 19859\n",
+ " - Gene (```MATCH (n:Gene) RETURN count(n)```) = 1667\n",
+ " - Pathway (```MATCH (n:Pathway) RETURN count(n)```) = 1847\n",
" - WikiPathways (```MATCH (n:Pathway {source: \"WikiPathways\"}) RETURN count(n)```) = 678\n",
" - OpenTargets, Reactome (```MATCH (n:Pathway {source: \"OpenTargets\"}) RETURN count(n)```) = 1154\n",
" - MINERVA (```MATCH (n:Pathway {source: \"MINERVA\"}) RETURN count(n)```) = 15\n",
@@ -2642,9 +2953,7 @@
"\n",
"- Export the graph as a `.csv` file\n",
"\n",
- " ```\n",
- " neo4j$ call apoc.export.csv.all(\"pcs_networkx_graph.csv\",{})\n",
- " ```"
+ " ```call apoc.export.csv.all(\"pcs_networkx_graph.csv\",{})```"
]
},
{
diff --git a/src/pyBiodatafuse/constants.py b/src/pyBiodatafuse/constants.py
index 02cce29c..632270d6 100644
--- a/src/pyBiodatafuse/constants.py
+++ b/src/pyBiodatafuse/constants.py
@@ -97,6 +97,14 @@
}
DISGENET_DISEASE_COL = f"{DISGENET}_diseases"
+# literature based disease info
+LITERATURE_DISEASE_COL = "literature_based_info"
+LITERATURE_DISEASE_OUTPUT_DICT = {
+ "disease_name": str,
+ "id": str,
+ "source": str,
+}
+
# Open Targets - Disease
OPENTARGETS_DISEASE_OUTPUT_DICT = {
"disease_name": str,
@@ -271,29 +279,43 @@
"disease_umlscui": None,
"labels": DISEASE_NODE_LABELS,
}
-DISGENET_GENE_DISEASE_EDGE_LABEL = "associated_with"
+GENE_DISEASE_EDGE_LABEL = "associated_with"
DISGENET_EDGE_ATTRS = {
"source": DISGENET,
"score": None,
"ei": None,
"el": None,
- "label": DISGENET_GENE_DISEASE_EDGE_LABEL,
+ "label": GENE_DISEASE_EDGE_LABEL,
}
+# Literature
-# Open Targets - Disease
-OPENTARGETS_DISEASE_NODE_ATTRS = {
- "source": OPENTARGETS,
+LITERATURE_DISEASE_NODE_ATTRS = {
+ "source": None,
"name": None,
"id": None,
- "therapeutic_areas": None,
+ "UMLS": None,
"labels": DISEASE_NODE_LABELS,
}
-OPENTARGETS_GENE_DISEASE_EDGE_LABEL = "associated_with"
-OPENTARGETS_DISEASE_EDGE_ATTRS = {
- "source": OPENTARGETS,
- "label": OPENTARGETS_GENE_DISEASE_EDGE_LABEL,
+LITERATURE_DISEASE_EDGE_ATTRS = {
+ "source": None,
+ "label": GENE_DISEASE_EDGE_LABEL,
}
+# TODO: The disease annotations are not curated and will be used again when the OpenTarget annotation improves.
+# Open Targets - Disease
+# OPENTARGETS_DISEASE_NODE_ATTRS = {
+# "source": OPENTARGETS,
+# "name": None,
+# "id": None,
+# "therapeutic_areas": None,
+# "labels": DISEASE_NODE_LABELS,
+# }
+# OPENTARGETS_DISEASE_EDGE_ATTRS = {
+# "source": OPENTARGETS,
+# "label": GENE_DISEASE_EDGE_LABEL,
+# }
+
+
# Pathway node
# MINERVA, WikiPathways, Open Targets - Reactome
PATHWAY_NODE_LABELS = "Pathway"
diff --git a/src/pyBiodatafuse/graph/generator.py b/src/pyBiodatafuse/graph/generator.py
index 5e9f2387..e6486cd4 100644
--- a/src/pyBiodatafuse/graph/generator.py
+++ b/src/pyBiodatafuse/graph/generator.py
@@ -22,7 +22,7 @@
DISGENET_DISEASE_COL,
DISGENET_DISEASE_NODE_ATTRS,
DISGENET_EDGE_ATTRS,
- DISGENET_GENE_DISEASE_EDGE_LABEL,
+ GENE_DISEASE_EDGE_LABEL,
GENE_GO_EDGE_ATTRS,
GENE_GO_EDGE_LABEL,
GENE_NODE_LABELS,
@@ -33,6 +33,9 @@
GO_MF_NODE_LABELS,
GO_NODE_ATTRS,
GO_NODE_MAIN_LABEL,
+ LITERATURE_DISEASE_COL,
+ LITERATURE_DISEASE_EDGE_ATTRS,
+ LITERATURE_DISEASE_NODE_ATTRS,
MINERVA,
MOLMEDB_COMPOUND_NODE_ATTRS,
MOLMEDB_PROTEIN_COMPOUND_COL,
@@ -187,7 +190,7 @@ def add_disgenet_gene_disease_subgraph(g, gene_node_label, annot_list):
g.add_node(annot_node_label, attr_dict=annot_node_attrs)
edge_attrs = DISGENET_EDGE_ATTRS.copy()
- edge_attrs["score"] = edge_attrs["score"]
+ edge_attrs["score"] = annot["score"]
if not pd.isna(annot["ei"]):
edge_attrs["ei"] = annot["ei"]
@@ -206,7 +209,48 @@ def add_disgenet_gene_disease_subgraph(g, gene_node_label, annot_list):
g.add_edge(
gene_node_label,
annot_node_label,
- label=DISGENET_GENE_DISEASE_EDGE_LABEL,
+ label=GENE_DISEASE_EDGE_LABEL,
+ attr_dict=edge_attrs,
+ )
+
+ return g
+
+
+def add_literature_gene_disease_subgraph(g, gene_node_label, annot_list):
+ """Construct part of the graph by linking the gene to diseases form literature.
+
+ :param g: the input graph to extend with new nodes and edges.
+ :param gene_node_label: the gene node to be linked to diseases.
+ :param annot_list: list of diseases from DisGeNET.
+ :returns: a NetworkX MultiDiGraph
+ """
+ for annot in annot_list:
+ if not pd.isna(annot["disease_name"]):
+ annot_node_label = annot[DISEASE_NODE_MAIN_LABEL]
+ annot_node_attrs = LITERATURE_DISEASE_NODE_ATTRS.copy()
+ annot_node_attrs["source"] = annot["source"]
+ annot_node_attrs["name"] = annot["disease_name"]
+ annot_node_attrs["id"] = annot["id"]
+ annot_node_attrs["UMLS"] = annot["id"]
+
+ g.add_node(annot_node_label, attr_dict=annot_node_attrs)
+
+ edge_attrs = LITERATURE_DISEASE_EDGE_ATTRS.copy()
+ edge_attrs["source"] = annot["source"]
+
+ edge_hash = hash(frozenset(edge_attrs.items()))
+ edge_attrs["edge_hash"] = edge_hash
+ edge_data = g.get_edge_data(gene_node_label, annot_node_label)
+ edge_data = {} if edge_data is None else edge_data
+ node_exists = [
+ x for x, y in edge_data.items() if y["attr_dict"]["edge_hash"] == edge_hash
+ ]
+
+ if len(node_exists) == 0:
+ g.add_edge(
+ gene_node_label,
+ annot_node_label,
+ label=GENE_DISEASE_EDGE_LABEL,
attr_dict=edge_attrs,
)
@@ -840,6 +884,7 @@ def networkx_graph(combined_df: pd.DataFrame, disease_compound=None):
func_dict = {
BGEE_GENE_EXPRESSION_LEVELS_COL: add_gene_bgee_subgraph,
DISGENET_DISEASE_COL: add_disgenet_gene_disease_subgraph,
+ LITERATURE_DISEASE_COL: add_literature_gene_disease_subgraph,
MINERVA: add_minerva_gene_pathway_subgraph,
WIKIPATHWAYS: add_wikipathways_gene_pathway_subgraph,
OPENTARGETS_REACTOME_COL: add_opentargets_gene_reactome_pathway_subgraph,