Skip to content

Commit

Permalink
Merge pull request #173 from BioDataFuse/example
Browse files Browse the repository at this point in the history
update use case
  • Loading branch information
tabbassidaloii authored Sep 18, 2024
2 parents 6192e4e + 1dd02cb commit 72a4e09
Show file tree
Hide file tree
Showing 4 changed files with 53 additions and 18 deletions.
34 changes: 32 additions & 2 deletions examples/gene_to_graph_workflow.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -2196,7 +2196,7 @@
},
{
"cell_type": "code",
"execution_count": 14,
"execution_count": 6,
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -2288,7 +2288,7 @@
"4 [{'pathway_id': nan, 'pathway_label': nan, 'pa... "
]
},
"execution_count": 14,
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -2300,6 +2300,36 @@
"minerva_df.head()"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'datasource': 'MINERVA',\n",
" 'metadata': {'source_version': '17.1.3'},\n",
" 'query': {'size': 6,\n",
" 'input_type': 'Ensembl',\n",
" 'MINERVA project': 'COVID19 Disease Map',\n",
" 'MINERVA project URL': 'https://covid19map.elixir-luxembourg.org/minerva/',\n",
" 'time': '0:00:47.612140',\n",
" 'date': '2024-09-10 15:23:35',\n",
" 'url': 'https://covid19map.elixir-luxembourg.org/minerva/',\n",
" 'number_of_added_nodes': 1,\n",
" 'number_of_added_edges': 1}}"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"minerva_metadata"
]
},
{
"cell_type": "code",
"execution_count": 15,
Expand Down
30 changes: 17 additions & 13 deletions examples/usecases/PCS/PCS_usecase.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -518,7 +518,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"### Add litaliterature-based data\n",
"### Add literature-based data\n",
"Genes found to be associated with Post-COVID-19"
]
},
Expand Down Expand Up @@ -727,7 +727,7 @@
"\n",
"def get_literature_based_info(gene):\n",
" if gene in pcs_associated_genes[\"Gene\"].values:\n",
" return literature_disease_attrs\n",
" return [literature_disease_attrs]\n",
" else:\n",
" return [{\"disease_name\": np.nan, \"id\": np.nan, \"source\": np.nan}]\n",
"\n",
Expand All @@ -745,7 +745,7 @@
{
"data": {
"text/plain": [
"362 {'disease_name': 'Post-COVID-19', 'id': 'C0000...\n",
"362 [{'disease_name': 'Post-COVID-19', 'id': 'C000...\n",
"Name: literature_based_info, dtype: object"
]
},
Expand Down Expand Up @@ -2244,7 +2244,7 @@
" <td>ENSG00000152592</td>\n",
" <td>Ensembl</td>\n",
" <td>[{'disease_name': 'Hypophosphatemic Rickets', ...</td>\n",
" <td>{'disease_name': 'Post-COVID-19', 'id': 'C0000...</td>\n",
" <td>[{'disease_name': 'Post-COVID-19', 'id': 'C000...</td>\n",
" <td>[{'chembl_id': nan, 'drugbank_id': nan, 'compo...</td>\n",
" <td>[{'pathway_id': nan, 'pathway_label': nan, 'pa...</td>\n",
" <td>[{'pathway_id': 'WP3971', 'pathway_label': 'OS...</td>\n",
Expand Down Expand Up @@ -2315,7 +2315,7 @@
"3 [{'disease_name': 'Cystic Fibrosis', 'HPO': ''... \n",
"\n",
" literature_based_info \\\n",
"0 {'disease_name': 'Post-COVID-19', 'id': 'C0000... \n",
"0 [{'disease_name': 'Post-COVID-19', 'id': 'C000... \n",
"1 [{'disease_name': nan, 'id': nan, 'source': nan}] \n",
"2 [{'disease_name': nan, 'id': nan, 'source': nan}] \n",
"3 [{'disease_name': nan, 'id': nan, 'source': nan}] \n",
Expand Down Expand Up @@ -2374,7 +2374,7 @@
{
"data": {
"text/plain": [
"{'disease_name': 'Post-COVID-19', 'id': 'C00000', 'source': 'PMID: 37675861'}"
"[{'disease_name': 'Post-COVID-19', 'id': 'C00000', 'source': 'PMID: 37675861'}]"
]
},
"execution_count": 29,
Expand Down Expand Up @@ -2793,7 +2793,7 @@
},
{
"cell_type": "code",
"execution_count": 32,
"execution_count": 34,
"metadata": {},
"outputs": [],
"source": [
Expand Down Expand Up @@ -2857,7 +2857,7 @@
},
{
"cell_type": "code",
"execution_count": 33,
"execution_count": 35,
"metadata": {},
"outputs": [],
"source": [
Expand Down Expand Up @@ -2893,7 +2893,7 @@
" ```\n",
"- Add `apoc.conf` file to **conf** subfolder of the DBMS folder\n",
"- Open Neo4j Browser\n",
"- (Optionl, only run if you have imported a graph before) Remove all the nodes before importing `.graphml` file\n",
"- (Optionl, only run if you have imported a graph before) Remove all the nodes before importing `.graphml` file\n",
"\n",
" ```\n",
" MATCH (n) DETACH DELETE n\n",
Expand All @@ -2919,7 +2919,7 @@
" ```\n",
"\n",
"- Count the number of each node type\n",
" - total (```MATCH (n) RETURN count(n)```) = 19859\n",
" - total (```MATCH (n) RETURN count(n)```) = 19860\n",
" - Gene (```MATCH (n:Gene) RETURN count(n)```) = 1667\n",
" - Pathway (```MATCH (n:Pathway) RETURN count(n)```) = 1847\n",
" - WikiPathways (```MATCH (n:Pathway {source: \"WikiPathways\"}) RETURN count(n)```) = 678\n",
Expand All @@ -2928,11 +2928,13 @@
" - Biological Process (```MATCH (n:`Biological Process`) RETURN count(n)```) = 4624\n",
" - Molecular Function (```MATCH (n:`Molecular Function`) RETURN count(n)```) = 1327\n",
" - Cellular Component (```MATCH (n:`Cellular Component`) RETURN count(n)```) = 736\n",
" - Disease (```MATCH (n:Disease) RETURN count(n)```) = 2913\n",
" - Disease (```MATCH (n:Disease) RETURN count(n)```) = 2914\n",
" - DISGENET (```MATCH (n:Disease {source: \"DISGENET\"}) RETURN count(n)```) = 2913\n",
" - Literature (```MATCH (n:Disease {source: \"PMID: 37675861\"}) RETURN count(n)```) = 1\n",
" - Compound (```MATCH (n:Compound) RETURN count(n)```) = 2244\n",
" - Side Effect (```MATCH (n:`Side Effect`) RETURN count(n)```) = 4501\n",
"- Count the number of each edge type\n",
" - total (```MATCH ()-[r]->() RETURN count(r)```) = 101630\n",
" - total (```MATCH ()-[r]->() RETURN count(r)```) = 101659\n",
" - interacts_with (```MATCH ()-[r:interacts_with]->() RETURN count(r)```) = 16844\n",
" - part_of (```MATCH ()-[r:part_of]->() RETURN count(r)```) = 30066 \n",
" - WikiPathways (```MATCH ()-[r:part_of {source: \"WikiPathways\"}]->() RETURN count(r)```) = 3174\n",
Expand All @@ -2942,7 +2944,9 @@
" - treats (```MATCH ()-[r:treats]->() RETURN count(r)```) = 8215\n",
" - has_side_effect (```MATCH ()-[r:has_side_effect]->() RETURN count(r)```) = 38328\n",
" - inhibits (```MATCH ()-[r:inhibits]->() RETURN count(r)```) = 71\n",
" - associated_with (```MATCH ()-[r:associated_with]->() RETURN count(r)```) = 7607\n",
" - associated_with (```MATCH ()-[r:associated_with]->() RETURN count(r)```) = 7636\n",
" - Literature (```MATCH ()-[r:associated_with {source: \"DISGENET\"}]->() RETURN count(r)```) = 7607\n",
" - DISGENET (```MATCH ()-[r:associated_with{source: \"PMID: 37675861\"}]->() RETURN count(r)```) = 29\n",
"\n",
"- Export the graph as a `.csv` file\n",
"\n",
Expand Down
4 changes: 2 additions & 2 deletions src/pyBiodatafuse/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -287,13 +287,13 @@
"el": None,
"label": GENE_DISEASE_EDGE_LABEL,
}
# Literature

# Literature
LITERATURE_NODE_MAIN_LABEL = "id"
LITERATURE_DISEASE_NODE_ATTRS = {
"source": None,
"name": None,
"id": None,
"UMLS": None,
"labels": DISEASE_NODE_LABELS,
}
LITERATURE_DISEASE_EDGE_ATTRS = {
Expand Down
3 changes: 2 additions & 1 deletion src/pyBiodatafuse/graph/generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
LITERATURE_DISEASE_COL,
LITERATURE_DISEASE_EDGE_ATTRS,
LITERATURE_DISEASE_NODE_ATTRS,
LITERATURE_NODE_MAIN_LABEL,
MINERVA,
MOLMEDB_COMPOUND_NODE_ATTRS,
MOLMEDB_PROTEIN_COMPOUND_COL,
Expand Down Expand Up @@ -226,7 +227,7 @@ def add_literature_gene_disease_subgraph(g, gene_node_label, annot_list):
"""
for annot in annot_list:
if not pd.isna(annot["disease_name"]):
annot_node_label = annot[DISEASE_NODE_MAIN_LABEL]
annot_node_label = annot[LITERATURE_NODE_MAIN_LABEL]
annot_node_attrs = LITERATURE_DISEASE_NODE_ATTRS.copy()
annot_node_attrs["source"] = annot["source"]
annot_node_attrs["name"] = annot["disease_name"]
Expand Down

0 comments on commit 72a4e09

Please sign in to comment.