diff --git a/examples/gene_to_graph_workflow.ipynb b/examples/gene_to_graph_workflow.ipynb
index cfd93063..8c764c7c 100644
--- a/examples/gene_to_graph_workflow.ipynb
+++ b/examples/gene_to_graph_workflow.ipynb
@@ -2196,7 +2196,7 @@
},
{
"cell_type": "code",
- "execution_count": 14,
+ "execution_count": 6,
"metadata": {},
"outputs": [
{
@@ -2288,7 +2288,7 @@
"4 [{'pathway_id': nan, 'pathway_label': nan, 'pa... "
]
},
- "execution_count": 14,
+ "execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
@@ -2300,6 +2300,36 @@
"minerva_df.head()"
]
},
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "{'datasource': 'MINERVA',\n",
+ " 'metadata': {'source_version': '17.1.3'},\n",
+ " 'query': {'size': 6,\n",
+ " 'input_type': 'Ensembl',\n",
+ " 'MINERVA project': 'COVID19 Disease Map',\n",
+ " 'MINERVA project URL': 'https://covid19map.elixir-luxembourg.org/minerva/',\n",
+ " 'time': '0:00:47.612140',\n",
+ " 'date': '2024-09-10 15:23:35',\n",
+ " 'url': 'https://covid19map.elixir-luxembourg.org/minerva/',\n",
+ " 'number_of_added_nodes': 1,\n",
+ " 'number_of_added_edges': 1}}"
+ ]
+ },
+ "execution_count": 7,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "minerva_metadata"
+ ]
+ },
{
"cell_type": "code",
"execution_count": 15,
diff --git a/examples/usecases/PCS/PCS_usecase.ipynb b/examples/usecases/PCS/PCS_usecase.ipynb
index f14fa427..73994dc8 100644
--- a/examples/usecases/PCS/PCS_usecase.ipynb
+++ b/examples/usecases/PCS/PCS_usecase.ipynb
@@ -518,7 +518,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
- "### Add litaliterature-based data\n",
+ "### Add literature-based data\n",
"Genes found to be associated with Post-COVID-19"
]
},
@@ -727,7 +727,7 @@
"\n",
"def get_literature_based_info(gene):\n",
" if gene in pcs_associated_genes[\"Gene\"].values:\n",
- " return literature_disease_attrs\n",
+ " return [literature_disease_attrs]\n",
" else:\n",
" return [{\"disease_name\": np.nan, \"id\": np.nan, \"source\": np.nan}]\n",
"\n",
@@ -745,7 +745,7 @@
{
"data": {
"text/plain": [
- "362 {'disease_name': 'Post-COVID-19', 'id': 'C0000...\n",
+ "362 [{'disease_name': 'Post-COVID-19', 'id': 'C000...\n",
"Name: literature_based_info, dtype: object"
]
},
@@ -2244,7 +2244,7 @@
"
ENSG00000152592 | \n",
" Ensembl | \n",
" [{'disease_name': 'Hypophosphatemic Rickets', ... | \n",
- " {'disease_name': 'Post-COVID-19', 'id': 'C0000... | \n",
+ " [{'disease_name': 'Post-COVID-19', 'id': 'C000... | \n",
" [{'chembl_id': nan, 'drugbank_id': nan, 'compo... | \n",
" [{'pathway_id': nan, 'pathway_label': nan, 'pa... | \n",
" [{'pathway_id': 'WP3971', 'pathway_label': 'OS... | \n",
@@ -2315,7 +2315,7 @@
"3 [{'disease_name': 'Cystic Fibrosis', 'HPO': ''... \n",
"\n",
" literature_based_info \\\n",
- "0 {'disease_name': 'Post-COVID-19', 'id': 'C0000... \n",
+ "0 [{'disease_name': 'Post-COVID-19', 'id': 'C000... \n",
"1 [{'disease_name': nan, 'id': nan, 'source': nan}] \n",
"2 [{'disease_name': nan, 'id': nan, 'source': nan}] \n",
"3 [{'disease_name': nan, 'id': nan, 'source': nan}] \n",
@@ -2374,7 +2374,7 @@
{
"data": {
"text/plain": [
- "{'disease_name': 'Post-COVID-19', 'id': 'C00000', 'source': 'PMID: 37675861'}"
+ "[{'disease_name': 'Post-COVID-19', 'id': 'C00000', 'source': 'PMID: 37675861'}]"
]
},
"execution_count": 29,
@@ -2793,7 +2793,7 @@
},
{
"cell_type": "code",
- "execution_count": 32,
+ "execution_count": 34,
"metadata": {},
"outputs": [],
"source": [
@@ -2857,7 +2857,7 @@
},
{
"cell_type": "code",
- "execution_count": 33,
+ "execution_count": 35,
"metadata": {},
"outputs": [],
"source": [
@@ -2893,7 +2893,7 @@
" ```\n",
"- Add `apoc.conf` file to **conf** subfolder of the DBMS folder\n",
"- Open Neo4j Browser\n",
- "- (Optionl, only run if you have imported a graph before) Remove all the nodes before importing `.graphml` file\n",
+ "- (Optionl, only run if you have imported a graph before) Remove all the nodes before importing `.graphml` file\n",
"\n",
" ```\n",
" MATCH (n) DETACH DELETE n\n",
@@ -2919,7 +2919,7 @@
" ```\n",
"\n",
"- Count the number of each node type\n",
- " - total (```MATCH (n) RETURN count(n)```) = 19859\n",
+ " - total (```MATCH (n) RETURN count(n)```) = 19860\n",
" - Gene (```MATCH (n:Gene) RETURN count(n)```) = 1667\n",
" - Pathway (```MATCH (n:Pathway) RETURN count(n)```) = 1847\n",
" - WikiPathways (```MATCH (n:Pathway {source: \"WikiPathways\"}) RETURN count(n)```) = 678\n",
@@ -2928,11 +2928,13 @@
" - Biological Process (```MATCH (n:`Biological Process`) RETURN count(n)```) = 4624\n",
" - Molecular Function (```MATCH (n:`Molecular Function`) RETURN count(n)```) = 1327\n",
" - Cellular Component (```MATCH (n:`Cellular Component`) RETURN count(n)```) = 736\n",
- " - Disease (```MATCH (n:Disease) RETURN count(n)```) = 2913\n",
+ " - Disease (```MATCH (n:Disease) RETURN count(n)```) = 2914\n",
+ " - DISGENET (```MATCH (n:Disease {source: \"DISGENET\"}) RETURN count(n)```) = 2913\n",
+ " - Literature (```MATCH (n:Disease {source: \"PMID: 37675861\"}) RETURN count(n)```) = 1\n",
" - Compound (```MATCH (n:Compound) RETURN count(n)```) = 2244\n",
" - Side Effect (```MATCH (n:`Side Effect`) RETURN count(n)```) = 4501\n",
"- Count the number of each edge type\n",
- " - total (```MATCH ()-[r]->() RETURN count(r)```) = 101630\n",
+ " - total (```MATCH ()-[r]->() RETURN count(r)```) = 101659\n",
" - interacts_with (```MATCH ()-[r:interacts_with]->() RETURN count(r)```) = 16844\n",
" - part_of (```MATCH ()-[r:part_of]->() RETURN count(r)```) = 30066 \n",
" - WikiPathways (```MATCH ()-[r:part_of {source: \"WikiPathways\"}]->() RETURN count(r)```) = 3174\n",
@@ -2942,7 +2944,9 @@
" - treats (```MATCH ()-[r:treats]->() RETURN count(r)```) = 8215\n",
" - has_side_effect (```MATCH ()-[r:has_side_effect]->() RETURN count(r)```) = 38328\n",
" - inhibits (```MATCH ()-[r:inhibits]->() RETURN count(r)```) = 71\n",
- " - associated_with (```MATCH ()-[r:associated_with]->() RETURN count(r)```) = 7607\n",
+ " - associated_with (```MATCH ()-[r:associated_with]->() RETURN count(r)```) = 7636\n",
+ " - Literature (```MATCH ()-[r:associated_with {source: \"DISGENET\"}]->() RETURN count(r)```) = 7607\n",
+ " - DISGENET (```MATCH ()-[r:associated_with{source: \"PMID: 37675861\"}]->() RETURN count(r)```) = 29\n",
"\n",
"- Export the graph as a `.csv` file\n",
"\n",
diff --git a/src/pyBiodatafuse/constants.py b/src/pyBiodatafuse/constants.py
index 632270d6..a2e681b6 100644
--- a/src/pyBiodatafuse/constants.py
+++ b/src/pyBiodatafuse/constants.py
@@ -287,13 +287,13 @@
"el": None,
"label": GENE_DISEASE_EDGE_LABEL,
}
-# Literature
+# Literature
+LITERATURE_NODE_MAIN_LABEL = "id"
LITERATURE_DISEASE_NODE_ATTRS = {
"source": None,
"name": None,
"id": None,
- "UMLS": None,
"labels": DISEASE_NODE_LABELS,
}
LITERATURE_DISEASE_EDGE_ATTRS = {
diff --git a/src/pyBiodatafuse/graph/generator.py b/src/pyBiodatafuse/graph/generator.py
index e6486cd4..c332b15c 100644
--- a/src/pyBiodatafuse/graph/generator.py
+++ b/src/pyBiodatafuse/graph/generator.py
@@ -36,6 +36,7 @@
LITERATURE_DISEASE_COL,
LITERATURE_DISEASE_EDGE_ATTRS,
LITERATURE_DISEASE_NODE_ATTRS,
+ LITERATURE_NODE_MAIN_LABEL,
MINERVA,
MOLMEDB_COMPOUND_NODE_ATTRS,
MOLMEDB_PROTEIN_COMPOUND_COL,
@@ -226,7 +227,7 @@ def add_literature_gene_disease_subgraph(g, gene_node_label, annot_list):
"""
for annot in annot_list:
if not pd.isna(annot["disease_name"]):
- annot_node_label = annot[DISEASE_NODE_MAIN_LABEL]
+ annot_node_label = annot[LITERATURE_NODE_MAIN_LABEL]
annot_node_attrs = LITERATURE_DISEASE_NODE_ATTRS.copy()
annot_node_attrs["source"] = annot["source"]
annot_node_attrs["name"] = annot["disease_name"]