diff --git a/examples/gene_to_graph_workflow.ipynb b/examples/gene_to_graph_workflow.ipynb index cfd93063..8c764c7c 100644 --- a/examples/gene_to_graph_workflow.ipynb +++ b/examples/gene_to_graph_workflow.ipynb @@ -2196,7 +2196,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 6, "metadata": {}, "outputs": [ { @@ -2288,7 +2288,7 @@ "4 [{'pathway_id': nan, 'pathway_label': nan, 'pa... " ] }, - "execution_count": 14, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } @@ -2300,6 +2300,36 @@ "minerva_df.head()" ] }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'datasource': 'MINERVA',\n", + " 'metadata': {'source_version': '17.1.3'},\n", + " 'query': {'size': 6,\n", + " 'input_type': 'Ensembl',\n", + " 'MINERVA project': 'COVID19 Disease Map',\n", + " 'MINERVA project URL': 'https://covid19map.elixir-luxembourg.org/minerva/',\n", + " 'time': '0:00:47.612140',\n", + " 'date': '2024-09-10 15:23:35',\n", + " 'url': 'https://covid19map.elixir-luxembourg.org/minerva/',\n", + " 'number_of_added_nodes': 1,\n", + " 'number_of_added_edges': 1}}" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "minerva_metadata" + ] + }, { "cell_type": "code", "execution_count": 15, diff --git a/examples/usecases/PCS/PCS_usecase.ipynb b/examples/usecases/PCS/PCS_usecase.ipynb index f14fa427..73994dc8 100644 --- a/examples/usecases/PCS/PCS_usecase.ipynb +++ b/examples/usecases/PCS/PCS_usecase.ipynb @@ -518,7 +518,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Add litaliterature-based data\n", + "### Add literature-based data\n", "Genes found to be associated with Post-COVID-19" ] }, @@ -727,7 +727,7 @@ "\n", "def get_literature_based_info(gene):\n", " if gene in pcs_associated_genes[\"Gene\"].values:\n", - " return literature_disease_attrs\n", + " return [literature_disease_attrs]\n", " else:\n", " return [{\"disease_name\": np.nan, \"id\": np.nan, \"source\": np.nan}]\n", "\n", @@ -745,7 +745,7 @@ { "data": { "text/plain": [ - "362 {'disease_name': 'Post-COVID-19', 'id': 'C0000...\n", + "362 [{'disease_name': 'Post-COVID-19', 'id': 'C000...\n", "Name: literature_based_info, dtype: object" ] }, @@ -2244,7 +2244,7 @@ " ENSG00000152592\n", " Ensembl\n", " [{'disease_name': 'Hypophosphatemic Rickets', ...\n", - " {'disease_name': 'Post-COVID-19', 'id': 'C0000...\n", + " [{'disease_name': 'Post-COVID-19', 'id': 'C000...\n", " [{'chembl_id': nan, 'drugbank_id': nan, 'compo...\n", " [{'pathway_id': nan, 'pathway_label': nan, 'pa...\n", " [{'pathway_id': 'WP3971', 'pathway_label': 'OS...\n", @@ -2315,7 +2315,7 @@ "3 [{'disease_name': 'Cystic Fibrosis', 'HPO': ''... \n", "\n", " literature_based_info \\\n", - "0 {'disease_name': 'Post-COVID-19', 'id': 'C0000... \n", + "0 [{'disease_name': 'Post-COVID-19', 'id': 'C000... \n", "1 [{'disease_name': nan, 'id': nan, 'source': nan}] \n", "2 [{'disease_name': nan, 'id': nan, 'source': nan}] \n", "3 [{'disease_name': nan, 'id': nan, 'source': nan}] \n", @@ -2374,7 +2374,7 @@ { "data": { "text/plain": [ - "{'disease_name': 'Post-COVID-19', 'id': 'C00000', 'source': 'PMID: 37675861'}" + "[{'disease_name': 'Post-COVID-19', 'id': 'C00000', 'source': 'PMID: 37675861'}]" ] }, "execution_count": 29, @@ -2793,7 +2793,7 @@ }, { "cell_type": "code", - "execution_count": 32, + "execution_count": 34, "metadata": {}, "outputs": [], "source": [ @@ -2857,7 +2857,7 @@ }, { "cell_type": "code", - "execution_count": 33, + "execution_count": 35, "metadata": {}, "outputs": [], "source": [ @@ -2893,7 +2893,7 @@ " ```\n", "- Add `apoc.conf` file to **conf** subfolder of the DBMS folder\n", "- Open Neo4j Browser\n", - "- (Optionl, only run if you have imported a graph before) Remove all the nodes before importing `.graphml` file\n", + "- (Optionl, only run if you have imported a graph before) Remove all the nodes before importing `.graphml` file\n", "\n", " ```\n", " MATCH (n) DETACH DELETE n\n", @@ -2919,7 +2919,7 @@ " ```\n", "\n", "- Count the number of each node type\n", - " - total (```MATCH (n) RETURN count(n)```) = 19859\n", + " - total (```MATCH (n) RETURN count(n)```) = 19860\n", " - Gene (```MATCH (n:Gene) RETURN count(n)```) = 1667\n", " - Pathway (```MATCH (n:Pathway) RETURN count(n)```) = 1847\n", " - WikiPathways (```MATCH (n:Pathway {source: \"WikiPathways\"}) RETURN count(n)```) = 678\n", @@ -2928,11 +2928,13 @@ " - Biological Process (```MATCH (n:`Biological Process`) RETURN count(n)```) = 4624\n", " - Molecular Function (```MATCH (n:`Molecular Function`) RETURN count(n)```) = 1327\n", " - Cellular Component (```MATCH (n:`Cellular Component`) RETURN count(n)```) = 736\n", - " - Disease (```MATCH (n:Disease) RETURN count(n)```) = 2913\n", + " - Disease (```MATCH (n:Disease) RETURN count(n)```) = 2914\n", + " - DISGENET (```MATCH (n:Disease {source: \"DISGENET\"}) RETURN count(n)```) = 2913\n", + " - Literature (```MATCH (n:Disease {source: \"PMID: 37675861\"}) RETURN count(n)```) = 1\n", " - Compound (```MATCH (n:Compound) RETURN count(n)```) = 2244\n", " - Side Effect (```MATCH (n:`Side Effect`) RETURN count(n)```) = 4501\n", "- Count the number of each edge type\n", - " - total (```MATCH ()-[r]->() RETURN count(r)```) = 101630\n", + " - total (```MATCH ()-[r]->() RETURN count(r)```) = 101659\n", " - interacts_with (```MATCH ()-[r:interacts_with]->() RETURN count(r)```) = 16844\n", " - part_of (```MATCH ()-[r:part_of]->() RETURN count(r)```) = 30066 \n", " - WikiPathways (```MATCH ()-[r:part_of {source: \"WikiPathways\"}]->() RETURN count(r)```) = 3174\n", @@ -2942,7 +2944,9 @@ " - treats (```MATCH ()-[r:treats]->() RETURN count(r)```) = 8215\n", " - has_side_effect (```MATCH ()-[r:has_side_effect]->() RETURN count(r)```) = 38328\n", " - inhibits (```MATCH ()-[r:inhibits]->() RETURN count(r)```) = 71\n", - " - associated_with (```MATCH ()-[r:associated_with]->() RETURN count(r)```) = 7607\n", + " - associated_with (```MATCH ()-[r:associated_with]->() RETURN count(r)```) = 7636\n", + " - Literature (```MATCH ()-[r:associated_with {source: \"DISGENET\"}]->() RETURN count(r)```) = 7607\n", + " - DISGENET (```MATCH ()-[r:associated_with{source: \"PMID: 37675861\"}]->() RETURN count(r)```) = 29\n", "\n", "- Export the graph as a `.csv` file\n", "\n", diff --git a/src/pyBiodatafuse/constants.py b/src/pyBiodatafuse/constants.py index 632270d6..a2e681b6 100644 --- a/src/pyBiodatafuse/constants.py +++ b/src/pyBiodatafuse/constants.py @@ -287,13 +287,13 @@ "el": None, "label": GENE_DISEASE_EDGE_LABEL, } -# Literature +# Literature +LITERATURE_NODE_MAIN_LABEL = "id" LITERATURE_DISEASE_NODE_ATTRS = { "source": None, "name": None, "id": None, - "UMLS": None, "labels": DISEASE_NODE_LABELS, } LITERATURE_DISEASE_EDGE_ATTRS = { diff --git a/src/pyBiodatafuse/graph/generator.py b/src/pyBiodatafuse/graph/generator.py index e6486cd4..c332b15c 100644 --- a/src/pyBiodatafuse/graph/generator.py +++ b/src/pyBiodatafuse/graph/generator.py @@ -36,6 +36,7 @@ LITERATURE_DISEASE_COL, LITERATURE_DISEASE_EDGE_ATTRS, LITERATURE_DISEASE_NODE_ATTRS, + LITERATURE_NODE_MAIN_LABEL, MINERVA, MOLMEDB_COMPOUND_NODE_ATTRS, MOLMEDB_PROTEIN_COMPOUND_COL, @@ -226,7 +227,7 @@ def add_literature_gene_disease_subgraph(g, gene_node_label, annot_list): """ for annot in annot_list: if not pd.isna(annot["disease_name"]): - annot_node_label = annot[DISEASE_NODE_MAIN_LABEL] + annot_node_label = annot[LITERATURE_NODE_MAIN_LABEL] annot_node_attrs = LITERATURE_DISEASE_NODE_ATTRS.copy() annot_node_attrs["source"] = annot["source"] annot_node_attrs["name"] = annot["disease_name"]