diff --git a/examples/gene_to_graph_workflow.ipynb b/examples/gene_to_graph_workflow.ipynb index b8b625ce..d295b201 100644 --- a/examples/gene_to_graph_workflow.ipynb +++ b/examples/gene_to_graph_workflow.ipynb @@ -11,7 +11,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 2, "metadata": {}, "outputs": [ { @@ -36,7 +36,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ @@ -83,7 +83,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 4, "metadata": {}, "outputs": [ { @@ -92,7 +92,7 @@ "6" ] }, - "execution_count": 3, + "execution_count": 4, "metadata": {}, "output_type": "execute_result" } @@ -145,6 +145,8 @@ "# AHR\"\"\"\n", "# genes_of_interest = \"CHRNG\"\n", "# genes_of_interest = \"HOXA10\"\n", + "# genes_of_interest = \"\"\"TRIM16L\n", + "# UBB\"\"\"\n", "\n", "gene_list = genes_of_interest.split(\"\\n\")\n", "len(gene_list)" @@ -152,7 +154,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 5, "metadata": {}, "outputs": [ { @@ -213,7 +215,7 @@ "4 SLC25A1" ] }, - "execution_count": 4, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" } @@ -317,13 +319,13 @@ } ], "source": [ - "bridgdb_df, bridgdb_metadata = id_mapper.bridgedb_xref(\n", + "bridgedb_df, bridgedb_metadata = id_mapper.bridgedb_xref(\n", " identifiers=data_input,\n", " input_species=\"Human\",\n", " input_datasource=\"HGNC\",\n", " output_datasource=\"All\",\n", ")\n", - "bridgdb_df.head()" + "bridgedb_df.head()" ] }, { @@ -433,7 +435,7 @@ } ], "source": [ - "bgee_df, bgee_metadata = bgee.get_gene_expression(bridgedb_df=bridgdb_df)\n", + "bgee_df, bgee_metadata = bgee.get_gene_expression(bridgedb_df=bridgedb_df)\n", "bgee_df.head()" ] }, @@ -729,7 +731,7 @@ ], "source": [ "disgenet_df, disgenet_metadata = disgenet.get_gene_disease(\n", - " api_key=disgenet_api_key, bridgedb_df=bridgdb_df\n", + " api_key=disgenet_api_key, bridgedb_df=bridgedb_df\n", ")\n", "disgenet_df.head()" ] @@ -1971,9 +1973,9 @@ "name": "stderr", "output_type": "stream", "text": [ - "E:\\BioDataFuse\\pyBiodatafuse\\src\\pyBiodatafuse\\annotators\\opentargets.py:1067: UserWarning: Not all values in column 'adverse_effect_count' have the correct type ''.\n", + "E:\\BioDataFuse\\pyBiodatafuse\\src\\pyBiodatafuse\\annotators\\opentargets.py:1075: UserWarning: Not all values in column 'adverse_effect_count' have the correct type ''.\n", " check_columns_against_constants(\n", - "E:\\BioDataFuse\\pyBiodatafuse\\src\\pyBiodatafuse\\annotators\\opentargets.py:1067: UserWarning: Not all values in column 'adverse_effect' have the correct type ''.\n", + "E:\\BioDataFuse\\pyBiodatafuse\\src\\pyBiodatafuse\\annotators\\opentargets.py:1075: UserWarning: Not all values in column 'adverse_effect' have the correct type ''.\n", " check_columns_against_constants(\n" ] }, @@ -2293,7 +2295,7 @@ ], "source": [ "minerva_df, minerva_metadata = minerva.get_gene_minerva_pathways(\n", - " bridgdb_df, map_name=\"COVID19 Disease Map\"\n", + " bridgedb_df, map_name=\"COVID19 Disease Map\"\n", ")\n", "minerva_df.head()" ] @@ -2427,7 +2429,7 @@ } ], "source": [ - "wikipathways_df, wikipathways_metadata = wikipathways.get_gene_wikipathways(bridgedb_df=bridgdb_df)\n", + "wikipathways_df, wikipathways_metadata = wikipathways.get_gene_wikipathways(bridgedb_df=bridgedb_df)\n", "wikipathways_df.head()" ] }, @@ -2597,7 +2599,7 @@ ], "source": [ "opentargets_reactome_df, opentargets_reactome_metadata = opentargets.get_gene_reactome_pathways(\n", - " bridgedb_df=bridgdb_df\n", + " bridgedb_df=bridgedb_df\n", ")\n", "opentargets_reactome_df.head()" ] @@ -2736,7 +2738,9 @@ } ], "source": [ - "opentargets_go_df, opentargets_go_metadata = opentargets.get_gene_go_process(bridgedb_df=bridgdb_df)\n", + "opentargets_go_df, opentargets_go_metadata = opentargets.get_gene_go_process(\n", + " bridgedb_df=bridgedb_df\n", + ")\n", "opentargets_go_df.head()" ] }, @@ -2897,9 +2901,9 @@ "name": "stderr", "output_type": "stream", "text": [ - "E:\\BioDataFuse\\pyBiodatafuse\\src\\pyBiodatafuse\\annotators\\opentargets.py:573: UserWarning: Not all values in column 'adverse_effect_count' have the correct type ''.\n", + "E:\\BioDataFuse\\pyBiodatafuse\\src\\pyBiodatafuse\\annotators\\opentargets.py:574: UserWarning: Not all values in column 'adverse_effect_count' have the correct type ''.\n", " check_columns_against_constants(\n", - "E:\\BioDataFuse\\pyBiodatafuse\\src\\pyBiodatafuse\\annotators\\opentargets.py:573: UserWarning: Not all values in column 'adverse_effect' have the correct type ''.\n", + "E:\\BioDataFuse\\pyBiodatafuse\\src\\pyBiodatafuse\\annotators\\opentargets.py:574: UserWarning: Not all values in column 'adverse_effect' have the correct type ''.\n", " check_columns_against_constants(\n" ] }, @@ -2999,7 +3003,7 @@ ], "source": [ "opentargets_compound_df, opentargets_compound_metadata = opentargets.get_gene_compound_interactions(\n", - " bridgedb_df=bridgdb_df\n", + " bridgedb_df=bridgedb_df\n", ")\n", "opentargets_compound_df.head()" ] @@ -3041,17 +3045,9 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 24, "metadata": {}, "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "C:\\Users\\Tooba\\AppData\\Local\\Temp\\ipykernel_14092\\803967956.py:1: UserWarning: MolMeDB endpoint is not available. Unable to retrieve data.\n", - " inhibitor_df, inhibitor_metadata = molmedb.get_gene_compound_inhibitor(bridgedb_df=bridgdb_df)\n" - ] - }, { "data": { "text/html": [ @@ -3073,34 +3069,150 @@ " \n", " \n", " \n", + " identifier\n", + " identifier.source\n", + " target\n", + " target.source\n", + " MolMeDB_transporter_inhibitor\n", " \n", " \n", " \n", + " \n", + " 0\n", + " AHR\n", + " HGNC\n", + " A0A024R9Z8\n", + " Uniprot-TrEMBL\n", + " [{'compound_name': nan, 'inchikey': nan, 'smil...\n", + " \n", + " \n", + " 1\n", + " AHR\n", + " HGNC\n", + " A0A2R8Y7G1\n", + " Uniprot-TrEMBL\n", + " [{'compound_name': nan, 'inchikey': nan, 'smil...\n", + " \n", + " \n", + " 2\n", + " AHR\n", + " HGNC\n", + " P35869\n", + " Uniprot-TrEMBL\n", + " [{'compound_name': nan, 'inchikey': nan, 'smil...\n", + " \n", + " \n", + " 3\n", + " CHRNG\n", + " HGNC\n", + " A0A6F7YAP6\n", + " Uniprot-TrEMBL\n", + " [{'compound_name': nan, 'inchikey': nan, 'smil...\n", + " \n", + " \n", + " 4\n", + " CHRNG\n", + " HGNC\n", + " P07510\n", + " Uniprot-TrEMBL\n", + " [{'compound_name': nan, 'inchikey': nan, 'smil...\n", + " \n", " \n", "\n", "" ], "text/plain": [ - "Empty DataFrame\n", - "Columns: []\n", - "Index: []" + " identifier identifier.source target target.source \\\n", + "0 AHR HGNC A0A024R9Z8 Uniprot-TrEMBL \n", + "1 AHR HGNC A0A2R8Y7G1 Uniprot-TrEMBL \n", + "2 AHR HGNC P35869 Uniprot-TrEMBL \n", + "3 CHRNG HGNC A0A6F7YAP6 Uniprot-TrEMBL \n", + "4 CHRNG HGNC P07510 Uniprot-TrEMBL \n", + "\n", + " MolMeDB_transporter_inhibitor \n", + "0 [{'compound_name': nan, 'inchikey': nan, 'smil... \n", + "1 [{'compound_name': nan, 'inchikey': nan, 'smil... \n", + "2 [{'compound_name': nan, 'inchikey': nan, 'smil... \n", + "3 [{'compound_name': nan, 'inchikey': nan, 'smil... \n", + "4 [{'compound_name': nan, 'inchikey': nan, 'smil... " ] }, - "execution_count": 25, + "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "inhibitor_df, inhibitor_metadata = molmedb.get_gene_compound_inhibitor(bridgedb_df=bridgdb_df)\n", + "inhibitor_df, inhibitor_metadata = molmedb.get_gene_compound_inhibitor(bridgedb_df=bridgedb_df)\n", "inhibitor_df.head()" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 25, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[{'compound_name': 'Lidocaine', 'inchikey': 'NNJVILVZKWQKPM-UHFFFAOYSA-N', 'smiles': 'CCN(CC)CC(=O)Nc1c(C)cccc1C', 'compound_cid': '3676', 'molmedb_id': 'MM00092', 'source_pmid': '24440379', 'chebi_id': '6456', 'drugbank_id': 'DB00281', 'uniprot_trembl_id': 'P35499'}]\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
identifieridentifier.sourcetargettarget.sourceMolMeDB_transporter_inhibitor
47SCN4AHGNCP35499Uniprot-TrEMBL[{'compound_name': 'Lidocaine', 'inchikey': 'N...
\n", + "
" + ], + "text/plain": [ + " identifier identifier.source target target.source \\\n", + "47 SCN4A HGNC P35499 Uniprot-TrEMBL \n", + "\n", + " MolMeDB_transporter_inhibitor \n", + "47 [{'compound_name': 'Lidocaine', 'inchikey': 'N... " + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "print(inhibitor_df[MOLMEDB_PROTEIN_COMPOUND_COL][47])\n", "inhibitor_df[inhibitor_df[\"target\"] == \"P35499\"]" @@ -3115,17 +3227,9 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 26, "metadata": {}, "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "C:\\Users\\Tooba\\AppData\\Local\\Temp\\ipykernel_14092\\3568013887.py:1: UserWarning: PubChem endpoint is not available. Unable to retrieve data.\n", - " pubchem_assay_df, pubchem_assay_metadata = pubchem.get_protein_compound_screened(\n" - ] - }, { "data": { "text/html": [ @@ -3147,34 +3251,89 @@ " \n", " \n", " \n", + " identifier\n", + " identifier.source\n", + " target\n", + " target.source\n", + " PubChem_assays\n", " \n", " \n", " \n", + " \n", + " 0\n", + " AHR\n", + " HGNC\n", + " A0A024R9Z8\n", + " Uniprot-TrEMBL\n", + " [{'pubchem_assay_id': nan, 'assay_type': nan, ...\n", + " \n", + " \n", + " 1\n", + " AHR\n", + " HGNC\n", + " A0A2R8Y7G1\n", + " Uniprot-TrEMBL\n", + " [{'pubchem_assay_id': nan, 'assay_type': nan, ...\n", + " \n", + " \n", + " 2\n", + " AHR\n", + " HGNC\n", + " P35869\n", + " Uniprot-TrEMBL\n", + " [{'pubchem_assay_id': nan, 'assay_type': nan, ...\n", + " \n", + " \n", + " 3\n", + " CHRNG\n", + " HGNC\n", + " A0A6F7YAP6\n", + " Uniprot-TrEMBL\n", + " [{'pubchem_assay_id': nan, 'assay_type': nan, ...\n", + " \n", + " \n", + " 4\n", + " CHRNG\n", + " HGNC\n", + " P07510\n", + " Uniprot-TrEMBL\n", + " [{'pubchem_assay_id': nan, 'assay_type': nan, ...\n", + " \n", " \n", "\n", "" ], "text/plain": [ - "Empty DataFrame\n", - "Columns: []\n", - "Index: []" + " identifier identifier.source target target.source \\\n", + "0 AHR HGNC A0A024R9Z8 Uniprot-TrEMBL \n", + "1 AHR HGNC A0A2R8Y7G1 Uniprot-TrEMBL \n", + "2 AHR HGNC P35869 Uniprot-TrEMBL \n", + "3 CHRNG HGNC A0A6F7YAP6 Uniprot-TrEMBL \n", + "4 CHRNG HGNC P07510 Uniprot-TrEMBL \n", + "\n", + " PubChem_assays \n", + "0 [{'pubchem_assay_id': nan, 'assay_type': nan, ... \n", + "1 [{'pubchem_assay_id': nan, 'assay_type': nan, ... \n", + "2 [{'pubchem_assay_id': nan, 'assay_type': nan, ... \n", + "3 [{'pubchem_assay_id': nan, 'assay_type': nan, ... \n", + "4 [{'pubchem_assay_id': nan, 'assay_type': nan, ... " ] }, - "execution_count": 27, + "execution_count": 26, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pubchem_assay_df, pubchem_assay_metadata = pubchem.get_protein_compound_screened(\n", - " bridgedb_df=bridgdb_df\n", + " bridgedb_df=bridgedb_df\n", ")\n", "pubchem_assay_df.head()" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 27, "metadata": {}, "outputs": [ { @@ -3313,7 +3472,7 @@ } ], "source": [ - "ppi_df, ppi_metadata = stringdb.get_ppi(bridgedb_df=bridgdb_df)\n", + "ppi_df, ppi_metadata = stringdb.get_ppi(bridgedb_df=bridgedb_df)\n", "ppi_df.head()" ] }, @@ -3382,24 +3541,28 @@ " OpenTargets_reactome\n", " OpenTargets_go\n", " OpenTargets_gene_compounds\n", + " MolMeDB_transporter_inhibitor\n", + " PubChem_assays\n", " StringDB_ppi\n", " \n", " \n", " \n", " \n", " 0\n", - " AHR\n", + " CHRNG\n", " HGNC\n", - " ENSG00000106546\n", + " ENSG00000196811\n", " Ensembl\n", " [{'anatomical_entity_id': 'UBERON_0000178', 'a...\n", - " [{'disease_name': 'Mammary Neoplasms', 'HPO': ...\n", - " [{'pathway_id': 953.0, 'pathway_label': 'Kynur...\n", - " [{'pathway_id': 'WP4673', 'pathway_label': 'Ma...\n", - " [{'pathway_label': 'Endogenous sterols', 'path...\n", - " [{'go_id': 'GO:0005667', 'go_name': 'transcrip...\n", - " [{'chembl_id': 'CHEMBL259571', 'drugbank_id': ...\n", - " [{'stringdb_link_to': nan, 'Ensembl': nan, 'sc...\n", + " [{'disease_name': 'Multiple pterygium syndrome...\n", + " [{'pathway_id': nan, 'pathway_label': nan, 'pa...\n", + " [{'pathway_id': nan, 'pathway_label': nan, 'pa...\n", + " [{'pathway_label': 'Highly sodium permeable po...\n", + " [{'go_id': 'GO:0015464', 'go_name': 'acetylcho...\n", + " [{'chembl_id': 'CHEMBL1200641', 'drugbank_id':...\n", + " [{'compound_name': nan, 'inchikey': nan, 'smil...\n", + " [{'pubchem_assay_id': nan, 'assay_type': nan, ...\n", + " [{'stringdb_link_to': 'SCN4A', 'Ensembl': 'ENS...\n", " \n", " \n", " 1\n", @@ -3414,52 +3577,60 @@ " [{'pathway_label': 'Highly sodium permeable po...\n", " [{'go_id': 'GO:0015464', 'go_name': 'acetylcho...\n", " [{'chembl_id': 'CHEMBL1200641', 'drugbank_id':...\n", + " [{'compound_name': nan, 'inchikey': nan, 'smil...\n", + " [{'pubchem_assay_id': nan, 'assay_type': nan, ...\n", " [{'stringdb_link_to': 'SCN4A', 'Ensembl': 'ENS...\n", " \n", " \n", " 2\n", - " DMD\n", + " CHRNG\n", " HGNC\n", - " ENSG00000198947\n", + " ENSG00000196811\n", " Ensembl\n", " [{'anatomical_entity_id': 'UBERON_0000178', 'a...\n", - " [{'disease_name': 'Muscular Dystrophy, Duchenn...\n", + " [{'disease_name': 'Multiple pterygium syndrome...\n", " [{'pathway_id': nan, 'pathway_label': nan, 'pa...\n", - " [{'pathway_id': 'WP5356', 'pathway_label': 'Af...\n", - " [{'pathway_label': 'Striated Muscle Contractio...\n", - " [{'go_id': 'GO:0016010', 'go_name': 'dystrophi...\n", - " [{'chembl_id': 'CHEMBL2108278', 'drugbank_id':...\n", + " [{'pathway_id': nan, 'pathway_label': nan, 'pa...\n", + " [{'pathway_label': 'Highly sodium permeable po...\n", + " [{'go_id': 'GO:0015464', 'go_name': 'acetylcho...\n", + " [{'chembl_id': 'CHEMBL1200641', 'drugbank_id':...\n", + " [{'compound_name': nan, 'inchikey': nan, 'smil...\n", + " [{'pubchem_assay_id': nan, 'assay_type': nan, ...\n", " [{'stringdb_link_to': 'SCN4A', 'Ensembl': 'ENS...\n", " \n", " \n", " 3\n", - " HTR3A\n", + " CHRNG\n", " HGNC\n", - " ENSG00000166736\n", + " ENSG00000196811\n", " Ensembl\n", " [{'anatomical_entity_id': 'UBERON_0000178', 'a...\n", - " [{'disease_name': 'Schizophrenia', 'HPO': 'HPO...\n", + " [{'disease_name': 'Multiple pterygium syndrome...\n", " [{'pathway_id': nan, 'pathway_label': nan, 'pa...\n", - " [{'pathway_id': 'WP706', 'pathway_label': 'Sud...\n", - " [{'pathway_label': 'Neurotransmitter receptors...\n", - " [{'go_id': 'GO:1904602', 'go_name': 'serotonin...\n", - " [{'chembl_id': 'CHEMBL56564', 'drugbank_id': '...\n", - " [{'stringdb_link_to': nan, 'Ensembl': nan, 'sc...\n", + " [{'pathway_id': nan, 'pathway_label': nan, 'pa...\n", + " [{'pathway_label': 'Highly sodium permeable po...\n", + " [{'go_id': 'GO:0015464', 'go_name': 'acetylcho...\n", + " [{'chembl_id': 'CHEMBL1200641', 'drugbank_id':...\n", + " [{'compound_name': nan, 'inchikey': nan, 'smil...\n", + " [{'pubchem_assay_id': nan, 'assay_type': nan, ...\n", + " [{'stringdb_link_to': 'SCN4A', 'Ensembl': 'ENS...\n", " \n", " \n", " 4\n", - " SCN4A\n", + " DMD\n", " HGNC\n", - " ENSG00000007314\n", + " ENSG00000198947\n", " Ensembl\n", " [{'anatomical_entity_id': 'UBERON_0000178', 'a...\n", - " [{'disease_name': 'Potassium aggravated myoton...\n", - " [{'pathway_id': nan, 'pathway_label': nan, 'pa...\n", + " [{'disease_name': 'Muscular Dystrophy, Duchenn...\n", " [{'pathway_id': nan, 'pathway_label': nan, 'pa...\n", - " [{'pathway_label': 'Phase 0 - rapid depolarisa...\n", - " [{'go_id': 'GO:0035725', 'go_name': 'sodium io...\n", - " [{'chembl_id': 'CHEMBL1077896', 'drugbank_id':...\n", - " [{'stringdb_link_to': 'DMD', 'Ensembl': 'ENSP0...\n", + " [{'pathway_id': 'WP5356', 'pathway_label': 'Af...\n", + " [{'pathway_label': 'Striated Muscle Contractio...\n", + " [{'go_id': 'GO:0016010', 'go_name': 'dystrophi...\n", + " [{'chembl_id': 'CHEMBL2108278', 'drugbank_id':...\n", + " [{'compound_name': nan, 'inchikey': nan, 'smil...\n", + " [{'pubchem_assay_id': nan, 'assay_type': nan, ...\n", + " [{'stringdb_link_to': 'SCN4A', 'Ensembl': 'ENS...\n", " \n", " \n", "\n", @@ -3467,11 +3638,11 @@ ], "text/plain": [ " identifier identifier.source target target.source \\\n", - "0 AHR HGNC ENSG00000106546 Ensembl \n", + "0 CHRNG HGNC ENSG00000196811 Ensembl \n", "1 CHRNG HGNC ENSG00000196811 Ensembl \n", - "2 DMD HGNC ENSG00000198947 Ensembl \n", - "3 HTR3A HGNC ENSG00000166736 Ensembl \n", - "4 SCN4A HGNC ENSG00000007314 Ensembl \n", + "2 CHRNG HGNC ENSG00000196811 Ensembl \n", + "3 CHRNG HGNC ENSG00000196811 Ensembl \n", + "4 DMD HGNC ENSG00000198947 Ensembl \n", "\n", " Bgee_gene_expression_levels \\\n", "0 [{'anatomical_entity_id': 'UBERON_0000178', 'a... \n", @@ -3481,53 +3652,67 @@ "4 [{'anatomical_entity_id': 'UBERON_0000178', 'a... \n", "\n", " DISGENET_diseases \\\n", - "0 [{'disease_name': 'Mammary Neoplasms', 'HPO': ... \n", + "0 [{'disease_name': 'Multiple pterygium syndrome... \n", "1 [{'disease_name': 'Multiple pterygium syndrome... \n", - "2 [{'disease_name': 'Muscular Dystrophy, Duchenn... \n", - "3 [{'disease_name': 'Schizophrenia', 'HPO': 'HPO... \n", - "4 [{'disease_name': 'Potassium aggravated myoton... \n", + "2 [{'disease_name': 'Multiple pterygium syndrome... \n", + "3 [{'disease_name': 'Multiple pterygium syndrome... \n", + "4 [{'disease_name': 'Muscular Dystrophy, Duchenn... \n", "\n", " MINERVA \\\n", - "0 [{'pathway_id': 953.0, 'pathway_label': 'Kynur... \n", + "0 [{'pathway_id': nan, 'pathway_label': nan, 'pa... \n", "1 [{'pathway_id': nan, 'pathway_label': nan, 'pa... \n", "2 [{'pathway_id': nan, 'pathway_label': nan, 'pa... \n", "3 [{'pathway_id': nan, 'pathway_label': nan, 'pa... \n", "4 [{'pathway_id': nan, 'pathway_label': nan, 'pa... \n", "\n", " WikiPathways \\\n", - "0 [{'pathway_id': 'WP4673', 'pathway_label': 'Ma... \n", + "0 [{'pathway_id': nan, 'pathway_label': nan, 'pa... \n", "1 [{'pathway_id': nan, 'pathway_label': nan, 'pa... \n", - "2 [{'pathway_id': 'WP5356', 'pathway_label': 'Af... \n", - "3 [{'pathway_id': 'WP706', 'pathway_label': 'Sud... \n", - "4 [{'pathway_id': nan, 'pathway_label': nan, 'pa... \n", + "2 [{'pathway_id': nan, 'pathway_label': nan, 'pa... \n", + "3 [{'pathway_id': nan, 'pathway_label': nan, 'pa... \n", + "4 [{'pathway_id': 'WP5356', 'pathway_label': 'Af... \n", "\n", " OpenTargets_reactome \\\n", - "0 [{'pathway_label': 'Endogenous sterols', 'path... \n", + "0 [{'pathway_label': 'Highly sodium permeable po... \n", "1 [{'pathway_label': 'Highly sodium permeable po... \n", - "2 [{'pathway_label': 'Striated Muscle Contractio... \n", - "3 [{'pathway_label': 'Neurotransmitter receptors... \n", - "4 [{'pathway_label': 'Phase 0 - rapid depolarisa... \n", + "2 [{'pathway_label': 'Highly sodium permeable po... \n", + "3 [{'pathway_label': 'Highly sodium permeable po... \n", + "4 [{'pathway_label': 'Striated Muscle Contractio... \n", "\n", " OpenTargets_go \\\n", - "0 [{'go_id': 'GO:0005667', 'go_name': 'transcrip... \n", + "0 [{'go_id': 'GO:0015464', 'go_name': 'acetylcho... \n", "1 [{'go_id': 'GO:0015464', 'go_name': 'acetylcho... \n", - "2 [{'go_id': 'GO:0016010', 'go_name': 'dystrophi... \n", - "3 [{'go_id': 'GO:1904602', 'go_name': 'serotonin... \n", - "4 [{'go_id': 'GO:0035725', 'go_name': 'sodium io... \n", + "2 [{'go_id': 'GO:0015464', 'go_name': 'acetylcho... \n", + "3 [{'go_id': 'GO:0015464', 'go_name': 'acetylcho... \n", + "4 [{'go_id': 'GO:0016010', 'go_name': 'dystrophi... \n", "\n", " OpenTargets_gene_compounds \\\n", - "0 [{'chembl_id': 'CHEMBL259571', 'drugbank_id': ... \n", + "0 [{'chembl_id': 'CHEMBL1200641', 'drugbank_id':... \n", "1 [{'chembl_id': 'CHEMBL1200641', 'drugbank_id':... \n", - "2 [{'chembl_id': 'CHEMBL2108278', 'drugbank_id':... \n", - "3 [{'chembl_id': 'CHEMBL56564', 'drugbank_id': '... \n", - "4 [{'chembl_id': 'CHEMBL1077896', 'drugbank_id':... \n", + "2 [{'chembl_id': 'CHEMBL1200641', 'drugbank_id':... \n", + "3 [{'chembl_id': 'CHEMBL1200641', 'drugbank_id':... \n", + "4 [{'chembl_id': 'CHEMBL2108278', 'drugbank_id':... \n", + "\n", + " MolMeDB_transporter_inhibitor \\\n", + "0 [{'compound_name': nan, 'inchikey': nan, 'smil... \n", + "1 [{'compound_name': nan, 'inchikey': nan, 'smil... \n", + "2 [{'compound_name': nan, 'inchikey': nan, 'smil... \n", + "3 [{'compound_name': nan, 'inchikey': nan, 'smil... \n", + "4 [{'compound_name': nan, 'inchikey': nan, 'smil... \n", + "\n", + " PubChem_assays \\\n", + "0 [{'pubchem_assay_id': nan, 'assay_type': nan, ... \n", + "1 [{'pubchem_assay_id': nan, 'assay_type': nan, ... \n", + "2 [{'pubchem_assay_id': nan, 'assay_type': nan, ... \n", + "3 [{'pubchem_assay_id': nan, 'assay_type': nan, ... \n", + "4 [{'pubchem_assay_id': nan, 'assay_type': nan, ... \n", "\n", " StringDB_ppi \n", - "0 [{'stringdb_link_to': nan, 'Ensembl': nan, 'sc... \n", + "0 [{'stringdb_link_to': 'SCN4A', 'Ensembl': 'ENS... \n", "1 [{'stringdb_link_to': 'SCN4A', 'Ensembl': 'ENS... \n", "2 [{'stringdb_link_to': 'SCN4A', 'Ensembl': 'ENS... \n", - "3 [{'stringdb_link_to': nan, 'Ensembl': nan, 'sc... \n", - "4 [{'stringdb_link_to': 'DMD', 'Ensembl': 'ENSP0... " + "3 [{'stringdb_link_to': 'SCN4A', 'Ensembl': 'ENS... \n", + "4 [{'stringdb_link_to': 'SCN4A', 'Ensembl': 'ENS... " ] }, "execution_count": 30, @@ -3537,6 +3722,7 @@ ], "source": [ "combined_df = combine_sources(\n", + " bridgedb_df,\n", " [\n", " bgee_df,\n", " disgenet_df,\n", @@ -3548,23 +3734,23 @@ " inhibitor_df,\n", " pubchem_assay_df,\n", " ppi_df,\n", - " ]\n", + " ],\n", ")\n", "combined_df.head()" ] }, { "cell_type": "code", - "execution_count": 33, + "execution_count": 31, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "(6, 12)" + "(1622, 14)" ] }, - "execution_count": 33, + "execution_count": 31, "metadata": {}, "output_type": "execute_result" } @@ -3582,7 +3768,7 @@ }, { "cell_type": "code", - "execution_count": 34, + "execution_count": 32, "metadata": {}, "outputs": [], "source": [ @@ -3601,7 +3787,7 @@ }, { "cell_type": "code", - "execution_count": 35, + "execution_count": 6, "metadata": {}, "outputs": [ { @@ -3636,24 +3822,28 @@ " OpenTargets_reactome\n", " OpenTargets_go\n", " OpenTargets_gene_compounds\n", + " MolMeDB_transporter_inhibitor\n", + " PubChem_assays\n", " StringDB_ppi\n", " \n", " \n", " \n", " \n", " 0\n", - " AHR\n", + " CHRNG\n", " HGNC\n", - " ENSG00000106546\n", + " ENSG00000196811\n", " Ensembl\n", " [{'anatomical_entity_id': 'UBERON_0000178', 'a...\n", - " [{'disease_name': 'Mammary Neoplasms', 'HPO': ...\n", - " [{'pathway_id': 953.0, 'pathway_label': 'Kynur...\n", - " [{'pathway_id': 'WP4673', 'pathway_label': 'Ma...\n", - " [{'pathway_label': 'Endogenous sterols', 'path...\n", - " [{'go_id': 'GO:0005667', 'go_name': 'transcrip...\n", - " [{'chembl_id': 'CHEMBL259571', 'drugbank_id': ...\n", - " [{'stringdb_link_to': nan, 'Ensembl': nan, 'sc...\n", + " [{'disease_name': 'Multiple pterygium syndrome...\n", + " [{'pathway_id': nan, 'pathway_label': nan, 'pa...\n", + " [{'pathway_id': nan, 'pathway_label': nan, 'pa...\n", + " [{'pathway_label': 'Highly sodium permeable po...\n", + " [{'go_id': 'GO:0015464', 'go_name': 'acetylcho...\n", + " [{'chembl_id': 'CHEMBL1200641', 'drugbank_id':...\n", + " [{'compound_name': nan, 'inchikey': nan, 'smil...\n", + " [{'pubchem_assay_id': nan, 'assay_type': nan, ...\n", + " [{'stringdb_link_to': 'SCN4A', 'Ensembl': 'ENS...\n", " \n", " \n", " 1\n", @@ -3668,52 +3858,60 @@ " [{'pathway_label': 'Highly sodium permeable po...\n", " [{'go_id': 'GO:0015464', 'go_name': 'acetylcho...\n", " [{'chembl_id': 'CHEMBL1200641', 'drugbank_id':...\n", + " [{'compound_name': nan, 'inchikey': nan, 'smil...\n", + " [{'pubchem_assay_id': nan, 'assay_type': nan, ...\n", " [{'stringdb_link_to': 'SCN4A', 'Ensembl': 'ENS...\n", " \n", " \n", " 2\n", - " DMD\n", + " CHRNG\n", " HGNC\n", - " ENSG00000198947\n", + " ENSG00000196811\n", " Ensembl\n", " [{'anatomical_entity_id': 'UBERON_0000178', 'a...\n", - " [{'disease_name': 'Muscular Dystrophy, Duchenn...\n", + " [{'disease_name': 'Multiple pterygium syndrome...\n", " [{'pathway_id': nan, 'pathway_label': nan, 'pa...\n", - " [{'pathway_id': 'WP5356', 'pathway_label': 'Af...\n", - " [{'pathway_label': 'Striated Muscle Contractio...\n", - " [{'go_id': 'GO:0016010', 'go_name': 'dystrophi...\n", - " [{'chembl_id': 'CHEMBL2108278', 'drugbank_id':...\n", + " [{'pathway_id': nan, 'pathway_label': nan, 'pa...\n", + " [{'pathway_label': 'Highly sodium permeable po...\n", + " [{'go_id': 'GO:0015464', 'go_name': 'acetylcho...\n", + " [{'chembl_id': 'CHEMBL1200641', 'drugbank_id':...\n", + " [{'compound_name': nan, 'inchikey': nan, 'smil...\n", + " [{'pubchem_assay_id': nan, 'assay_type': nan, ...\n", " [{'stringdb_link_to': 'SCN4A', 'Ensembl': 'ENS...\n", " \n", " \n", " 3\n", - " HTR3A\n", + " CHRNG\n", " HGNC\n", - " ENSG00000166736\n", + " ENSG00000196811\n", " Ensembl\n", " [{'anatomical_entity_id': 'UBERON_0000178', 'a...\n", - " [{'disease_name': 'Schizophrenia', 'HPO': 'HPO...\n", + " [{'disease_name': 'Multiple pterygium syndrome...\n", " [{'pathway_id': nan, 'pathway_label': nan, 'pa...\n", - " [{'pathway_id': 'WP706', 'pathway_label': 'Sud...\n", - " [{'pathway_label': 'Neurotransmitter receptors...\n", - " [{'go_id': 'GO:1904602', 'go_name': 'serotonin...\n", - " [{'chembl_id': 'CHEMBL56564', 'drugbank_id': '...\n", - " [{'stringdb_link_to': nan, 'Ensembl': nan, 'sc...\n", + " [{'pathway_id': nan, 'pathway_label': nan, 'pa...\n", + " [{'pathway_label': 'Highly sodium permeable po...\n", + " [{'go_id': 'GO:0015464', 'go_name': 'acetylcho...\n", + " [{'chembl_id': 'CHEMBL1200641', 'drugbank_id':...\n", + " [{'compound_name': nan, 'inchikey': nan, 'smil...\n", + " [{'pubchem_assay_id': nan, 'assay_type': nan, ...\n", + " [{'stringdb_link_to': 'SCN4A', 'Ensembl': 'ENS...\n", " \n", " \n", " 4\n", - " SCN4A\n", + " DMD\n", " HGNC\n", - " ENSG00000007314\n", + " ENSG00000198947\n", " Ensembl\n", " [{'anatomical_entity_id': 'UBERON_0000178', 'a...\n", - " [{'disease_name': 'Potassium aggravated myoton...\n", - " [{'pathway_id': nan, 'pathway_label': nan, 'pa...\n", + " [{'disease_name': 'Muscular Dystrophy, Duchenn...\n", " [{'pathway_id': nan, 'pathway_label': nan, 'pa...\n", - " [{'pathway_label': 'Phase 0 - rapid depolarisa...\n", - " [{'go_id': 'GO:0035725', 'go_name': 'sodium io...\n", - " [{'chembl_id': 'CHEMBL1077896', 'drugbank_id':...\n", - " [{'stringdb_link_to': 'DMD', 'Ensembl': 'ENSP0...\n", + " [{'pathway_id': 'WP5356', 'pathway_label': 'Af...\n", + " [{'pathway_label': 'Striated Muscle Contractio...\n", + " [{'go_id': 'GO:0016010', 'go_name': 'dystrophi...\n", + " [{'chembl_id': 'CHEMBL2108278', 'drugbank_id':...\n", + " [{'compound_name': nan, 'inchikey': nan, 'smil...\n", + " [{'pubchem_assay_id': nan, 'assay_type': nan, ...\n", + " [{'stringdb_link_to': 'SCN4A', 'Ensembl': 'ENS...\n", " \n", " \n", "\n", @@ -3721,11 +3919,11 @@ ], "text/plain": [ " identifier identifier.source target target.source \\\n", - "0 AHR HGNC ENSG00000106546 Ensembl \n", + "0 CHRNG HGNC ENSG00000196811 Ensembl \n", "1 CHRNG HGNC ENSG00000196811 Ensembl \n", - "2 DMD HGNC ENSG00000198947 Ensembl \n", - "3 HTR3A HGNC ENSG00000166736 Ensembl \n", - "4 SCN4A HGNC ENSG00000007314 Ensembl \n", + "2 CHRNG HGNC ENSG00000196811 Ensembl \n", + "3 CHRNG HGNC ENSG00000196811 Ensembl \n", + "4 DMD HGNC ENSG00000198947 Ensembl \n", "\n", " Bgee_gene_expression_levels \\\n", "0 [{'anatomical_entity_id': 'UBERON_0000178', 'a... \n", @@ -3735,56 +3933,70 @@ "4 [{'anatomical_entity_id': 'UBERON_0000178', 'a... \n", "\n", " DISGENET_diseases \\\n", - "0 [{'disease_name': 'Mammary Neoplasms', 'HPO': ... \n", + "0 [{'disease_name': 'Multiple pterygium syndrome... \n", "1 [{'disease_name': 'Multiple pterygium syndrome... \n", - "2 [{'disease_name': 'Muscular Dystrophy, Duchenn... \n", - "3 [{'disease_name': 'Schizophrenia', 'HPO': 'HPO... \n", - "4 [{'disease_name': 'Potassium aggravated myoton... \n", + "2 [{'disease_name': 'Multiple pterygium syndrome... \n", + "3 [{'disease_name': 'Multiple pterygium syndrome... \n", + "4 [{'disease_name': 'Muscular Dystrophy, Duchenn... \n", "\n", " MINERVA \\\n", - "0 [{'pathway_id': 953.0, 'pathway_label': 'Kynur... \n", + "0 [{'pathway_id': nan, 'pathway_label': nan, 'pa... \n", "1 [{'pathway_id': nan, 'pathway_label': nan, 'pa... \n", "2 [{'pathway_id': nan, 'pathway_label': nan, 'pa... \n", "3 [{'pathway_id': nan, 'pathway_label': nan, 'pa... \n", "4 [{'pathway_id': nan, 'pathway_label': nan, 'pa... \n", "\n", " WikiPathways \\\n", - "0 [{'pathway_id': 'WP4673', 'pathway_label': 'Ma... \n", + "0 [{'pathway_id': nan, 'pathway_label': nan, 'pa... \n", "1 [{'pathway_id': nan, 'pathway_label': nan, 'pa... \n", - "2 [{'pathway_id': 'WP5356', 'pathway_label': 'Af... \n", - "3 [{'pathway_id': 'WP706', 'pathway_label': 'Sud... \n", - "4 [{'pathway_id': nan, 'pathway_label': nan, 'pa... \n", + "2 [{'pathway_id': nan, 'pathway_label': nan, 'pa... \n", + "3 [{'pathway_id': nan, 'pathway_label': nan, 'pa... \n", + "4 [{'pathway_id': 'WP5356', 'pathway_label': 'Af... \n", "\n", " OpenTargets_reactome \\\n", - "0 [{'pathway_label': 'Endogenous sterols', 'path... \n", + "0 [{'pathway_label': 'Highly sodium permeable po... \n", "1 [{'pathway_label': 'Highly sodium permeable po... \n", - "2 [{'pathway_label': 'Striated Muscle Contractio... \n", - "3 [{'pathway_label': 'Neurotransmitter receptors... \n", - "4 [{'pathway_label': 'Phase 0 - rapid depolarisa... \n", + "2 [{'pathway_label': 'Highly sodium permeable po... \n", + "3 [{'pathway_label': 'Highly sodium permeable po... \n", + "4 [{'pathway_label': 'Striated Muscle Contractio... \n", "\n", " OpenTargets_go \\\n", - "0 [{'go_id': 'GO:0005667', 'go_name': 'transcrip... \n", + "0 [{'go_id': 'GO:0015464', 'go_name': 'acetylcho... \n", "1 [{'go_id': 'GO:0015464', 'go_name': 'acetylcho... \n", - "2 [{'go_id': 'GO:0016010', 'go_name': 'dystrophi... \n", - "3 [{'go_id': 'GO:1904602', 'go_name': 'serotonin... \n", - "4 [{'go_id': 'GO:0035725', 'go_name': 'sodium io... \n", + "2 [{'go_id': 'GO:0015464', 'go_name': 'acetylcho... \n", + "3 [{'go_id': 'GO:0015464', 'go_name': 'acetylcho... \n", + "4 [{'go_id': 'GO:0016010', 'go_name': 'dystrophi... \n", "\n", " OpenTargets_gene_compounds \\\n", - "0 [{'chembl_id': 'CHEMBL259571', 'drugbank_id': ... \n", + "0 [{'chembl_id': 'CHEMBL1200641', 'drugbank_id':... \n", "1 [{'chembl_id': 'CHEMBL1200641', 'drugbank_id':... \n", - "2 [{'chembl_id': 'CHEMBL2108278', 'drugbank_id':... \n", - "3 [{'chembl_id': 'CHEMBL56564', 'drugbank_id': '... \n", - "4 [{'chembl_id': 'CHEMBL1077896', 'drugbank_id':... \n", + "2 [{'chembl_id': 'CHEMBL1200641', 'drugbank_id':... \n", + "3 [{'chembl_id': 'CHEMBL1200641', 'drugbank_id':... \n", + "4 [{'chembl_id': 'CHEMBL2108278', 'drugbank_id':... \n", + "\n", + " MolMeDB_transporter_inhibitor \\\n", + "0 [{'compound_name': nan, 'inchikey': nan, 'smil... \n", + "1 [{'compound_name': nan, 'inchikey': nan, 'smil... \n", + "2 [{'compound_name': nan, 'inchikey': nan, 'smil... \n", + "3 [{'compound_name': nan, 'inchikey': nan, 'smil... \n", + "4 [{'compound_name': nan, 'inchikey': nan, 'smil... \n", + "\n", + " PubChem_assays \\\n", + "0 [{'pubchem_assay_id': nan, 'assay_type': nan, ... \n", + "1 [{'pubchem_assay_id': nan, 'assay_type': nan, ... \n", + "2 [{'pubchem_assay_id': nan, 'assay_type': nan, ... \n", + "3 [{'pubchem_assay_id': nan, 'assay_type': nan, ... \n", + "4 [{'pubchem_assay_id': nan, 'assay_type': nan, ... \n", "\n", " StringDB_ppi \n", - "0 [{'stringdb_link_to': nan, 'Ensembl': nan, 'sc... \n", + "0 [{'stringdb_link_to': 'SCN4A', 'Ensembl': 'ENS... \n", "1 [{'stringdb_link_to': 'SCN4A', 'Ensembl': 'ENS... \n", "2 [{'stringdb_link_to': 'SCN4A', 'Ensembl': 'ENS... \n", - "3 [{'stringdb_link_to': nan, 'Ensembl': nan, 'sc... \n", - "4 [{'stringdb_link_to': 'DMD', 'Ensembl': 'ENSP0... " + "3 [{'stringdb_link_to': 'SCN4A', 'Ensembl': 'ENS... \n", + "4 [{'stringdb_link_to': 'SCN4A', 'Ensembl': 'ENS... " ] }, - "execution_count": 35, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } @@ -3800,7 +4012,7 @@ }, { "cell_type": "code", - "execution_count": 36, + "execution_count": 7, "metadata": {}, "outputs": [ { @@ -3892,7 +4104,7 @@ "4 [{'chembl_id': 'CHEMBL2103743', 'drugbank_id':... " ] }, - "execution_count": 36, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } @@ -3903,7 +4115,7 @@ }, { "cell_type": "code", - "execution_count": 38, + "execution_count": 8, "metadata": {}, "outputs": [], "source": [ @@ -3919,11 +4131,11 @@ }, { "cell_type": "code", - "execution_count": 39, + "execution_count": 9, "metadata": {}, "outputs": [], "source": [ - "with open(\"networkx_graph.pkl\", \"wb\") as out:\n", + "with open(\"networkx_graph_test.pkl\", \"wb\") as out:\n", " pickle.dump(pygraph, out)" ] }, @@ -3949,7 +4161,7 @@ }, { "cell_type": "code", - "execution_count": 40, + "execution_count": 10, "metadata": {}, "outputs": [ { diff --git a/examples/usecases/PCS/PCS_usecase.ipynb b/examples/usecases/PCS/PCS_usecase.ipynb index a99e2151..35ae98a2 100644 --- a/examples/usecases/PCS/PCS_usecase.ipynb +++ b/examples/usecases/PCS/PCS_usecase.ipynb @@ -4,24 +4,23 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Example: PCS use case\n", + "## Example: PCS use case\n", "\n", - "This notebook shows all the steps to generate PCS KG." + "This notebook shows all the steps to generate PCS KG and the downstream analysis." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Set up the environment" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Current directory: E:\\BioDataFuse\\pyBiodatafuse\n" - ] - } - ], + "outputs": [], "source": [ "new_path = \"E:\\BioDataFuse\\pyBiodatafuse\"\n", "\n", @@ -30,8 +29,7 @@ "os.chdir(new_path)\n", "\n", "# Set the current working directory\n", - "current_dir = os.getcwd()\n", - "print(\"Current directory:\", current_dir)" + "current_dir = os.getcwd()" ] }, { @@ -84,7 +82,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "Number of genes: 2023\n" + "Total number of genes: 2023\n" ] }, { @@ -152,7 +150,7 @@ ], "source": [ "data_input = pd.read_csv(os.path.join(os.getcwd(), r\"examples\\usecases\\PCS\\PCS_gene_list.csv\"))\n", - "print(\"Number of genes:\", len(data_input))\n", + "print(\"Total number of genes:\", len(data_input.drop_duplicates()))\n", "data_input.head()" ] }, @@ -168,6 +166,13 @@ "execution_count": 4, "metadata": {}, "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Number of genes with mapping in BridgeDb: 1667\n" + ] + }, { "data": { "text/html": [ @@ -198,50 +203,50 @@ " \n", " \n", " 0\n", - " LOC729609\n", + " DMP1\n", " HGNC\n", - " \n", - " NaN\n", + " Q13316\n", + " Uniprot-TrEMBL\n", " \n", " \n", " 1\n", - " LOC105374060\n", + " DMP1\n", " HGNC\n", - " \n", - " NaN\n", + " HGNC:2932\n", + " HGNC Accession Number\n", " \n", " \n", " 2\n", " DMP1\n", " HGNC\n", - " XP_011530008\n", - " RefSeq\n", + " DMP1\n", + " HGNC\n", " \n", " \n", " 3\n", " DMP1\n", " HGNC\n", - " 2735121\n", - " Affy\n", + " ENSG00000152592\n", + " Ensembl\n", " \n", " \n", " 4\n", " DMP1\n", " HGNC\n", - " 2735120\n", - " Affy\n", + " 1758\n", + " NCBI Gene\n", " \n", " \n", "\n", "" ], "text/plain": [ - " identifier identifier.source target target.source\n", - "0 LOC729609 HGNC NaN\n", - "1 LOC105374060 HGNC NaN\n", - "2 DMP1 HGNC XP_011530008 RefSeq\n", - "3 DMP1 HGNC 2735121 Affy\n", - "4 DMP1 HGNC 2735120 Affy" + " identifier identifier.source target target.source\n", + "0 DMP1 HGNC Q13316 Uniprot-TrEMBL\n", + "1 DMP1 HGNC HGNC:2932 HGNC Accession Number\n", + "2 DMP1 HGNC DMP1 HGNC\n", + "3 DMP1 HGNC ENSG00000152592 Ensembl\n", + "4 DMP1 HGNC 1758 NCBI Gene" ] }, "execution_count": 4, @@ -250,46 +255,50 @@ } ], "source": [ - "# bridgdb_df, bridgdb_metadata = id_mapper.bridgedb_xref(\n", + "# bridgedb_df, bridgedb_metadata = id_mapper.bridgedb_xref(\n", "# identifiers=data_input,\n", "# input_species=\"Human\",\n", "# input_datasource=\"HGNC\",\n", "# output_datasource=\"All\",\n", "# )\n", - "# bridgdb_df.to_pickle(os.path.join(os.getcwd(), \"examples\", \"usecases\", \"PCS\", \"bridgdb_df.pkl\"))\n", - "# with open(os.path.join(os.getcwd(), \"examples\", \"usecases\", \"PCS\", \"bridgdb_metadata.pkl\"), \"wb\") as file:\n", - "# pickle.dump(bridgdb_metadata, file)\n", - "with open(os.path.join(os.getcwd(), \"examples\", \"usecases\", \"PCS\", \"bridgdb_df.pkl\"), \"rb\") as file:\n", - " bridgdb_df = pickle.load(file)\n", + "# bridgedb_df.to_pickle(os.path.join(os.getcwd(), \"examples\", \"usecases\", \"PCS\", \"bridgedb_df.pkl\"))\n", + "# with open(os.path.join(os.getcwd(), \"examples\", \"usecases\", \"PCS\", \"bridgedb_metadata.pkl\"), \"wb\") as file:\n", + "# pickle.dump(bridgedb_metadata, file)\n", + "with open(\n", + " os.path.join(os.getcwd(), \"examples\", \"usecases\", \"PCS\", \"bridgedb_df.pkl\"), \"rb\"\n", + ") as file:\n", + " bridgedb_df = pickle.load(file)\n", "with open(\n", - " os.path.join(os.getcwd(), \"examples\", \"usecases\", \"PCS\", \"bridgdb_metadata.pkl\"), \"rb\"\n", + " os.path.join(os.getcwd(), \"examples\", \"usecases\", \"PCS\", \"bridgedb_metadata.pkl\"), \"rb\"\n", ") as file:\n", - " bridgdb_metadata = pickle.load(file)\n", + " bridgedb_metadata = pickle.load(file)\n", "\n", - "bridgdb_df.head()" + "print(\"Number of genes with mapping in BridgeDb:\", len(bridgedb_df[\"identifier\"].unique()))\n", + "bridgedb_df.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Disease annotatation from DisGeNet\n" + "### Gene to Disease annotatation from DisGeNet\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "ADD your DISGENET API KEY in the main folder\n", + "**ADD your DISGENET API KEY in the main folder**\n", "\n", - "**1)** Create a .env File and add DISGENET_API_KEY to it:\n", + " **1)** Create a ``.env`` file and add DISGENET_API_KEY to it:\n", "\n", - "DISGENET_API_KEY=\"your-API-key-value\"\n", + " DISGENET_API_KEY=\"your-API-key-value\"\n", "\n", - "**2)** Install *python-dotenv*:\n", - "```\n", - "pip install python-dotenv\n", - "```" + " **2)** Install *python-dotenv*:\n", + " \n", + " ```\n", + " pip install python-dotenv\n", + " ```" ] }, { @@ -405,7 +414,7 @@ ], "source": [ "# disgenet_df, disgenet_metadata = disgenet.get_gene_disease(\n", - "# api_key=disgenet_api_key, bridgedb_df=bridgdb_df\n", + "# api_key=disgenet_api_key, bridgedb_df=bridgedb_df\n", "# )\n", "# disgenet_df.to_pickle(os.path.join(os.getcwd(), \"examples\", \"usecases\", \"PCS\", \"datasources\", \"disgenet_df.pkl\"))\n", "# with open(os.path.join(os.getcwd(), \"examples\", \"usecases\", \"PCS\", \"datasources\", \"disgenet_metadata.pkl\"), \"wb\") as file:\n", @@ -508,7 +517,14 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Disease to compound annotation from OpenTargets" + "### Disease to Compound annotation from OpenTargets" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "##### Prepare the input to use DISGENET output as seed for OpenTargets\n" ] }, { @@ -598,11 +614,19 @@ } ], "source": [ - "# Prepare the input to use DISGENET output as seed for OpenTargets\n", "disease_mapping_df = create_harmonized_input_file(disgenet_df, DISGENET_DISEASE_COL, \"EFO\", \"UMLS\")\n", "disease_mapping_df.head()" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "##### Disease to Compound annotation\n", + "\n", + "TODO: to run again." + ] + }, { "cell_type": "code", "execution_count": 9, @@ -708,8 +732,27 @@ "# opentargets_disease_compound_metadata,\n", "# ) = opentargets.get_disease_compound_interactions(disease_mapping_df)\n", "\n", - "# opentargets_disease_compound_df.to_pickle(os.path.join(os.getcwd(), \"examples\", \"usecases\", \"PCS\", \"datasources\", \"opentargets_disease_compound_df.pkl\"))\n", - "# with open(os.path.join(os.getcwd(), \"examples\", \"usecases\", \"PCS\", \"datasources\", \"opentargets_disease_compound_metadata.pkl\"), \"wb\") as file:\n", + "# opentargets_disease_compound_df.to_pickle(\n", + "# os.path.join(\n", + "# os.getcwd(),\n", + "# \"examples\",\n", + "# \"usecases\",\n", + "# \"PCS\",\n", + "# \"datasources\",\n", + "# \"opentargets_disease_compound_df.pkl\",\n", + "# )\n", + "# )\n", + "# with open(\n", + "# os.path.join(\n", + "# os.getcwd(),\n", + "# \"examples\",\n", + "# \"usecases\",\n", + "# \"PCS\",\n", + "# \"datasources\",\n", + "# \"opentargets_disease_compound_metadata.pkl\",\n", + "# ),\n", + "# \"wb\",\n", + "# ) as file:\n", "# pickle.dump(opentargets_disease_compound_metadata, file)\n", "\n", "with open(\n", @@ -849,7 +892,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Compounds from OpenTarget" + "### Gene to Compound annotation from OpenTarget" ] }, { @@ -953,7 +996,7 @@ ], "source": [ "# opentargets_compound_df, opentargets_compound_metadata = opentargets.get_gene_compound_interactions(\n", - "# bridgedb_df=bridgdb_df\n", + "# bridgedb_df=bridgedb_df\n", "# )\n", "\n", "# opentargets_compound_df.to_pickle(os.path.join(os.getcwd(), \"examples\", \"usecases\", \"PCS\", \"datasources\", \"opentargets_compound_df.pkl\"))\n", @@ -1043,7 +1086,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Pathways from MINERVA" + "### Gene to Pathway annotation from MINERVA" ] }, { @@ -1147,7 +1190,7 @@ ], "source": [ "# minerva_df, minerva_metadata = minerva.get_gene_minerva_pathways(\n", - "# bridgdb_df, map_name=\"COVID19 Disease Map\"\n", + "# bridgedb_df, map_name=\"COVID19 Disease Map\"\n", "# )\n", "# minerva_df.to_pickle(os.path.join(os.getcwd(), \"examples\", \"usecases\", \"PCS\", \"datasources\", \"minerva_df.pkl\"))\n", "# with open(\n", @@ -1193,7 +1236,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Pathways from WikiPathways" + "### Gene to Pathway annotation from WikiPathways" ] }, { @@ -1296,7 +1339,7 @@ } ], "source": [ - "# wikipathways_df, wikipathways_metadata = wikipathways.get_gene_wikipathways(bridgedb_df=bridgdb_df)\n", + "# wikipathways_df, wikipathways_metadata = wikipathways.get_gene_wikipathways(bridgedb_df=bridgedb_df)\n", "# wikipathways_df.to_pickle(\n", "# os.path.join(os.getcwd(), \"examples\", \"usecases\", \"PCS\", \"datasources\", \"wikipathways_df.pkl\")\n", "# )\n", @@ -1379,7 +1422,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Reactome pathways from OpenTargets" + "### Gene to Reactome Pathway from OpenTargets" ] }, { @@ -1483,7 +1526,7 @@ ], "source": [ "# opentargets_reactome_df, opentargets_reactome_metadata = opentargets.get_gene_reactome_pathways(\n", - "# bridgedb_df=bridgdb_df\n", + "# bridgedb_df=bridgedb_df\n", "# )\n", "# opentargets_reactome_df.to_pickle(os.path.join(os.getcwd(), \"examples\", \"usecases\", \"PCS\", \"datasources\", \"opentargets_reactome_df.pkl\"))\n", "# with open(os.path.join(os.getcwd(), \"examples\", \"usecases\", \"PCS\", \"datasources\", \"opentargets_reactome_metadata.pkl\"), \"wb\") as file:\n", @@ -1542,7 +1585,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Gene Ontology from OpenTargets" + "### Gene Ontology annotation from OpenTargets" ] }, { @@ -1645,7 +1688,7 @@ } ], "source": [ - "# opentargets_go_df, opentargets_go_metadata = opentargets.get_gene_go_process(bridgedb_df=bridgdb_df)\n", + "# opentargets_go_df, opentargets_go_metadata = opentargets.get_gene_go_process(bridgedb_df=bridgedb_df)\n", "# opentargets_go_df.to_pickle(os.path.join(os.getcwd(), \"examples\", \"usecases\", \"PCS\", \"datasources\", \"opentargets_go_df.pkl\"))\n", "# with open(os.path.join(os.getcwd(), \"examples\", \"usecases\", \"PCS\", \"datasources\", \"opentargets_go_metadata.pkl\"), \"wb\") as file:\n", "# pickle.dump(opentargets_go_metadata, file)\n", @@ -1701,7 +1744,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Protein-Protein interactions from STRING" + "### Protein-Protein interaction from STRING" ] }, { @@ -1804,7 +1847,7 @@ } ], "source": [ - "# string_ppi_df, string_ppi_metadata = stringdb.get_ppi(bridgedb_df=bridgdb_df)\n", + "# string_ppi_df, string_ppi_metadata = stringdb.get_ppi(bridgedb_df=bridgedb_df)\n", "# string_ppi_df.to_pickle(os.path.join(os.getcwd(), \"examples\", \"usecases\", \"PCS\", \"datasources\", \"string_ppi_df.pkl\"))\n", "# with open(os.path.join(os.getcwd(), \"examples\", \"usecases\", \"PCS\", \"datasources\", \"string_ppi_metadata.pkl\"), \"wb\") as file:\n", "# pickle.dump(string_ppi_metadata, file)\n", @@ -1865,19 +1908,19 @@ "outputs": [], "source": [ "combined_df = combine_sources(\n", + " bridgedb_df,\n", " [\n", " disgenet_df,\n", - " opentargets_disease_compound_df,\n", " opentargets_compound_df,\n", " minerva_df,\n", " wikipathways_df,\n", " opentargets_reactome_df,\n", " opentargets_go_df,\n", " string_ppi_df,\n", - " ]\n", + " ],\n", ")\n", "combined_metadata = create_or_append_to_metadata(\n", - " bridgdb_metadata,\n", + " bridgedb_metadata,\n", " [\n", " disgenet_metadata,\n", " opentargets_disease_compound_metadata,\n", @@ -1922,7 +1965,6 @@ " target\n", " target.source\n", " DISGENET_diseases\n", - " OpenTargets_disease_compounds\n", " OpenTargets_gene_compounds\n", " MINERVA\n", " WikiPathways\n", @@ -1934,62 +1976,58 @@ " \n", " \n", " 0\n", - " A2ML1\n", + " DMP1\n", " HGNC\n", - " 144568\n", - " NCBI Gene\n", - " [{'disease_name': 'Noonan Syndrome', 'HPO': ''...\n", - " [{'chembl_id': 'CHEMBL1276308', 'drugbank_id':...\n", + " ENSG00000152592\n", + " Ensembl\n", + " [{'disease_name': 'Hypophosphatemic Rickets', ...\n", " [{'chembl_id': nan, 'drugbank_id': nan, 'compo...\n", " [{'pathway_id': nan, 'pathway_label': nan, 'pa...\n", - " [{'pathway_id': nan, 'pathway_label': nan, 'pa...\n", - " [{'pathway_label': nan, 'pathway_id': nan}]\n", - " [{'go_id': 'GO:0052548', 'go_name': 'regulatio...\n", + " [{'pathway_id': 'WP3971', 'pathway_label': 'OS...\n", + " [{'pathway_label': 'ECM proteoglycans', 'pathw...\n", + " [{'go_id': 'GO:0005788', 'go_name': 'endoplasm...\n", " [{'stringdb_link_to': 'TNFRSF11B', 'Ensembl': ...\n", " \n", " \n", " 1\n", - " AAMDC\n", + " PNLIP\n", " HGNC\n", - " 28971\n", - " NCBI Gene\n", - " [{'disease_name': nan, 'HPO': nan, 'NCI': nan,...\n", - " [{'chembl_id': 'CHEMBL1431', 'drugbank_id': 'D...\n", - " [{'chembl_id': nan, 'drugbank_id': nan, 'compo...\n", + " ENSG00000175535\n", + " Ensembl\n", + " [{'disease_name': 'Pancreatic Lipase Deficienc...\n", + " [{'chembl_id': 'CHEMBL175247', 'drugbank_id': ...\n", " [{'pathway_id': nan, 'pathway_label': nan, 'pa...\n", " [{'pathway_id': nan, 'pathway_label': nan, 'pa...\n", - " [{'pathway_label': nan, 'pathway_id': nan}]\n", - " [{'go_id': 'GO:0005737', 'go_name': 'cytoplasm...\n", + " [{'pathway_label': 'Retinoid metabolism and tr...\n", + " [{'go_id': 'GO:0004806', 'go_name': 'triglycer...\n", " [{'stringdb_link_to': 'LIPE', 'Ensembl': 'ENSP...\n", " \n", " \n", " 2\n", - " ABCA1\n", + " OR4N3P\n", " HGNC\n", - " 19\n", - " NCBI Gene\n", - " [{'disease_name': 'Tangier Disease', 'HPO': ''...\n", - " [{'chembl_id': 'CHEMBL306823', 'drugbank_id': ...\n", - " [{'chembl_id': 'CHEMBL608', 'drugbank_id': 'DB...\n", + " ENSG00000259435\n", + " Ensembl\n", + " [{'disease_name': nan, 'HPO': nan, 'NCI': nan,...\n", + " [{'chembl_id': nan, 'drugbank_id': nan, 'compo...\n", " [{'pathway_id': nan, 'pathway_label': nan, 'pa...\n", - " [{'pathway_id': 'WP4718', 'pathway_label': 'Ch...\n", - " [{'pathway_label': 'PPARA activates gene expre...\n", - " [{'go_id': 'GO:0005524', 'go_name': 'ATP bindi...\n", + " [{'pathway_id': nan, 'pathway_label': nan, 'pa...\n", + " [{'pathway_label': nan, 'pathway_id': nan}]\n", + " [{'go_id': nan, 'go_name': nan, 'go_type': nan}]\n", " [{'stringdb_link_to': nan, 'Ensembl': nan, 'sc...\n", " \n", " \n", " 3\n", - " ABCB1\n", + " SLC6A14\n", " HGNC\n", - " 5243\n", - " NCBI Gene\n", - " [{'disease_name': 'Epilepsy', 'HPO': 'HPO_HP:0...\n", - " [{'chembl_id': 'CHEMBL704', 'drugbank_id': 'DB...\n", - " [{'chembl_id': 'CHEMBL1086218', 'drugbank_id':...\n", + " ENSG00000268104\n", + " Ensembl\n", + " [{'disease_name': 'Cystic Fibrosis', 'HPO': ''...\n", + " [{'chembl_id': nan, 'drugbank_id': nan, 'compo...\n", " [{'pathway_id': nan, 'pathway_label': nan, 'pa...\n", - " [{'pathway_id': 'WP3672', 'pathway_label': 'ln...\n", - " [{'pathway_label': 'Abacavir transmembrane tra...\n", - " [{'go_id': 'GO:0008559', 'go_name': 'ABC-type ...\n", + " [{'pathway_id': 'WP2882', 'pathway_label': 'Nu...\n", + " [{'pathway_label': 'Amino acid transport acros...\n", + " [{'go_id': 'GO:0015657', 'go_name': 'branched-...\n", " [{'stringdb_link_to': 'SLC7A11', 'Ensembl': 'E...\n", " \n", " \n", @@ -1997,29 +2035,23 @@ "" ], "text/plain": [ - " identifier identifier.source target target.source \\\n", - "0 A2ML1 HGNC 144568 NCBI Gene \n", - "1 AAMDC HGNC 28971 NCBI Gene \n", - "2 ABCA1 HGNC 19 NCBI Gene \n", - "3 ABCB1 HGNC 5243 NCBI Gene \n", + " identifier identifier.source target target.source \\\n", + "0 DMP1 HGNC ENSG00000152592 Ensembl \n", + "1 PNLIP HGNC ENSG00000175535 Ensembl \n", + "2 OR4N3P HGNC ENSG00000259435 Ensembl \n", + "3 SLC6A14 HGNC ENSG00000268104 Ensembl \n", "\n", " DISGENET_diseases \\\n", - "0 [{'disease_name': 'Noonan Syndrome', 'HPO': ''... \n", - "1 [{'disease_name': nan, 'HPO': nan, 'NCI': nan,... \n", - "2 [{'disease_name': 'Tangier Disease', 'HPO': ''... \n", - "3 [{'disease_name': 'Epilepsy', 'HPO': 'HPO_HP:0... \n", - "\n", - " OpenTargets_disease_compounds \\\n", - "0 [{'chembl_id': 'CHEMBL1276308', 'drugbank_id':... \n", - "1 [{'chembl_id': 'CHEMBL1431', 'drugbank_id': 'D... \n", - "2 [{'chembl_id': 'CHEMBL306823', 'drugbank_id': ... \n", - "3 [{'chembl_id': 'CHEMBL704', 'drugbank_id': 'DB... \n", + "0 [{'disease_name': 'Hypophosphatemic Rickets', ... \n", + "1 [{'disease_name': 'Pancreatic Lipase Deficienc... \n", + "2 [{'disease_name': nan, 'HPO': nan, 'NCI': nan,... \n", + "3 [{'disease_name': 'Cystic Fibrosis', 'HPO': ''... \n", "\n", " OpenTargets_gene_compounds \\\n", "0 [{'chembl_id': nan, 'drugbank_id': nan, 'compo... \n", - "1 [{'chembl_id': nan, 'drugbank_id': nan, 'compo... \n", - "2 [{'chembl_id': 'CHEMBL608', 'drugbank_id': 'DB... \n", - "3 [{'chembl_id': 'CHEMBL1086218', 'drugbank_id':... \n", + "1 [{'chembl_id': 'CHEMBL175247', 'drugbank_id': ... \n", + "2 [{'chembl_id': nan, 'drugbank_id': nan, 'compo... \n", + "3 [{'chembl_id': nan, 'drugbank_id': nan, 'compo... \n", "\n", " MINERVA \\\n", "0 [{'pathway_id': nan, 'pathway_label': nan, 'pa... \n", @@ -2028,22 +2060,22 @@ "3 [{'pathway_id': nan, 'pathway_label': nan, 'pa... \n", "\n", " WikiPathways \\\n", - "0 [{'pathway_id': nan, 'pathway_label': nan, 'pa... \n", + "0 [{'pathway_id': 'WP3971', 'pathway_label': 'OS... \n", "1 [{'pathway_id': nan, 'pathway_label': nan, 'pa... \n", - "2 [{'pathway_id': 'WP4718', 'pathway_label': 'Ch... \n", - "3 [{'pathway_id': 'WP3672', 'pathway_label': 'ln... \n", + "2 [{'pathway_id': nan, 'pathway_label': nan, 'pa... \n", + "3 [{'pathway_id': 'WP2882', 'pathway_label': 'Nu... \n", "\n", " OpenTargets_reactome \\\n", - "0 [{'pathway_label': nan, 'pathway_id': nan}] \n", - "1 [{'pathway_label': nan, 'pathway_id': nan}] \n", - "2 [{'pathway_label': 'PPARA activates gene expre... \n", - "3 [{'pathway_label': 'Abacavir transmembrane tra... \n", + "0 [{'pathway_label': 'ECM proteoglycans', 'pathw... \n", + "1 [{'pathway_label': 'Retinoid metabolism and tr... \n", + "2 [{'pathway_label': nan, 'pathway_id': nan}] \n", + "3 [{'pathway_label': 'Amino acid transport acros... \n", "\n", " OpenTargets_go \\\n", - "0 [{'go_id': 'GO:0052548', 'go_name': 'regulatio... \n", - "1 [{'go_id': 'GO:0005737', 'go_name': 'cytoplasm... \n", - "2 [{'go_id': 'GO:0005524', 'go_name': 'ATP bindi... \n", - "3 [{'go_id': 'GO:0008559', 'go_name': 'ABC-type ... \n", + "0 [{'go_id': 'GO:0005788', 'go_name': 'endoplasm... \n", + "1 [{'go_id': 'GO:0004806', 'go_name': 'triglycer... \n", + "2 [{'go_id': nan, 'go_name': nan, 'go_type': nan}] \n", + "3 [{'go_id': 'GO:0015657', 'go_name': 'branched-... \n", "\n", " StringDB_ppi \n", "0 [{'stringdb_link_to': 'TNFRSF11B', 'Ensembl': ... \n", @@ -2073,8 +2105,8 @@ " 'metadata': {'lastUpdate': '10 Jul 2024', 'version': 'DISGENET v24.2'},\n", " 'query': {'size': 1590,\n", " 'input_type': 'NCBI Gene',\n", - " 'time': '0:30:50.957534',\n", - " 'date': '2024-08-27 10:06:00',\n", + " 'time': '0:31:18.977092',\n", + " 'date': '2024-09-11 14:58:51',\n", " 'url': 'https://api.disgenet.com/api/v1/gda/summary',\n", " 'number_of_added_nodes': 2913,\n", " 'number_of_added_edges': 7607}},\n", @@ -2097,18 +2129,18 @@ " 'data_version': {'dataVersion': {'year': '24', 'month': '06'}}},\n", " 'query': {'size': 1675,\n", " 'input_type': 'Ensembl',\n", - " 'time': '0:00:00.216112',\n", - " 'date': '2024-08-27 11:49:06',\n", + " 'time': '0:00:00.194775',\n", + " 'date': '2024-09-11 16:22:49',\n", " 'url': 'https://api.platform.opentargets.org/api/v4/graphql',\n", " 'number_of_added_nodes': 445,\n", " 'number_of_added_edges': 571}},\n", " {'datasource': 'MINERVA',\n", - " 'metadata': {'source_version': '17.1.0'},\n", + " 'metadata': {'source_version': '17.1.3'},\n", " 'query': {'size': 1675,\n", " 'input_type': 'Ensembl',\n", " 'MINERVA project': 'COVID19 Disease Map',\n", - " 'time': '0:00:47.537505',\n", - " 'date': '2024-08-27 11:34:05',\n", + " 'time': '0:00:49.696535',\n", + " 'date': '2024-09-11 16:29:05',\n", " 'url': 'https://covid19map.elixir-luxembourg.org/minerva/',\n", " 'number_of_added_nodes': 15,\n", " 'number_of_added_edges': 108}},\n", @@ -2116,8 +2148,8 @@ " 'metadata': {'source_version': 'WikiPathways RDF 20240810'},\n", " 'query': {'size': 1590,\n", " 'input_type': 'NCBI Gene',\n", - " 'time': '0:00:49.183906',\n", - " 'date': '2024-08-27 11:36:18',\n", + " 'time': '0:00:50.003086',\n", + " 'date': '2024-09-11 16:30:54',\n", " 'url': 'https://sparql.wikipathways.org/sparql',\n", " 'number_of_added_nodes': 678,\n", " 'number_of_added_edges': 3199}},\n", @@ -2128,8 +2160,8 @@ " 'data_version': {'dataVersion': {'year': '24', 'month': '06'}}},\n", " 'query': {'size': 1675,\n", " 'input_type': 'Ensembl',\n", - " 'time': '0:00:00.163974',\n", - " 'date': '2024-08-27 11:39:23',\n", + " 'time': '0:00:00.178402',\n", + " 'date': '2024-09-11 16:33:35',\n", " 'url': 'https://api.platform.opentargets.org/api/v4/graphql',\n", " 'number_of_added_nodes': 1154,\n", " 'number_of_added_edges': 3349}},\n", @@ -2140,8 +2172,8 @@ " 'data_version': {'dataVersion': {'year': '24', 'month': '06'}}},\n", " 'query': {'size': 1675,\n", " 'input_type': 'Ensembl',\n", - " 'time': '0:00:19.645037',\n", - " 'date': '2024-08-27 11:40:10',\n", + " 'time': '0:00:01.304462',\n", + " 'date': '2024-09-11 16:34:03',\n", " 'url': 'https://api.platform.opentargets.org/api/v4/graphql',\n", " 'number_of_added_nodes': 6687,\n", " 'number_of_added_edges': 23435}},\n", @@ -2150,8 +2182,8 @@ " 'query': {'size': 1675,\n", " 'input_type': 'Ensembl',\n", " 'number_of_added_edges': 8422,\n", - " 'time': '0:00:03.534867',\n", - " 'date': '2024-08-27 11:40:33',\n", + " 'time': '0:00:04.448563',\n", + " 'date': '2024-09-11 16:35:05',\n", " 'url': 'https://string-db.org/api'}},\n", " {'datasource': 'BridgeDb',\n", " 'metadata': {'source_version': {'java.version': '11.0.16',\n", @@ -2209,8 +2241,8 @@ " 'SCHEMAVERSION: 3']},\n", " 'query': {'size': 2023,\n", " 'input_type': 'HGNC',\n", - " 'time': '0:00:00.924162',\n", - " 'date': '2024-08-27 11:56:24',\n", + " 'time': '0:00:00.888076',\n", + " 'date': '2024-09-11 14:26:48',\n", " 'url': 'https://webservice.bridgedb.org/Human/xrefsBatch',\n", " 'request_string': 'LOC729609\\tH\\nLOC105374060\\tH\\nDMP1\\tH\\nPNLIP\\tH\\nOR4N3P\\tH\\nSLC6A14\\tH\\nLOC101927239\\tH\\nDEFB105A\\tH\\nDEFB105B\\tH\\nGSTTP1\\tH\\nNEUROD1\\tH\\nRND1\\tH\\nVN1R10P\\tH\\nLOC440446\\tH\\nLOC152225\\tH\\nLOC101929341\\tH\\nPGLYRP3\\tH\\nLINC01533\\tH\\nLINC01090\\tH\\nSPEM1\\tH\\nC16orf82\\tH\\nMIR4432HG\\tH\\nLINC01169\\tH\\nFAM71A\\tH\\nRNASE10\\tH\\nKLF17\\tH\\nC9\\tH\\nARC\\tH\\nMYL10\\tH\\nGCM1\\tH\\nAIPL1\\tH\\nHSPA6\\tH\\nLOC101929124\\tH\\nC7orf65\\tH\\nSLC2A14\\tH\\nPNLIPRP2\\tH\\nNPAS4\\tH\\nLOC101060498\\tH\\nPROP1\\tH\\nELAVL3\\tH\\nLOC105747689\\tH\\nTNF\\tH\\nADAMTS4\\tH\\nPCDH10\\tH\\nLOC101927274\\tH\\nNR4A2\\tH\\nLOC102724612\\tH\\nCEACAM22P\\tH\\nSNAI1\\tH\\nSLC2A3\\tH\\nDLX3\\tH\\nID2\\tH\\nLOC151475\\tH\\nATF3\\tH\\nNKAIN4\\tH\\nASAP1-IT2\\tH\\nNOXRED1\\tH\\nDNM1P41\\tH\\nSLC7A11\\tH\\nC10orf82\\tH\\nULBP2\\tH\\nTPTE2P6\\tH\\nNR4A3\\tH\\nLOC399715\\tH\\nCNTN3\\tH\\nGEM\\tH\\nHSPA7\\tH\\nNCMAP\\tH\\nPNP\\tH\\nPLK2\\tH\\nATP2C2\\tH\\nTNFRSF10D\\tH\\nULBP3\\tH\\nHSPA5\\tH\\nEFHB\\tH\\nHSD17B13\\tH\\nWNK3\\tH\\nLINC01535\\tH\\nELL2\\tH\\nRND3\\tH\\nDUSP5\\tH\\nNRXN3\\tH\\nIPCEF1\\tH\\nZNF492\\tH\\nSDR16C5\\tH\\nCENPL\\tH\\nSOX11\\tH\\nMAFF\\tH\\nPRG4\\tH\\nPCDH17\\tH\\nCDKN1A\\tH\\nPELI1\\tH\\nTMEM169\\tH\\nTMEM236\\tH\\nEFNA5\\tH\\nGCH1\\tH\\nANGPTL4\\tH\\nMAP1LC3C\\tH\\nCHL1\\tH\\nMPZ\\tH\\nSERPINE1\\tH\\nSLC2A1\\tH\\nLRRC16A\\tH\\nFRZB\\tH\\nGLIS3\\tH\\nTIAM1\\tH\\nSRGAP1\\tH\\nSH2D4A\\tH\\nMYEF2\\tH\\nNT5E\\tH\\nVGLL3\\tH\\nPRTG\\tH\\nDPP4\\tH\\nKLF11\\tH\\nTAF13\\tH\\nSTRADB\\tH\\nPOMP\\tH\\nLAMTOR5\\tH\\nCCDC69\\tH\\nZNF32\\tH\\nIQSEC2\\tH\\nAPIP\\tH\\nGDF9\\tH\\nSCUBE2\\tH\\nC20orf24\\tH\\nZSWIM7\\tH\\nTIMM8B\\tH\\nLOC102724532\\tH\\nPRR16\\tH\\nAHRR\\tH\\nLEFTY2\\tH\\nIRX3\\tH\\nVMO1\\tH\\nPVALB\\tH\\nMT1DP\\tH\\nCALML5\\tH\\nLOC101929116\\tH\\nLOC101929694\\tH\\nLINC01205\\tH\\nLINC01241\\tH\\nTMPRSS11A\\tH\\nLOC101928942\\tH\\nLOC100507461\\tH\\nLINC01565\\tH\\nLOC101928358\\tH\\nSCGB1D4\\tH\\nTTR\\tH\\nLINC01284\\tH\\nSSX8\\tH\\nTMEM225\\tH\\nNCRNA00250\\tH\\nOR13D1\\tH\\nLINC01192\\tH\\nCALCB\\tH\\nLINC00411\\tH\\nLINC01227\\tH\\nMIR5689HG\\tH\\nLINC00615\\tH\\nGHSR\\tH\\nLOC105375556\\tH\\nCT45A5\\tH\\nLOC646029\\tH\\nZFP42\\tH\\nCT45A9\\tH\\nFLJ46066\\tH\\nCGA\\tH\\nLOC285692\\tH\\nLOC105369509\\tH\\nCLEC1B\\tH\\nHIST1H4A\\tH\\nDSCAM-IT1\\tH\\nCT45A2\\tH\\nCT45A8\\tH\\nLINC00928\\tH\\nBDKRB1\\tH\\nLOC105370586\\tH\\nTRIM51\\tH\\nLOC101927480\\tH\\nLINC01568\\tH\\nCASC17\\tH\\nLOC101929631\\tH\\nLINC01233\\tH\\nLOC101927948\\tH\\nOR13C5\\tH\\nSSX2\\tH\\nSSX2B\\tH\\nCACNA1C-IT3\\tH\\nLOC100500773\\tH\\nSPATA3\\tH\\nLOC101927374\\tH\\nFBXO47\\tH\\nLINC01493\\tH\\nLOC105369431\\tH\\nLOC105376468\\tH\\nOR5W2\\tH\\nREG4\\tH\\nCD5L\\tH\\nLINC01514\\tH\\nLOC105376331\\tH\\nLOC102723557\\tH\\nPISRT1\\tH\\nHIGD2B\\tH\\nPAGE1\\tH\\nMMP26\\tH\\nLOC101928602\\tH\\nLOC102723895\\tH\\nACTR3BP2\\tH\\nLOC101927363\\tH\\nHNRNPKP3\\tH\\nLOC101927188\\tH\\nDISC1-IT1\\tH\\nLOC102467222\\tH\\nFAM9B\\tH\\nGLOD5\\tH\\nC2orf48\\tH\\nLOC100288254\\tH\\nFRG2\\tH\\nGACAT3\\tH\\nFOXCUT\\tH\\nLOC101927357\\tH\\nLOC101929260\\tH\\nOR13C2\\tH\\nLOC101929754\\tH\\nLOC146513\\tH\\nOR2AT4\\tH\\nPBOV1\\tH\\nTFDP3\\tH\\nLOC101929420\\tH\\nHRAT17\\tH\\nOR6W1P\\tH\\nSSX9\\tH\\nSSX3\\tH\\nHMGA1P7\\tH\\nLINC00374\\tH\\nLINC01288\\tH\\nLINC00836\\tH\\nLINC01320\\tH\\nTRIM64\\tH\\nSDR16C6P\\tH\\nLOC729966\\tH\\nLOC105375014\\tH\\nLINC01441\\tH\\nSCNN1G\\tH\\nC7orf69\\tH\\nOPN1LW\\tH\\nKRTAP5-4\\tH\\nANKUB1\\tH\\nTMEM213\\tH\\nTFAP2D\\tH\\nDANT2\\tH\\nLOC101927419\\tH\\nTXNDC2\\tH\\nOR11A1\\tH\\nLINC01317\\tH\\nLOC101805491\\tH\\nLOC286083\\tH\\nLOC101929563\\tH\\nLINC01216\\tH\\nLINC01163\\tH\\nLOC101927166\\tH\\nPHOX2B\\tH\\nLOC102467081\\tH\\nCT45A6\\tH\\nSND1-IT1\\tH\\nSSX4B\\tH\\nSSX4\\tH\\nSULT1E1\\tH\\nNOL4\\tH\\nZNF716\\tH\\nSUMO1P1\\tH\\nLOC440896\\tH\\nG6PC\\tH\\nMIR31HG\\tH\\nLOC101929259\\tH\\nHTR3C\\tH\\nLOC730100\\tH\\nMAB21L3\\tH\\nIL6\\tH\\nMIP\\tH\\nTRIM64B\\tH\\nCNGB1\\tH\\nLINC01531\\tH\\nFOXL2NB\\tH\\nCXCL8\\tH\\nSLC15A1\\tH\\nGABRB1\\tH\\nLINC00862\\tH\\nZPBP2\\tH\\nLOC101928992\\tH\\nDPPA4\\tH\\nPOU2F3\\tH\\nNUTM1\\tH\\nLOC105372440\\tH\\nSELE\\tH\\nGPR143\\tH\\nFSTL5\\tH\\nAXDND1\\tH\\nLINC01619\\tH\\nSAMD7\\tH\\nLOC100131257\\tH\\nABCC13\\tH\\nC17orf78\\tH\\nCRX\\tH\\nC12orf42\\tH\\nFOXG1\\tH\\nHTR3A\\tH\\nLOC644189\\tH\\nPNPLA1\\tH\\nLINC00880\\tH\\nTOP1P2\\tH\\nCAGE1\\tH\\nLINC00670\\tH\\nLOC101928231\\tH\\nFAM138C\\tH\\nRTP1\\tH\\nLOC101928617\\tH\\nSPAG11B\\tH\\nLOC101927691\\tH\\nSLC35G3\\tH\\nBCO1\\tH\\nSLC35G4\\tH\\nLINC00636\\tH\\nEPGN\\tH\\nPTGS2\\tH\\nPGC\\tH\\nLOC102724467\\tH\\nLOC101928103\\tH\\nTRPC5OS\\tH\\nLOC338694\\tH\\nLINC01036\\tH\\nDLX6\\tH\\nLINC00426\\tH\\nCXorf65\\tH\\nHP09025\\tH\\nLOC389273\\tH\\nDPCR1\\tH\\nC5orf60\\tH\\nPCSK1\\tH\\nLOC494141\\tH\\nGADD45B\\tH\\nC1orf87\\tH\\nANKS4B\\tH\\nJAKMIP2\\tH\\nLINC00266-3\\tH\\nDRAIC\\tH\\nTCAM1P\\tH\\nMIR202HG\\tH\\nSPRR2F\\tH\\nFAM138B\\tH\\nLINC00907\\tH\\nCCL19\\tH\\nASCL1\\tH\\nNUP210L\\tH\\nLINC01170\\tH\\nLINC00264\\tH\\nANKRD7\\tH\\nLOC102724601\\tH\\nSH2D6\\tH\\nFAM138F\\tH\\nFAM138A\\tH\\nGYPE\\tH\\nDDX4\\tH\\nIL5RA\\tH\\nTNFRSF9\\tH\\nLINC00368\\tH\\nLGSN\\tH\\nNEK5\\tH\\nLOC105374177\\tH\\nGLB1L3\\tH\\nLOC105379511\\tH\\nMT1A\\tH\\nFAM138E\\tH\\nTEKT3\\tH\\nSV2C\\tH\\nNR2E3\\tH\\nPLA2G10\\tH\\nLOC101927770\\tH\\nENO4\\tH\\nSBK2\\tH\\nA2ML1\\tH\\nLOC101927257\\tH\\nSPRY4-IT1\\tH\\nDNAH8\\tH\\nAK7\\tH\\nASXL3\\tH\\nTEX38\\tH\\nDNM1P35\\tH\\nCCL26\\tH\\nPPP3R2\\tH\\nCTSLP2\\tH\\nACBD7\\tH\\nSOX2-OT\\tH\\nSTC1\\tH\\nLOC284865\\tH\\nFDPSP2\\tH\\nMARVELD2\\tH\\nCDKL2\\tH\\nDCX\\tH\\nSHISA9\\tH\\nC4orf26\\tH\\nDNAH5\\tH\\nCD3G\\tH\\nTTC23L\\tH\\nPDE6A\\tH\\nAPOBEC3H\\tH\\nLINC00311\\tH\\nCXCL2\\tH\\nLINC00632\\tH\\nSALL4\\tH\\nLOC105372582\\tH\\nFAM106CP\\tH\\nRASD1\\tH\\nCACNA1F\\tH\\nELAVL2\\tH\\nKIAA0087\\tH\\nGIPR\\tH\\nCIDEA\\tH\\nBCL11B\\tH\\nTNFRSF11B\\tH\\nCA13\\tH\\nANKRD20A9P\\tH\\nFAM106B\\tH\\nSEMA3E\\tH\\nGPRC5A\\tH\\nLOC285819\\tH\\nLOC730101\\tH\\nIL1RL1\\tH\\nRGS2\\tH\\nRYBP\\tH\\nC3orf52\\tH\\nHOOK1\\tH\\nPCDH9\\tH\\nCDH19\\tH\\nPGA4\\tH\\nSTARD4\\tH\\nCYP2B7P\\tH\\nTFPI2\\tH\\nPDK4\\tH\\nPGA5\\tH\\nKCNAB3\\tH\\nLINC00641\\tH\\nLOC102724571\\tH\\nSEZ6L\\tH\\nTNFSF9\\tH\\nZNF483\\tH\\nM1AP\\tH\\nFAAP24\\tH\\nKLHL15\\tH\\nCHD1\\tH\\nAP1S3\\tH\\nCDS1\\tH\\nCRTAC1\\tH\\nGYG2\\tH\\nGRHL1\\tH\\nFSIP1\\tH\\nSYT1\\tH\\nPLCXD3\\tH\\nLOC101928371\\tH\\nPEG10\\tH\\nMPZL3\\tH\\nZNF331\\tH\\nKCNQ1OT1\\tH\\nLOC388436\\tH\\nLOC79999\\tH\\nFAM106A\\tH\\nRPS6KA6\\tH\\nBCL2L15\\tH\\nTBX5\\tH\\nEMP1\\tH\\nPPP2R2B\\tH\\nTACR1\\tH\\nSLC7A10\\tH\\nELOVL6\\tH\\nATP1B3\\tH\\nSEMA4A\\tH\\nCEP152\\tH\\nLINC01296\\tH\\nNRXN1\\tH\\nADGRG2\\tH\\nCLDN1\\tH\\nZSWIM6\\tH\\nWNT3\\tH\\nCCDC170\\tH\\nTHBS1\\tH\\nSLC35F2\\tH\\nZC3H12B\\tH\\nPLIN1\\tH\\nLOC401052\\tH\\nCATSPERG\\tH\\nIFRD1\\tH\\nGAS2L3\\tH\\nAPOBEC3D\\tH\\nPOU2F2\\tH\\nERRFI1\\tH\\nARSJ\\tH\\nFOXC1\\tH\\nPRDM1\\tH\\nRASGRP1\\tH\\nKIAA1683\\tH\\nPRELP\\tH\\nTIPARP\\tH\\nZC3H12A\\tH\\nSGIP1\\tH\\nPDE8B\\tH\\nGFPT2\\tH\\nCABP4\\tH\\nRAD51B\\tH\\nMICB\\tH\\nEIF4A3\\tH\\nFAM72C\\tH\\nC7\\tH\\nQPCT\\tH\\nMAP3K8\\tH\\nTUFT1\\tH\\nDUXAP10\\tH\\nSHROOM3\\tH\\nZC3HAV1\\tH\\nS1PR2\\tH\\nFAM122C\\tH\\nHRH1\\tH\\nUGCG\\tH\\nSOX9\\tH\\nLYVE1\\tH\\nBCL2L11\\tH\\nEIF2AK3\\tH\\nC11orf63\\tH\\nSERPINB8\\tH\\nLEPR\\tH\\nCACNB2\\tH\\nCACNA2D4\\tH\\nNR2F1\\tH\\nCLCF1\\tH\\nPSD3\\tH\\nADNP2\\tH\\nDYNC2H1\\tH\\nOR2A20P\\tH\\nSYT17\\tH\\nVASH2\\tH\\nTMEM2\\tH\\nOR2A9P\\tH\\nUSP32P2\\tH\\nEDIL3\\tH\\nLOX\\tH\\nMXD1\\tH\\nNHSL1\\tH\\nDLC1\\tH\\nCYBB\\tH\\nETV5\\tH\\nCEP126\\tH\\nPTPRF\\tH\\nCOCH\\tH\\nSCRN1\\tH\\nPPM1D\\tH\\nLILRB4\\tH\\nMFSD4A\\tH\\nCCDC144B\\tH\\nPXDNL\\tH\\nAHR\\tH\\nTRIM14\\tH\\nFRMD4B\\tH\\nCD84\\tH\\nTIAM2\\tH\\nADAMTS5\\tH\\nXYLT1\\tH\\nMYOF\\tH\\nSLC7A1\\tH\\nSMG1P3\\tH\\nUGDH\\tH\\nPMP22\\tH\\nAMPH\\tH\\nNPIPB5\\tH\\nNT5DC3\\tH\\nUBE2D2\\tH\\nPIGX\\tH\\nTTC1\\tH\\nSRP14\\tH\\nGKAP1\\tH\\nFIBP\\tH\\nMED11\\tH\\nVTI1B\\tH\\nATPAF1\\tH\\nDNAJC19\\tH\\nMRPL24\\tH\\nTRIM16L\\tH\\nPOLR2F\\tH\\nGCSH\\tH\\nTMEM147\\tH\\nLSM10\\tH\\nMRPL40\\tH\\nC11orf74\\tH\\nSERF2-C15ORF63\\tH\\nNDUFAF2\\tH\\nUBE3D\\tH\\nMALSU1\\tH\\nCOA4\\tH\\nELP6\\tH\\nMTX2\\tH\\nCMC4\\tH\\nMON1A\\tH\\nCABP7\\tH\\nMID1IP1\\tH\\nCOA6\\tH\\nKIF22\\tH\\nTSEN15\\tH\\nNDFIP2\\tH\\nHYPK\\tH\\nZCRB1\\tH\\nPARK7\\tH\\nCOX16\\tH\\nGTF3C6\\tH\\nMINOS1\\tH\\nMRPS15\\tH\\nSTOML2\\tH\\nKCNS3\\tH\\nCACNA2D3\\tH\\nCTNNBIP1\\tH\\nC7orf55\\tH\\nCOPS5\\tH\\nCHCHD5\\tH\\nYBX3P1\\tH\\nSPAG7\\tH\\nNDUFS3\\tH\\nTPI1\\tH\\nPET100\\tH\\nST3GAL2\\tH\\nMRPL21\\tH\\nTP53TG1\\tH\\nCDKN2AIPNL\\tH\\nOIP5\\tH\\nRPS20\\tH\\nATP5E\\tH\\nCBWD2\\tH\\nCDK5\\tH\\nTOMM5\\tH\\nPRR34\\tH\\nHINT1\\tH\\nBAD\\tH\\nATP5L\\tH\\nSFXN5\\tH\\nAAMDC\\tH\\nMRPL51\\tH\\nKIAA0930\\tH\\nVAMP5\\tH\\nSEPW1\\tH\\nNDUFA6\\tH\\nSLIRP\\tH\\nSHISA2\\tH\\nNUDT2\\tH\\nCOX5B\\tH\\nSNRPN\\tH\\nSNURF\\tH\\nAURKA\\tH\\nCBWD1\\tH\\nNDUFB2\\tH\\nNAA38\\tH\\nCKM\\tH\\nGPD1\\tH\\nRPS29\\tH\\nDHRS4L1\\tH\\nMRPL33\\tH\\nLOC100507291\\tH\\nATP23\\tH\\nUQCRQ\\tH\\nNDUFC2\\tH\\nBOLA3\\tH\\nTCEB2\\tH\\nCOX7A1\\tH\\nDHRS4\\tH\\nCOX6C\\tH\\nFHL2\\tH\\nSLN\\tH\\nNDUFA1\\tH\\nRPL21P28\\tH\\nRPL21\\tH\\nNDUFC2-KCTD14\\tH\\nATP5I\\tH\\nUQCC2\\tH\\nLOC101929231\\tH\\nDBNDD1\\tH\\nNDUFB9\\tH\\nLAMB3\\tH\\nCSF3R\\tH\\nUSMG5\\tH\\nDHRS4L2\\tH\\nSERPINA1\\tH\\nC1orf53\\tH\\nGLT1D1\\tH\\nGREM2\\tH\\nUQCRBP1\\tH\\nFAM24B\\tH\\nS100A8\\tH\\nCDH22\\tH\\nLEFTY1\\tH\\nC3orf14\\tH\\nLINC01291\\tH\\nTPI1P2\\tH\\nCHAF1B\\tH\\nCENPE\\tH\\nE2F2\\tH\\nOSMR\\tH\\nNDUFC1\\tH\\nGP9\\tH\\nCDON\\tH\\nPOU3F3\\tH\\nLINC01224\\tH\\nOR7G1\\tH\\nZNF735\\tH\\nRPL23AP53\\tH\\nSAMD12\\tH\\nPAMR1\\tH\\nHIST3H2A\\tH\\nLOC101927798\\tH\\nFMOD\\tH\\nOR8S1\\tH\\nKLHL11\\tH\\nLOC105375429\\tH\\nLINC01122\\tH\\nTMCO2\\tH\\nDNAH12\\tH\\nKLF4\\tH\\nCHRM4\\tH\\nLOC101928505\\tH\\nADAMTS1\\tH\\nBEX2\\tH\\nMCTP1\\tH\\nHSD3BP4\\tH\\nLINC01053\\tH\\nELK2AP\\tH\\nLOC105377458\\tH\\nFAM71E2\\tH\\nHAO1\\tH\\nCD68\\tH\\nLOC101928728\\tH\\nSYT15\\tH\\nBAGE\\tH\\nBPIFC\\tH\\nRAET1K\\tH\\nTMPRSS11BNL\\tH\\nTOMM7\\tH\\nHESX1\\tH\\nLRRC72\\tH\\nTUSC5\\tH\\nMUC13\\tH\\nLOC101929227\\tH\\nEDA2R\\tH\\nTM2D1\\tH\\nBCAT1\\tH\\nF13B\\tH\\nLINC00958\\tH\\nRFX4\\tH\\nBRD2\\tH\\nSCN3B\\tH\\nNANOS1\\tH\\nLINC01252\\tH\\nPHLDA2\\tH\\nSNAI3\\tH\\nLOC100506274\\tH\\nLINC01021\\tH\\nCHI3L1\\tH\\nTIMM10\\tH\\nKRTAP5-2\\tH\\nLY6G6C\\tH\\nLOC101927476\\tH\\nZNF169\\tH\\nTINCR\\tH\\nUBL5\\tH\\nLINC01551\\tH\\nFIRRE\\tH\\nRPS28\\tH\\nCYP2G1P\\tH\\nCASC21\\tH\\nWDR76\\tH\\nAGBL4-IT1\\tH\\nLINC01483\\tH\\nYEATS4\\tH\\nNUGGC\\tH\\nAPOBEC1\\tH\\nZAN\\tH\\nCNNM1\\tH\\nTMC1\\tH\\nAPOPT1\\tH\\nNT5M\\tH\\nLINC00877\\tH\\nLOC100133050\\tH\\nMRPL53\\tH\\nCBWD3\\tH\\nJMJD1C\\tH\\nNDUFA11\\tH\\nPLA2G2A\\tH\\nARRDC5\\tH\\nENPP1\\tH\\nNDUFB1\\tH\\nTSHZ2\\tH\\nCRIP3\\tH\\nSMIM4\\tH\\nNANOG\\tH\\nFBXO36\\tH\\nDGCR6L\\tH\\nFAM138D\\tH\\nARAP2\\tH\\nBMP6\\tH\\nMRPL20\\tH\\nMRPS18C\\tH\\nTGIF2-C20orf24\\tH\\nTPM1\\tH\\nSCML4\\tH\\nHRASLS\\tH\\nLOC105379450\\tH\\nNHS\\tH\\nLINC00888\\tH\\nLUADT1\\tH\\nTHBS2\\tH\\nSFTPB\\tH\\nSCN8A\\tH\\nCBWD6\\tH\\nSLC24A4\\tH\\nSRPX2\\tH\\nLCE3D\\tH\\nLCN12\\tH\\nGATA2\\tH\\nLINC00578\\tH\\nLOC101928449\\tH\\nGYPC\\tH\\nPDCL2\\tH\\nCHCHD3\\tH\\nGHET1\\tH\\nLOC101927284\\tH\\nC19orf35\\tH\\nPARP11\\tH\\nLOC100268168\\tH\\nANKRD45\\tH\\nCT45A3\\tH\\nAZGP1\\tH\\nARPC2\\tH\\nLINC01516\\tH\\nPTGER3\\tH\\nUROS\\tH\\nLOC101928887\\tH\\nFCGR1CP\\tH\\nLOC105375396\\tH\\nLOC727924\\tH\\nST20-MTHFS\\tH\\nTNIP3\\tH\\nTDGF1P3\\tH\\nCCL28\\tH\\nGALNT15\\tH\\nNME9\\tH\\nRSPH14\\tH\\nLINC00608\\tH\\nPCDH8\\tH\\nSHISA4\\tH\\nLVCAT5\\tH\\nDCUN1D3\\tH\\nLOC401463\\tH\\nLOC105375483\\tH\\nMRPL15\\tH\\nHS3ST2\\tH\\nC1orf194\\tH\\nRAB3B\\tH\\nTMEM251\\tH\\nLINC00152\\tH\\nLINC00102\\tH\\nCORO2B\\tH\\nBSPRY\\tH\\nCCR7\\tH\\nGLI3\\tH\\nAPOL4\\tH\\nKERA\\tH\\nGAMT\\tH\\nRBP4\\tH\\nLMO1\\tH\\nSNHG12\\tH\\nLINC01410\\tH\\nZNF280C\\tH\\nCCDC144A\\tH\\nSNRNP27\\tH\\nNDUFA3\\tH\\nSKIDA1\\tH\\nFZD5\\tH\\nRUNDC3B\\tH\\nSHFM1\\tH\\nZMAT5\\tH\\nGGT7\\tH\\nTXLNG\\tH\\nSMG1P1\\tH\\nMMADHC\\tH\\nKPNA2\\tH\\nPAM16\\tH\\nLOC101929697\\tH\\nCXCL13\\tH\\nIMPA2\\tH\\nPRKAG2\\tH\\nMEX3B\\tH\\nNCCRP1\\tH\\nMAFA\\tH\\nHIST1H3J\\tH\\nLDLR\\tH\\nKANK4\\tH\\nSHC4\\tH\\nMACROD1\\tH\\nTAC3\\tH\\nNKX2-5\\tH\\nCOX8A\\tH\\nCREB5\\tH\\nTIMM17B\\tH\\nCBWD5\\tH\\nMTFR2\\tH\\nGSTTP2\\tH\\nLINC01504\\tH\\nEMC4\\tH\\nLOC101928272\\tH\\nCWH43\\tH\\nAPOC4\\tH\\nCCND2\\tH\\nSDHAF4\\tH\\nC2orf91\\tH\\nMYCNOS\\tH\\nZNF80\\tH\\nSIK2\\tH\\nMRPL52\\tH\\nBAK1\\tH\\nEZH2\\tH\\nABCC6P1\\tH\\nHIST1H2BO\\tH\\nNRG1-IT1\\tH\\nWWC1\\tH\\nFAM183A\\tH\\nPABPC1L\\tH\\nTPTE\\tH\\nBRS3\\tH\\nPCDH19\\tH\\nAKR1D1\\tH\\nSLC4A8\\tH\\nLOC105377651\\tH\\nLDHA\\tH\\nRPGRIP1\\tH\\nPPP1R1B\\tH\\nATP5EP2\\tH\\nCACYBP\\tH\\nCHURC1-FNTB\\tH\\nBARX2\\tH\\nHELB\\tH\\nCTCFL\\tH\\nPTPN13\\tH\\nPGR\\tH\\nTMEM261\\tH\\nTRIM49B\\tH\\nMYLPF\\tH\\nLOC100131047\\tH\\nPAPPA\\tH\\nPGM2\\tH\\nMRC1\\tH\\nSNX29P2\\tH\\nLOC101929159\\tH\\nNAP1L3\\tH\\nHILPDA\\tH\\nEFNA2\\tH\\nTMEM35\\tH\\nLOC101243545\\tH\\nLOC101927829\\tH\\nHEPHL1\\tH\\nACER1\\tH\\nLYPD4\\tH\\nLOC101928510\\tH\\nLOC101929577\\tH\\nRELL1\\tH\\nSLC20A1\\tH\\nSSNA1\\tH\\nATP5G1\\tH\\nLRIT2\\tH\\nGDF6\\tH\\nNDUFA13\\tH\\nFAM227A\\tH\\nLOC101929431\\tH\\nGAPDH\\tH\\nSOAT1\\tH\\nPWRN2\\tH\\nLINC00173\\tH\\nFOXL2\\tH\\nUQCRHL\\tH\\nLINC00906\\tH\\nCA5A\\tH\\nAPOBEC2\\tH\\nCT45A1\\tH\\nPSMC3\\tH\\nPART1\\tH\\nLINC00305\\tH\\nLOC400655\\tH\\nSYT11\\tH\\nLINC01361\\tH\\nANGPTL7\\tH\\nMPC2\\tH\\nLGALS9B\\tH\\nLINC01276\\tH\\nRIPK2\\tH\\nHEPACAM\\tH\\nDKFZp779M0652\\tH\\nSOX4\\tH\\nSPATA21\\tH\\nEFCAB5\\tH\\nNDUFB5\\tH\\nTRAF3IP2\\tH\\nTRAPPC3\\tH\\nGADD45G\\tH\\nCXXC4\\tH\\nLINC00676\\tH\\nSOX1\\tH\\nC15orf61\\tH\\nHIST1H2BK\\tH\\nHIST1H2AC\\tH\\nLOC284950\\tH\\nTMEM266\\tH\\nMMP19\\tH\\nPLAUR\\tH\\nC20orf96\\tH\\nSLC9C2\\tH\\nLOC101060524\\tH\\nDRD5P2\\tH\\nMRPL11\\tH\\nAPOF\\tH\\nLRRC23\\tH\\nECT2L\\tH\\nNMNAT1\\tH\\nCCDC144CP\\tH\\nLOC101928539\\tH\\nRNLS\\tH\\nLOC105372179\\tH\\nMS4A10\\tH\\nTRAPPC2B\\tH\\nCHCHD2\\tH\\nLOC102724434\\tH\\nC7orf31\\tH\\nMIEN1\\tH\\nLOC100506444\\tH\\nPPP1R36\\tH\\nCCL2\\tH\\nSLC19A3\\tH\\nENDOU\\tH\\nLOC440028\\tH\\nPSMB10\\tH\\nFAM72D\\tH\\nGNG4\\tH\\nFOXO1\\tH\\nATP6V0A4\\tH\\nSKA1\\tH\\nPPP1R15B\\tH\\nTRPM5\\tH\\nANKRD33B\\tH\\nC1orf210\\tH\\nLOC101927058\\tH\\nMCF2\\tH\\nGALNT16\\tH\\nFRMD5\\tH\\nPCK1\\tH\\nPALM2\\tH\\nFIS1\\tH\\nKIAA0040\\tH\\nCIB2\\tH\\nNHEG1\\tH\\nCLDN11\\tH\\nPTGER4\\tH\\nCD83\\tH\\nNENF\\tH\\nLOC101928107\\tH\\nGLB1L2\\tH\\nLOC100505918\\tH\\nC2orf66\\tH\\nS100P\\tH\\nMBD3L3\\tH\\nLOC729970\\tH\\nREPS2\\tH\\nSNRPD2\\tH\\nCYP27A1\\tH\\nCDC20B\\tH\\nTAT\\tH\\nMDH1\\tH\\nCOX4I1\\tH\\nNHLH1\\tH\\nTMIGD1\\tH\\nTSACC\\tH\\nLOC101927596\\tH\\nWBSCR17\\tH\\nCYP1A2\\tH\\nPLK4\\tH\\nPSMD14\\tH\\nLOC105373782\\tH\\nMRPS28\\tH\\nARMC9\\tH\\nLINC01213\\tH\\nTGFBR3\\tH\\nARMCX4\\tH\\nLINC00243\\tH\\nDSC2\\tH\\nLOC105371335\\tH\\nLOC101927780\\tH\\nCXADR\\tH\\nDSG2\\tH\\nLPAR4\\tH\\nDAW1\\tH\\nBTG1\\tH\\nGLRX3\\tH\\nDUXAP8\\tH\\nMRPL34\\tH\\nSAT1\\tH\\nDHRS7C\\tH\\nOLR1\\tH\\nTM4SF1\\tH\\nSEMA3D\\tH\\nLOC101927650\\tH\\nLINC00668\\tH\\nRGS4\\tH\\nLOC644838\\tH\\nUBB\\tH\\nLOC101928514\\tH\\nELF4\\tH\\nCH25H\\tH\\nNCOA7\\tH\\nLINC01387\\tH\\nMSR1\\tH\\nNUTF2\\tH\\nZNF367\\tH\\nTSPAN5\\tH\\nATP5O\\tH\\nNKAIN3\\tH\\nCD44\\tH\\nFASN\\tH\\nMYBPC2\\tH\\nZNF611\\tH\\nLOC100287036\\tH\\nMTSS1L\\tH\\nGABRG2\\tH\\nZNF829\\tH\\nLOC100271832\\tH\\nUQCRH\\tH\\nPIGH\\tH\\nPOM121L8P\\tH\\nCTH\\tH\\nAK1\\tH\\nSLC7A14\\tH\\nFGF21\\tH\\nPAIP1\\tH\\nUBA3\\tH\\nMAPKAP1\\tH\\nZIM3\\tH\\nILDR1\\tH\\nFAHD1\\tH\\nMELK\\tH\\nTRIM29\\tH\\nNTM-IT\\tH\\nTPH1\\tH\\nSMIM10L1\\tH\\nCRYGB\\tH\\nSNAP91\\tH\\nNEURL1\\tH\\nLOC101929504\\tH\\nLOC102724053\\tH\\nLINC01268\\tH\\nFAM171B\\tH\\nFOSL1\\tH\\nC10orf126\\tH\\nLOC286059\\tH\\nLOC100506747\\tH\\nCXCR2\\tH\\nLINC00294\\tH\\nPPP1R7\\tH\\nTMA7\\tH\\nERC2-IT1\\tH\\nANTXR1\\tH\\nPRKACG\\tH\\nPIGR\\tH\\nTF\\tH\\nNME2\\tH\\nINE1\\tH\\nLCE3B\\tH\\nIMMP1L\\tH\\nLOC101927142\\tH\\nDNAJB1\\tH\\nVSTM1\\tH\\nLOC105372626\\tH\\nEPHA7\\tH\\nGUCY2F\\tH\\nANXA1\\tH\\nLOC101928973\\tH\\nLOC102723427\\tH\\nCD109\\tH\\nIER3\\tH\\nOVOL1\\tH\\nLOC101927630\\tH\\nRGS14\\tH\\nLOC100289333\\tH\\nMRGPRE\\tH\\nTRPC1\\tH\\nPDZK1\\tH\\nLOC285889\\tH\\nLOC100130899\\tH\\nLOC642929\\tH\\nGYPB\\tH\\nSF3B5\\tH\\nCRAT8\\tH\\nRDH14\\tH\\nIRGC\\tH\\nIGF2BP1\\tH\\nSep-14\\tH\\nCTD-2201E9.1\\tH\\nLOC100506085\\tH\\nCDH16\\tH\\nUGT8\\tH\\nCCL11\\tH\\nULK4P2\\tH\\nULK4P1\\tH\\nNDUFB10\\tH\\nLOC101927526\\tH\\nLOC440910\\tH\\nTLR6\\tH\\nZNF724P\\tH\\nTBX18\\tH\\nISCA2\\tH\\nINSC\\tH\\nISY1\\tH\\nTGIF2\\tH\\nIKBKB\\tH\\nXCL1\\tH\\nMID1\\tH\\nLOC100996251\\tH\\nSLC38A1\\tH\\nLOC105375401\\tH\\nLOC388692\\tH\\nLINC00710\\tH\\nOAZ1\\tH\\nTHSD7A\\tH\\nMAP6D1\\tH\\nLOC102723727\\tH\\nSHH\\tH\\nLOC339666\\tH\\nGAB3\\tH\\nNSUN6\\tH\\nCGN\\tH\\nOR7E156P\\tH\\nNXF1\\tH\\nOLIG1\\tH\\nHCG2040054\\tH\\nC6orf203\\tH\\nLOC441454\\tH\\nTRPM3\\tH\\nCXCL1\\tH\\nCMC2\\tH\\nCYP27C1\\tH\\nCCL22\\tH\\nBAZ1A\\tH\\nBMS1P5\\tH\\nMS4A2\\tH\\nTCAF2\\tH\\nDCST2\\tH\\nCCEPR\\tH\\nDLEU7\\tH\\nSLC2A7\\tH\\nTEKT2\\tH\\nCRY1\\tH\\nLOC105370792\\tH\\nCT45A7\\tH\\nTPM2\\tH\\nNME1-NME2\\tH\\nCT45A10\\tH\\nSLC25A26\\tH\\nIER5L\\tH\\nLINC01111\\tH\\nLEP\\tH\\nFLVCR1\\tH\\nTES\\tH\\nPRELID3A\\tH\\nCLEC19A\\tH\\nITGAE\\tH\\nDNAJB13\\tH\\nABHD12B\\tH\\nNTRK3\\tH\\nBANCR\\tH\\nHTRA4\\tH\\nCYP2B6\\tH\\nSLC6A4\\tH\\nRPL37A\\tH\\nTRIM71\\tH\\nSNTN\\tH\\nSNHG6\\tH\\nLINC01563\\tH\\nRIMS2\\tH\\nDPM3\\tH\\nFAM46A\\tH\\nZBP1\\tH\\nSERF1B\\tH\\nSERF1A\\tH\\nPTGER4P2-CDK2AP2P2\\tH\\nGPBAR1\\tH\\nCYR61\\tH\\nMRPL37\\tH\\nBAGE3\\tH\\nBAGE2\\tH\\nELMO1\\tH\\nTROAP\\tH\\nTMEM217\\tH\\nTMPRSS11E\\tH\\nMYH1\\tH\\nLOC101929234\\tH\\nSARNP\\tH\\nCRAT37\\tH\\nBAGE5\\tH\\nBAGE4\\tH\\nLINC00844\\tH\\nSLX4IP\\tH\\nLOC101928008\\tH\\nB4GALT3\\tH\\nLINC01206\\tH\\nNDUFA7\\tH\\nCOX14\\tH\\nMORC1\\tH\\nARID5B\\tH\\nPNKD\\tH\\nBIRC3\\tH\\nBTBD6\\tH\\nLOC101928902\\tH\\nFAM71D\\tH\\nLINC01251\\tH\\nARL2\\tH\\nLINC01265\\tH\\nTMEM205\\tH\\nLOC101929125\\tH\\nHCG22\\tH\\nLOC102724708\\tH\\nPRKCG\\tH\\nLINC01481\\tH\\nZNF98\\tH\\nPSMA8\\tH\\nCD14\\tH\\nPSMD4\\tH\\nAKR1C2\\tH\\nPSMB3\\tH\\nSMDT1\\tH\\nTCF7L1\\tH\\nMTCP1\\tH\\nHPSE\\tH\\nANGPTL5\\tH\\nFUNDC2P2\\tH\\nLINC00330\\tH\\nCACNG8\\tH\\nATRAID\\tH\\nPKHD1L1\\tH\\nHDAC11\\tH\\nC3orf18\\tH\\nSTX11\\tH\\nHIST2H2BA\\tH\\nTMTC4\\tH\\nLOC100506682\\tH\\nRPS14P3\\tH\\nELOVL7\\tH\\nTMEM156\\tH\\nBUB1B\\tH\\nLINC00477\\tH\\nMAP7D2\\tH\\nGPC6\\tH\\nPAQR5\\tH\\nPGAM2\\tH\\nPTS\\tH\\nS100A1\\tH\\nHEXIM2\\tH\\nOR4K2\\tH\\nS100G\\tH\\nATP5H\\tH\\nFKBP3\\tH\\nSCGB2A2\\tH\\nPLEKHH2\\tH\\nLOC102723322\\tH\\nACSM5\\tH\\nSFPQ\\tH\\nZNF358\\tH\\nGABRE\\tH\\nRRAGD\\tH\\nLMO7DN\\tH\\nNSMCE1\\tH\\nLINC00941\\tH\\nDAAM2\\tH\\nHPVC1\\tH\\nLINC00486\\tH\\nRPL26L1\\tH\\nLOC100287896\\tH\\nCASC6\\tH\\nREL\\tH\\nSPATA24\\tH\\nTMEM42\\tH\\nEFNB2\\tH\\nFNDC5\\tH\\nLKAAEAR1\\tH\\nCLDN4\\tH\\nTPTE2P1\\tH\\nSTEAP3\\tH\\nMLXIPL\\tH\\nCSF2\\tH\\nDYDC1\\tH\\nDPCD\\tH\\nABCB1\\tH\\nPRSS12\\tH\\nSDHB\\tH\\nTREML3P\\tH\\nLINC00911\\tH\\nFBXO25\\tH\\nLOC101928335\\tH\\nLNP1\\tH\\nLINC01138\\tH\\nLOC101928403\\tH\\nLOC101929565\\tH\\nCDCA8\\tH\\nLOC100505478\\tH\\nLY6K\\tH\\nINTS6L\\tH\\nBCAS1\\tH\\nLOC105376351\\tH\\nMRPL18\\tH\\nTRIM49\\tH\\nRUNX2\\tH\\nCITED2\\tH\\nLINC01436\\tH\\nABL2\\tH\\nUQCRFS1\\tH\\nOCLN\\tH\\nCCDC192\\tH\\nMERTK\\tH\\nSMKR1\\tH\\nCHCHD10\\tH\\nLOC100996634\\tH\\nTPI1P3\\tH\\nNTRK2\\tH\\nEMC6\\tH\\nLOC101928858\\tH\\nRARRES1\\tH\\nCLDN19\\tH\\nCLYBL\\tH\\nNDUFAF5\\tH\\nTIMM13\\tH\\nICAM1\\tH\\nRNF181\\tH\\nNCAPH\\tH\\nSAMM50\\tH\\nNDUFS2\\tH\\nPGA3\\tH\\nC4orf19\\tH\\nAIMP2\\tH\\nMARVELD3\\tH\\nLCE6A\\tH\\nRPS25\\tH\\nAP1B1P1\\tH\\nCOL12A1\\tH\\nATF4\\tH\\nGAP43\\tH\\nACKR2\\tH\\nSLMO2-ATP5E\\tH\\nARHGEF9-IT1\\tH\\nGTF3A\\tH\\nCDC26\\tH\\nTIMMDC1\\tH\\nLSM1\\tH\\nTRIM59\\tH\\nCDR2\\tH\\nCPT1A\\tH\\nGINS4\\tH\\nLOC102546299\\tH\\nTRH\\tH\\nLINC00942\\tH\\nARHGAP11A\\tH\\nNMBR\\tH\\nPRC1\\tH\\nSERF2\\tH\\nMC5R\\tH\\nCOX11\\tH\\nEFHC2\\tH\\nPLVAP\\tH\\nFCGR1A\\tH\\nGCG\\tH\\nOR2G3\\tH\\nSNAPIN\\tH\\nWBSCR28\\tH\\nPDCL3\\tH\\nFLJ40194\\tH\\nLOC407835\\tH\\nCT45A4\\tH\\nCCHCR1\\tH\\nUCHL3\\tH\\nMEP1B\\tH\\nNPIPB6\\tH\\nLOC101926940\\tH\\nLINC00959\\tH\\nLINC01180\\tH\\nDNAJC5G\\tH\\nFZD10\\tH\\nNDUFB8\\tH\\nERCC1\\tH\\nLOC389641\\tH\\nRPS14\\tH\\nARPC5L\\tH\\nDOCK10\\tH\\nLOC101928809\\tH\\nPLEKHA5\\tH\\nLINC00449\\tH\\nTFAP2B\\tH\\nMIR503HG\\tH\\nXG\\tH\\nCXCL3\\tH\\nCSTL1\\tH\\nLOC101928161\\tH\\nCOX6B1\\tH\\nCA8\\tH\\nIL1R1\\tH\\nLINC00619\\tH\\nGAGE1\\tH\\nNDUFA4\\tH\\nLINC01549\\tH\\nCCL16\\tH\\nERN2\\tH\\nALLC\\tH\\nCCDC43\\tH\\nFAM81B\\tH\\nMT2A\\tH\\nS100B\\tH\\nZSCAN12\\tH\\nCABP5\\tH\\nVAV3\\tH\\nIKZF3\\tH\\nDEFB118\\tH\\nDGCR6\\tH\\nLOC105371795\\tH\\nSLC28A3\\tH\\nLOC100129518\\tH\\nZNF503\\tH\\nJTB\\tH\\nLY9\\tH\\nMGC27345\\tH\\nMX2\\tH\\nLOC400002\\tH\\nUGGT2\\tH\\nNDUFA2\\tH\\nMFAP5\\tH\\nITGAM\\tH\\nXKR4\\tH\\nLINC01030\\tH\\nEBAG9\\tH\\nMAGEB5\\tH\\nTMEM150A\\tH\\nLOC101927653\\tH\\nEMC7\\tH\\nSIK1\\tH\\nEMB\\tH\\nDUXA\\tH\\nMIR3663HG\\tH\\nSPATA42\\tH\\nTNFRSF12A\\tH\\nLOC100507195\\tH\\nFAM78A\\tH\\nTENM2\\tH\\nLOC102724428\\tH\\nTRABD2A\\tH\\nTPTE2P3\\tH\\nRASAL1\\tH\\nITPRIP\\tH\\nADGRG6\\tH\\nVSIG4\\tH\\nADRBK2\\tH\\nTRIM49C\\tH\\nHOXC5\\tH\\nCMAHP\\tH\\nRPSAP58\\tH\\nOR7G3\\tH\\nLOC100288069\\tH\\nKRT9\\tH\\nARL6IP1\\tH\\nLINC00635\\tH\\nGPC3\\tH\\nSNX21\\tH\\nRIN2\\tH\\nMYHAS\\tH\\nPOTEE\\tH\\nCLEC2A\\tH\\nATP1A3\\tH\\nLOC105371267\\tH\\nLINC00696\\tH\\nBEND2\\tH\\nSPECC1\\tH\\nECM1\\tH\\nTSPAN1\\tH\\nFAM86JP\\tH\\nP2RX7\\tH\\nTMEM106A\\tH\\nPTPRH\\tH\\nEIF3K\\tH\\nSYK\\tH\\nAGR3\\tH\\nLINC00396\\tH\\nMR1\\tH\\nSLC9A2\\tH\\nGSTZ1\\tH\\nDEFB1\\tH\\nLOC101928370\\tH\\nCALD1\\tH\\nLINC01351\\tH\\nBICD1\\tH\\nFAM231D\\tH\\nSFRP5\\tH\\nEFNA1\\tH\\nLOC101929054\\tH\\nMETTL21A\\tH\\nHOXB5\\tH\\nRYR2\\tH\\nTCEA3\\tH\\nGOLGA8F\\tH\\nARL6IP6\\tH\\nLOC105369891\\tH\\nFAM185A\\tH\\nCCDC124\\tH\\nLOC100499194\\tH\\nKDM6A\\tH\\nLONRF1\\tH\\nADRA2A\\tH\\nFAM210B\\tH\\nTRIM31\\tH\\nRAB39B\\tH\\nKIAA0513\\tH\\nIQUB\\tH\\nTLL1\\tH\\nLRRC15\\tH\\nLOC284294\\tH\\nNQO1\\tH\\nRMST\\tH\\nC12orf57\\tH\\nSIRT1\\tH\\nPDGFC\\tH\\nPPIAL4C\\tH\\nPPIAL4A\\tH\\nC18orf61\\tH\\nLOC283194\\tH\\nRPS23\\tH\\nIFNLR1\\tH\\nGOLGA8G\\tH\\nLY6G6F\\tH\\nLINC00671\\tH\\nRPL23A\\tH\\nLOC101929726\\tH\\nOR10Q1\\tH\\nRNF7\\tH\\nSMCP\\tH\\nNCK2\\tH\\nRNF148\\tH\\nMIR17HG\\tH\\nLINC00479\\tH\\nLINC00551\\tH\\nSIRT4\\tH\\nHERC5\\tH\\nZNF738\\tH\\nLINC01209\\tH\\nTOB2P1\\tH\\nESPL1\\tH\\nLINC00116\\tH\\nHK1\\tH\\nLBP\\tH\\nLOC105369632\\tH\\nVIM\\tH\\nDSEL\\tH\\nPOTEJ\\tH\\nUSP44\\tH\\nLOC101927415\\tH\\nHSPH1\\tH\\nENPP7P13\\tH\\nTNFAIP3\\tH\\nBHLHE41\\tH\\nETV7\\tH\\nKCNQ4\\tH\\nLOC100287792\\tH\\nLOC101929511\\tH\\nMROH5\\tH\\nOAZ3\\tH\\nPPP1R15A\\tH\\nIDI2\\tH\\nCYB561A3\\tH\\nARMC4\\tH\\nBHMT2\\tH\\nNETO2\\tH\\nSUCNR1\\tH\\nSSU72\\tH\\nLOC399886\\tH\\nDISC1\\tH\\nSTAMBP\\tH\\nNLGN1\\tH\\nHAX1\\tH\\nTNRC18P1\\tH\\nAKR1B1\\tH\\nULK4P3\\tH\\nC1QTNF3\\tH\\nCT47A7\\tH\\nWBSCR22\\tH\\nHCAR1\\tH\\nRGL1\\tH\\nLINC01606\\tH\\nCLPS\\tH\\nDUPD1\\tH\\nSSX1\\tH\\nGSTK1\\tH\\nSPRY4\\tH\\nNUDCD2\\tH\\nRECK\\tH\\nNOL4L\\tH\\nPCBP4\\tH\\nCNTNAP2\\tH\\nKCNE1\\tH\\nLOC400541\\tH\\nLINC00261\\tH\\nC9orf173\\tH\\nMRPL48\\tH\\nPOM121L9P\\tH\\nMKRN2OS\\tH\\nRALY\\tH\\nESM1\\tH\\nEID1\\tH\\nNUDT6\\tH\\nHINT3\\tH\\nIPMK\\tH\\nC11orf98\\tH\\nCRLF1\\tH\\nCFL1P1\\tH\\nTMPRSS9\\tH\\nCHMP2A\\tH\\nOLFM1\\tH\\nZNF511\\tH\\nB3GNT7\\tH\\nSIK3\\tH\\nACER3\\tH\\nCIDEC\\tH\\nADGRD1\\tH\\nSPC25\\tH\\nLOC101926911\\tH\\nPELI3\\tH\\nEXT1\\tH\\nPCAT5\\tH\\nGDF15\\tH\\nMRPL47\\tH\\nPLSCR1\\tH\\nTOM1\\tH\\nC6\\tH\\nWDR87\\tH\\nFXYD5\\tH\\nCOBLL1\\tH\\nANGPT2\\tH\\nSRCIN1\\tH\\nSLC10A1\\tH\\nOAS1\\tH\\nMMP21\\tH\\nCOL19A1\\tH\\nGPR18\\tH\\nTMEM219\\tH\\nZNF296\\tH\\nUSP43\\tH\\nGOLGA2P9\\tH\\nRFX2\\tH\\nRAB27A\\tH\\nLOC102467217\\tH\\nMYH13\\tH\\nPHLPP2\\tH\\nLOC101928985\\tH\\nCDRT7\\tH\\nINTS6\\tH\\nHAS2\\tH\\nDZIP1\\tH\\nOR2V2\\tH\\nOR2H2\\tH\\nTSSC1\\tH\\nBOLA1\\tH\\nPABPC1P2\\tH\\nTMEM229A\\tH\\nATP8B1\\tH\\nLCNL1\\tH\\nDCDC5\\tH\\nSOD1\\tH\\nPAG1\\tH\\nCETN2\\tH\\nNCR1\\tH\\nTMEM100\\tH\\nURI1\\tH\\nTEKT4P2\\tH\\nPCAT1\\tH\\nSERTAD4\\tH\\nLINC00550\\tH\\nGLB1L\\tH\\nUNG\\tH\\nAGMAT\\tH\\nLOC101928540\\tH\\nZNF681\\tH\\nLINC01456\\tH\\nFCGR2C\\tH\\nABCG2\\tH\\nANAPC11\\tH\\nLOC102800447\\tH\\nCYLC2\\tH\\nC6orf226\\tH\\nREM2\\tH\\nBMPR1B\\tH\\nBECN1\\tH\\nADM\\tH\\nPDPR\\tH\\nKDM8\\tH\\nHMBS\\tH\\nMYO1H\\tH\\nLINC00493\\tH\\nFGF14\\tH\\nEIF2AK1\\tH\\nLOC101928489\\tH\\nKCNK1\\tH\\nCKS2\\tH\\nLOC101928035\\tH\\nLINC01221\\tH\\nEREG\\tH\\nNDUFB11\\tH\\nNARF\\tH\\nZC3HC1\\tH\\nADGRE2\\tH\\nUFC1\\tH\\nHOMER1\\tH\\nHDDC2\\tH\\nHIST1H3A\\tH\\nTNNT3\\tH\\nZNF670-ZNF695\\tH\\nGSR\\tH\\nNDRG4\\tH\\nTERC\\tH\\nFANCB\\tH\\nFFAR4\\tH\\nMGAM2\\tH\\nLRRTM4\\tH\\nINHBA\\tH\\nLOC403312\\tH\\nKLLN\\tH\\nDZANK1\\tH\\nRGS9BP\\tH\\nRIIAD1\\tH\\nARL2-SNX15\\tH\\nPLAU\\tH\\nSPDYE8P\\tH\\nSLC25A19\\tH\\nBMS1P6\\tH\\nZFYVE19\\tH\\nCTAGE1\\tH\\nMTIF3\\tH\\nSPACA4\\tH\\nSIPA1L1\\tH\\nSLC2A10\\tH\\nPGK1\\tH\\nGIF\\tH\\nMYH8\\tH\\nLOC101928098\\tH\\nFRMD4A\\tH\\nLINC01397\\tH\\nLIPE\\tH\\nTRIM49D2\\tH\\nPGM1\\tH\\nHRH4\\tH\\nLOC646241\\tH\\nLOC101927587\\tH\\nCTD-2201I18.1\\tH\\nRAPGEF4\\tH\\nRUNX1\\tH\\nC5\\tH\\nTRIM49D1\\tH\\nLOC100508046\\tH\\nLOC101928885\\tH\\nUCHL1\\tH\\nR3HDM4\\tH\\nMAP9\\tH\\nMIF4GD\\tH\\nLOC100190986\\tH\\nCOQ2\\tH\\nKNTC1\\tH\\nSAXO1\\tH\\nLOC105369860\\tH\\nFPR1\\tH\\nGP6\\tH\\nEIF2S2\\tH\\nLINC00461\\tH\\nHIST1H2AH\\tH\\nDHRS7\\tH\\nCHST8\\tH\\nHAGH\\tH\\nC4orf3\\tH\\nNMUR2\\tH\\nAKR1C3\\tH\\nLRRC70\\tH\\nREXO2\\tH\\nPRH1-TAS2R14\\tH\\nSLC9A1\\tH\\nMNAT1\\tH\\nSLC37A4\\tH\\nMGC34796\\tH\\nHSPB9\\tH\\nCADM3\\tH\\nMYEOV2\\tH\\nKRTAP6-3\\tH\\nARNTL2\\tH\\nENPP2\\tH\\nCUBN\\tH\\nLOC339059\\tH\\nGSDMA\\tH\\nBTG3\\tH\\nSTBD1\\tH\\nNAV3\\tH\\nALDH1L2\\tH\\nZBTB21\\tH\\nSPATA5\\tH\\nMRPL57\\tH\\nCWC15\\tH\\nNOMO3\\tH\\nUBTD1\\tH\\nIFI30\\tH\\nFMNL2\\tH\\nPRMT3\\tH\\nLOC101927692\\tH\\nNTPCR\\tH\\nDHRS7B\\tH\\nTBCB\\tH\\nC3orf58\\tH\\nKRT222\\tH\\nWRB-SH3BGR\\tH\\nLOC101928580\\tH\\nRWDD1\\tH\\nNKIRAS1\\tH\\nABCA1\\tH\\nCASC20\\tH\\nRTN4IP1\\tH\\nSPATA6L\\tH\\nLUZP1\\tH\\nCARS2\\tH\\nC2orf61\\tH\\nLOC102467226\\tH\\nMIR3945HG\\tH\\nFGF9\\tH\\nVRTN\\tH\\nPCDH18\\tH\\nPOLR3K\\tH\\nLINC00566\\tH\\nAOX1\\tH\\nPDLIM7\\tH\\nLOC102577426\\tH\\nUSE1\\tH\\nGINS2\\tH\\nRAPGEF2\\tH\\nLINC01492\\tH\\nTMEM70\\tH\\nCOX17\\tH\\nSRRM4\\tH\\nLOC101928295\\tH\\nISCA1\\tH\\nIL18R1\\tH\\nAPOC4-APOC2\\tH\\nMT1M\\tH\\nLMO2\\tH\\nSCN4B\\tH\\nRDH12\\tH\\nFEZF2\\tH\\nTMEM150B\\tH\\nCPS1\\tH\\nSLC35G2\\tH\\nTPM3\\tH\\nREG1A\\tH\\nLINC01133\\tH\\nAFAP1L2\\tH\\nPSENEN\\tH\\nFAM72A\\tH\\nLINC00467\\tH\\nHELLS\\tH\\nLINC00367\\tH\\nPLXNA4\\tH\\nC11orf73\\tH\\nKLF7\\tH\\nYBEY\\tH\\nOIT3\\tH\\nLOC101929681\\tH\\nPTPRD\\tH\\nLOC100422737\\tH\\nLINC01411\\tH\\nTSPAN17\\tH\\nUGT1A10\\tH\\nIFT22\\tH\\nRPS10P7\\tH\\nDBIL5P2\\tH\\nIFI44\\tH\\nBTK\\tH\\nMDP1\\tH\\nLOC284080\\tH\\nCYP2C18\\tH\\nFBXW12\\tH\\nCORO7-PAM16\\tH\\nTMEM14B\\tH\\nPOLQ\\tH\\nAFF4\\tH\\nLHFPL4\\tH\\nABTB2\\tH\\nNOMO1\\tH\\nFHDC1\\tH\\nTRIM38\\tH\\nCTSV\\tH\\nGATA3\\tH\\nLINCR-0002\\tH\\nCFAP20\\tH\\nNDUFB6\\tH\\nRASA4\\tH\\nLOC100288798\\tH\\nCFAP206\\tH\\nROR1\\tH\\nACOT13\\tH\\nLOC285626\\tH\\nBANF1\\tH\\nDCAF4L2\\tH\\nSH3BGR\\tH\\nOTOA\\tH\\nCD226\\tH\\nSLC29A4\\tH\\nRPL18\\tH\\nPRDX3\\tH\\nFGB\\tH\\nTEX14\\tH\\nFBN1\\tH\\nEPHA3\\tH\\n'}}]" ] @@ -2232,7 +2264,7 @@ { "data": { "text/plain": [ - "(1675, 12)" + "(2421, 11)" ] }, "execution_count": 26, @@ -2244,16 +2276,197 @@ "combined_df.shape" ] }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
identifieridentifier.sourcetargettarget.sourceDISGENET_diseasesOpenTargets_gene_compoundsMINERVAWikiPathwaysOpenTargets_reactomeOpenTargets_goStringDB_ppi
2416PRDX3HGNCENSG00000165672Ensembl[{'disease_name': 'SPINOCEREBELLAR ATAXIA, AUT...[{'chembl_id': nan, 'drugbank_id': nan, 'compo...[{'pathway_id': 933.0, 'pathway_label': 'Elect...[{'pathway_id': nan, 'pathway_label': nan, 'pa...[{'pathway_label': 'Detoxification of Reactive...[{'go_id': 'GO:0005515', 'go_name': 'protein b...[{'stringdb_link_to': 'SIRT1', 'Ensembl': 'ENS...
2417FGBHGNCENSG00000171564Ensembl[{'disease_name': 'Cardiovascular Diseases', '...[{'chembl_id': 'CHEMBL2109072', 'drugbank_id':...[{'pathway_id': 951.0, 'pathway_label': 'Coagu...[{'pathway_id': 'WP5115', 'pathway_label': 'Ne...[{'pathway_label': 'p130Cas linkage to MAPK si...[{'go_id': 'GO:0005576', 'go_name': 'extracell...[{'stringdb_link_to': 'LBP', 'Ensembl': 'ENSP0...
2418TEX14HGNCENSG00000121101Ensembl[{'disease_name': 'Non-obstructive azoospermia...[{'chembl_id': nan, 'drugbank_id': nan, 'compo...[{'pathway_id': nan, 'pathway_label': nan, 'pa...[{'pathway_id': nan, 'pathway_label': nan, 'pa...[{'pathway_label': nan, 'pathway_id': nan}][{'go_id': 'GO:0032466', 'go_name': 'negative ...[{'stringdb_link_to': nan, 'Ensembl': nan, 'sc...
2419FBN1HGNCENSG00000166147Ensembl[{'disease_name': 'Marfan Syndrome', 'HPO': ''...[{'chembl_id': nan, 'drugbank_id': nan, 'compo...[{'pathway_id': 945.0, 'pathway_label': 'Nsp9 ...[{'pathway_id': 'WP3668', 'pathway_label': 'Hy...[{'pathway_label': 'TGF-beta receptor signalin...[{'go_id': 'GO:0005201', 'go_name': 'extracell...[{'stringdb_link_to': 'SERPINE1', 'Ensembl': '...
2420EPHA3HGNCENSG00000044524Ensembl[{'disease_name': 'Adenocarcinoma of lung (dis...[{'chembl_id': 'CHEMBL24828', 'drugbank_id': '...[{'pathway_id': nan, 'pathway_label': nan, 'pa...[{'pathway_id': 'WP2882', 'pathway_label': 'Nu...[{'pathway_label': 'EPH-Ephrin signaling', 'pa...[{'go_id': 'GO:0010717', 'go_name': 'regulatio...[{'stringdb_link_to': 'EFNA2', 'Ensembl': 'ENS...
\n", + "
" + ], + "text/plain": [ + " identifier identifier.source target target.source \\\n", + "2416 PRDX3 HGNC ENSG00000165672 Ensembl \n", + "2417 FGB HGNC ENSG00000171564 Ensembl \n", + "2418 TEX14 HGNC ENSG00000121101 Ensembl \n", + "2419 FBN1 HGNC ENSG00000166147 Ensembl \n", + "2420 EPHA3 HGNC ENSG00000044524 Ensembl \n", + "\n", + " DISGENET_diseases \\\n", + "2416 [{'disease_name': 'SPINOCEREBELLAR ATAXIA, AUT... \n", + "2417 [{'disease_name': 'Cardiovascular Diseases', '... \n", + "2418 [{'disease_name': 'Non-obstructive azoospermia... \n", + "2419 [{'disease_name': 'Marfan Syndrome', 'HPO': ''... \n", + "2420 [{'disease_name': 'Adenocarcinoma of lung (dis... \n", + "\n", + " OpenTargets_gene_compounds \\\n", + "2416 [{'chembl_id': nan, 'drugbank_id': nan, 'compo... \n", + "2417 [{'chembl_id': 'CHEMBL2109072', 'drugbank_id':... \n", + "2418 [{'chembl_id': nan, 'drugbank_id': nan, 'compo... \n", + "2419 [{'chembl_id': nan, 'drugbank_id': nan, 'compo... \n", + "2420 [{'chembl_id': 'CHEMBL24828', 'drugbank_id': '... \n", + "\n", + " MINERVA \\\n", + "2416 [{'pathway_id': 933.0, 'pathway_label': 'Elect... \n", + "2417 [{'pathway_id': 951.0, 'pathway_label': 'Coagu... \n", + "2418 [{'pathway_id': nan, 'pathway_label': nan, 'pa... \n", + "2419 [{'pathway_id': 945.0, 'pathway_label': 'Nsp9 ... \n", + "2420 [{'pathway_id': nan, 'pathway_label': nan, 'pa... \n", + "\n", + " WikiPathways \\\n", + "2416 [{'pathway_id': nan, 'pathway_label': nan, 'pa... \n", + "2417 [{'pathway_id': 'WP5115', 'pathway_label': 'Ne... \n", + "2418 [{'pathway_id': nan, 'pathway_label': nan, 'pa... \n", + "2419 [{'pathway_id': 'WP3668', 'pathway_label': 'Hy... \n", + "2420 [{'pathway_id': 'WP2882', 'pathway_label': 'Nu... \n", + "\n", + " OpenTargets_reactome \\\n", + "2416 [{'pathway_label': 'Detoxification of Reactive... \n", + "2417 [{'pathway_label': 'p130Cas linkage to MAPK si... \n", + "2418 [{'pathway_label': nan, 'pathway_id': nan}] \n", + "2419 [{'pathway_label': 'TGF-beta receptor signalin... \n", + "2420 [{'pathway_label': 'EPH-Ephrin signaling', 'pa... \n", + "\n", + " OpenTargets_go \\\n", + "2416 [{'go_id': 'GO:0005515', 'go_name': 'protein b... \n", + "2417 [{'go_id': 'GO:0005576', 'go_name': 'extracell... \n", + "2418 [{'go_id': 'GO:0032466', 'go_name': 'negative ... \n", + "2419 [{'go_id': 'GO:0005201', 'go_name': 'extracell... \n", + "2420 [{'go_id': 'GO:0010717', 'go_name': 'regulatio... \n", + "\n", + " StringDB_ppi \n", + "2416 [{'stringdb_link_to': 'SIRT1', 'Ensembl': 'ENS... \n", + "2417 [{'stringdb_link_to': 'LBP', 'Ensembl': 'ENSP0... \n", + "2418 [{'stringdb_link_to': nan, 'Ensembl': nan, 'sc... \n", + "2419 [{'stringdb_link_to': 'SERPINE1', 'Ensembl': '... \n", + "2420 [{'stringdb_link_to': 'EFNA2', 'Ensembl': 'ENS... " + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "combined_df.tail()" + ] + }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Exporting the database in pickle format" + "##### Exporting the combined data in pickle format" ] }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 28, "metadata": {}, "outputs": [], "source": [ @@ -2269,67 +2482,551 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Creating a graph from the annotated dataframe" + "### Creating a graph from the annotated data" ] }, { "cell_type": "code", - "execution_count": 28, + "execution_count": 29, "metadata": {}, "outputs": [], "source": [ - "pygraph = generator.networkx_graph(combined_df)" + "# pygraph = generator.networkx_graph(combined_df, opentargets_disease_compound_df)\n", + "# with open(\n", + "# os.path.join(os.getcwd(), \"examples\", \"usecases\", \"PCS\", \"pcs_networkx_graph.pkl\"), \"wb\"\n", + "# ) as out:\n", + "# pickle.dump(pygraph, out)\n", + "\n", + "with open(\n", + " os.path.join(os.getcwd(), \"examples\", \"usecases\", \"PCS\", \"pcs_networkx_graph.pkl\"),\n", + " \"rb\",\n", + ") as file:\n", + " pygraph = pickle.load(file)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Store the graph" + "### Visualize the graph" ] }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 30, "metadata": {}, "outputs": [], "source": [ - "# with open(\n", - "# os.path.join(os.getcwd(), \"examples\", \"usecases\", \"PCS\", \"pcs_networkx_graph.pkl\"), \"wb\"\n", - "# ) as out:\n", - "# pickle.dump(pygraph, out)" + "# pos = nx.circular_layout(pygraph)\n", + "\n", + "# plt.figure(3, figsize=(30, 30))\n", + "# nx.draw(pygraph, pos)\n", + "# plt.show()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "## Visualize the graph" + "#### Cytosacpe" ] }, { "cell_type": "code", - "execution_count": 38, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ - "# pos = nx.circular_layout(pygraph)\n", + "from pyBiodatafuse.graph import cytoscape\n", "\n", - "# plt.figure(3, figsize=(30, 30))\n", - "# nx.draw(pygraph, pos)\n", - "# plt.show()" + "cytoscape.load_graph(pygraph, network_name=\"PCS network\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Neo4j" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 31, "metadata": {}, "outputs": [], "source": [ - "from pyBiodatafuse.graph import cytoscape, neo4j\n", + "from pyBiodatafuse.graph import neo4j\n", "\n", - "neo4j.save_graph_to_graphml(pygraph, output_path=\"graph_to-test.graphml\")\n", - "cytoscape.load_graph(pygraph, network_name=\"test\")" + "neo4j.save_graph_to_graphml(\n", + " pygraph,\n", + " output_path=os.path.join(\n", + " os.getcwd(), \"examples\", \"usecases\", \"PCS\", \"pcs_networkx_graph.graphml\"\n", + " ),\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "##### Steps to load in Neo4j" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "- Add `.graphml` file in **import** subfolder of the DBMS folder\n", + "- Install apoc plugin\n", + "- Add `apoc.conf` file to **conf** subfolder of the DBMS folder\n", + "\n", + " `apoc.conf` file:\n", + " ```\n", + " apoc.trigger.enabled=true\n", + " apoc.import.file.enabled=true\n", + " apoc.export.file.enabled=true\n", + " apoc.import.file.use_neo4j_config=true\n", + " ```\n", + "\n", + "- Open Neo4j Browser\n", + "- (Optionl, only run if you have imported a graph before) Remove all the nodes before importing `.graphml` file\n", + "\n", + " ```\n", + " neo4j$ MATCH (n) DETACH DELETE n\n", + " ```\n", + "\n", + "- Import `.graphml` file\n", + "\n", + " ```\n", + " neo4j$ call apoc.import.graphml('file:///pcs_networkx_graph.graphml',{readLabels:TRUE})\n", + " ```\n", + "\n", + "- Add indexes after importing the graph for improving the performance of queries\n", + "\n", + " ```\n", + " neo4j$ create index Gene for (n:Gene) on (n.node_type)\n", + " neo4j$ create index Pathway for (n:Pathway) on (n.node_type)\n", + " neo4j$ create index `Biological Process` for (n:`Biological Process`) on (n.node_type)\n", + " neo4j$ create index `Molecular Function` for (n:`Molecular Function`) on (n.node_type)\n", + " neo4j$ create index `Cellular Component` for (n:`Cellular Component`) on (n.node_type)\n", + " neo4j$ create index Disease for (n:Disease) on (n.node_type)\n", + " neo4j$ create index Compound for (n:Compound) on (n.node_type)\n", + " neo4j$ create index `Side Effect` for (n:`Side Effect`) on (n.node_type)\n", + " ```\n", + "\n", + "- Count the number of each node type\n", + " - total (```neo4j$ MATCH (n) RETURN count(n)```) = 19859\n", + " - Gene (```neo4j$ MATCH (n:Gene) RETURN count(n)```) = 1667\n", + " - Pathway (```neo4j$ MATCH (n:Pathway) RETURN count(n)```) = 1847\n", + " - WikiPathways (```MATCH (n:Pathway {source: \"WikiPathways\"}) RETURN count(n)```) = 678\n", + " - OpenTargets, Reactome (```MATCH (n:Pathway {source: \"OpenTargets\"}) RETURN count(n)```) = 1154\n", + " - MINERVA (```MATCH (n:Pathway {source: \"MINERVA\"}) RETURN count(n)```) = 15\n", + " - Biological Process (```MATCH (n:`Biological Process`) RETURN count(n)```) = 4624\n", + " - Molecular Function (```MATCH (n:`Molecular Function`) RETURN count(n)```) = 1327\n", + " - Cellular Component (```MATCH (n:`Cellular Component`) RETURN count(n)```) = 736\n", + " - Disease (```MATCH (n:Disease) RETURN count(n)```) = 2913\n", + " - Compound (```MATCH (n:Compound) RETURN count(n)```) = 2244\n", + " - Side Effect (```MATCH (n:`Side Effect`) RETURN count(n)```) = 4501\n", + "- Count the number of each edge type\n", + " - total (```MATCH ()-[r]->() RETURN count(r)```) = 101630\n", + " - interacts_with (```MATCH ()-[r:interacts_with]->() RETURN count(r)```) = 16844\n", + " - part_of (```MATCH ()-[r:part_of]->() RETURN count(r)```) = 30066 \n", + " - WikiPathways (```MATCH ()-[r:part_of {source: \"WikiPathways\"}]->() RETURN count(r)```) = 3174\n", + " - OpenTargets, Reactome (```MATCH ()-[r:part_of {source: \"OpenTargets\"}]->() RETURN count(r)```) = 26784\n", + " - MINERVA (```MATCH ()-[r:part_of {source: \"MINERVA\"}]->() RETURN count(r)```) = 108\n", + " - activates (```MATCH ()-[r:activates]->() RETURN count(r)```) = 499\n", + " - treats (```MATCH ()-[r:treats]->() RETURN count(r)```) = 8215\n", + " - has_side_effect (```MATCH ()-[r:has_side_effect]->() RETURN count(r)```) = 38328\n", + " - inhibits (```MATCH ()-[r:inhibits]->() RETURN count(r)```) = 71\n", + " - associated_with (```MATCH ()-[r:associated_with]->() RETURN count(r)```) = 7607\n", + "\n", + "- Export the graph as a `.csv` file\n", + "\n", + " ```\n", + " neo4j$ call apoc.export.csv.all(\"pcs_networkx_graph.csv\",{})\n", + " ```" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Dreamwalk algoritm" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'e:\\\\BioDataFuse\\\\pyBiodatafuse\\\\examples\\\\usecases\\\\PCS\\\\DREAMwalk'" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import os\n", + "\n", + "new_path = os.path.join(os.getcwd(), \"DREAMwalk\")\n", + "\n", + "\n", + "os.chdir(new_path)\n", + "\n", + "# Set the current working directory\n", + "current_dir = os.getcwd()\n", + "current_dir" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "e:\\BioDataFuse\\pyBiodatafuse\\.venv\\lib\\site-packages\\tqdm\\auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", + " from .autonotebook import tqdm as notebook_tqdm\n" + ] + } + ], + "source": [ + "import DREAMwalk.generate_dis_sim as dis_gen\n", + "import DREAMwalk.generate_files as gen\n", + "import pandas as pd\n", + "from DREAMwalk.calculate_drug_scores import find_candidates\n", + "from DREAMwalk.generate_embeddings import save_embedding_files\n", + "from DREAMwalk.generate_similarity_net import save_sim_graph\n", + "from DREAMwalk.predict_associations import predict_dda" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\Tooba\\AppData\\Local\\Temp\\ipykernel_696\\3278806773.py:2: DtypeWarning: Columns (1,2,3,4,5,6,7,8,9,10,11,13,16,17,18,20,21,22,23,28) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " kg_data= pd.read_csv(\"../pcs_networkx_graph.csv\")\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
_id_labelsDOEFOEnsemblHPOMESHMONDONCIOMIM...is_approvednamesource_start_end_typeeielscoresource.1
039718.0:GeneNaNNaNENSG00000152592NaNNaNNaNNaNNaN...NaNDMP1BridgeDBNaNNaNNaNNaNNaNNaNNaN
139719.0:DiseaseNaNNaNNaNHPO_HP:0004912MESH_D063730MONDO_0000044, MONDO_0024300NCI_C131449NaN...NaNHypophosphatemic RicketsDISGENETNaNNaNNaNNaNNaNNaNNaN
239720.0:DiseaseDO_0050949NaNNaNNaNMESH_C562792MONDO_0009430, MONDO_0017324NCI_C123187OMIM_241520...NaNAutosomal recessive hypophosphatemic vitamin D...DISGENETNaNNaNNaNNaNNaNNaNNaN
339721.0:DiseaseDO_0050949NaNNaNNaNMESH_C562792MONDO_0009430, MONDO_0017324NaNOMIM_600980, OMIM_241520...NaNHypophosphatemic Rickets, Autosomal Recessive, 1DISGENETNaNNaNNaNNaNNaNNaNNaN
439722.0:PathwayNaNNaNNaNNaNNaNNaNNaNNaN...NaNOSX and miRNAs in tooth developmentWikiPathwaysNaNNaNNaNNaNNaNNaNNaN
\n", + "

5 rows × 31 columns

\n", + "
" + ], + "text/plain": [ + " _id _labels DO EFO Ensembl HPO \\\n", + "0 39718.0 :Gene NaN NaN ENSG00000152592 NaN \n", + "1 39719.0 :Disease NaN NaN NaN HPO_HP:0004912 \n", + "2 39720.0 :Disease DO_0050949 NaN NaN NaN \n", + "3 39721.0 :Disease DO_0050949 NaN NaN NaN \n", + "4 39722.0 :Pathway NaN NaN NaN NaN \n", + "\n", + " MESH MONDO NCI \\\n", + "0 NaN NaN NaN \n", + "1 MESH_D063730 MONDO_0000044, MONDO_0024300 NCI_C131449 \n", + "2 MESH_C562792 MONDO_0009430, MONDO_0017324 NCI_C123187 \n", + "3 MESH_C562792 MONDO_0009430, MONDO_0017324 NaN \n", + "4 NaN NaN NaN \n", + "\n", + " OMIM ... is_approved \\\n", + "0 NaN ... NaN \n", + "1 NaN ... NaN \n", + "2 OMIM_241520 ... NaN \n", + "3 OMIM_600980, OMIM_241520 ... NaN \n", + "4 NaN ... NaN \n", + "\n", + " name source _start \\\n", + "0 DMP1 BridgeDB NaN \n", + "1 Hypophosphatemic Rickets DISGENET NaN \n", + "2 Autosomal recessive hypophosphatemic vitamin D... DISGENET NaN \n", + "3 Hypophosphatemic Rickets, Autosomal Recessive, 1 DISGENET NaN \n", + "4 OSX and miRNAs in tooth development WikiPathways NaN \n", + "\n", + " _end _type ei el score source.1 \n", + "0 NaN NaN NaN NaN NaN NaN \n", + "1 NaN NaN NaN NaN NaN NaN \n", + "2 NaN NaN NaN NaN NaN NaN \n", + "3 NaN NaN NaN NaN NaN NaN \n", + "4 NaN NaN NaN NaN NaN NaN \n", + "\n", + "[5 rows x 31 columns]" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# GENERSTE FILES\n", + "kg_data = pd.read_csv(\"../pcs_networkx_graph.csv\")\n", + "kg_data.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['_id', '_labels', 'DO', 'EFO', 'Ensembl', 'HPO', 'MESH', 'MONDO', 'NCI',\n", + " 'OMIM', 'ORDO', 'UMLS', 'adverse_effect_count', 'chembl_id',\n", + " 'clincal_trial_phase', 'compound_cid', 'disease_type',\n", + " 'disease_umlscui', 'drugbank_id', 'gene_count', 'id', 'is_approved',\n", + " 'name', 'source', '_start', '_end', '_type', 'ei', 'el', 'score',\n", + " 'source.1'],\n", + " dtype='object')" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "kg_data.columns" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Graph file is saved!\n", + "Node types file is saved!\n" + ] + }, + { + "ename": "AttributeError", + "evalue": "'DataFrame' object has no attribute 'colmuns'", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mAttributeError\u001b[0m Traceback (most recent call last)", + "\u001b[1;32m~\\AppData\\Local\\Temp\\ipykernel_696\\2648921939.py\u001b[0m in \u001b[0;36m?\u001b[1;34m()\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mgen\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mgenerate_files\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mkg_data\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[1;32me:\\BioDataFuse\\pyBiodatafuse\\examples\\usecases\\PCS\\DREAMwalk\\DREAMwalk\\generate_files.py\u001b[0m in \u001b[0;36m?\u001b[1;34m(kg_data)\u001b[0m\n\u001b[0;32m 61\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 62\u001b[0m \u001b[1;31m## generate hierarchy file\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 63\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 64\u001b[0m \u001b[1;31m# filter rows with ':Compound' in '_labels'\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 65\u001b[1;33m \u001b[0mprint\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mkg_data\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mkg_data\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'_labels'\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0misin\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m':Compound'\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcolmuns\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 66\u001b[0m \u001b[0mcompounds_filtered\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mkg_data\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mkg_data\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'_labels'\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0misin\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m':Compound'\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'compound_cid'\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;34m'atcClassification'\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 67\u001b[0m \u001b[0mcompound_hierarchy\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mgenerate_drug_hierarchy\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mcompounds_filtered\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 68\u001b[0m \u001b[0mcompound_hierarchy\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mto_csv\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'hierarchy.csv'\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0msep\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;34m\",\"\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mindex\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;32mFalse\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32me:\\BioDataFuse\\pyBiodatafuse\\.venv\\lib\\site-packages\\pandas\\core\\generic.py\u001b[0m in \u001b[0;36m?\u001b[1;34m(self, name)\u001b[0m\n\u001b[0;32m 5985\u001b[0m \u001b[1;32mand\u001b[0m \u001b[0mname\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_accessors\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 5986\u001b[0m \u001b[1;32mand\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_info_axis\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_can_hold_identifiers_and_holds_name\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mname\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 5987\u001b[0m ):\n\u001b[0;32m 5988\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mname\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 5989\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mobject\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m__getattribute__\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mname\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[1;31mAttributeError\u001b[0m: 'DataFrame' object has no attribute 'colmuns'" + ] + } + ], + "source": [ + "gen.generate_files(kg_data)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "ename": "KeyError", + "evalue": "'type'", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mKeyError\u001b[0m Traceback (most recent call last)", + "File \u001b[1;32me:\\BioDataFuse\\pyBiodatafuse\\.venv\\lib\\site-packages\\pandas\\core\\indexes\\base.py:3653\u001b[0m, in \u001b[0;36mIndex.get_loc\u001b[1;34m(self, key)\u001b[0m\n\u001b[0;32m 3652\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m-> 3653\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_engine\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_loc\u001b[49m\u001b[43m(\u001b[49m\u001b[43mcasted_key\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 3654\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m err:\n", + "File \u001b[1;32me:\\BioDataFuse\\pyBiodatafuse\\.venv\\lib\\site-packages\\pandas\\_libs\\index.pyx:147\u001b[0m, in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[1;34m()\u001b[0m\n", + "File \u001b[1;32me:\\BioDataFuse\\pyBiodatafuse\\.venv\\lib\\site-packages\\pandas\\_libs\\index.pyx:176\u001b[0m, in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[1;34m()\u001b[0m\n", + "File \u001b[1;32mpandas\\_libs\\hashtable_class_helper.pxi:7080\u001b[0m, in \u001b[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001b[1;34m()\u001b[0m\n", + "File \u001b[1;32mpandas\\_libs\\hashtable_class_helper.pxi:7088\u001b[0m, in \u001b[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001b[1;34m()\u001b[0m\n", + "\u001b[1;31mKeyError\u001b[0m: 'type'", + "\nThe above exception was the direct cause of the following exception:\n", + "\u001b[1;31mKeyError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[1;32mIn[5], line 1\u001b[0m\n\u001b[1;32m----> 1\u001b[0m \u001b[43mgen\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mgenerate_files\u001b[49m\u001b[43m(\u001b[49m\u001b[43mkg_data\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 3\u001b[0m dis_gen\u001b[38;5;241m.\u001b[39msave_dis_sim(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m../pcs_networkx_graph.csv\u001b[39m\u001b[38;5;124m'\u001b[39m,\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mdis_sim.tsv\u001b[39m\u001b[38;5;124m'\u001b[39m)\n", + "File \u001b[1;32me:\\BioDataFuse\\pyBiodatafuse\\examples\\usecases\\PCS\\DREAMwalk\\DREAMwalk\\generate_files.py:22\u001b[0m, in \u001b[0;36mgenerate_files\u001b[1;34m(kg_data)\u001b[0m\n\u001b[0;32m 19\u001b[0m label_map_dict \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mdict\u001b[39m(\u001b[38;5;28mzip\u001b[39m(id_map[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mmapped_id\u001b[39m\u001b[38;5;124m'\u001b[39m], id_map[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m_label\u001b[39m\u001b[38;5;124m'\u001b[39m]))\n\u001b[0;32m 21\u001b[0m \u001b[38;5;66;03m# filter rows with 'INTERACTS_WITH', 'TARGETS', or 'IS_ASSOCIATED_WITH' in 'type'\u001b[39;00m\n\u001b[1;32m---> 22\u001b[0m edges_filtered \u001b[38;5;241m=\u001b[39m kg_data[\u001b[43mkg_data\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mtype\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m\u001b[38;5;241m.\u001b[39misin([\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mactivates\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124minhibits\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124massociated_with\u001b[39m\u001b[38;5;124m'\u001b[39m,\u001b[38;5;124m'\u001b[39m\u001b[38;5;124minteracts_with\u001b[39m\u001b[38;5;124m'\u001b[39m])]\u001b[38;5;241m.\u001b[39mcopy()\n\u001b[0;32m 24\u001b[0m count \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m0\u001b[39m\n\u001b[0;32m 25\u001b[0m output_graph \u001b[38;5;241m=\u001b[39m pd\u001b[38;5;241m.\u001b[39mDataFrame(columns\u001b[38;5;241m=\u001b[39m[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124msource\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mtarget\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124medgetype\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mweight\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124medge_id\u001b[39m\u001b[38;5;124m'\u001b[39m])\n", + "File \u001b[1;32me:\\BioDataFuse\\pyBiodatafuse\\.venv\\lib\\site-packages\\pandas\\core\\frame.py:3761\u001b[0m, in \u001b[0;36mDataFrame.__getitem__\u001b[1;34m(self, key)\u001b[0m\n\u001b[0;32m 3759\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcolumns\u001b[38;5;241m.\u001b[39mnlevels \u001b[38;5;241m>\u001b[39m \u001b[38;5;241m1\u001b[39m:\n\u001b[0;32m 3760\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_getitem_multilevel(key)\n\u001b[1;32m-> 3761\u001b[0m indexer \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcolumns\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_loc\u001b[49m\u001b[43m(\u001b[49m\u001b[43mkey\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 3762\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m is_integer(indexer):\n\u001b[0;32m 3763\u001b[0m indexer \u001b[38;5;241m=\u001b[39m [indexer]\n", + "File \u001b[1;32me:\\BioDataFuse\\pyBiodatafuse\\.venv\\lib\\site-packages\\pandas\\core\\indexes\\base.py:3655\u001b[0m, in \u001b[0;36mIndex.get_loc\u001b[1;34m(self, key)\u001b[0m\n\u001b[0;32m 3653\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_engine\u001b[38;5;241m.\u001b[39mget_loc(casted_key)\n\u001b[0;32m 3654\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m err:\n\u001b[1;32m-> 3655\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m(key) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01merr\u001b[39;00m\n\u001b[0;32m 3656\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m:\n\u001b[0;32m 3657\u001b[0m \u001b[38;5;66;03m# If we have a listlike key, _check_indexing_error will raise\u001b[39;00m\n\u001b[0;32m 3658\u001b[0m \u001b[38;5;66;03m# InvalidIndexError. Otherwise we fall through and re-raise\u001b[39;00m\n\u001b[0;32m 3659\u001b[0m \u001b[38;5;66;03m# the TypeError.\u001b[39;00m\n\u001b[0;32m 3660\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_check_indexing_error(key)\n", + "\u001b[1;31mKeyError\u001b[0m: 'type'" + ] + } + ], + "source": [ + "dis_gen.save_dis_sim(\"../pcs_networkx_graph.csv\", \"dis_sim.tsv\")" ] } ], diff --git a/src/pyBiodatafuse/annotators/opentargets.py b/src/pyBiodatafuse/annotators/opentargets.py index 117c1be8..b81523ba 100644 --- a/src/pyBiodatafuse/annotators/opentargets.py +++ b/src/pyBiodatafuse/annotators/opentargets.py @@ -923,6 +923,7 @@ def get_disease_compound_interactions( """Get information about drugs associated with diseases of interest. :param bridgedb_df: BridgeDb output for creating the list of gene ids to query. + :raises ValueError: if failed to retrieve data :returns: a DataFrame containing the OpenTargets output and dictionary of the query metadata. """ # Check if the API is available @@ -978,7 +979,12 @@ def get_disease_compound_interactions( query_string = query_string.replace("$efoIds", str(efo_ids).replace("'", '"')) - r = requests.post(OPENTARGETS_ENDPOINT, json={"query": query_string}).json() + r = requests.post(OPENTARGETS_ENDPOINT, json={"query": query_string}) + + try: + response_data = r.json() + except ValueError: + raise ValueError(f"Failed to parse JSON response. Response text: {r.text}") # Record the end time end_time = datetime.datetime.now() @@ -1001,14 +1007,14 @@ def get_disease_compound_interactions( # Generate the OpenTargets DataFrame intermediate_df = pd.DataFrame() - if r["data"]["diseases"] is None: + if response_data["data"]["diseases"] is None: warnings.warn( f"There is no annotation for your input list in {OPENTARGETS_DISEASE_COMPOUND_COL}.", stacklevel=2, ) return pd.DataFrame(), opentargets_version - for disease in r["data"]["diseases"]: + for disease in response_data["data"]["diseases"]: if not disease["knownDrugs"]: continue diff --git a/src/pyBiodatafuse/constants.py b/src/pyBiodatafuse/constants.py index a049f5af..02cce29c 100644 --- a/src/pyBiodatafuse/constants.py +++ b/src/pyBiodatafuse/constants.py @@ -4,6 +4,7 @@ # Endpoints / API +BRIDGEDB_ENDPOINT = "https://webservice.bridgedb.org" BGEE_ENDPOINT = "https://www.bgee.org/sparql/" DISGENET_ENDPOINT = "https://api.disgenet.com/api/v1/gda/summary" MINERVA_ENDPOINT = "https://minerva-net.lcsb.uni.lu/api/" diff --git a/src/pyBiodatafuse/id_mapper.py b/src/pyBiodatafuse/id_mapper.py index 702bd1ee..821e8b97 100644 --- a/src/pyBiodatafuse/id_mapper.py +++ b/src/pyBiodatafuse/id_mapper.py @@ -14,6 +14,8 @@ from pubchempy import BadRequestError, PubChemHTTPError, get_compounds, get_synonyms from rdkit.Chem import CanonSmiles +from pyBiodatafuse.constants import BRIDGEDB_ENDPOINT + logger = logging.getLogger(__name__) @@ -85,7 +87,7 @@ def get_version_datasource_bridgedb(input_species: Optional[str] = None) -> List def bridgedb_xref( identifiers: pd.DataFrame, input_species: Optional[str] = None, - input_datasource: Optional[str] = None, + input_datasource: str = "HGNC", output_datasource: Optional[list] = None, ) -> Tuple[pd.DataFrame, dict]: """Map input list using BridgeDb. @@ -103,16 +105,12 @@ def bridgedb_xref( if not input_datasource: raise ValueError("Please provide the identifier datasource, e.g. HGNC") - if output_datasource is None: + if output_datasource is None or "All": output_datasource = [ - "RefSeq", - "WikiGenes", - "OMIM", "Uniprot-TrEMBL", "NCBI Gene", "Ensembl", "HGNC Accession Number", - "PDB", "HGNC", ] @@ -130,8 +128,7 @@ def bridgedb_xref( ) # Setting up the query url - url = "https://webservice.bridgedb.org" - query_link = f"{url}/{input_species}/xrefsBatch" + query_link = f"{BRIDGEDB_ENDPOINT}/{input_species}/xrefsBatch" # Record the start time start_time = datetime.datetime.now() @@ -139,6 +136,7 @@ def bridgedb_xref( # Getting the response to the query try: s = requests.post(url=query_link, data=post_con.encode()) + s.raise_for_status() except Exception as e: raise ValueError("Error:", e) @@ -177,9 +175,11 @@ def bridgedb_xref( data_sources.set_index("systemCode")["source"] ) + # Drop not mapped ids + bridgedb = bridgedb.dropna(subset=["target.source"]) + # Subset based on the output_datasource - if not output_datasource == "All": - bridgedb = bridgedb[bridgedb["target.source"].isin(output_datasource)] + bridgedb = bridgedb[bridgedb["target.source"].isin(output_datasource)] bridgedb = bridgedb.drop_duplicates() identifiers.columns = [