From 90bf173ef110f5d92c6f0842a9510f90439ab4b1 Mon Sep 17 00:00:00 2001 From: Tooba Abbassi-Daloii Date: Tue, 17 Sep 2024 08:35:01 +0200 Subject: [PATCH 1/7] add literature data --- examples/usecases/PCS/PCS_usecase.ipynb | 335 +++++++++++++++++++++--- src/pyBiodatafuse/constants.py | 35 ++- src/pyBiodatafuse/graph/generator.py | 50 +++- 3 files changed, 365 insertions(+), 55 deletions(-) diff --git a/examples/usecases/PCS/PCS_usecase.ipynb b/examples/usecases/PCS/PCS_usecase.ipynb index 35ae98a2..22138373 100644 --- a/examples/usecases/PCS/PCS_usecase.ipynb +++ b/examples/usecases/PCS/PCS_usecase.ipynb @@ -44,6 +44,7 @@ "import matplotlib.pyplot as plt\n", "import networkx as nx\n", "import pandas as pd\n", + "import numpy as np\n", "from dotenv import load_dotenv\n", "\n", "from pyBiodatafuse import id_mapper\n", @@ -513,6 +514,234 @@ "disgenet_df[DISGENET_DISEASE_COL][0]" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Add litaliterature-based data\n", + "Genes found to be associated with Post-COVID-19" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Gene
0CTLA4
1PTPN22
2KIT
3KRAS
4NF1
\n", + "
" + ], + "text/plain": [ + " Gene\n", + "0 CTLA4\n", + "1 PTPN22\n", + "2 KIT\n", + "3 KRAS\n", + "4 NF1" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pcs_associated_genes = pd.read_excel(os.path.join(os.getcwd(), \"examples\", \"usecases\", \"PCS\", \"pcs_associated_genes.xlsx\"))\n", + "pcs_associated_genes.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
identifieridentifier.sourcetargettarget.sourceDISGENET_diseasesliterature_based_info
0A2ML1HGNC144568NCBI Gene[{'disease_name': 'Noonan Syndrome', 'HPO': ''...[{'disease_name': nan, 'UMLS': nan, 'source': ...
1AAMDCHGNC28971NCBI Gene[{'disease_name': nan, 'HPO': nan, 'NCI': nan,...[{'disease_name': nan, 'UMLS': nan, 'source': ...
2ABCA1HGNC19NCBI Gene[{'disease_name': 'Tangier Disease', 'HPO': ''...[{'disease_name': nan, 'UMLS': nan, 'source': ...
3ABCB1HGNC5243NCBI Gene[{'disease_name': 'Epilepsy', 'HPO': 'HPO_HP:0...[{'disease_name': nan, 'UMLS': nan, 'source': ...
4ABCC6P1HGNC653190NCBI Gene[{'disease_name': nan, 'HPO': nan, 'NCI': nan,...[{'disease_name': nan, 'UMLS': nan, 'source': ...
\n", + "
" + ], + "text/plain": [ + " identifier identifier.source target target.source \\\n", + "0 A2ML1 HGNC 144568 NCBI Gene \n", + "1 AAMDC HGNC 28971 NCBI Gene \n", + "2 ABCA1 HGNC 19 NCBI Gene \n", + "3 ABCB1 HGNC 5243 NCBI Gene \n", + "4 ABCC6P1 HGNC 653190 NCBI Gene \n", + "\n", + " DISGENET_diseases \\\n", + "0 [{'disease_name': 'Noonan Syndrome', 'HPO': ''... \n", + "1 [{'disease_name': nan, 'HPO': nan, 'NCI': nan,... \n", + "2 [{'disease_name': 'Tangier Disease', 'HPO': ''... \n", + "3 [{'disease_name': 'Epilepsy', 'HPO': 'HPO_HP:0... \n", + "4 [{'disease_name': nan, 'HPO': nan, 'NCI': nan,... \n", + "\n", + " literature_based_info \n", + "0 [{'disease_name': nan, 'UMLS': nan, 'source': ... \n", + "1 [{'disease_name': nan, 'UMLS': nan, 'source': ... \n", + "2 [{'disease_name': nan, 'UMLS': nan, 'source': ... \n", + "3 [{'disease_name': nan, 'UMLS': nan, 'source': ... \n", + "4 [{'disease_name': nan, 'UMLS': nan, 'source': ... " + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "def get_literature_based_info(gene):\n", + " if gene in pcs_associated_genes[\"Gene\"].values:\n", + " return [{\"disease_name\": \"Post-COVID-19\", \n", + " \"UMLS\": \"C00000\", \n", + " \"source\": \"PMID: 37675861\"}]\n", + " else:\n", + " return [{\"disease_name\": np.nan, \n", + " \"UMLS\": np.nan, \n", + " \"source\": np.nan}]\n", + "\n", + "# Apply the function to each row in bridgedb_df and create the new column\n", + "disgenet_df[\"literature_based_info\"] = disgenet_df[\"identifier\"].apply(get_literature_based_info)\n", + "\n", + "disgenet_df.head()\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "29\n" + ] + } + ], + "source": [ + "print(pcs_associated_genes[\"Gene\"].isin(disgenet_df[\"identifier\"]).sum())" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -529,7 +758,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 11, "metadata": {}, "outputs": [ { @@ -608,7 +837,7 @@ "4 UMLS_C0010054 UMLS EFO_0001645 EFO" ] }, - "execution_count": 8, + "execution_count": 11, "metadata": {}, "output_type": "execute_result" } @@ -629,7 +858,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 12, "metadata": {}, "outputs": [ { @@ -721,7 +950,7 @@ "4 [{'chembl_id': 'CHEMBL628', 'drugbank_id': 'DB... " ] }, - "execution_count": 9, + "execution_count": 12, "metadata": {}, "output_type": "execute_result" } @@ -784,7 +1013,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 13, "metadata": {}, "outputs": [ { @@ -879,7 +1108,7 @@ " {'name': 'drug withdrawal syndrome neonatal'}]}]" ] }, - "execution_count": 10, + "execution_count": 13, "metadata": {}, "output_type": "execute_result" } @@ -897,7 +1126,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 14, "metadata": {}, "outputs": [ { @@ -989,7 +1218,7 @@ "4 [{'chembl_id': nan, 'drugbank_id': nan, 'compo... " ] }, - "execution_count": 11, + "execution_count": 14, "metadata": {}, "output_type": "execute_result" } @@ -1028,7 +1257,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 15, "metadata": {}, "outputs": [ { @@ -1073,7 +1302,7 @@ " 'adverse_effect': None}]" ] }, - "execution_count": 12, + "execution_count": 15, "metadata": {}, "output_type": "execute_result" } @@ -1091,7 +1320,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 16, "metadata": {}, "outputs": [ { @@ -1183,7 +1412,7 @@ "4 [{'pathway_id': nan, 'pathway_label': nan, 'pa... " ] }, - "execution_count": 13, + "execution_count": 16, "metadata": {}, "output_type": "execute_result" } @@ -1212,7 +1441,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 17, "metadata": {}, "outputs": [ { @@ -1223,7 +1452,7 @@ " 'pathway_gene_count': 45.0}]" ] }, - "execution_count": 14, + "execution_count": 17, "metadata": {}, "output_type": "execute_result" } @@ -1241,7 +1470,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 18, "metadata": {}, "outputs": [ { @@ -1333,7 +1562,7 @@ "4 [{'pathway_id': nan, 'pathway_label': nan, 'pa... " ] }, - "execution_count": 15, + "execution_count": 18, "metadata": {}, "output_type": "execute_result" } @@ -1365,7 +1594,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 19, "metadata": {}, "outputs": [ { @@ -1409,7 +1638,7 @@ " 'pathway_gene_count': 35.0}]" ] }, - "execution_count": 16, + "execution_count": 19, "metadata": {}, "output_type": "execute_result" } @@ -1427,7 +1656,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 20, "metadata": {}, "outputs": [ { @@ -1519,7 +1748,7 @@ "4 [{'pathway_label': nan, 'pathway_id': nan}] " ] }, - "execution_count": 17, + "execution_count": 20, "metadata": {}, "output_type": "execute_result" } @@ -1557,7 +1786,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 21, "metadata": {}, "outputs": [ { @@ -1572,7 +1801,7 @@ " {'pathway_label': 'HDL assembly', 'pathway_id': 'R-HSA-8963896'}]" ] }, - "execution_count": 18, + "execution_count": 21, "metadata": {}, "output_type": "execute_result" } @@ -1590,7 +1819,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 22, "metadata": {}, "outputs": [ { @@ -1682,7 +1911,7 @@ "4 [{'go_id': nan, 'go_name': nan, 'go_type': nan}] " ] }, - "execution_count": 19, + "execution_count": 22, "metadata": {}, "output_type": "execute_result" } @@ -1712,7 +1941,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 23, "metadata": {}, "outputs": [ { @@ -1731,7 +1960,7 @@ " 'go_type': 'F'}]" ] }, - "execution_count": 20, + "execution_count": 23, "metadata": {}, "output_type": "execute_result" } @@ -1749,7 +1978,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 24, "metadata": {}, "outputs": [ { @@ -1841,7 +2070,7 @@ "4 [{'stringdb_link_to': 'DEFB118', 'Ensembl': 'E... " ] }, - "execution_count": 21, + "execution_count": 24, "metadata": {}, "output_type": "execute_result" } @@ -1869,7 +2098,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 25, "metadata": {}, "outputs": [ { @@ -1885,7 +2114,7 @@ " {'stringdb_link_to': 'RUNX2', 'Ensembl': 'ENSP00000360493', 'score': 0.713}]" ] }, - "execution_count": 22, + "execution_count": 25, "metadata": {}, "output_type": "execute_result" } @@ -1903,7 +2132,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 26, "metadata": {}, "outputs": [], "source": [ @@ -1936,7 +2165,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 27, "metadata": {}, "outputs": [ { @@ -1965,6 +2194,7 @@ " target\n", " target.source\n", " DISGENET_diseases\n", + " literature_based_info\n", " OpenTargets_gene_compounds\n", " MINERVA\n", " WikiPathways\n", @@ -1981,6 +2211,7 @@ " ENSG00000152592\n", " Ensembl\n", " [{'disease_name': 'Hypophosphatemic Rickets', ...\n", + " [{'disease_name': 'Post-COVID-19', 'UMLS': 'C0...\n", " [{'chembl_id': nan, 'drugbank_id': nan, 'compo...\n", " [{'pathway_id': nan, 'pathway_label': nan, 'pa...\n", " [{'pathway_id': 'WP3971', 'pathway_label': 'OS...\n", @@ -1995,6 +2226,7 @@ " ENSG00000175535\n", " Ensembl\n", " [{'disease_name': 'Pancreatic Lipase Deficienc...\n", + " [{'disease_name': nan, 'UMLS': nan, 'source': ...\n", " [{'chembl_id': 'CHEMBL175247', 'drugbank_id': ...\n", " [{'pathway_id': nan, 'pathway_label': nan, 'pa...\n", " [{'pathway_id': nan, 'pathway_label': nan, 'pa...\n", @@ -2009,6 +2241,7 @@ " ENSG00000259435\n", " Ensembl\n", " [{'disease_name': nan, 'HPO': nan, 'NCI': nan,...\n", + " [{'disease_name': nan, 'UMLS': nan, 'source': ...\n", " [{'chembl_id': nan, 'drugbank_id': nan, 'compo...\n", " [{'pathway_id': nan, 'pathway_label': nan, 'pa...\n", " [{'pathway_id': nan, 'pathway_label': nan, 'pa...\n", @@ -2023,6 +2256,7 @@ " ENSG00000268104\n", " Ensembl\n", " [{'disease_name': 'Cystic Fibrosis', 'HPO': ''...\n", + " [{'disease_name': nan, 'UMLS': nan, 'source': ...\n", " [{'chembl_id': nan, 'drugbank_id': nan, 'compo...\n", " [{'pathway_id': nan, 'pathway_label': nan, 'pa...\n", " [{'pathway_id': 'WP2882', 'pathway_label': 'Nu...\n", @@ -2047,6 +2281,12 @@ "2 [{'disease_name': nan, 'HPO': nan, 'NCI': nan,... \n", "3 [{'disease_name': 'Cystic Fibrosis', 'HPO': ''... \n", "\n", + " literature_based_info \\\n", + "0 [{'disease_name': 'Post-COVID-19', 'UMLS': 'C0... \n", + "1 [{'disease_name': nan, 'UMLS': nan, 'source': ... \n", + "2 [{'disease_name': nan, 'UMLS': nan, 'source': ... \n", + "3 [{'disease_name': nan, 'UMLS': nan, 'source': ... \n", + "\n", " OpenTargets_gene_compounds \\\n", "0 [{'chembl_id': nan, 'drugbank_id': nan, 'compo... \n", "1 [{'chembl_id': 'CHEMBL175247', 'drugbank_id': ... \n", @@ -2084,7 +2324,7 @@ "3 [{'stringdb_link_to': 'SLC7A11', 'Ensembl': 'E... " ] }, - "execution_count": 24, + "execution_count": 27, "metadata": {}, "output_type": "execute_result" } @@ -2095,7 +2335,7 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 28, "metadata": {}, "outputs": [ { @@ -2247,7 +2487,7 @@ " 'request_string': 'LOC729609\\tH\\nLOC105374060\\tH\\nDMP1\\tH\\nPNLIP\\tH\\nOR4N3P\\tH\\nSLC6A14\\tH\\nLOC101927239\\tH\\nDEFB105A\\tH\\nDEFB105B\\tH\\nGSTTP1\\tH\\nNEUROD1\\tH\\nRND1\\tH\\nVN1R10P\\tH\\nLOC440446\\tH\\nLOC152225\\tH\\nLOC101929341\\tH\\nPGLYRP3\\tH\\nLINC01533\\tH\\nLINC01090\\tH\\nSPEM1\\tH\\nC16orf82\\tH\\nMIR4432HG\\tH\\nLINC01169\\tH\\nFAM71A\\tH\\nRNASE10\\tH\\nKLF17\\tH\\nC9\\tH\\nARC\\tH\\nMYL10\\tH\\nGCM1\\tH\\nAIPL1\\tH\\nHSPA6\\tH\\nLOC101929124\\tH\\nC7orf65\\tH\\nSLC2A14\\tH\\nPNLIPRP2\\tH\\nNPAS4\\tH\\nLOC101060498\\tH\\nPROP1\\tH\\nELAVL3\\tH\\nLOC105747689\\tH\\nTNF\\tH\\nADAMTS4\\tH\\nPCDH10\\tH\\nLOC101927274\\tH\\nNR4A2\\tH\\nLOC102724612\\tH\\nCEACAM22P\\tH\\nSNAI1\\tH\\nSLC2A3\\tH\\nDLX3\\tH\\nID2\\tH\\nLOC151475\\tH\\nATF3\\tH\\nNKAIN4\\tH\\nASAP1-IT2\\tH\\nNOXRED1\\tH\\nDNM1P41\\tH\\nSLC7A11\\tH\\nC10orf82\\tH\\nULBP2\\tH\\nTPTE2P6\\tH\\nNR4A3\\tH\\nLOC399715\\tH\\nCNTN3\\tH\\nGEM\\tH\\nHSPA7\\tH\\nNCMAP\\tH\\nPNP\\tH\\nPLK2\\tH\\nATP2C2\\tH\\nTNFRSF10D\\tH\\nULBP3\\tH\\nHSPA5\\tH\\nEFHB\\tH\\nHSD17B13\\tH\\nWNK3\\tH\\nLINC01535\\tH\\nELL2\\tH\\nRND3\\tH\\nDUSP5\\tH\\nNRXN3\\tH\\nIPCEF1\\tH\\nZNF492\\tH\\nSDR16C5\\tH\\nCENPL\\tH\\nSOX11\\tH\\nMAFF\\tH\\nPRG4\\tH\\nPCDH17\\tH\\nCDKN1A\\tH\\nPELI1\\tH\\nTMEM169\\tH\\nTMEM236\\tH\\nEFNA5\\tH\\nGCH1\\tH\\nANGPTL4\\tH\\nMAP1LC3C\\tH\\nCHL1\\tH\\nMPZ\\tH\\nSERPINE1\\tH\\nSLC2A1\\tH\\nLRRC16A\\tH\\nFRZB\\tH\\nGLIS3\\tH\\nTIAM1\\tH\\nSRGAP1\\tH\\nSH2D4A\\tH\\nMYEF2\\tH\\nNT5E\\tH\\nVGLL3\\tH\\nPRTG\\tH\\nDPP4\\tH\\nKLF11\\tH\\nTAF13\\tH\\nSTRADB\\tH\\nPOMP\\tH\\nLAMTOR5\\tH\\nCCDC69\\tH\\nZNF32\\tH\\nIQSEC2\\tH\\nAPIP\\tH\\nGDF9\\tH\\nSCUBE2\\tH\\nC20orf24\\tH\\nZSWIM7\\tH\\nTIMM8B\\tH\\nLOC102724532\\tH\\nPRR16\\tH\\nAHRR\\tH\\nLEFTY2\\tH\\nIRX3\\tH\\nVMO1\\tH\\nPVALB\\tH\\nMT1DP\\tH\\nCALML5\\tH\\nLOC101929116\\tH\\nLOC101929694\\tH\\nLINC01205\\tH\\nLINC01241\\tH\\nTMPRSS11A\\tH\\nLOC101928942\\tH\\nLOC100507461\\tH\\nLINC01565\\tH\\nLOC101928358\\tH\\nSCGB1D4\\tH\\nTTR\\tH\\nLINC01284\\tH\\nSSX8\\tH\\nTMEM225\\tH\\nNCRNA00250\\tH\\nOR13D1\\tH\\nLINC01192\\tH\\nCALCB\\tH\\nLINC00411\\tH\\nLINC01227\\tH\\nMIR5689HG\\tH\\nLINC00615\\tH\\nGHSR\\tH\\nLOC105375556\\tH\\nCT45A5\\tH\\nLOC646029\\tH\\nZFP42\\tH\\nCT45A9\\tH\\nFLJ46066\\tH\\nCGA\\tH\\nLOC285692\\tH\\nLOC105369509\\tH\\nCLEC1B\\tH\\nHIST1H4A\\tH\\nDSCAM-IT1\\tH\\nCT45A2\\tH\\nCT45A8\\tH\\nLINC00928\\tH\\nBDKRB1\\tH\\nLOC105370586\\tH\\nTRIM51\\tH\\nLOC101927480\\tH\\nLINC01568\\tH\\nCASC17\\tH\\nLOC101929631\\tH\\nLINC01233\\tH\\nLOC101927948\\tH\\nOR13C5\\tH\\nSSX2\\tH\\nSSX2B\\tH\\nCACNA1C-IT3\\tH\\nLOC100500773\\tH\\nSPATA3\\tH\\nLOC101927374\\tH\\nFBXO47\\tH\\nLINC01493\\tH\\nLOC105369431\\tH\\nLOC105376468\\tH\\nOR5W2\\tH\\nREG4\\tH\\nCD5L\\tH\\nLINC01514\\tH\\nLOC105376331\\tH\\nLOC102723557\\tH\\nPISRT1\\tH\\nHIGD2B\\tH\\nPAGE1\\tH\\nMMP26\\tH\\nLOC101928602\\tH\\nLOC102723895\\tH\\nACTR3BP2\\tH\\nLOC101927363\\tH\\nHNRNPKP3\\tH\\nLOC101927188\\tH\\nDISC1-IT1\\tH\\nLOC102467222\\tH\\nFAM9B\\tH\\nGLOD5\\tH\\nC2orf48\\tH\\nLOC100288254\\tH\\nFRG2\\tH\\nGACAT3\\tH\\nFOXCUT\\tH\\nLOC101927357\\tH\\nLOC101929260\\tH\\nOR13C2\\tH\\nLOC101929754\\tH\\nLOC146513\\tH\\nOR2AT4\\tH\\nPBOV1\\tH\\nTFDP3\\tH\\nLOC101929420\\tH\\nHRAT17\\tH\\nOR6W1P\\tH\\nSSX9\\tH\\nSSX3\\tH\\nHMGA1P7\\tH\\nLINC00374\\tH\\nLINC01288\\tH\\nLINC00836\\tH\\nLINC01320\\tH\\nTRIM64\\tH\\nSDR16C6P\\tH\\nLOC729966\\tH\\nLOC105375014\\tH\\nLINC01441\\tH\\nSCNN1G\\tH\\nC7orf69\\tH\\nOPN1LW\\tH\\nKRTAP5-4\\tH\\nANKUB1\\tH\\nTMEM213\\tH\\nTFAP2D\\tH\\nDANT2\\tH\\nLOC101927419\\tH\\nTXNDC2\\tH\\nOR11A1\\tH\\nLINC01317\\tH\\nLOC101805491\\tH\\nLOC286083\\tH\\nLOC101929563\\tH\\nLINC01216\\tH\\nLINC01163\\tH\\nLOC101927166\\tH\\nPHOX2B\\tH\\nLOC102467081\\tH\\nCT45A6\\tH\\nSND1-IT1\\tH\\nSSX4B\\tH\\nSSX4\\tH\\nSULT1E1\\tH\\nNOL4\\tH\\nZNF716\\tH\\nSUMO1P1\\tH\\nLOC440896\\tH\\nG6PC\\tH\\nMIR31HG\\tH\\nLOC101929259\\tH\\nHTR3C\\tH\\nLOC730100\\tH\\nMAB21L3\\tH\\nIL6\\tH\\nMIP\\tH\\nTRIM64B\\tH\\nCNGB1\\tH\\nLINC01531\\tH\\nFOXL2NB\\tH\\nCXCL8\\tH\\nSLC15A1\\tH\\nGABRB1\\tH\\nLINC00862\\tH\\nZPBP2\\tH\\nLOC101928992\\tH\\nDPPA4\\tH\\nPOU2F3\\tH\\nNUTM1\\tH\\nLOC105372440\\tH\\nSELE\\tH\\nGPR143\\tH\\nFSTL5\\tH\\nAXDND1\\tH\\nLINC01619\\tH\\nSAMD7\\tH\\nLOC100131257\\tH\\nABCC13\\tH\\nC17orf78\\tH\\nCRX\\tH\\nC12orf42\\tH\\nFOXG1\\tH\\nHTR3A\\tH\\nLOC644189\\tH\\nPNPLA1\\tH\\nLINC00880\\tH\\nTOP1P2\\tH\\nCAGE1\\tH\\nLINC00670\\tH\\nLOC101928231\\tH\\nFAM138C\\tH\\nRTP1\\tH\\nLOC101928617\\tH\\nSPAG11B\\tH\\nLOC101927691\\tH\\nSLC35G3\\tH\\nBCO1\\tH\\nSLC35G4\\tH\\nLINC00636\\tH\\nEPGN\\tH\\nPTGS2\\tH\\nPGC\\tH\\nLOC102724467\\tH\\nLOC101928103\\tH\\nTRPC5OS\\tH\\nLOC338694\\tH\\nLINC01036\\tH\\nDLX6\\tH\\nLINC00426\\tH\\nCXorf65\\tH\\nHP09025\\tH\\nLOC389273\\tH\\nDPCR1\\tH\\nC5orf60\\tH\\nPCSK1\\tH\\nLOC494141\\tH\\nGADD45B\\tH\\nC1orf87\\tH\\nANKS4B\\tH\\nJAKMIP2\\tH\\nLINC00266-3\\tH\\nDRAIC\\tH\\nTCAM1P\\tH\\nMIR202HG\\tH\\nSPRR2F\\tH\\nFAM138B\\tH\\nLINC00907\\tH\\nCCL19\\tH\\nASCL1\\tH\\nNUP210L\\tH\\nLINC01170\\tH\\nLINC00264\\tH\\nANKRD7\\tH\\nLOC102724601\\tH\\nSH2D6\\tH\\nFAM138F\\tH\\nFAM138A\\tH\\nGYPE\\tH\\nDDX4\\tH\\nIL5RA\\tH\\nTNFRSF9\\tH\\nLINC00368\\tH\\nLGSN\\tH\\nNEK5\\tH\\nLOC105374177\\tH\\nGLB1L3\\tH\\nLOC105379511\\tH\\nMT1A\\tH\\nFAM138E\\tH\\nTEKT3\\tH\\nSV2C\\tH\\nNR2E3\\tH\\nPLA2G10\\tH\\nLOC101927770\\tH\\nENO4\\tH\\nSBK2\\tH\\nA2ML1\\tH\\nLOC101927257\\tH\\nSPRY4-IT1\\tH\\nDNAH8\\tH\\nAK7\\tH\\nASXL3\\tH\\nTEX38\\tH\\nDNM1P35\\tH\\nCCL26\\tH\\nPPP3R2\\tH\\nCTSLP2\\tH\\nACBD7\\tH\\nSOX2-OT\\tH\\nSTC1\\tH\\nLOC284865\\tH\\nFDPSP2\\tH\\nMARVELD2\\tH\\nCDKL2\\tH\\nDCX\\tH\\nSHISA9\\tH\\nC4orf26\\tH\\nDNAH5\\tH\\nCD3G\\tH\\nTTC23L\\tH\\nPDE6A\\tH\\nAPOBEC3H\\tH\\nLINC00311\\tH\\nCXCL2\\tH\\nLINC00632\\tH\\nSALL4\\tH\\nLOC105372582\\tH\\nFAM106CP\\tH\\nRASD1\\tH\\nCACNA1F\\tH\\nELAVL2\\tH\\nKIAA0087\\tH\\nGIPR\\tH\\nCIDEA\\tH\\nBCL11B\\tH\\nTNFRSF11B\\tH\\nCA13\\tH\\nANKRD20A9P\\tH\\nFAM106B\\tH\\nSEMA3E\\tH\\nGPRC5A\\tH\\nLOC285819\\tH\\nLOC730101\\tH\\nIL1RL1\\tH\\nRGS2\\tH\\nRYBP\\tH\\nC3orf52\\tH\\nHOOK1\\tH\\nPCDH9\\tH\\nCDH19\\tH\\nPGA4\\tH\\nSTARD4\\tH\\nCYP2B7P\\tH\\nTFPI2\\tH\\nPDK4\\tH\\nPGA5\\tH\\nKCNAB3\\tH\\nLINC00641\\tH\\nLOC102724571\\tH\\nSEZ6L\\tH\\nTNFSF9\\tH\\nZNF483\\tH\\nM1AP\\tH\\nFAAP24\\tH\\nKLHL15\\tH\\nCHD1\\tH\\nAP1S3\\tH\\nCDS1\\tH\\nCRTAC1\\tH\\nGYG2\\tH\\nGRHL1\\tH\\nFSIP1\\tH\\nSYT1\\tH\\nPLCXD3\\tH\\nLOC101928371\\tH\\nPEG10\\tH\\nMPZL3\\tH\\nZNF331\\tH\\nKCNQ1OT1\\tH\\nLOC388436\\tH\\nLOC79999\\tH\\nFAM106A\\tH\\nRPS6KA6\\tH\\nBCL2L15\\tH\\nTBX5\\tH\\nEMP1\\tH\\nPPP2R2B\\tH\\nTACR1\\tH\\nSLC7A10\\tH\\nELOVL6\\tH\\nATP1B3\\tH\\nSEMA4A\\tH\\nCEP152\\tH\\nLINC01296\\tH\\nNRXN1\\tH\\nADGRG2\\tH\\nCLDN1\\tH\\nZSWIM6\\tH\\nWNT3\\tH\\nCCDC170\\tH\\nTHBS1\\tH\\nSLC35F2\\tH\\nZC3H12B\\tH\\nPLIN1\\tH\\nLOC401052\\tH\\nCATSPERG\\tH\\nIFRD1\\tH\\nGAS2L3\\tH\\nAPOBEC3D\\tH\\nPOU2F2\\tH\\nERRFI1\\tH\\nARSJ\\tH\\nFOXC1\\tH\\nPRDM1\\tH\\nRASGRP1\\tH\\nKIAA1683\\tH\\nPRELP\\tH\\nTIPARP\\tH\\nZC3H12A\\tH\\nSGIP1\\tH\\nPDE8B\\tH\\nGFPT2\\tH\\nCABP4\\tH\\nRAD51B\\tH\\nMICB\\tH\\nEIF4A3\\tH\\nFAM72C\\tH\\nC7\\tH\\nQPCT\\tH\\nMAP3K8\\tH\\nTUFT1\\tH\\nDUXAP10\\tH\\nSHROOM3\\tH\\nZC3HAV1\\tH\\nS1PR2\\tH\\nFAM122C\\tH\\nHRH1\\tH\\nUGCG\\tH\\nSOX9\\tH\\nLYVE1\\tH\\nBCL2L11\\tH\\nEIF2AK3\\tH\\nC11orf63\\tH\\nSERPINB8\\tH\\nLEPR\\tH\\nCACNB2\\tH\\nCACNA2D4\\tH\\nNR2F1\\tH\\nCLCF1\\tH\\nPSD3\\tH\\nADNP2\\tH\\nDYNC2H1\\tH\\nOR2A20P\\tH\\nSYT17\\tH\\nVASH2\\tH\\nTMEM2\\tH\\nOR2A9P\\tH\\nUSP32P2\\tH\\nEDIL3\\tH\\nLOX\\tH\\nMXD1\\tH\\nNHSL1\\tH\\nDLC1\\tH\\nCYBB\\tH\\nETV5\\tH\\nCEP126\\tH\\nPTPRF\\tH\\nCOCH\\tH\\nSCRN1\\tH\\nPPM1D\\tH\\nLILRB4\\tH\\nMFSD4A\\tH\\nCCDC144B\\tH\\nPXDNL\\tH\\nAHR\\tH\\nTRIM14\\tH\\nFRMD4B\\tH\\nCD84\\tH\\nTIAM2\\tH\\nADAMTS5\\tH\\nXYLT1\\tH\\nMYOF\\tH\\nSLC7A1\\tH\\nSMG1P3\\tH\\nUGDH\\tH\\nPMP22\\tH\\nAMPH\\tH\\nNPIPB5\\tH\\nNT5DC3\\tH\\nUBE2D2\\tH\\nPIGX\\tH\\nTTC1\\tH\\nSRP14\\tH\\nGKAP1\\tH\\nFIBP\\tH\\nMED11\\tH\\nVTI1B\\tH\\nATPAF1\\tH\\nDNAJC19\\tH\\nMRPL24\\tH\\nTRIM16L\\tH\\nPOLR2F\\tH\\nGCSH\\tH\\nTMEM147\\tH\\nLSM10\\tH\\nMRPL40\\tH\\nC11orf74\\tH\\nSERF2-C15ORF63\\tH\\nNDUFAF2\\tH\\nUBE3D\\tH\\nMALSU1\\tH\\nCOA4\\tH\\nELP6\\tH\\nMTX2\\tH\\nCMC4\\tH\\nMON1A\\tH\\nCABP7\\tH\\nMID1IP1\\tH\\nCOA6\\tH\\nKIF22\\tH\\nTSEN15\\tH\\nNDFIP2\\tH\\nHYPK\\tH\\nZCRB1\\tH\\nPARK7\\tH\\nCOX16\\tH\\nGTF3C6\\tH\\nMINOS1\\tH\\nMRPS15\\tH\\nSTOML2\\tH\\nKCNS3\\tH\\nCACNA2D3\\tH\\nCTNNBIP1\\tH\\nC7orf55\\tH\\nCOPS5\\tH\\nCHCHD5\\tH\\nYBX3P1\\tH\\nSPAG7\\tH\\nNDUFS3\\tH\\nTPI1\\tH\\nPET100\\tH\\nST3GAL2\\tH\\nMRPL21\\tH\\nTP53TG1\\tH\\nCDKN2AIPNL\\tH\\nOIP5\\tH\\nRPS20\\tH\\nATP5E\\tH\\nCBWD2\\tH\\nCDK5\\tH\\nTOMM5\\tH\\nPRR34\\tH\\nHINT1\\tH\\nBAD\\tH\\nATP5L\\tH\\nSFXN5\\tH\\nAAMDC\\tH\\nMRPL51\\tH\\nKIAA0930\\tH\\nVAMP5\\tH\\nSEPW1\\tH\\nNDUFA6\\tH\\nSLIRP\\tH\\nSHISA2\\tH\\nNUDT2\\tH\\nCOX5B\\tH\\nSNRPN\\tH\\nSNURF\\tH\\nAURKA\\tH\\nCBWD1\\tH\\nNDUFB2\\tH\\nNAA38\\tH\\nCKM\\tH\\nGPD1\\tH\\nRPS29\\tH\\nDHRS4L1\\tH\\nMRPL33\\tH\\nLOC100507291\\tH\\nATP23\\tH\\nUQCRQ\\tH\\nNDUFC2\\tH\\nBOLA3\\tH\\nTCEB2\\tH\\nCOX7A1\\tH\\nDHRS4\\tH\\nCOX6C\\tH\\nFHL2\\tH\\nSLN\\tH\\nNDUFA1\\tH\\nRPL21P28\\tH\\nRPL21\\tH\\nNDUFC2-KCTD14\\tH\\nATP5I\\tH\\nUQCC2\\tH\\nLOC101929231\\tH\\nDBNDD1\\tH\\nNDUFB9\\tH\\nLAMB3\\tH\\nCSF3R\\tH\\nUSMG5\\tH\\nDHRS4L2\\tH\\nSERPINA1\\tH\\nC1orf53\\tH\\nGLT1D1\\tH\\nGREM2\\tH\\nUQCRBP1\\tH\\nFAM24B\\tH\\nS100A8\\tH\\nCDH22\\tH\\nLEFTY1\\tH\\nC3orf14\\tH\\nLINC01291\\tH\\nTPI1P2\\tH\\nCHAF1B\\tH\\nCENPE\\tH\\nE2F2\\tH\\nOSMR\\tH\\nNDUFC1\\tH\\nGP9\\tH\\nCDON\\tH\\nPOU3F3\\tH\\nLINC01224\\tH\\nOR7G1\\tH\\nZNF735\\tH\\nRPL23AP53\\tH\\nSAMD12\\tH\\nPAMR1\\tH\\nHIST3H2A\\tH\\nLOC101927798\\tH\\nFMOD\\tH\\nOR8S1\\tH\\nKLHL11\\tH\\nLOC105375429\\tH\\nLINC01122\\tH\\nTMCO2\\tH\\nDNAH12\\tH\\nKLF4\\tH\\nCHRM4\\tH\\nLOC101928505\\tH\\nADAMTS1\\tH\\nBEX2\\tH\\nMCTP1\\tH\\nHSD3BP4\\tH\\nLINC01053\\tH\\nELK2AP\\tH\\nLOC105377458\\tH\\nFAM71E2\\tH\\nHAO1\\tH\\nCD68\\tH\\nLOC101928728\\tH\\nSYT15\\tH\\nBAGE\\tH\\nBPIFC\\tH\\nRAET1K\\tH\\nTMPRSS11BNL\\tH\\nTOMM7\\tH\\nHESX1\\tH\\nLRRC72\\tH\\nTUSC5\\tH\\nMUC13\\tH\\nLOC101929227\\tH\\nEDA2R\\tH\\nTM2D1\\tH\\nBCAT1\\tH\\nF13B\\tH\\nLINC00958\\tH\\nRFX4\\tH\\nBRD2\\tH\\nSCN3B\\tH\\nNANOS1\\tH\\nLINC01252\\tH\\nPHLDA2\\tH\\nSNAI3\\tH\\nLOC100506274\\tH\\nLINC01021\\tH\\nCHI3L1\\tH\\nTIMM10\\tH\\nKRTAP5-2\\tH\\nLY6G6C\\tH\\nLOC101927476\\tH\\nZNF169\\tH\\nTINCR\\tH\\nUBL5\\tH\\nLINC01551\\tH\\nFIRRE\\tH\\nRPS28\\tH\\nCYP2G1P\\tH\\nCASC21\\tH\\nWDR76\\tH\\nAGBL4-IT1\\tH\\nLINC01483\\tH\\nYEATS4\\tH\\nNUGGC\\tH\\nAPOBEC1\\tH\\nZAN\\tH\\nCNNM1\\tH\\nTMC1\\tH\\nAPOPT1\\tH\\nNT5M\\tH\\nLINC00877\\tH\\nLOC100133050\\tH\\nMRPL53\\tH\\nCBWD3\\tH\\nJMJD1C\\tH\\nNDUFA11\\tH\\nPLA2G2A\\tH\\nARRDC5\\tH\\nENPP1\\tH\\nNDUFB1\\tH\\nTSHZ2\\tH\\nCRIP3\\tH\\nSMIM4\\tH\\nNANOG\\tH\\nFBXO36\\tH\\nDGCR6L\\tH\\nFAM138D\\tH\\nARAP2\\tH\\nBMP6\\tH\\nMRPL20\\tH\\nMRPS18C\\tH\\nTGIF2-C20orf24\\tH\\nTPM1\\tH\\nSCML4\\tH\\nHRASLS\\tH\\nLOC105379450\\tH\\nNHS\\tH\\nLINC00888\\tH\\nLUADT1\\tH\\nTHBS2\\tH\\nSFTPB\\tH\\nSCN8A\\tH\\nCBWD6\\tH\\nSLC24A4\\tH\\nSRPX2\\tH\\nLCE3D\\tH\\nLCN12\\tH\\nGATA2\\tH\\nLINC00578\\tH\\nLOC101928449\\tH\\nGYPC\\tH\\nPDCL2\\tH\\nCHCHD3\\tH\\nGHET1\\tH\\nLOC101927284\\tH\\nC19orf35\\tH\\nPARP11\\tH\\nLOC100268168\\tH\\nANKRD45\\tH\\nCT45A3\\tH\\nAZGP1\\tH\\nARPC2\\tH\\nLINC01516\\tH\\nPTGER3\\tH\\nUROS\\tH\\nLOC101928887\\tH\\nFCGR1CP\\tH\\nLOC105375396\\tH\\nLOC727924\\tH\\nST20-MTHFS\\tH\\nTNIP3\\tH\\nTDGF1P3\\tH\\nCCL28\\tH\\nGALNT15\\tH\\nNME9\\tH\\nRSPH14\\tH\\nLINC00608\\tH\\nPCDH8\\tH\\nSHISA4\\tH\\nLVCAT5\\tH\\nDCUN1D3\\tH\\nLOC401463\\tH\\nLOC105375483\\tH\\nMRPL15\\tH\\nHS3ST2\\tH\\nC1orf194\\tH\\nRAB3B\\tH\\nTMEM251\\tH\\nLINC00152\\tH\\nLINC00102\\tH\\nCORO2B\\tH\\nBSPRY\\tH\\nCCR7\\tH\\nGLI3\\tH\\nAPOL4\\tH\\nKERA\\tH\\nGAMT\\tH\\nRBP4\\tH\\nLMO1\\tH\\nSNHG12\\tH\\nLINC01410\\tH\\nZNF280C\\tH\\nCCDC144A\\tH\\nSNRNP27\\tH\\nNDUFA3\\tH\\nSKIDA1\\tH\\nFZD5\\tH\\nRUNDC3B\\tH\\nSHFM1\\tH\\nZMAT5\\tH\\nGGT7\\tH\\nTXLNG\\tH\\nSMG1P1\\tH\\nMMADHC\\tH\\nKPNA2\\tH\\nPAM16\\tH\\nLOC101929697\\tH\\nCXCL13\\tH\\nIMPA2\\tH\\nPRKAG2\\tH\\nMEX3B\\tH\\nNCCRP1\\tH\\nMAFA\\tH\\nHIST1H3J\\tH\\nLDLR\\tH\\nKANK4\\tH\\nSHC4\\tH\\nMACROD1\\tH\\nTAC3\\tH\\nNKX2-5\\tH\\nCOX8A\\tH\\nCREB5\\tH\\nTIMM17B\\tH\\nCBWD5\\tH\\nMTFR2\\tH\\nGSTTP2\\tH\\nLINC01504\\tH\\nEMC4\\tH\\nLOC101928272\\tH\\nCWH43\\tH\\nAPOC4\\tH\\nCCND2\\tH\\nSDHAF4\\tH\\nC2orf91\\tH\\nMYCNOS\\tH\\nZNF80\\tH\\nSIK2\\tH\\nMRPL52\\tH\\nBAK1\\tH\\nEZH2\\tH\\nABCC6P1\\tH\\nHIST1H2BO\\tH\\nNRG1-IT1\\tH\\nWWC1\\tH\\nFAM183A\\tH\\nPABPC1L\\tH\\nTPTE\\tH\\nBRS3\\tH\\nPCDH19\\tH\\nAKR1D1\\tH\\nSLC4A8\\tH\\nLOC105377651\\tH\\nLDHA\\tH\\nRPGRIP1\\tH\\nPPP1R1B\\tH\\nATP5EP2\\tH\\nCACYBP\\tH\\nCHURC1-FNTB\\tH\\nBARX2\\tH\\nHELB\\tH\\nCTCFL\\tH\\nPTPN13\\tH\\nPGR\\tH\\nTMEM261\\tH\\nTRIM49B\\tH\\nMYLPF\\tH\\nLOC100131047\\tH\\nPAPPA\\tH\\nPGM2\\tH\\nMRC1\\tH\\nSNX29P2\\tH\\nLOC101929159\\tH\\nNAP1L3\\tH\\nHILPDA\\tH\\nEFNA2\\tH\\nTMEM35\\tH\\nLOC101243545\\tH\\nLOC101927829\\tH\\nHEPHL1\\tH\\nACER1\\tH\\nLYPD4\\tH\\nLOC101928510\\tH\\nLOC101929577\\tH\\nRELL1\\tH\\nSLC20A1\\tH\\nSSNA1\\tH\\nATP5G1\\tH\\nLRIT2\\tH\\nGDF6\\tH\\nNDUFA13\\tH\\nFAM227A\\tH\\nLOC101929431\\tH\\nGAPDH\\tH\\nSOAT1\\tH\\nPWRN2\\tH\\nLINC00173\\tH\\nFOXL2\\tH\\nUQCRHL\\tH\\nLINC00906\\tH\\nCA5A\\tH\\nAPOBEC2\\tH\\nCT45A1\\tH\\nPSMC3\\tH\\nPART1\\tH\\nLINC00305\\tH\\nLOC400655\\tH\\nSYT11\\tH\\nLINC01361\\tH\\nANGPTL7\\tH\\nMPC2\\tH\\nLGALS9B\\tH\\nLINC01276\\tH\\nRIPK2\\tH\\nHEPACAM\\tH\\nDKFZp779M0652\\tH\\nSOX4\\tH\\nSPATA21\\tH\\nEFCAB5\\tH\\nNDUFB5\\tH\\nTRAF3IP2\\tH\\nTRAPPC3\\tH\\nGADD45G\\tH\\nCXXC4\\tH\\nLINC00676\\tH\\nSOX1\\tH\\nC15orf61\\tH\\nHIST1H2BK\\tH\\nHIST1H2AC\\tH\\nLOC284950\\tH\\nTMEM266\\tH\\nMMP19\\tH\\nPLAUR\\tH\\nC20orf96\\tH\\nSLC9C2\\tH\\nLOC101060524\\tH\\nDRD5P2\\tH\\nMRPL11\\tH\\nAPOF\\tH\\nLRRC23\\tH\\nECT2L\\tH\\nNMNAT1\\tH\\nCCDC144CP\\tH\\nLOC101928539\\tH\\nRNLS\\tH\\nLOC105372179\\tH\\nMS4A10\\tH\\nTRAPPC2B\\tH\\nCHCHD2\\tH\\nLOC102724434\\tH\\nC7orf31\\tH\\nMIEN1\\tH\\nLOC100506444\\tH\\nPPP1R36\\tH\\nCCL2\\tH\\nSLC19A3\\tH\\nENDOU\\tH\\nLOC440028\\tH\\nPSMB10\\tH\\nFAM72D\\tH\\nGNG4\\tH\\nFOXO1\\tH\\nATP6V0A4\\tH\\nSKA1\\tH\\nPPP1R15B\\tH\\nTRPM5\\tH\\nANKRD33B\\tH\\nC1orf210\\tH\\nLOC101927058\\tH\\nMCF2\\tH\\nGALNT16\\tH\\nFRMD5\\tH\\nPCK1\\tH\\nPALM2\\tH\\nFIS1\\tH\\nKIAA0040\\tH\\nCIB2\\tH\\nNHEG1\\tH\\nCLDN11\\tH\\nPTGER4\\tH\\nCD83\\tH\\nNENF\\tH\\nLOC101928107\\tH\\nGLB1L2\\tH\\nLOC100505918\\tH\\nC2orf66\\tH\\nS100P\\tH\\nMBD3L3\\tH\\nLOC729970\\tH\\nREPS2\\tH\\nSNRPD2\\tH\\nCYP27A1\\tH\\nCDC20B\\tH\\nTAT\\tH\\nMDH1\\tH\\nCOX4I1\\tH\\nNHLH1\\tH\\nTMIGD1\\tH\\nTSACC\\tH\\nLOC101927596\\tH\\nWBSCR17\\tH\\nCYP1A2\\tH\\nPLK4\\tH\\nPSMD14\\tH\\nLOC105373782\\tH\\nMRPS28\\tH\\nARMC9\\tH\\nLINC01213\\tH\\nTGFBR3\\tH\\nARMCX4\\tH\\nLINC00243\\tH\\nDSC2\\tH\\nLOC105371335\\tH\\nLOC101927780\\tH\\nCXADR\\tH\\nDSG2\\tH\\nLPAR4\\tH\\nDAW1\\tH\\nBTG1\\tH\\nGLRX3\\tH\\nDUXAP8\\tH\\nMRPL34\\tH\\nSAT1\\tH\\nDHRS7C\\tH\\nOLR1\\tH\\nTM4SF1\\tH\\nSEMA3D\\tH\\nLOC101927650\\tH\\nLINC00668\\tH\\nRGS4\\tH\\nLOC644838\\tH\\nUBB\\tH\\nLOC101928514\\tH\\nELF4\\tH\\nCH25H\\tH\\nNCOA7\\tH\\nLINC01387\\tH\\nMSR1\\tH\\nNUTF2\\tH\\nZNF367\\tH\\nTSPAN5\\tH\\nATP5O\\tH\\nNKAIN3\\tH\\nCD44\\tH\\nFASN\\tH\\nMYBPC2\\tH\\nZNF611\\tH\\nLOC100287036\\tH\\nMTSS1L\\tH\\nGABRG2\\tH\\nZNF829\\tH\\nLOC100271832\\tH\\nUQCRH\\tH\\nPIGH\\tH\\nPOM121L8P\\tH\\nCTH\\tH\\nAK1\\tH\\nSLC7A14\\tH\\nFGF21\\tH\\nPAIP1\\tH\\nUBA3\\tH\\nMAPKAP1\\tH\\nZIM3\\tH\\nILDR1\\tH\\nFAHD1\\tH\\nMELK\\tH\\nTRIM29\\tH\\nNTM-IT\\tH\\nTPH1\\tH\\nSMIM10L1\\tH\\nCRYGB\\tH\\nSNAP91\\tH\\nNEURL1\\tH\\nLOC101929504\\tH\\nLOC102724053\\tH\\nLINC01268\\tH\\nFAM171B\\tH\\nFOSL1\\tH\\nC10orf126\\tH\\nLOC286059\\tH\\nLOC100506747\\tH\\nCXCR2\\tH\\nLINC00294\\tH\\nPPP1R7\\tH\\nTMA7\\tH\\nERC2-IT1\\tH\\nANTXR1\\tH\\nPRKACG\\tH\\nPIGR\\tH\\nTF\\tH\\nNME2\\tH\\nINE1\\tH\\nLCE3B\\tH\\nIMMP1L\\tH\\nLOC101927142\\tH\\nDNAJB1\\tH\\nVSTM1\\tH\\nLOC105372626\\tH\\nEPHA7\\tH\\nGUCY2F\\tH\\nANXA1\\tH\\nLOC101928973\\tH\\nLOC102723427\\tH\\nCD109\\tH\\nIER3\\tH\\nOVOL1\\tH\\nLOC101927630\\tH\\nRGS14\\tH\\nLOC100289333\\tH\\nMRGPRE\\tH\\nTRPC1\\tH\\nPDZK1\\tH\\nLOC285889\\tH\\nLOC100130899\\tH\\nLOC642929\\tH\\nGYPB\\tH\\nSF3B5\\tH\\nCRAT8\\tH\\nRDH14\\tH\\nIRGC\\tH\\nIGF2BP1\\tH\\nSep-14\\tH\\nCTD-2201E9.1\\tH\\nLOC100506085\\tH\\nCDH16\\tH\\nUGT8\\tH\\nCCL11\\tH\\nULK4P2\\tH\\nULK4P1\\tH\\nNDUFB10\\tH\\nLOC101927526\\tH\\nLOC440910\\tH\\nTLR6\\tH\\nZNF724P\\tH\\nTBX18\\tH\\nISCA2\\tH\\nINSC\\tH\\nISY1\\tH\\nTGIF2\\tH\\nIKBKB\\tH\\nXCL1\\tH\\nMID1\\tH\\nLOC100996251\\tH\\nSLC38A1\\tH\\nLOC105375401\\tH\\nLOC388692\\tH\\nLINC00710\\tH\\nOAZ1\\tH\\nTHSD7A\\tH\\nMAP6D1\\tH\\nLOC102723727\\tH\\nSHH\\tH\\nLOC339666\\tH\\nGAB3\\tH\\nNSUN6\\tH\\nCGN\\tH\\nOR7E156P\\tH\\nNXF1\\tH\\nOLIG1\\tH\\nHCG2040054\\tH\\nC6orf203\\tH\\nLOC441454\\tH\\nTRPM3\\tH\\nCXCL1\\tH\\nCMC2\\tH\\nCYP27C1\\tH\\nCCL22\\tH\\nBAZ1A\\tH\\nBMS1P5\\tH\\nMS4A2\\tH\\nTCAF2\\tH\\nDCST2\\tH\\nCCEPR\\tH\\nDLEU7\\tH\\nSLC2A7\\tH\\nTEKT2\\tH\\nCRY1\\tH\\nLOC105370792\\tH\\nCT45A7\\tH\\nTPM2\\tH\\nNME1-NME2\\tH\\nCT45A10\\tH\\nSLC25A26\\tH\\nIER5L\\tH\\nLINC01111\\tH\\nLEP\\tH\\nFLVCR1\\tH\\nTES\\tH\\nPRELID3A\\tH\\nCLEC19A\\tH\\nITGAE\\tH\\nDNAJB13\\tH\\nABHD12B\\tH\\nNTRK3\\tH\\nBANCR\\tH\\nHTRA4\\tH\\nCYP2B6\\tH\\nSLC6A4\\tH\\nRPL37A\\tH\\nTRIM71\\tH\\nSNTN\\tH\\nSNHG6\\tH\\nLINC01563\\tH\\nRIMS2\\tH\\nDPM3\\tH\\nFAM46A\\tH\\nZBP1\\tH\\nSERF1B\\tH\\nSERF1A\\tH\\nPTGER4P2-CDK2AP2P2\\tH\\nGPBAR1\\tH\\nCYR61\\tH\\nMRPL37\\tH\\nBAGE3\\tH\\nBAGE2\\tH\\nELMO1\\tH\\nTROAP\\tH\\nTMEM217\\tH\\nTMPRSS11E\\tH\\nMYH1\\tH\\nLOC101929234\\tH\\nSARNP\\tH\\nCRAT37\\tH\\nBAGE5\\tH\\nBAGE4\\tH\\nLINC00844\\tH\\nSLX4IP\\tH\\nLOC101928008\\tH\\nB4GALT3\\tH\\nLINC01206\\tH\\nNDUFA7\\tH\\nCOX14\\tH\\nMORC1\\tH\\nARID5B\\tH\\nPNKD\\tH\\nBIRC3\\tH\\nBTBD6\\tH\\nLOC101928902\\tH\\nFAM71D\\tH\\nLINC01251\\tH\\nARL2\\tH\\nLINC01265\\tH\\nTMEM205\\tH\\nLOC101929125\\tH\\nHCG22\\tH\\nLOC102724708\\tH\\nPRKCG\\tH\\nLINC01481\\tH\\nZNF98\\tH\\nPSMA8\\tH\\nCD14\\tH\\nPSMD4\\tH\\nAKR1C2\\tH\\nPSMB3\\tH\\nSMDT1\\tH\\nTCF7L1\\tH\\nMTCP1\\tH\\nHPSE\\tH\\nANGPTL5\\tH\\nFUNDC2P2\\tH\\nLINC00330\\tH\\nCACNG8\\tH\\nATRAID\\tH\\nPKHD1L1\\tH\\nHDAC11\\tH\\nC3orf18\\tH\\nSTX11\\tH\\nHIST2H2BA\\tH\\nTMTC4\\tH\\nLOC100506682\\tH\\nRPS14P3\\tH\\nELOVL7\\tH\\nTMEM156\\tH\\nBUB1B\\tH\\nLINC00477\\tH\\nMAP7D2\\tH\\nGPC6\\tH\\nPAQR5\\tH\\nPGAM2\\tH\\nPTS\\tH\\nS100A1\\tH\\nHEXIM2\\tH\\nOR4K2\\tH\\nS100G\\tH\\nATP5H\\tH\\nFKBP3\\tH\\nSCGB2A2\\tH\\nPLEKHH2\\tH\\nLOC102723322\\tH\\nACSM5\\tH\\nSFPQ\\tH\\nZNF358\\tH\\nGABRE\\tH\\nRRAGD\\tH\\nLMO7DN\\tH\\nNSMCE1\\tH\\nLINC00941\\tH\\nDAAM2\\tH\\nHPVC1\\tH\\nLINC00486\\tH\\nRPL26L1\\tH\\nLOC100287896\\tH\\nCASC6\\tH\\nREL\\tH\\nSPATA24\\tH\\nTMEM42\\tH\\nEFNB2\\tH\\nFNDC5\\tH\\nLKAAEAR1\\tH\\nCLDN4\\tH\\nTPTE2P1\\tH\\nSTEAP3\\tH\\nMLXIPL\\tH\\nCSF2\\tH\\nDYDC1\\tH\\nDPCD\\tH\\nABCB1\\tH\\nPRSS12\\tH\\nSDHB\\tH\\nTREML3P\\tH\\nLINC00911\\tH\\nFBXO25\\tH\\nLOC101928335\\tH\\nLNP1\\tH\\nLINC01138\\tH\\nLOC101928403\\tH\\nLOC101929565\\tH\\nCDCA8\\tH\\nLOC100505478\\tH\\nLY6K\\tH\\nINTS6L\\tH\\nBCAS1\\tH\\nLOC105376351\\tH\\nMRPL18\\tH\\nTRIM49\\tH\\nRUNX2\\tH\\nCITED2\\tH\\nLINC01436\\tH\\nABL2\\tH\\nUQCRFS1\\tH\\nOCLN\\tH\\nCCDC192\\tH\\nMERTK\\tH\\nSMKR1\\tH\\nCHCHD10\\tH\\nLOC100996634\\tH\\nTPI1P3\\tH\\nNTRK2\\tH\\nEMC6\\tH\\nLOC101928858\\tH\\nRARRES1\\tH\\nCLDN19\\tH\\nCLYBL\\tH\\nNDUFAF5\\tH\\nTIMM13\\tH\\nICAM1\\tH\\nRNF181\\tH\\nNCAPH\\tH\\nSAMM50\\tH\\nNDUFS2\\tH\\nPGA3\\tH\\nC4orf19\\tH\\nAIMP2\\tH\\nMARVELD3\\tH\\nLCE6A\\tH\\nRPS25\\tH\\nAP1B1P1\\tH\\nCOL12A1\\tH\\nATF4\\tH\\nGAP43\\tH\\nACKR2\\tH\\nSLMO2-ATP5E\\tH\\nARHGEF9-IT1\\tH\\nGTF3A\\tH\\nCDC26\\tH\\nTIMMDC1\\tH\\nLSM1\\tH\\nTRIM59\\tH\\nCDR2\\tH\\nCPT1A\\tH\\nGINS4\\tH\\nLOC102546299\\tH\\nTRH\\tH\\nLINC00942\\tH\\nARHGAP11A\\tH\\nNMBR\\tH\\nPRC1\\tH\\nSERF2\\tH\\nMC5R\\tH\\nCOX11\\tH\\nEFHC2\\tH\\nPLVAP\\tH\\nFCGR1A\\tH\\nGCG\\tH\\nOR2G3\\tH\\nSNAPIN\\tH\\nWBSCR28\\tH\\nPDCL3\\tH\\nFLJ40194\\tH\\nLOC407835\\tH\\nCT45A4\\tH\\nCCHCR1\\tH\\nUCHL3\\tH\\nMEP1B\\tH\\nNPIPB6\\tH\\nLOC101926940\\tH\\nLINC00959\\tH\\nLINC01180\\tH\\nDNAJC5G\\tH\\nFZD10\\tH\\nNDUFB8\\tH\\nERCC1\\tH\\nLOC389641\\tH\\nRPS14\\tH\\nARPC5L\\tH\\nDOCK10\\tH\\nLOC101928809\\tH\\nPLEKHA5\\tH\\nLINC00449\\tH\\nTFAP2B\\tH\\nMIR503HG\\tH\\nXG\\tH\\nCXCL3\\tH\\nCSTL1\\tH\\nLOC101928161\\tH\\nCOX6B1\\tH\\nCA8\\tH\\nIL1R1\\tH\\nLINC00619\\tH\\nGAGE1\\tH\\nNDUFA4\\tH\\nLINC01549\\tH\\nCCL16\\tH\\nERN2\\tH\\nALLC\\tH\\nCCDC43\\tH\\nFAM81B\\tH\\nMT2A\\tH\\nS100B\\tH\\nZSCAN12\\tH\\nCABP5\\tH\\nVAV3\\tH\\nIKZF3\\tH\\nDEFB118\\tH\\nDGCR6\\tH\\nLOC105371795\\tH\\nSLC28A3\\tH\\nLOC100129518\\tH\\nZNF503\\tH\\nJTB\\tH\\nLY9\\tH\\nMGC27345\\tH\\nMX2\\tH\\nLOC400002\\tH\\nUGGT2\\tH\\nNDUFA2\\tH\\nMFAP5\\tH\\nITGAM\\tH\\nXKR4\\tH\\nLINC01030\\tH\\nEBAG9\\tH\\nMAGEB5\\tH\\nTMEM150A\\tH\\nLOC101927653\\tH\\nEMC7\\tH\\nSIK1\\tH\\nEMB\\tH\\nDUXA\\tH\\nMIR3663HG\\tH\\nSPATA42\\tH\\nTNFRSF12A\\tH\\nLOC100507195\\tH\\nFAM78A\\tH\\nTENM2\\tH\\nLOC102724428\\tH\\nTRABD2A\\tH\\nTPTE2P3\\tH\\nRASAL1\\tH\\nITPRIP\\tH\\nADGRG6\\tH\\nVSIG4\\tH\\nADRBK2\\tH\\nTRIM49C\\tH\\nHOXC5\\tH\\nCMAHP\\tH\\nRPSAP58\\tH\\nOR7G3\\tH\\nLOC100288069\\tH\\nKRT9\\tH\\nARL6IP1\\tH\\nLINC00635\\tH\\nGPC3\\tH\\nSNX21\\tH\\nRIN2\\tH\\nMYHAS\\tH\\nPOTEE\\tH\\nCLEC2A\\tH\\nATP1A3\\tH\\nLOC105371267\\tH\\nLINC00696\\tH\\nBEND2\\tH\\nSPECC1\\tH\\nECM1\\tH\\nTSPAN1\\tH\\nFAM86JP\\tH\\nP2RX7\\tH\\nTMEM106A\\tH\\nPTPRH\\tH\\nEIF3K\\tH\\nSYK\\tH\\nAGR3\\tH\\nLINC00396\\tH\\nMR1\\tH\\nSLC9A2\\tH\\nGSTZ1\\tH\\nDEFB1\\tH\\nLOC101928370\\tH\\nCALD1\\tH\\nLINC01351\\tH\\nBICD1\\tH\\nFAM231D\\tH\\nSFRP5\\tH\\nEFNA1\\tH\\nLOC101929054\\tH\\nMETTL21A\\tH\\nHOXB5\\tH\\nRYR2\\tH\\nTCEA3\\tH\\nGOLGA8F\\tH\\nARL6IP6\\tH\\nLOC105369891\\tH\\nFAM185A\\tH\\nCCDC124\\tH\\nLOC100499194\\tH\\nKDM6A\\tH\\nLONRF1\\tH\\nADRA2A\\tH\\nFAM210B\\tH\\nTRIM31\\tH\\nRAB39B\\tH\\nKIAA0513\\tH\\nIQUB\\tH\\nTLL1\\tH\\nLRRC15\\tH\\nLOC284294\\tH\\nNQO1\\tH\\nRMST\\tH\\nC12orf57\\tH\\nSIRT1\\tH\\nPDGFC\\tH\\nPPIAL4C\\tH\\nPPIAL4A\\tH\\nC18orf61\\tH\\nLOC283194\\tH\\nRPS23\\tH\\nIFNLR1\\tH\\nGOLGA8G\\tH\\nLY6G6F\\tH\\nLINC00671\\tH\\nRPL23A\\tH\\nLOC101929726\\tH\\nOR10Q1\\tH\\nRNF7\\tH\\nSMCP\\tH\\nNCK2\\tH\\nRNF148\\tH\\nMIR17HG\\tH\\nLINC00479\\tH\\nLINC00551\\tH\\nSIRT4\\tH\\nHERC5\\tH\\nZNF738\\tH\\nLINC01209\\tH\\nTOB2P1\\tH\\nESPL1\\tH\\nLINC00116\\tH\\nHK1\\tH\\nLBP\\tH\\nLOC105369632\\tH\\nVIM\\tH\\nDSEL\\tH\\nPOTEJ\\tH\\nUSP44\\tH\\nLOC101927415\\tH\\nHSPH1\\tH\\nENPP7P13\\tH\\nTNFAIP3\\tH\\nBHLHE41\\tH\\nETV7\\tH\\nKCNQ4\\tH\\nLOC100287792\\tH\\nLOC101929511\\tH\\nMROH5\\tH\\nOAZ3\\tH\\nPPP1R15A\\tH\\nIDI2\\tH\\nCYB561A3\\tH\\nARMC4\\tH\\nBHMT2\\tH\\nNETO2\\tH\\nSUCNR1\\tH\\nSSU72\\tH\\nLOC399886\\tH\\nDISC1\\tH\\nSTAMBP\\tH\\nNLGN1\\tH\\nHAX1\\tH\\nTNRC18P1\\tH\\nAKR1B1\\tH\\nULK4P3\\tH\\nC1QTNF3\\tH\\nCT47A7\\tH\\nWBSCR22\\tH\\nHCAR1\\tH\\nRGL1\\tH\\nLINC01606\\tH\\nCLPS\\tH\\nDUPD1\\tH\\nSSX1\\tH\\nGSTK1\\tH\\nSPRY4\\tH\\nNUDCD2\\tH\\nRECK\\tH\\nNOL4L\\tH\\nPCBP4\\tH\\nCNTNAP2\\tH\\nKCNE1\\tH\\nLOC400541\\tH\\nLINC00261\\tH\\nC9orf173\\tH\\nMRPL48\\tH\\nPOM121L9P\\tH\\nMKRN2OS\\tH\\nRALY\\tH\\nESM1\\tH\\nEID1\\tH\\nNUDT6\\tH\\nHINT3\\tH\\nIPMK\\tH\\nC11orf98\\tH\\nCRLF1\\tH\\nCFL1P1\\tH\\nTMPRSS9\\tH\\nCHMP2A\\tH\\nOLFM1\\tH\\nZNF511\\tH\\nB3GNT7\\tH\\nSIK3\\tH\\nACER3\\tH\\nCIDEC\\tH\\nADGRD1\\tH\\nSPC25\\tH\\nLOC101926911\\tH\\nPELI3\\tH\\nEXT1\\tH\\nPCAT5\\tH\\nGDF15\\tH\\nMRPL47\\tH\\nPLSCR1\\tH\\nTOM1\\tH\\nC6\\tH\\nWDR87\\tH\\nFXYD5\\tH\\nCOBLL1\\tH\\nANGPT2\\tH\\nSRCIN1\\tH\\nSLC10A1\\tH\\nOAS1\\tH\\nMMP21\\tH\\nCOL19A1\\tH\\nGPR18\\tH\\nTMEM219\\tH\\nZNF296\\tH\\nUSP43\\tH\\nGOLGA2P9\\tH\\nRFX2\\tH\\nRAB27A\\tH\\nLOC102467217\\tH\\nMYH13\\tH\\nPHLPP2\\tH\\nLOC101928985\\tH\\nCDRT7\\tH\\nINTS6\\tH\\nHAS2\\tH\\nDZIP1\\tH\\nOR2V2\\tH\\nOR2H2\\tH\\nTSSC1\\tH\\nBOLA1\\tH\\nPABPC1P2\\tH\\nTMEM229A\\tH\\nATP8B1\\tH\\nLCNL1\\tH\\nDCDC5\\tH\\nSOD1\\tH\\nPAG1\\tH\\nCETN2\\tH\\nNCR1\\tH\\nTMEM100\\tH\\nURI1\\tH\\nTEKT4P2\\tH\\nPCAT1\\tH\\nSERTAD4\\tH\\nLINC00550\\tH\\nGLB1L\\tH\\nUNG\\tH\\nAGMAT\\tH\\nLOC101928540\\tH\\nZNF681\\tH\\nLINC01456\\tH\\nFCGR2C\\tH\\nABCG2\\tH\\nANAPC11\\tH\\nLOC102800447\\tH\\nCYLC2\\tH\\nC6orf226\\tH\\nREM2\\tH\\nBMPR1B\\tH\\nBECN1\\tH\\nADM\\tH\\nPDPR\\tH\\nKDM8\\tH\\nHMBS\\tH\\nMYO1H\\tH\\nLINC00493\\tH\\nFGF14\\tH\\nEIF2AK1\\tH\\nLOC101928489\\tH\\nKCNK1\\tH\\nCKS2\\tH\\nLOC101928035\\tH\\nLINC01221\\tH\\nEREG\\tH\\nNDUFB11\\tH\\nNARF\\tH\\nZC3HC1\\tH\\nADGRE2\\tH\\nUFC1\\tH\\nHOMER1\\tH\\nHDDC2\\tH\\nHIST1H3A\\tH\\nTNNT3\\tH\\nZNF670-ZNF695\\tH\\nGSR\\tH\\nNDRG4\\tH\\nTERC\\tH\\nFANCB\\tH\\nFFAR4\\tH\\nMGAM2\\tH\\nLRRTM4\\tH\\nINHBA\\tH\\nLOC403312\\tH\\nKLLN\\tH\\nDZANK1\\tH\\nRGS9BP\\tH\\nRIIAD1\\tH\\nARL2-SNX15\\tH\\nPLAU\\tH\\nSPDYE8P\\tH\\nSLC25A19\\tH\\nBMS1P6\\tH\\nZFYVE19\\tH\\nCTAGE1\\tH\\nMTIF3\\tH\\nSPACA4\\tH\\nSIPA1L1\\tH\\nSLC2A10\\tH\\nPGK1\\tH\\nGIF\\tH\\nMYH8\\tH\\nLOC101928098\\tH\\nFRMD4A\\tH\\nLINC01397\\tH\\nLIPE\\tH\\nTRIM49D2\\tH\\nPGM1\\tH\\nHRH4\\tH\\nLOC646241\\tH\\nLOC101927587\\tH\\nCTD-2201I18.1\\tH\\nRAPGEF4\\tH\\nRUNX1\\tH\\nC5\\tH\\nTRIM49D1\\tH\\nLOC100508046\\tH\\nLOC101928885\\tH\\nUCHL1\\tH\\nR3HDM4\\tH\\nMAP9\\tH\\nMIF4GD\\tH\\nLOC100190986\\tH\\nCOQ2\\tH\\nKNTC1\\tH\\nSAXO1\\tH\\nLOC105369860\\tH\\nFPR1\\tH\\nGP6\\tH\\nEIF2S2\\tH\\nLINC00461\\tH\\nHIST1H2AH\\tH\\nDHRS7\\tH\\nCHST8\\tH\\nHAGH\\tH\\nC4orf3\\tH\\nNMUR2\\tH\\nAKR1C3\\tH\\nLRRC70\\tH\\nREXO2\\tH\\nPRH1-TAS2R14\\tH\\nSLC9A1\\tH\\nMNAT1\\tH\\nSLC37A4\\tH\\nMGC34796\\tH\\nHSPB9\\tH\\nCADM3\\tH\\nMYEOV2\\tH\\nKRTAP6-3\\tH\\nARNTL2\\tH\\nENPP2\\tH\\nCUBN\\tH\\nLOC339059\\tH\\nGSDMA\\tH\\nBTG3\\tH\\nSTBD1\\tH\\nNAV3\\tH\\nALDH1L2\\tH\\nZBTB21\\tH\\nSPATA5\\tH\\nMRPL57\\tH\\nCWC15\\tH\\nNOMO3\\tH\\nUBTD1\\tH\\nIFI30\\tH\\nFMNL2\\tH\\nPRMT3\\tH\\nLOC101927692\\tH\\nNTPCR\\tH\\nDHRS7B\\tH\\nTBCB\\tH\\nC3orf58\\tH\\nKRT222\\tH\\nWRB-SH3BGR\\tH\\nLOC101928580\\tH\\nRWDD1\\tH\\nNKIRAS1\\tH\\nABCA1\\tH\\nCASC20\\tH\\nRTN4IP1\\tH\\nSPATA6L\\tH\\nLUZP1\\tH\\nCARS2\\tH\\nC2orf61\\tH\\nLOC102467226\\tH\\nMIR3945HG\\tH\\nFGF9\\tH\\nVRTN\\tH\\nPCDH18\\tH\\nPOLR3K\\tH\\nLINC00566\\tH\\nAOX1\\tH\\nPDLIM7\\tH\\nLOC102577426\\tH\\nUSE1\\tH\\nGINS2\\tH\\nRAPGEF2\\tH\\nLINC01492\\tH\\nTMEM70\\tH\\nCOX17\\tH\\nSRRM4\\tH\\nLOC101928295\\tH\\nISCA1\\tH\\nIL18R1\\tH\\nAPOC4-APOC2\\tH\\nMT1M\\tH\\nLMO2\\tH\\nSCN4B\\tH\\nRDH12\\tH\\nFEZF2\\tH\\nTMEM150B\\tH\\nCPS1\\tH\\nSLC35G2\\tH\\nTPM3\\tH\\nREG1A\\tH\\nLINC01133\\tH\\nAFAP1L2\\tH\\nPSENEN\\tH\\nFAM72A\\tH\\nLINC00467\\tH\\nHELLS\\tH\\nLINC00367\\tH\\nPLXNA4\\tH\\nC11orf73\\tH\\nKLF7\\tH\\nYBEY\\tH\\nOIT3\\tH\\nLOC101929681\\tH\\nPTPRD\\tH\\nLOC100422737\\tH\\nLINC01411\\tH\\nTSPAN17\\tH\\nUGT1A10\\tH\\nIFT22\\tH\\nRPS10P7\\tH\\nDBIL5P2\\tH\\nIFI44\\tH\\nBTK\\tH\\nMDP1\\tH\\nLOC284080\\tH\\nCYP2C18\\tH\\nFBXW12\\tH\\nCORO7-PAM16\\tH\\nTMEM14B\\tH\\nPOLQ\\tH\\nAFF4\\tH\\nLHFPL4\\tH\\nABTB2\\tH\\nNOMO1\\tH\\nFHDC1\\tH\\nTRIM38\\tH\\nCTSV\\tH\\nGATA3\\tH\\nLINCR-0002\\tH\\nCFAP20\\tH\\nNDUFB6\\tH\\nRASA4\\tH\\nLOC100288798\\tH\\nCFAP206\\tH\\nROR1\\tH\\nACOT13\\tH\\nLOC285626\\tH\\nBANF1\\tH\\nDCAF4L2\\tH\\nSH3BGR\\tH\\nOTOA\\tH\\nCD226\\tH\\nSLC29A4\\tH\\nRPL18\\tH\\nPRDX3\\tH\\nFGB\\tH\\nTEX14\\tH\\nFBN1\\tH\\nEPHA3\\tH\\n'}}]" ] }, - "execution_count": 25, + "execution_count": 28, "metadata": {}, "output_type": "execute_result" } @@ -2258,16 +2498,16 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 29, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "(2421, 11)" + "(2421, 12)" ] }, - "execution_count": 26, + "execution_count": 29, "metadata": {}, "output_type": "execute_result" } @@ -2278,7 +2518,7 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 30, "metadata": {}, "outputs": [ { @@ -2307,6 +2547,7 @@ " target\n", " target.source\n", " DISGENET_diseases\n", + " literature_based_info\n", " OpenTargets_gene_compounds\n", " MINERVA\n", " WikiPathways\n", @@ -2323,6 +2564,7 @@ " ENSG00000165672\n", " Ensembl\n", " [{'disease_name': 'SPINOCEREBELLAR ATAXIA, AUT...\n", + " [{'disease_name': nan, 'UMLS': nan, 'source': ...\n", " [{'chembl_id': nan, 'drugbank_id': nan, 'compo...\n", " [{'pathway_id': 933.0, 'pathway_label': 'Elect...\n", " [{'pathway_id': nan, 'pathway_label': nan, 'pa...\n", @@ -2337,6 +2579,7 @@ " ENSG00000171564\n", " Ensembl\n", " [{'disease_name': 'Cardiovascular Diseases', '...\n", + " [{'disease_name': nan, 'UMLS': nan, 'source': ...\n", " [{'chembl_id': 'CHEMBL2109072', 'drugbank_id':...\n", " [{'pathway_id': 951.0, 'pathway_label': 'Coagu...\n", " [{'pathway_id': 'WP5115', 'pathway_label': 'Ne...\n", @@ -2351,6 +2594,7 @@ " ENSG00000121101\n", " Ensembl\n", " [{'disease_name': 'Non-obstructive azoospermia...\n", + " [{'disease_name': nan, 'UMLS': nan, 'source': ...\n", " [{'chembl_id': nan, 'drugbank_id': nan, 'compo...\n", " [{'pathway_id': nan, 'pathway_label': nan, 'pa...\n", " [{'pathway_id': nan, 'pathway_label': nan, 'pa...\n", @@ -2365,6 +2609,7 @@ " ENSG00000166147\n", " Ensembl\n", " [{'disease_name': 'Marfan Syndrome', 'HPO': ''...\n", + " [{'disease_name': nan, 'UMLS': nan, 'source': ...\n", " [{'chembl_id': nan, 'drugbank_id': nan, 'compo...\n", " [{'pathway_id': 945.0, 'pathway_label': 'Nsp9 ...\n", " [{'pathway_id': 'WP3668', 'pathway_label': 'Hy...\n", @@ -2379,6 +2624,7 @@ " ENSG00000044524\n", " Ensembl\n", " [{'disease_name': 'Adenocarcinoma of lung (dis...\n", + " [{'disease_name': nan, 'UMLS': nan, 'source': ...\n", " [{'chembl_id': 'CHEMBL24828', 'drugbank_id': '...\n", " [{'pathway_id': nan, 'pathway_label': nan, 'pa...\n", " [{'pathway_id': 'WP2882', 'pathway_label': 'Nu...\n", @@ -2405,6 +2651,13 @@ "2419 [{'disease_name': 'Marfan Syndrome', 'HPO': ''... \n", "2420 [{'disease_name': 'Adenocarcinoma of lung (dis... \n", "\n", + " literature_based_info \\\n", + "2416 [{'disease_name': nan, 'UMLS': nan, 'source': ... \n", + "2417 [{'disease_name': nan, 'UMLS': nan, 'source': ... \n", + "2418 [{'disease_name': nan, 'UMLS': nan, 'source': ... \n", + "2419 [{'disease_name': nan, 'UMLS': nan, 'source': ... \n", + "2420 [{'disease_name': nan, 'UMLS': nan, 'source': ... \n", + "\n", " OpenTargets_gene_compounds \\\n", "2416 [{'chembl_id': nan, 'drugbank_id': nan, 'compo... \n", "2417 [{'chembl_id': 'CHEMBL2109072', 'drugbank_id':... \n", @@ -2448,7 +2701,7 @@ "2420 [{'stringdb_link_to': 'EFNA2', 'Ensembl': 'ENS... " ] }, - "execution_count": 27, + "execution_count": 30, "metadata": {}, "output_type": "execute_result" } @@ -2466,7 +2719,7 @@ }, { "cell_type": "code", - "execution_count": 28, + "execution_count": 31, "metadata": {}, "outputs": [], "source": [ diff --git a/src/pyBiodatafuse/constants.py b/src/pyBiodatafuse/constants.py index 02cce29c..4a438fab 100644 --- a/src/pyBiodatafuse/constants.py +++ b/src/pyBiodatafuse/constants.py @@ -271,29 +271,42 @@ "disease_umlscui": None, "labels": DISEASE_NODE_LABELS, } -DISGENET_GENE_DISEASE_EDGE_LABEL = "associated_with" +GENE_DISEASE_EDGE_LABEL = "associated_with" DISGENET_EDGE_ATTRS = { "source": DISGENET, "score": None, "ei": None, "el": None, - "label": DISGENET_GENE_DISEASE_EDGE_LABEL, + "label": GENE_DISEASE_EDGE_LABEL, } - -# Open Targets - Disease -OPENTARGETS_DISEASE_NODE_ATTRS = { - "source": OPENTARGETS, +# Literature +LITERATURE_DISEASE_NODE_ATTRS = { + "source": None, "name": None, "id": None, - "therapeutic_areas": None, + "UMLS": None, "labels": DISEASE_NODE_LABELS, } -OPENTARGETS_GENE_DISEASE_EDGE_LABEL = "associated_with" -OPENTARGETS_DISEASE_EDGE_ATTRS = { - "source": OPENTARGETS, - "label": OPENTARGETS_GENE_DISEASE_EDGE_LABEL, +LITERATURE_DISEASE_EDGE_ATTRS = { + "source": None, + "label": GENE_DISEASE_EDGE_LABEL, } +# TODO: The disease annotations are not curated and will be used again when the OpenTarget annotation improves. +# Open Targets - Disease +# OPENTARGETS_DISEASE_NODE_ATTRS = { +# "source": OPENTARGETS, +# "name": None, +# "id": None, +# "therapeutic_areas": None, +# "labels": DISEASE_NODE_LABELS, +# } +# OPENTARGETS_DISEASE_EDGE_ATTRS = { +# "source": OPENTARGETS, +# "label": GENE_DISEASE_EDGE_LABEL, +# } + + # Pathway node # MINERVA, WikiPathways, Open Targets - Reactome PATHWAY_NODE_LABELS = "Pathway" diff --git a/src/pyBiodatafuse/graph/generator.py b/src/pyBiodatafuse/graph/generator.py index 1c9a9302..c4150274 100644 --- a/src/pyBiodatafuse/graph/generator.py +++ b/src/pyBiodatafuse/graph/generator.py @@ -22,7 +22,7 @@ DISGENET_DISEASE_COL, DISGENET_DISEASE_NODE_ATTRS, DISGENET_EDGE_ATTRS, - DISGENET_GENE_DISEASE_EDGE_LABEL, + GENE_DISEASE_EDGE_LABEL, GENE_GO_EDGE_ATTRS, GENE_GO_EDGE_LABEL, GENE_NODE_LABELS, @@ -33,6 +33,8 @@ GO_MF_NODE_LABELS, GO_NODE_ATTRS, GO_NODE_MAIN_LABEL, + LITERATURE_DISEASE_EDGE_ATTRS, + LITERATURE_DISEASE_NODE_ATTRS, MINERVA, MOLMEDB_COMPOUND_NODE_ATTRS, MOLMEDB_PROTEIN_COMPOUND_COL, @@ -187,7 +189,7 @@ def add_disgenet_gene_disease_subgraph(g, gene_node_label, annot_list): g.add_node(annot_node_label, attr_dict=annot_node_attrs) edge_attrs = DISGENET_EDGE_ATTRS.copy() - edge_attrs["score"] = edge_attrs["score"] + edge_attrs["score"] = annot["score"] if not pd.isna(annot["ei"]): edge_attrs["ei"] = annot["ei"] @@ -206,7 +208,49 @@ def add_disgenet_gene_disease_subgraph(g, gene_node_label, annot_list): g.add_edge( gene_node_label, annot_node_label, - label=DISGENET_GENE_DISEASE_EDGE_LABEL, + label=GENE_DISEASE_EDGE_LABEL, + attr_dict=edge_attrs, + ) + + return g + + +def add_literature_gene_disease_subgraph(g, gene_node_label, annot_list): + """Construct part of the graph by linking the gene to diseases form literature. + + :param g: the input graph to extend with new nodes and edges. + :param gene_node_label: the gene node to be linked to diseases. + :param annot_list: list of diseases from DisGeNET. + :returns: a NetworkX MultiDiGraph + """ + for annot in annot_list: + if not pd.isna(annot["disease_name"]): + annot_node_label = annot[DISEASE_NODE_MAIN_LABEL] + annot_node_attrs = LITERATURE_DISEASE_NODE_ATTRS.copy() + annot_node_attrs["source"] = annot["source"] + annot_node_attrs["name"] = annot["disease_name"] + annot_node_attrs["id"] = annot["UMLS"] + annot_node_attrs["UMLS"] = annot["UMLS"] + + + g.add_node(annot_node_label, attr_dict=annot_node_attrs) + + edge_attrs = LITERATURE_DISEASE_EDGE_ATTRS.copy() + edge_attrs["source"] = annot["source"] + + edge_hash = hash(frozenset(edge_attrs.items())) + edge_attrs["edge_hash"] = edge_hash + edge_data = g.get_edge_data(gene_node_label, annot_node_label) + edge_data = {} if edge_data is None else edge_data + node_exists = [ + x for x, y in edge_data.items() if y["attr_dict"]["edge_hash"] == edge_hash + ] + + if len(node_exists) == 0: + g.add_edge( + gene_node_label, + annot_node_label, + label=GENE_DISEASE_EDGE_LABEL, attr_dict=edge_attrs, ) From c4306424f8802f4f56c1949936f35a5917eeea4d Mon Sep 17 00:00:00 2001 From: Tooba Abbassi-Daloii Date: Tue, 17 Sep 2024 08:41:53 +0200 Subject: [PATCH 2/7] tox fix --- examples/usecases/PCS/PCS_usecase.ipynb | 58 +++++++++++++++---------- src/pyBiodatafuse/graph/generator.py | 1 - 2 files changed, 35 insertions(+), 24 deletions(-) diff --git a/examples/usecases/PCS/PCS_usecase.ipynb b/examples/usecases/PCS/PCS_usecase.ipynb index 22138373..75aea419 100644 --- a/examples/usecases/PCS/PCS_usecase.ipynb +++ b/examples/usecases/PCS/PCS_usecase.ipynb @@ -43,8 +43,8 @@ "\n", "import matplotlib.pyplot as plt\n", "import networkx as nx\n", - "import pandas as pd\n", "import numpy as np\n", + "import pandas as pd\n", "from dotenv import load_dotenv\n", "\n", "from pyBiodatafuse import id_mapper\n", @@ -591,10 +591,19 @@ } ], "source": [ - "pcs_associated_genes = pd.read_excel(os.path.join(os.getcwd(), \"examples\", \"usecases\", \"PCS\", \"pcs_associated_genes.xlsx\"))\n", + "pcs_associated_genes = pd.read_excel(\n", + " os.path.join(os.getcwd(), \"examples\", \"usecases\", \"PCS\", \"pcs_associated_genes.xlsx\")\n", + ")\n", "pcs_associated_genes.head()" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Define the literature based info" + ] + }, { "cell_type": "code", "execution_count": 9, @@ -708,21 +717,24 @@ } ], "source": [ + "from pyBiodatafuse.constants import LITERATURE_DISEASE_NODE_ATTRS\n", + "\n", + "\n", + "literature_disease_attrs = LITERATURE_DISEASE_NODE_ATTRS.copy()\n", + "literature_disease_attrs[\"disease_name\"] = \"Post-COVID-19\"\n", + "literature_disease_attrs[\"UMLS\"] = \"C00000\"\n", + "literature_disease_attrs[\"source\"] = \"PMID: 37675861\"\n", + "\n", + "\n", "def get_literature_based_info(gene):\n", " if gene in pcs_associated_genes[\"Gene\"].values:\n", - " return [{\"disease_name\": \"Post-COVID-19\", \n", - " \"UMLS\": \"C00000\", \n", - " \"source\": \"PMID: 37675861\"}]\n", + " return literature_disease_attrs\n", " else:\n", - " return [{\"disease_name\": np.nan, \n", - " \"UMLS\": np.nan, \n", - " \"source\": np.nan}]\n", + " return [{\"disease_name\": np.nan, \"UMLS\": np.nan, \"source\": np.nan}]\n", "\n", - "# Apply the function to each row in bridgedb_df and create the new column\n", "disgenet_df[\"literature_based_info\"] = disgenet_df[\"identifier\"].apply(get_literature_based_info)\n", "\n", - "disgenet_df.head()\n", - "\n" + "disgenet_df.head()" ] }, { @@ -2211,7 +2223,7 @@ " ENSG00000152592\n", " Ensembl\n", " [{'disease_name': 'Hypophosphatemic Rickets', ...\n", - " [{'disease_name': 'Post-COVID-19', 'UMLS': 'C0...\n", + " {'source': 'PMID: 37675861', 'name': None, 'id...\n", " [{'chembl_id': nan, 'drugbank_id': nan, 'compo...\n", " [{'pathway_id': nan, 'pathway_label': nan, 'pa...\n", " [{'pathway_id': 'WP3971', 'pathway_label': 'OS...\n", @@ -2282,7 +2294,7 @@ "3 [{'disease_name': 'Cystic Fibrosis', 'HPO': ''... \n", "\n", " literature_based_info \\\n", - "0 [{'disease_name': 'Post-COVID-19', 'UMLS': 'C0... \n", + "0 {'source': 'PMID: 37675861', 'name': None, 'id... \n", "1 [{'disease_name': nan, 'UMLS': nan, 'source': ... \n", "2 [{'disease_name': nan, 'UMLS': nan, 'source': ... \n", "3 [{'disease_name': nan, 'UMLS': nan, 'source': ... \n", @@ -2744,17 +2756,17 @@ "metadata": {}, "outputs": [], "source": [ - "# pygraph = generator.networkx_graph(combined_df, opentargets_disease_compound_df)\n", - "# with open(\n", - "# os.path.join(os.getcwd(), \"examples\", \"usecases\", \"PCS\", \"pcs_networkx_graph.pkl\"), \"wb\"\n", - "# ) as out:\n", - "# pickle.dump(pygraph, out)\n", - "\n", + "pygraph = generator.networkx_graph(combined_df, opentargets_disease_compound_df)\n", "with open(\n", - " os.path.join(os.getcwd(), \"examples\", \"usecases\", \"PCS\", \"pcs_networkx_graph.pkl\"),\n", - " \"rb\",\n", - ") as file:\n", - " pygraph = pickle.load(file)" + " os.path.join(os.getcwd(), \"examples\", \"usecases\", \"PCS\", \"pcs_networkx_graph.pkl\"), \"wb\"\n", + ") as out:\n", + " pickle.dump(pygraph, out)\n", + "\n", + "# with open(\n", + "# os.path.join(os.getcwd(), \"examples\", \"usecases\", \"PCS\", \"pcs_networkx_graph.pkl\"),\n", + "# \"rb\",\n", + "# ) as file:\n", + "# pygraph = pickle.load(file)" ] }, { diff --git a/src/pyBiodatafuse/graph/generator.py b/src/pyBiodatafuse/graph/generator.py index c4150274..99c4ee5e 100644 --- a/src/pyBiodatafuse/graph/generator.py +++ b/src/pyBiodatafuse/graph/generator.py @@ -232,7 +232,6 @@ def add_literature_gene_disease_subgraph(g, gene_node_label, annot_list): annot_node_attrs["id"] = annot["UMLS"] annot_node_attrs["UMLS"] = annot["UMLS"] - g.add_node(annot_node_label, attr_dict=annot_node_attrs) edge_attrs = LITERATURE_DISEASE_EDGE_ATTRS.copy() From 5430fa7695679bd71c0738b399dcbd13e9cbd90d Mon Sep 17 00:00:00 2001 From: Tooba Abbassi-Daloii Date: Tue, 17 Sep 2024 09:28:29 +0200 Subject: [PATCH 3/7] add literature --- src/pyBiodatafuse/constants.py | 4 ++++ src/pyBiodatafuse/graph/generator.py | 2 ++ 2 files changed, 6 insertions(+) diff --git a/src/pyBiodatafuse/constants.py b/src/pyBiodatafuse/constants.py index 4a438fab..280c263e 100644 --- a/src/pyBiodatafuse/constants.py +++ b/src/pyBiodatafuse/constants.py @@ -97,6 +97,9 @@ } DISGENET_DISEASE_COL = f"{DISGENET}_diseases" +# literature based disease info +LITERATURE_DISEASE_COL = "literature_based_info" + # Open Targets - Disease OPENTARGETS_DISEASE_OUTPUT_DICT = { "disease_name": str, @@ -280,6 +283,7 @@ "label": GENE_DISEASE_EDGE_LABEL, } # Literature + LITERATURE_DISEASE_NODE_ATTRS = { "source": None, "name": None, diff --git a/src/pyBiodatafuse/graph/generator.py b/src/pyBiodatafuse/graph/generator.py index 4a8c7968..3ab1af8c 100644 --- a/src/pyBiodatafuse/graph/generator.py +++ b/src/pyBiodatafuse/graph/generator.py @@ -33,6 +33,7 @@ GO_MF_NODE_LABELS, GO_NODE_ATTRS, GO_NODE_MAIN_LABEL, + LITERATURE_DISEASE_COL, LITERATURE_DISEASE_EDGE_ATTRS, LITERATURE_DISEASE_NODE_ATTRS, MINERVA, @@ -883,6 +884,7 @@ def networkx_graph(combined_df: pd.DataFrame, disease_compound=None): func_dict = { BGEE_GENE_EXPRESSION_LEVELS_COL: add_gene_bgee_subgraph, DISGENET_DISEASE_COL: add_disgenet_gene_disease_subgraph, + LITERATURE_DISEASE_COL: add_literature_gene_disease_subgraph, MINERVA: add_minerva_gene_pathway_subgraph, WIKIPATHWAYS: add_wikipathways_gene_pathway_subgraph, OPENTARGETS_REACTOME_COL: add_opentargets_gene_reactome_pathway_subgraph, From fd795bd2951a200e21333b8fd4ecb53307aae478 Mon Sep 17 00:00:00 2001 From: Tooba Abbassi-Daloii Date: Wed, 18 Sep 2024 11:18:06 +0200 Subject: [PATCH 4/7] Update PCS_usecase.ipynb --- examples/usecases/PCS/PCS_usecase.ipynb | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/examples/usecases/PCS/PCS_usecase.ipynb b/examples/usecases/PCS/PCS_usecase.ipynb index 75aea419..ca53973e 100644 --- a/examples/usecases/PCS/PCS_usecase.ipynb +++ b/examples/usecases/PCS/PCS_usecase.ipynb @@ -717,8 +717,7 @@ } ], "source": [ - "from pyBiodatafuse.constants import LITERATURE_DISEASE_NODE_ATTRS\n", - "\n", + "from pyBiodatafuse.constants import LITERATURE_DISEASE_COL, LITERATURE_DISEASE_NODE_ATTRS\n", "\n", "literature_disease_attrs = LITERATURE_DISEASE_NODE_ATTRS.copy()\n", "literature_disease_attrs[\"disease_name\"] = \"Post-COVID-19\"\n", @@ -732,7 +731,7 @@ " else:\n", " return [{\"disease_name\": np.nan, \"UMLS\": np.nan, \"source\": np.nan}]\n", "\n", - "disgenet_df[\"literature_based_info\"] = disgenet_df[\"identifier\"].apply(get_literature_based_info)\n", + "disgenet_df[LITERATURE_DISEASE_COL] = disgenet_df[\"identifier\"].apply(get_literature_based_info)\n", "\n", "disgenet_df.head()" ] @@ -2752,7 +2751,7 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 32, "metadata": {}, "outputs": [], "source": [ From c76c4a6001624bac022d63d60fc935b264fccd7c Mon Sep 17 00:00:00 2001 From: Tooba Abbassi-Daloii Date: Wed, 18 Sep 2024 11:34:55 +0200 Subject: [PATCH 5/7] minor bug fix --- examples/usecases/PCS/PCS_usecase.ipynb | 189 ++++++++++++++---------- src/pyBiodatafuse/constants.py | 5 + src/pyBiodatafuse/graph/generator.py | 4 +- 3 files changed, 122 insertions(+), 76 deletions(-) diff --git a/examples/usecases/PCS/PCS_usecase.ipynb b/examples/usecases/PCS/PCS_usecase.ipynb index ca53973e..d67554f2 100644 --- a/examples/usecases/PCS/PCS_usecase.ipynb +++ b/examples/usecases/PCS/PCS_usecase.ipynb @@ -646,7 +646,7 @@ " 144568\n", " NCBI Gene\n", " [{'disease_name': 'Noonan Syndrome', 'HPO': ''...\n", - " [{'disease_name': nan, 'UMLS': nan, 'source': ...\n", + " [{'disease_name': nan, 'id': nan, 'source': nan}]\n", " \n", " \n", " 1\n", @@ -655,7 +655,7 @@ " 28971\n", " NCBI Gene\n", " [{'disease_name': nan, 'HPO': nan, 'NCI': nan,...\n", - " [{'disease_name': nan, 'UMLS': nan, 'source': ...\n", + " [{'disease_name': nan, 'id': nan, 'source': nan}]\n", " \n", " \n", " 2\n", @@ -664,7 +664,7 @@ " 19\n", " NCBI Gene\n", " [{'disease_name': 'Tangier Disease', 'HPO': ''...\n", - " [{'disease_name': nan, 'UMLS': nan, 'source': ...\n", + " [{'disease_name': nan, 'id': nan, 'source': nan}]\n", " \n", " \n", " 3\n", @@ -673,7 +673,7 @@ " 5243\n", " NCBI Gene\n", " [{'disease_name': 'Epilepsy', 'HPO': 'HPO_HP:0...\n", - " [{'disease_name': nan, 'UMLS': nan, 'source': ...\n", + " [{'disease_name': nan, 'id': nan, 'source': nan}]\n", " \n", " \n", " 4\n", @@ -682,7 +682,7 @@ " 653190\n", " NCBI Gene\n", " [{'disease_name': nan, 'HPO': nan, 'NCI': nan,...\n", - " [{'disease_name': nan, 'UMLS': nan, 'source': ...\n", + " [{'disease_name': nan, 'id': nan, 'source': nan}]\n", " \n", " \n", "\n", @@ -704,11 +704,11 @@ "4 [{'disease_name': nan, 'HPO': nan, 'NCI': nan,... \n", "\n", " literature_based_info \n", - "0 [{'disease_name': nan, 'UMLS': nan, 'source': ... \n", - "1 [{'disease_name': nan, 'UMLS': nan, 'source': ... \n", - "2 [{'disease_name': nan, 'UMLS': nan, 'source': ... \n", - "3 [{'disease_name': nan, 'UMLS': nan, 'source': ... \n", - "4 [{'disease_name': nan, 'UMLS': nan, 'source': ... " + "0 [{'disease_name': nan, 'id': nan, 'source': nan}] \n", + "1 [{'disease_name': nan, 'id': nan, 'source': nan}] \n", + "2 [{'disease_name': nan, 'id': nan, 'source': nan}] \n", + "3 [{'disease_name': nan, 'id': nan, 'source': nan}] \n", + "4 [{'disease_name': nan, 'id': nan, 'source': nan}] " ] }, "execution_count": 9, @@ -717,11 +717,11 @@ } ], "source": [ - "from pyBiodatafuse.constants import LITERATURE_DISEASE_COL, LITERATURE_DISEASE_NODE_ATTRS\n", + "from pyBiodatafuse.constants import LITERATURE_DISEASE_COL, LITERATURE_DISEASE_OUTPUT_DICT\n", "\n", - "literature_disease_attrs = LITERATURE_DISEASE_NODE_ATTRS.copy()\n", + "literature_disease_attrs = LITERATURE_DISEASE_OUTPUT_DICT.copy()\n", "literature_disease_attrs[\"disease_name\"] = \"Post-COVID-19\"\n", - "literature_disease_attrs[\"UMLS\"] = \"C00000\"\n", + "literature_disease_attrs[\"id\"] = \"C00000\"\n", "literature_disease_attrs[\"source\"] = \"PMID: 37675861\"\n", "\n", "\n", @@ -729,7 +729,7 @@ " if gene in pcs_associated_genes[\"Gene\"].values:\n", " return literature_disease_attrs\n", " else:\n", - " return [{\"disease_name\": np.nan, \"UMLS\": np.nan, \"source\": np.nan}]\n", + " return [{\"disease_name\": np.nan, \"id\": np.nan, \"source\": np.nan}]\n", "\n", "disgenet_df[LITERATURE_DISEASE_COL] = disgenet_df[\"identifier\"].apply(get_literature_based_info)\n", "\n", @@ -740,6 +740,27 @@ "cell_type": "code", "execution_count": 10, "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "362 {'disease_name': 'Post-COVID-19', 'id': 'C0000...\n", + "Name: literature_based_info, dtype: object" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "disgenet_df[disgenet_df[\"identifier\"] == \"DMP1\"][LITERATURE_DISEASE_COL]" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, "outputs": [ { "name": "stdout", @@ -769,7 +790,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 12, "metadata": {}, "outputs": [ { @@ -848,7 +869,7 @@ "4 UMLS_C0010054 UMLS EFO_0001645 EFO" ] }, - "execution_count": 11, + "execution_count": 12, "metadata": {}, "output_type": "execute_result" } @@ -869,7 +890,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 13, "metadata": {}, "outputs": [ { @@ -961,7 +982,7 @@ "4 [{'chembl_id': 'CHEMBL628', 'drugbank_id': 'DB... " ] }, - "execution_count": 12, + "execution_count": 13, "metadata": {}, "output_type": "execute_result" } @@ -1024,7 +1045,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 14, "metadata": {}, "outputs": [ { @@ -1119,7 +1140,7 @@ " {'name': 'drug withdrawal syndrome neonatal'}]}]" ] }, - "execution_count": 13, + "execution_count": 14, "metadata": {}, "output_type": "execute_result" } @@ -1137,7 +1158,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 15, "metadata": {}, "outputs": [ { @@ -1229,7 +1250,7 @@ "4 [{'chembl_id': nan, 'drugbank_id': nan, 'compo... " ] }, - "execution_count": 14, + "execution_count": 15, "metadata": {}, "output_type": "execute_result" } @@ -1268,7 +1289,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 16, "metadata": {}, "outputs": [ { @@ -1313,7 +1334,7 @@ " 'adverse_effect': None}]" ] }, - "execution_count": 15, + "execution_count": 16, "metadata": {}, "output_type": "execute_result" } @@ -1331,7 +1352,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 17, "metadata": {}, "outputs": [ { @@ -1423,7 +1444,7 @@ "4 [{'pathway_id': nan, 'pathway_label': nan, 'pa... " ] }, - "execution_count": 16, + "execution_count": 17, "metadata": {}, "output_type": "execute_result" } @@ -1452,7 +1473,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 18, "metadata": {}, "outputs": [ { @@ -1463,7 +1484,7 @@ " 'pathway_gene_count': 45.0}]" ] }, - "execution_count": 17, + "execution_count": 18, "metadata": {}, "output_type": "execute_result" } @@ -1481,7 +1502,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 19, "metadata": {}, "outputs": [ { @@ -1573,7 +1594,7 @@ "4 [{'pathway_id': nan, 'pathway_label': nan, 'pa... " ] }, - "execution_count": 18, + "execution_count": 19, "metadata": {}, "output_type": "execute_result" } @@ -1605,7 +1626,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 20, "metadata": {}, "outputs": [ { @@ -1649,7 +1670,7 @@ " 'pathway_gene_count': 35.0}]" ] }, - "execution_count": 19, + "execution_count": 20, "metadata": {}, "output_type": "execute_result" } @@ -1667,7 +1688,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 21, "metadata": {}, "outputs": [ { @@ -1759,7 +1780,7 @@ "4 [{'pathway_label': nan, 'pathway_id': nan}] " ] }, - "execution_count": 20, + "execution_count": 21, "metadata": {}, "output_type": "execute_result" } @@ -1797,7 +1818,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 22, "metadata": {}, "outputs": [ { @@ -1812,7 +1833,7 @@ " {'pathway_label': 'HDL assembly', 'pathway_id': 'R-HSA-8963896'}]" ] }, - "execution_count": 21, + "execution_count": 22, "metadata": {}, "output_type": "execute_result" } @@ -1830,7 +1851,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 23, "metadata": {}, "outputs": [ { @@ -1922,7 +1943,7 @@ "4 [{'go_id': nan, 'go_name': nan, 'go_type': nan}] " ] }, - "execution_count": 22, + "execution_count": 23, "metadata": {}, "output_type": "execute_result" } @@ -1952,7 +1973,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 24, "metadata": {}, "outputs": [ { @@ -1971,7 +1992,7 @@ " 'go_type': 'F'}]" ] }, - "execution_count": 23, + "execution_count": 24, "metadata": {}, "output_type": "execute_result" } @@ -1989,7 +2010,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 25, "metadata": {}, "outputs": [ { @@ -2081,7 +2102,7 @@ "4 [{'stringdb_link_to': 'DEFB118', 'Ensembl': 'E... " ] }, - "execution_count": 24, + "execution_count": 25, "metadata": {}, "output_type": "execute_result" } @@ -2109,7 +2130,7 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 26, "metadata": {}, "outputs": [ { @@ -2125,7 +2146,7 @@ " {'stringdb_link_to': 'RUNX2', 'Ensembl': 'ENSP00000360493', 'score': 0.713}]" ] }, - "execution_count": 25, + "execution_count": 26, "metadata": {}, "output_type": "execute_result" } @@ -2143,7 +2164,7 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 27, "metadata": {}, "outputs": [], "source": [ @@ -2176,7 +2197,7 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 28, "metadata": {}, "outputs": [ { @@ -2222,7 +2243,7 @@ " ENSG00000152592\n", " Ensembl\n", " [{'disease_name': 'Hypophosphatemic Rickets', ...\n", - " {'source': 'PMID: 37675861', 'name': None, 'id...\n", + " {'disease_name': 'Post-COVID-19', 'id': 'C0000...\n", " [{'chembl_id': nan, 'drugbank_id': nan, 'compo...\n", " [{'pathway_id': nan, 'pathway_label': nan, 'pa...\n", " [{'pathway_id': 'WP3971', 'pathway_label': 'OS...\n", @@ -2237,7 +2258,7 @@ " ENSG00000175535\n", " Ensembl\n", " [{'disease_name': 'Pancreatic Lipase Deficienc...\n", - " [{'disease_name': nan, 'UMLS': nan, 'source': ...\n", + " [{'disease_name': nan, 'id': nan, 'source': nan}]\n", " [{'chembl_id': 'CHEMBL175247', 'drugbank_id': ...\n", " [{'pathway_id': nan, 'pathway_label': nan, 'pa...\n", " [{'pathway_id': nan, 'pathway_label': nan, 'pa...\n", @@ -2252,7 +2273,7 @@ " ENSG00000259435\n", " Ensembl\n", " [{'disease_name': nan, 'HPO': nan, 'NCI': nan,...\n", - " [{'disease_name': nan, 'UMLS': nan, 'source': ...\n", + " [{'disease_name': nan, 'id': nan, 'source': nan}]\n", " [{'chembl_id': nan, 'drugbank_id': nan, 'compo...\n", " [{'pathway_id': nan, 'pathway_label': nan, 'pa...\n", " [{'pathway_id': nan, 'pathway_label': nan, 'pa...\n", @@ -2267,7 +2288,7 @@ " ENSG00000268104\n", " Ensembl\n", " [{'disease_name': 'Cystic Fibrosis', 'HPO': ''...\n", - " [{'disease_name': nan, 'UMLS': nan, 'source': ...\n", + " [{'disease_name': nan, 'id': nan, 'source': nan}]\n", " [{'chembl_id': nan, 'drugbank_id': nan, 'compo...\n", " [{'pathway_id': nan, 'pathway_label': nan, 'pa...\n", " [{'pathway_id': 'WP2882', 'pathway_label': 'Nu...\n", @@ -2293,10 +2314,10 @@ "3 [{'disease_name': 'Cystic Fibrosis', 'HPO': ''... \n", "\n", " literature_based_info \\\n", - "0 {'source': 'PMID: 37675861', 'name': None, 'id... \n", - "1 [{'disease_name': nan, 'UMLS': nan, 'source': ... \n", - "2 [{'disease_name': nan, 'UMLS': nan, 'source': ... \n", - "3 [{'disease_name': nan, 'UMLS': nan, 'source': ... \n", + "0 {'disease_name': 'Post-COVID-19', 'id': 'C0000... \n", + "1 [{'disease_name': nan, 'id': nan, 'source': nan}] \n", + "2 [{'disease_name': nan, 'id': nan, 'source': nan}] \n", + "3 [{'disease_name': nan, 'id': nan, 'source': nan}] \n", "\n", " OpenTargets_gene_compounds \\\n", "0 [{'chembl_id': nan, 'drugbank_id': nan, 'compo... \n", @@ -2335,7 +2356,7 @@ "3 [{'stringdb_link_to': 'SLC7A11', 'Ensembl': 'E... " ] }, - "execution_count": 27, + "execution_count": 28, "metadata": {}, "output_type": "execute_result" } @@ -2346,7 +2367,27 @@ }, { "cell_type": "code", - "execution_count": 28, + "execution_count": 29, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'disease_name': 'Post-COVID-19', 'id': 'C00000', 'source': 'PMID: 37675861'}" + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "combined_df[LITERATURE_DISEASE_COL][0]" + ] + }, + { + "cell_type": "code", + "execution_count": 30, "metadata": {}, "outputs": [ { @@ -2498,7 +2539,7 @@ " 'request_string': 'LOC729609\\tH\\nLOC105374060\\tH\\nDMP1\\tH\\nPNLIP\\tH\\nOR4N3P\\tH\\nSLC6A14\\tH\\nLOC101927239\\tH\\nDEFB105A\\tH\\nDEFB105B\\tH\\nGSTTP1\\tH\\nNEUROD1\\tH\\nRND1\\tH\\nVN1R10P\\tH\\nLOC440446\\tH\\nLOC152225\\tH\\nLOC101929341\\tH\\nPGLYRP3\\tH\\nLINC01533\\tH\\nLINC01090\\tH\\nSPEM1\\tH\\nC16orf82\\tH\\nMIR4432HG\\tH\\nLINC01169\\tH\\nFAM71A\\tH\\nRNASE10\\tH\\nKLF17\\tH\\nC9\\tH\\nARC\\tH\\nMYL10\\tH\\nGCM1\\tH\\nAIPL1\\tH\\nHSPA6\\tH\\nLOC101929124\\tH\\nC7orf65\\tH\\nSLC2A14\\tH\\nPNLIPRP2\\tH\\nNPAS4\\tH\\nLOC101060498\\tH\\nPROP1\\tH\\nELAVL3\\tH\\nLOC105747689\\tH\\nTNF\\tH\\nADAMTS4\\tH\\nPCDH10\\tH\\nLOC101927274\\tH\\nNR4A2\\tH\\nLOC102724612\\tH\\nCEACAM22P\\tH\\nSNAI1\\tH\\nSLC2A3\\tH\\nDLX3\\tH\\nID2\\tH\\nLOC151475\\tH\\nATF3\\tH\\nNKAIN4\\tH\\nASAP1-IT2\\tH\\nNOXRED1\\tH\\nDNM1P41\\tH\\nSLC7A11\\tH\\nC10orf82\\tH\\nULBP2\\tH\\nTPTE2P6\\tH\\nNR4A3\\tH\\nLOC399715\\tH\\nCNTN3\\tH\\nGEM\\tH\\nHSPA7\\tH\\nNCMAP\\tH\\nPNP\\tH\\nPLK2\\tH\\nATP2C2\\tH\\nTNFRSF10D\\tH\\nULBP3\\tH\\nHSPA5\\tH\\nEFHB\\tH\\nHSD17B13\\tH\\nWNK3\\tH\\nLINC01535\\tH\\nELL2\\tH\\nRND3\\tH\\nDUSP5\\tH\\nNRXN3\\tH\\nIPCEF1\\tH\\nZNF492\\tH\\nSDR16C5\\tH\\nCENPL\\tH\\nSOX11\\tH\\nMAFF\\tH\\nPRG4\\tH\\nPCDH17\\tH\\nCDKN1A\\tH\\nPELI1\\tH\\nTMEM169\\tH\\nTMEM236\\tH\\nEFNA5\\tH\\nGCH1\\tH\\nANGPTL4\\tH\\nMAP1LC3C\\tH\\nCHL1\\tH\\nMPZ\\tH\\nSERPINE1\\tH\\nSLC2A1\\tH\\nLRRC16A\\tH\\nFRZB\\tH\\nGLIS3\\tH\\nTIAM1\\tH\\nSRGAP1\\tH\\nSH2D4A\\tH\\nMYEF2\\tH\\nNT5E\\tH\\nVGLL3\\tH\\nPRTG\\tH\\nDPP4\\tH\\nKLF11\\tH\\nTAF13\\tH\\nSTRADB\\tH\\nPOMP\\tH\\nLAMTOR5\\tH\\nCCDC69\\tH\\nZNF32\\tH\\nIQSEC2\\tH\\nAPIP\\tH\\nGDF9\\tH\\nSCUBE2\\tH\\nC20orf24\\tH\\nZSWIM7\\tH\\nTIMM8B\\tH\\nLOC102724532\\tH\\nPRR16\\tH\\nAHRR\\tH\\nLEFTY2\\tH\\nIRX3\\tH\\nVMO1\\tH\\nPVALB\\tH\\nMT1DP\\tH\\nCALML5\\tH\\nLOC101929116\\tH\\nLOC101929694\\tH\\nLINC01205\\tH\\nLINC01241\\tH\\nTMPRSS11A\\tH\\nLOC101928942\\tH\\nLOC100507461\\tH\\nLINC01565\\tH\\nLOC101928358\\tH\\nSCGB1D4\\tH\\nTTR\\tH\\nLINC01284\\tH\\nSSX8\\tH\\nTMEM225\\tH\\nNCRNA00250\\tH\\nOR13D1\\tH\\nLINC01192\\tH\\nCALCB\\tH\\nLINC00411\\tH\\nLINC01227\\tH\\nMIR5689HG\\tH\\nLINC00615\\tH\\nGHSR\\tH\\nLOC105375556\\tH\\nCT45A5\\tH\\nLOC646029\\tH\\nZFP42\\tH\\nCT45A9\\tH\\nFLJ46066\\tH\\nCGA\\tH\\nLOC285692\\tH\\nLOC105369509\\tH\\nCLEC1B\\tH\\nHIST1H4A\\tH\\nDSCAM-IT1\\tH\\nCT45A2\\tH\\nCT45A8\\tH\\nLINC00928\\tH\\nBDKRB1\\tH\\nLOC105370586\\tH\\nTRIM51\\tH\\nLOC101927480\\tH\\nLINC01568\\tH\\nCASC17\\tH\\nLOC101929631\\tH\\nLINC01233\\tH\\nLOC101927948\\tH\\nOR13C5\\tH\\nSSX2\\tH\\nSSX2B\\tH\\nCACNA1C-IT3\\tH\\nLOC100500773\\tH\\nSPATA3\\tH\\nLOC101927374\\tH\\nFBXO47\\tH\\nLINC01493\\tH\\nLOC105369431\\tH\\nLOC105376468\\tH\\nOR5W2\\tH\\nREG4\\tH\\nCD5L\\tH\\nLINC01514\\tH\\nLOC105376331\\tH\\nLOC102723557\\tH\\nPISRT1\\tH\\nHIGD2B\\tH\\nPAGE1\\tH\\nMMP26\\tH\\nLOC101928602\\tH\\nLOC102723895\\tH\\nACTR3BP2\\tH\\nLOC101927363\\tH\\nHNRNPKP3\\tH\\nLOC101927188\\tH\\nDISC1-IT1\\tH\\nLOC102467222\\tH\\nFAM9B\\tH\\nGLOD5\\tH\\nC2orf48\\tH\\nLOC100288254\\tH\\nFRG2\\tH\\nGACAT3\\tH\\nFOXCUT\\tH\\nLOC101927357\\tH\\nLOC101929260\\tH\\nOR13C2\\tH\\nLOC101929754\\tH\\nLOC146513\\tH\\nOR2AT4\\tH\\nPBOV1\\tH\\nTFDP3\\tH\\nLOC101929420\\tH\\nHRAT17\\tH\\nOR6W1P\\tH\\nSSX9\\tH\\nSSX3\\tH\\nHMGA1P7\\tH\\nLINC00374\\tH\\nLINC01288\\tH\\nLINC00836\\tH\\nLINC01320\\tH\\nTRIM64\\tH\\nSDR16C6P\\tH\\nLOC729966\\tH\\nLOC105375014\\tH\\nLINC01441\\tH\\nSCNN1G\\tH\\nC7orf69\\tH\\nOPN1LW\\tH\\nKRTAP5-4\\tH\\nANKUB1\\tH\\nTMEM213\\tH\\nTFAP2D\\tH\\nDANT2\\tH\\nLOC101927419\\tH\\nTXNDC2\\tH\\nOR11A1\\tH\\nLINC01317\\tH\\nLOC101805491\\tH\\nLOC286083\\tH\\nLOC101929563\\tH\\nLINC01216\\tH\\nLINC01163\\tH\\nLOC101927166\\tH\\nPHOX2B\\tH\\nLOC102467081\\tH\\nCT45A6\\tH\\nSND1-IT1\\tH\\nSSX4B\\tH\\nSSX4\\tH\\nSULT1E1\\tH\\nNOL4\\tH\\nZNF716\\tH\\nSUMO1P1\\tH\\nLOC440896\\tH\\nG6PC\\tH\\nMIR31HG\\tH\\nLOC101929259\\tH\\nHTR3C\\tH\\nLOC730100\\tH\\nMAB21L3\\tH\\nIL6\\tH\\nMIP\\tH\\nTRIM64B\\tH\\nCNGB1\\tH\\nLINC01531\\tH\\nFOXL2NB\\tH\\nCXCL8\\tH\\nSLC15A1\\tH\\nGABRB1\\tH\\nLINC00862\\tH\\nZPBP2\\tH\\nLOC101928992\\tH\\nDPPA4\\tH\\nPOU2F3\\tH\\nNUTM1\\tH\\nLOC105372440\\tH\\nSELE\\tH\\nGPR143\\tH\\nFSTL5\\tH\\nAXDND1\\tH\\nLINC01619\\tH\\nSAMD7\\tH\\nLOC100131257\\tH\\nABCC13\\tH\\nC17orf78\\tH\\nCRX\\tH\\nC12orf42\\tH\\nFOXG1\\tH\\nHTR3A\\tH\\nLOC644189\\tH\\nPNPLA1\\tH\\nLINC00880\\tH\\nTOP1P2\\tH\\nCAGE1\\tH\\nLINC00670\\tH\\nLOC101928231\\tH\\nFAM138C\\tH\\nRTP1\\tH\\nLOC101928617\\tH\\nSPAG11B\\tH\\nLOC101927691\\tH\\nSLC35G3\\tH\\nBCO1\\tH\\nSLC35G4\\tH\\nLINC00636\\tH\\nEPGN\\tH\\nPTGS2\\tH\\nPGC\\tH\\nLOC102724467\\tH\\nLOC101928103\\tH\\nTRPC5OS\\tH\\nLOC338694\\tH\\nLINC01036\\tH\\nDLX6\\tH\\nLINC00426\\tH\\nCXorf65\\tH\\nHP09025\\tH\\nLOC389273\\tH\\nDPCR1\\tH\\nC5orf60\\tH\\nPCSK1\\tH\\nLOC494141\\tH\\nGADD45B\\tH\\nC1orf87\\tH\\nANKS4B\\tH\\nJAKMIP2\\tH\\nLINC00266-3\\tH\\nDRAIC\\tH\\nTCAM1P\\tH\\nMIR202HG\\tH\\nSPRR2F\\tH\\nFAM138B\\tH\\nLINC00907\\tH\\nCCL19\\tH\\nASCL1\\tH\\nNUP210L\\tH\\nLINC01170\\tH\\nLINC00264\\tH\\nANKRD7\\tH\\nLOC102724601\\tH\\nSH2D6\\tH\\nFAM138F\\tH\\nFAM138A\\tH\\nGYPE\\tH\\nDDX4\\tH\\nIL5RA\\tH\\nTNFRSF9\\tH\\nLINC00368\\tH\\nLGSN\\tH\\nNEK5\\tH\\nLOC105374177\\tH\\nGLB1L3\\tH\\nLOC105379511\\tH\\nMT1A\\tH\\nFAM138E\\tH\\nTEKT3\\tH\\nSV2C\\tH\\nNR2E3\\tH\\nPLA2G10\\tH\\nLOC101927770\\tH\\nENO4\\tH\\nSBK2\\tH\\nA2ML1\\tH\\nLOC101927257\\tH\\nSPRY4-IT1\\tH\\nDNAH8\\tH\\nAK7\\tH\\nASXL3\\tH\\nTEX38\\tH\\nDNM1P35\\tH\\nCCL26\\tH\\nPPP3R2\\tH\\nCTSLP2\\tH\\nACBD7\\tH\\nSOX2-OT\\tH\\nSTC1\\tH\\nLOC284865\\tH\\nFDPSP2\\tH\\nMARVELD2\\tH\\nCDKL2\\tH\\nDCX\\tH\\nSHISA9\\tH\\nC4orf26\\tH\\nDNAH5\\tH\\nCD3G\\tH\\nTTC23L\\tH\\nPDE6A\\tH\\nAPOBEC3H\\tH\\nLINC00311\\tH\\nCXCL2\\tH\\nLINC00632\\tH\\nSALL4\\tH\\nLOC105372582\\tH\\nFAM106CP\\tH\\nRASD1\\tH\\nCACNA1F\\tH\\nELAVL2\\tH\\nKIAA0087\\tH\\nGIPR\\tH\\nCIDEA\\tH\\nBCL11B\\tH\\nTNFRSF11B\\tH\\nCA13\\tH\\nANKRD20A9P\\tH\\nFAM106B\\tH\\nSEMA3E\\tH\\nGPRC5A\\tH\\nLOC285819\\tH\\nLOC730101\\tH\\nIL1RL1\\tH\\nRGS2\\tH\\nRYBP\\tH\\nC3orf52\\tH\\nHOOK1\\tH\\nPCDH9\\tH\\nCDH19\\tH\\nPGA4\\tH\\nSTARD4\\tH\\nCYP2B7P\\tH\\nTFPI2\\tH\\nPDK4\\tH\\nPGA5\\tH\\nKCNAB3\\tH\\nLINC00641\\tH\\nLOC102724571\\tH\\nSEZ6L\\tH\\nTNFSF9\\tH\\nZNF483\\tH\\nM1AP\\tH\\nFAAP24\\tH\\nKLHL15\\tH\\nCHD1\\tH\\nAP1S3\\tH\\nCDS1\\tH\\nCRTAC1\\tH\\nGYG2\\tH\\nGRHL1\\tH\\nFSIP1\\tH\\nSYT1\\tH\\nPLCXD3\\tH\\nLOC101928371\\tH\\nPEG10\\tH\\nMPZL3\\tH\\nZNF331\\tH\\nKCNQ1OT1\\tH\\nLOC388436\\tH\\nLOC79999\\tH\\nFAM106A\\tH\\nRPS6KA6\\tH\\nBCL2L15\\tH\\nTBX5\\tH\\nEMP1\\tH\\nPPP2R2B\\tH\\nTACR1\\tH\\nSLC7A10\\tH\\nELOVL6\\tH\\nATP1B3\\tH\\nSEMA4A\\tH\\nCEP152\\tH\\nLINC01296\\tH\\nNRXN1\\tH\\nADGRG2\\tH\\nCLDN1\\tH\\nZSWIM6\\tH\\nWNT3\\tH\\nCCDC170\\tH\\nTHBS1\\tH\\nSLC35F2\\tH\\nZC3H12B\\tH\\nPLIN1\\tH\\nLOC401052\\tH\\nCATSPERG\\tH\\nIFRD1\\tH\\nGAS2L3\\tH\\nAPOBEC3D\\tH\\nPOU2F2\\tH\\nERRFI1\\tH\\nARSJ\\tH\\nFOXC1\\tH\\nPRDM1\\tH\\nRASGRP1\\tH\\nKIAA1683\\tH\\nPRELP\\tH\\nTIPARP\\tH\\nZC3H12A\\tH\\nSGIP1\\tH\\nPDE8B\\tH\\nGFPT2\\tH\\nCABP4\\tH\\nRAD51B\\tH\\nMICB\\tH\\nEIF4A3\\tH\\nFAM72C\\tH\\nC7\\tH\\nQPCT\\tH\\nMAP3K8\\tH\\nTUFT1\\tH\\nDUXAP10\\tH\\nSHROOM3\\tH\\nZC3HAV1\\tH\\nS1PR2\\tH\\nFAM122C\\tH\\nHRH1\\tH\\nUGCG\\tH\\nSOX9\\tH\\nLYVE1\\tH\\nBCL2L11\\tH\\nEIF2AK3\\tH\\nC11orf63\\tH\\nSERPINB8\\tH\\nLEPR\\tH\\nCACNB2\\tH\\nCACNA2D4\\tH\\nNR2F1\\tH\\nCLCF1\\tH\\nPSD3\\tH\\nADNP2\\tH\\nDYNC2H1\\tH\\nOR2A20P\\tH\\nSYT17\\tH\\nVASH2\\tH\\nTMEM2\\tH\\nOR2A9P\\tH\\nUSP32P2\\tH\\nEDIL3\\tH\\nLOX\\tH\\nMXD1\\tH\\nNHSL1\\tH\\nDLC1\\tH\\nCYBB\\tH\\nETV5\\tH\\nCEP126\\tH\\nPTPRF\\tH\\nCOCH\\tH\\nSCRN1\\tH\\nPPM1D\\tH\\nLILRB4\\tH\\nMFSD4A\\tH\\nCCDC144B\\tH\\nPXDNL\\tH\\nAHR\\tH\\nTRIM14\\tH\\nFRMD4B\\tH\\nCD84\\tH\\nTIAM2\\tH\\nADAMTS5\\tH\\nXYLT1\\tH\\nMYOF\\tH\\nSLC7A1\\tH\\nSMG1P3\\tH\\nUGDH\\tH\\nPMP22\\tH\\nAMPH\\tH\\nNPIPB5\\tH\\nNT5DC3\\tH\\nUBE2D2\\tH\\nPIGX\\tH\\nTTC1\\tH\\nSRP14\\tH\\nGKAP1\\tH\\nFIBP\\tH\\nMED11\\tH\\nVTI1B\\tH\\nATPAF1\\tH\\nDNAJC19\\tH\\nMRPL24\\tH\\nTRIM16L\\tH\\nPOLR2F\\tH\\nGCSH\\tH\\nTMEM147\\tH\\nLSM10\\tH\\nMRPL40\\tH\\nC11orf74\\tH\\nSERF2-C15ORF63\\tH\\nNDUFAF2\\tH\\nUBE3D\\tH\\nMALSU1\\tH\\nCOA4\\tH\\nELP6\\tH\\nMTX2\\tH\\nCMC4\\tH\\nMON1A\\tH\\nCABP7\\tH\\nMID1IP1\\tH\\nCOA6\\tH\\nKIF22\\tH\\nTSEN15\\tH\\nNDFIP2\\tH\\nHYPK\\tH\\nZCRB1\\tH\\nPARK7\\tH\\nCOX16\\tH\\nGTF3C6\\tH\\nMINOS1\\tH\\nMRPS15\\tH\\nSTOML2\\tH\\nKCNS3\\tH\\nCACNA2D3\\tH\\nCTNNBIP1\\tH\\nC7orf55\\tH\\nCOPS5\\tH\\nCHCHD5\\tH\\nYBX3P1\\tH\\nSPAG7\\tH\\nNDUFS3\\tH\\nTPI1\\tH\\nPET100\\tH\\nST3GAL2\\tH\\nMRPL21\\tH\\nTP53TG1\\tH\\nCDKN2AIPNL\\tH\\nOIP5\\tH\\nRPS20\\tH\\nATP5E\\tH\\nCBWD2\\tH\\nCDK5\\tH\\nTOMM5\\tH\\nPRR34\\tH\\nHINT1\\tH\\nBAD\\tH\\nATP5L\\tH\\nSFXN5\\tH\\nAAMDC\\tH\\nMRPL51\\tH\\nKIAA0930\\tH\\nVAMP5\\tH\\nSEPW1\\tH\\nNDUFA6\\tH\\nSLIRP\\tH\\nSHISA2\\tH\\nNUDT2\\tH\\nCOX5B\\tH\\nSNRPN\\tH\\nSNURF\\tH\\nAURKA\\tH\\nCBWD1\\tH\\nNDUFB2\\tH\\nNAA38\\tH\\nCKM\\tH\\nGPD1\\tH\\nRPS29\\tH\\nDHRS4L1\\tH\\nMRPL33\\tH\\nLOC100507291\\tH\\nATP23\\tH\\nUQCRQ\\tH\\nNDUFC2\\tH\\nBOLA3\\tH\\nTCEB2\\tH\\nCOX7A1\\tH\\nDHRS4\\tH\\nCOX6C\\tH\\nFHL2\\tH\\nSLN\\tH\\nNDUFA1\\tH\\nRPL21P28\\tH\\nRPL21\\tH\\nNDUFC2-KCTD14\\tH\\nATP5I\\tH\\nUQCC2\\tH\\nLOC101929231\\tH\\nDBNDD1\\tH\\nNDUFB9\\tH\\nLAMB3\\tH\\nCSF3R\\tH\\nUSMG5\\tH\\nDHRS4L2\\tH\\nSERPINA1\\tH\\nC1orf53\\tH\\nGLT1D1\\tH\\nGREM2\\tH\\nUQCRBP1\\tH\\nFAM24B\\tH\\nS100A8\\tH\\nCDH22\\tH\\nLEFTY1\\tH\\nC3orf14\\tH\\nLINC01291\\tH\\nTPI1P2\\tH\\nCHAF1B\\tH\\nCENPE\\tH\\nE2F2\\tH\\nOSMR\\tH\\nNDUFC1\\tH\\nGP9\\tH\\nCDON\\tH\\nPOU3F3\\tH\\nLINC01224\\tH\\nOR7G1\\tH\\nZNF735\\tH\\nRPL23AP53\\tH\\nSAMD12\\tH\\nPAMR1\\tH\\nHIST3H2A\\tH\\nLOC101927798\\tH\\nFMOD\\tH\\nOR8S1\\tH\\nKLHL11\\tH\\nLOC105375429\\tH\\nLINC01122\\tH\\nTMCO2\\tH\\nDNAH12\\tH\\nKLF4\\tH\\nCHRM4\\tH\\nLOC101928505\\tH\\nADAMTS1\\tH\\nBEX2\\tH\\nMCTP1\\tH\\nHSD3BP4\\tH\\nLINC01053\\tH\\nELK2AP\\tH\\nLOC105377458\\tH\\nFAM71E2\\tH\\nHAO1\\tH\\nCD68\\tH\\nLOC101928728\\tH\\nSYT15\\tH\\nBAGE\\tH\\nBPIFC\\tH\\nRAET1K\\tH\\nTMPRSS11BNL\\tH\\nTOMM7\\tH\\nHESX1\\tH\\nLRRC72\\tH\\nTUSC5\\tH\\nMUC13\\tH\\nLOC101929227\\tH\\nEDA2R\\tH\\nTM2D1\\tH\\nBCAT1\\tH\\nF13B\\tH\\nLINC00958\\tH\\nRFX4\\tH\\nBRD2\\tH\\nSCN3B\\tH\\nNANOS1\\tH\\nLINC01252\\tH\\nPHLDA2\\tH\\nSNAI3\\tH\\nLOC100506274\\tH\\nLINC01021\\tH\\nCHI3L1\\tH\\nTIMM10\\tH\\nKRTAP5-2\\tH\\nLY6G6C\\tH\\nLOC101927476\\tH\\nZNF169\\tH\\nTINCR\\tH\\nUBL5\\tH\\nLINC01551\\tH\\nFIRRE\\tH\\nRPS28\\tH\\nCYP2G1P\\tH\\nCASC21\\tH\\nWDR76\\tH\\nAGBL4-IT1\\tH\\nLINC01483\\tH\\nYEATS4\\tH\\nNUGGC\\tH\\nAPOBEC1\\tH\\nZAN\\tH\\nCNNM1\\tH\\nTMC1\\tH\\nAPOPT1\\tH\\nNT5M\\tH\\nLINC00877\\tH\\nLOC100133050\\tH\\nMRPL53\\tH\\nCBWD3\\tH\\nJMJD1C\\tH\\nNDUFA11\\tH\\nPLA2G2A\\tH\\nARRDC5\\tH\\nENPP1\\tH\\nNDUFB1\\tH\\nTSHZ2\\tH\\nCRIP3\\tH\\nSMIM4\\tH\\nNANOG\\tH\\nFBXO36\\tH\\nDGCR6L\\tH\\nFAM138D\\tH\\nARAP2\\tH\\nBMP6\\tH\\nMRPL20\\tH\\nMRPS18C\\tH\\nTGIF2-C20orf24\\tH\\nTPM1\\tH\\nSCML4\\tH\\nHRASLS\\tH\\nLOC105379450\\tH\\nNHS\\tH\\nLINC00888\\tH\\nLUADT1\\tH\\nTHBS2\\tH\\nSFTPB\\tH\\nSCN8A\\tH\\nCBWD6\\tH\\nSLC24A4\\tH\\nSRPX2\\tH\\nLCE3D\\tH\\nLCN12\\tH\\nGATA2\\tH\\nLINC00578\\tH\\nLOC101928449\\tH\\nGYPC\\tH\\nPDCL2\\tH\\nCHCHD3\\tH\\nGHET1\\tH\\nLOC101927284\\tH\\nC19orf35\\tH\\nPARP11\\tH\\nLOC100268168\\tH\\nANKRD45\\tH\\nCT45A3\\tH\\nAZGP1\\tH\\nARPC2\\tH\\nLINC01516\\tH\\nPTGER3\\tH\\nUROS\\tH\\nLOC101928887\\tH\\nFCGR1CP\\tH\\nLOC105375396\\tH\\nLOC727924\\tH\\nST20-MTHFS\\tH\\nTNIP3\\tH\\nTDGF1P3\\tH\\nCCL28\\tH\\nGALNT15\\tH\\nNME9\\tH\\nRSPH14\\tH\\nLINC00608\\tH\\nPCDH8\\tH\\nSHISA4\\tH\\nLVCAT5\\tH\\nDCUN1D3\\tH\\nLOC401463\\tH\\nLOC105375483\\tH\\nMRPL15\\tH\\nHS3ST2\\tH\\nC1orf194\\tH\\nRAB3B\\tH\\nTMEM251\\tH\\nLINC00152\\tH\\nLINC00102\\tH\\nCORO2B\\tH\\nBSPRY\\tH\\nCCR7\\tH\\nGLI3\\tH\\nAPOL4\\tH\\nKERA\\tH\\nGAMT\\tH\\nRBP4\\tH\\nLMO1\\tH\\nSNHG12\\tH\\nLINC01410\\tH\\nZNF280C\\tH\\nCCDC144A\\tH\\nSNRNP27\\tH\\nNDUFA3\\tH\\nSKIDA1\\tH\\nFZD5\\tH\\nRUNDC3B\\tH\\nSHFM1\\tH\\nZMAT5\\tH\\nGGT7\\tH\\nTXLNG\\tH\\nSMG1P1\\tH\\nMMADHC\\tH\\nKPNA2\\tH\\nPAM16\\tH\\nLOC101929697\\tH\\nCXCL13\\tH\\nIMPA2\\tH\\nPRKAG2\\tH\\nMEX3B\\tH\\nNCCRP1\\tH\\nMAFA\\tH\\nHIST1H3J\\tH\\nLDLR\\tH\\nKANK4\\tH\\nSHC4\\tH\\nMACROD1\\tH\\nTAC3\\tH\\nNKX2-5\\tH\\nCOX8A\\tH\\nCREB5\\tH\\nTIMM17B\\tH\\nCBWD5\\tH\\nMTFR2\\tH\\nGSTTP2\\tH\\nLINC01504\\tH\\nEMC4\\tH\\nLOC101928272\\tH\\nCWH43\\tH\\nAPOC4\\tH\\nCCND2\\tH\\nSDHAF4\\tH\\nC2orf91\\tH\\nMYCNOS\\tH\\nZNF80\\tH\\nSIK2\\tH\\nMRPL52\\tH\\nBAK1\\tH\\nEZH2\\tH\\nABCC6P1\\tH\\nHIST1H2BO\\tH\\nNRG1-IT1\\tH\\nWWC1\\tH\\nFAM183A\\tH\\nPABPC1L\\tH\\nTPTE\\tH\\nBRS3\\tH\\nPCDH19\\tH\\nAKR1D1\\tH\\nSLC4A8\\tH\\nLOC105377651\\tH\\nLDHA\\tH\\nRPGRIP1\\tH\\nPPP1R1B\\tH\\nATP5EP2\\tH\\nCACYBP\\tH\\nCHURC1-FNTB\\tH\\nBARX2\\tH\\nHELB\\tH\\nCTCFL\\tH\\nPTPN13\\tH\\nPGR\\tH\\nTMEM261\\tH\\nTRIM49B\\tH\\nMYLPF\\tH\\nLOC100131047\\tH\\nPAPPA\\tH\\nPGM2\\tH\\nMRC1\\tH\\nSNX29P2\\tH\\nLOC101929159\\tH\\nNAP1L3\\tH\\nHILPDA\\tH\\nEFNA2\\tH\\nTMEM35\\tH\\nLOC101243545\\tH\\nLOC101927829\\tH\\nHEPHL1\\tH\\nACER1\\tH\\nLYPD4\\tH\\nLOC101928510\\tH\\nLOC101929577\\tH\\nRELL1\\tH\\nSLC20A1\\tH\\nSSNA1\\tH\\nATP5G1\\tH\\nLRIT2\\tH\\nGDF6\\tH\\nNDUFA13\\tH\\nFAM227A\\tH\\nLOC101929431\\tH\\nGAPDH\\tH\\nSOAT1\\tH\\nPWRN2\\tH\\nLINC00173\\tH\\nFOXL2\\tH\\nUQCRHL\\tH\\nLINC00906\\tH\\nCA5A\\tH\\nAPOBEC2\\tH\\nCT45A1\\tH\\nPSMC3\\tH\\nPART1\\tH\\nLINC00305\\tH\\nLOC400655\\tH\\nSYT11\\tH\\nLINC01361\\tH\\nANGPTL7\\tH\\nMPC2\\tH\\nLGALS9B\\tH\\nLINC01276\\tH\\nRIPK2\\tH\\nHEPACAM\\tH\\nDKFZp779M0652\\tH\\nSOX4\\tH\\nSPATA21\\tH\\nEFCAB5\\tH\\nNDUFB5\\tH\\nTRAF3IP2\\tH\\nTRAPPC3\\tH\\nGADD45G\\tH\\nCXXC4\\tH\\nLINC00676\\tH\\nSOX1\\tH\\nC15orf61\\tH\\nHIST1H2BK\\tH\\nHIST1H2AC\\tH\\nLOC284950\\tH\\nTMEM266\\tH\\nMMP19\\tH\\nPLAUR\\tH\\nC20orf96\\tH\\nSLC9C2\\tH\\nLOC101060524\\tH\\nDRD5P2\\tH\\nMRPL11\\tH\\nAPOF\\tH\\nLRRC23\\tH\\nECT2L\\tH\\nNMNAT1\\tH\\nCCDC144CP\\tH\\nLOC101928539\\tH\\nRNLS\\tH\\nLOC105372179\\tH\\nMS4A10\\tH\\nTRAPPC2B\\tH\\nCHCHD2\\tH\\nLOC102724434\\tH\\nC7orf31\\tH\\nMIEN1\\tH\\nLOC100506444\\tH\\nPPP1R36\\tH\\nCCL2\\tH\\nSLC19A3\\tH\\nENDOU\\tH\\nLOC440028\\tH\\nPSMB10\\tH\\nFAM72D\\tH\\nGNG4\\tH\\nFOXO1\\tH\\nATP6V0A4\\tH\\nSKA1\\tH\\nPPP1R15B\\tH\\nTRPM5\\tH\\nANKRD33B\\tH\\nC1orf210\\tH\\nLOC101927058\\tH\\nMCF2\\tH\\nGALNT16\\tH\\nFRMD5\\tH\\nPCK1\\tH\\nPALM2\\tH\\nFIS1\\tH\\nKIAA0040\\tH\\nCIB2\\tH\\nNHEG1\\tH\\nCLDN11\\tH\\nPTGER4\\tH\\nCD83\\tH\\nNENF\\tH\\nLOC101928107\\tH\\nGLB1L2\\tH\\nLOC100505918\\tH\\nC2orf66\\tH\\nS100P\\tH\\nMBD3L3\\tH\\nLOC729970\\tH\\nREPS2\\tH\\nSNRPD2\\tH\\nCYP27A1\\tH\\nCDC20B\\tH\\nTAT\\tH\\nMDH1\\tH\\nCOX4I1\\tH\\nNHLH1\\tH\\nTMIGD1\\tH\\nTSACC\\tH\\nLOC101927596\\tH\\nWBSCR17\\tH\\nCYP1A2\\tH\\nPLK4\\tH\\nPSMD14\\tH\\nLOC105373782\\tH\\nMRPS28\\tH\\nARMC9\\tH\\nLINC01213\\tH\\nTGFBR3\\tH\\nARMCX4\\tH\\nLINC00243\\tH\\nDSC2\\tH\\nLOC105371335\\tH\\nLOC101927780\\tH\\nCXADR\\tH\\nDSG2\\tH\\nLPAR4\\tH\\nDAW1\\tH\\nBTG1\\tH\\nGLRX3\\tH\\nDUXAP8\\tH\\nMRPL34\\tH\\nSAT1\\tH\\nDHRS7C\\tH\\nOLR1\\tH\\nTM4SF1\\tH\\nSEMA3D\\tH\\nLOC101927650\\tH\\nLINC00668\\tH\\nRGS4\\tH\\nLOC644838\\tH\\nUBB\\tH\\nLOC101928514\\tH\\nELF4\\tH\\nCH25H\\tH\\nNCOA7\\tH\\nLINC01387\\tH\\nMSR1\\tH\\nNUTF2\\tH\\nZNF367\\tH\\nTSPAN5\\tH\\nATP5O\\tH\\nNKAIN3\\tH\\nCD44\\tH\\nFASN\\tH\\nMYBPC2\\tH\\nZNF611\\tH\\nLOC100287036\\tH\\nMTSS1L\\tH\\nGABRG2\\tH\\nZNF829\\tH\\nLOC100271832\\tH\\nUQCRH\\tH\\nPIGH\\tH\\nPOM121L8P\\tH\\nCTH\\tH\\nAK1\\tH\\nSLC7A14\\tH\\nFGF21\\tH\\nPAIP1\\tH\\nUBA3\\tH\\nMAPKAP1\\tH\\nZIM3\\tH\\nILDR1\\tH\\nFAHD1\\tH\\nMELK\\tH\\nTRIM29\\tH\\nNTM-IT\\tH\\nTPH1\\tH\\nSMIM10L1\\tH\\nCRYGB\\tH\\nSNAP91\\tH\\nNEURL1\\tH\\nLOC101929504\\tH\\nLOC102724053\\tH\\nLINC01268\\tH\\nFAM171B\\tH\\nFOSL1\\tH\\nC10orf126\\tH\\nLOC286059\\tH\\nLOC100506747\\tH\\nCXCR2\\tH\\nLINC00294\\tH\\nPPP1R7\\tH\\nTMA7\\tH\\nERC2-IT1\\tH\\nANTXR1\\tH\\nPRKACG\\tH\\nPIGR\\tH\\nTF\\tH\\nNME2\\tH\\nINE1\\tH\\nLCE3B\\tH\\nIMMP1L\\tH\\nLOC101927142\\tH\\nDNAJB1\\tH\\nVSTM1\\tH\\nLOC105372626\\tH\\nEPHA7\\tH\\nGUCY2F\\tH\\nANXA1\\tH\\nLOC101928973\\tH\\nLOC102723427\\tH\\nCD109\\tH\\nIER3\\tH\\nOVOL1\\tH\\nLOC101927630\\tH\\nRGS14\\tH\\nLOC100289333\\tH\\nMRGPRE\\tH\\nTRPC1\\tH\\nPDZK1\\tH\\nLOC285889\\tH\\nLOC100130899\\tH\\nLOC642929\\tH\\nGYPB\\tH\\nSF3B5\\tH\\nCRAT8\\tH\\nRDH14\\tH\\nIRGC\\tH\\nIGF2BP1\\tH\\nSep-14\\tH\\nCTD-2201E9.1\\tH\\nLOC100506085\\tH\\nCDH16\\tH\\nUGT8\\tH\\nCCL11\\tH\\nULK4P2\\tH\\nULK4P1\\tH\\nNDUFB10\\tH\\nLOC101927526\\tH\\nLOC440910\\tH\\nTLR6\\tH\\nZNF724P\\tH\\nTBX18\\tH\\nISCA2\\tH\\nINSC\\tH\\nISY1\\tH\\nTGIF2\\tH\\nIKBKB\\tH\\nXCL1\\tH\\nMID1\\tH\\nLOC100996251\\tH\\nSLC38A1\\tH\\nLOC105375401\\tH\\nLOC388692\\tH\\nLINC00710\\tH\\nOAZ1\\tH\\nTHSD7A\\tH\\nMAP6D1\\tH\\nLOC102723727\\tH\\nSHH\\tH\\nLOC339666\\tH\\nGAB3\\tH\\nNSUN6\\tH\\nCGN\\tH\\nOR7E156P\\tH\\nNXF1\\tH\\nOLIG1\\tH\\nHCG2040054\\tH\\nC6orf203\\tH\\nLOC441454\\tH\\nTRPM3\\tH\\nCXCL1\\tH\\nCMC2\\tH\\nCYP27C1\\tH\\nCCL22\\tH\\nBAZ1A\\tH\\nBMS1P5\\tH\\nMS4A2\\tH\\nTCAF2\\tH\\nDCST2\\tH\\nCCEPR\\tH\\nDLEU7\\tH\\nSLC2A7\\tH\\nTEKT2\\tH\\nCRY1\\tH\\nLOC105370792\\tH\\nCT45A7\\tH\\nTPM2\\tH\\nNME1-NME2\\tH\\nCT45A10\\tH\\nSLC25A26\\tH\\nIER5L\\tH\\nLINC01111\\tH\\nLEP\\tH\\nFLVCR1\\tH\\nTES\\tH\\nPRELID3A\\tH\\nCLEC19A\\tH\\nITGAE\\tH\\nDNAJB13\\tH\\nABHD12B\\tH\\nNTRK3\\tH\\nBANCR\\tH\\nHTRA4\\tH\\nCYP2B6\\tH\\nSLC6A4\\tH\\nRPL37A\\tH\\nTRIM71\\tH\\nSNTN\\tH\\nSNHG6\\tH\\nLINC01563\\tH\\nRIMS2\\tH\\nDPM3\\tH\\nFAM46A\\tH\\nZBP1\\tH\\nSERF1B\\tH\\nSERF1A\\tH\\nPTGER4P2-CDK2AP2P2\\tH\\nGPBAR1\\tH\\nCYR61\\tH\\nMRPL37\\tH\\nBAGE3\\tH\\nBAGE2\\tH\\nELMO1\\tH\\nTROAP\\tH\\nTMEM217\\tH\\nTMPRSS11E\\tH\\nMYH1\\tH\\nLOC101929234\\tH\\nSARNP\\tH\\nCRAT37\\tH\\nBAGE5\\tH\\nBAGE4\\tH\\nLINC00844\\tH\\nSLX4IP\\tH\\nLOC101928008\\tH\\nB4GALT3\\tH\\nLINC01206\\tH\\nNDUFA7\\tH\\nCOX14\\tH\\nMORC1\\tH\\nARID5B\\tH\\nPNKD\\tH\\nBIRC3\\tH\\nBTBD6\\tH\\nLOC101928902\\tH\\nFAM71D\\tH\\nLINC01251\\tH\\nARL2\\tH\\nLINC01265\\tH\\nTMEM205\\tH\\nLOC101929125\\tH\\nHCG22\\tH\\nLOC102724708\\tH\\nPRKCG\\tH\\nLINC01481\\tH\\nZNF98\\tH\\nPSMA8\\tH\\nCD14\\tH\\nPSMD4\\tH\\nAKR1C2\\tH\\nPSMB3\\tH\\nSMDT1\\tH\\nTCF7L1\\tH\\nMTCP1\\tH\\nHPSE\\tH\\nANGPTL5\\tH\\nFUNDC2P2\\tH\\nLINC00330\\tH\\nCACNG8\\tH\\nATRAID\\tH\\nPKHD1L1\\tH\\nHDAC11\\tH\\nC3orf18\\tH\\nSTX11\\tH\\nHIST2H2BA\\tH\\nTMTC4\\tH\\nLOC100506682\\tH\\nRPS14P3\\tH\\nELOVL7\\tH\\nTMEM156\\tH\\nBUB1B\\tH\\nLINC00477\\tH\\nMAP7D2\\tH\\nGPC6\\tH\\nPAQR5\\tH\\nPGAM2\\tH\\nPTS\\tH\\nS100A1\\tH\\nHEXIM2\\tH\\nOR4K2\\tH\\nS100G\\tH\\nATP5H\\tH\\nFKBP3\\tH\\nSCGB2A2\\tH\\nPLEKHH2\\tH\\nLOC102723322\\tH\\nACSM5\\tH\\nSFPQ\\tH\\nZNF358\\tH\\nGABRE\\tH\\nRRAGD\\tH\\nLMO7DN\\tH\\nNSMCE1\\tH\\nLINC00941\\tH\\nDAAM2\\tH\\nHPVC1\\tH\\nLINC00486\\tH\\nRPL26L1\\tH\\nLOC100287896\\tH\\nCASC6\\tH\\nREL\\tH\\nSPATA24\\tH\\nTMEM42\\tH\\nEFNB2\\tH\\nFNDC5\\tH\\nLKAAEAR1\\tH\\nCLDN4\\tH\\nTPTE2P1\\tH\\nSTEAP3\\tH\\nMLXIPL\\tH\\nCSF2\\tH\\nDYDC1\\tH\\nDPCD\\tH\\nABCB1\\tH\\nPRSS12\\tH\\nSDHB\\tH\\nTREML3P\\tH\\nLINC00911\\tH\\nFBXO25\\tH\\nLOC101928335\\tH\\nLNP1\\tH\\nLINC01138\\tH\\nLOC101928403\\tH\\nLOC101929565\\tH\\nCDCA8\\tH\\nLOC100505478\\tH\\nLY6K\\tH\\nINTS6L\\tH\\nBCAS1\\tH\\nLOC105376351\\tH\\nMRPL18\\tH\\nTRIM49\\tH\\nRUNX2\\tH\\nCITED2\\tH\\nLINC01436\\tH\\nABL2\\tH\\nUQCRFS1\\tH\\nOCLN\\tH\\nCCDC192\\tH\\nMERTK\\tH\\nSMKR1\\tH\\nCHCHD10\\tH\\nLOC100996634\\tH\\nTPI1P3\\tH\\nNTRK2\\tH\\nEMC6\\tH\\nLOC101928858\\tH\\nRARRES1\\tH\\nCLDN19\\tH\\nCLYBL\\tH\\nNDUFAF5\\tH\\nTIMM13\\tH\\nICAM1\\tH\\nRNF181\\tH\\nNCAPH\\tH\\nSAMM50\\tH\\nNDUFS2\\tH\\nPGA3\\tH\\nC4orf19\\tH\\nAIMP2\\tH\\nMARVELD3\\tH\\nLCE6A\\tH\\nRPS25\\tH\\nAP1B1P1\\tH\\nCOL12A1\\tH\\nATF4\\tH\\nGAP43\\tH\\nACKR2\\tH\\nSLMO2-ATP5E\\tH\\nARHGEF9-IT1\\tH\\nGTF3A\\tH\\nCDC26\\tH\\nTIMMDC1\\tH\\nLSM1\\tH\\nTRIM59\\tH\\nCDR2\\tH\\nCPT1A\\tH\\nGINS4\\tH\\nLOC102546299\\tH\\nTRH\\tH\\nLINC00942\\tH\\nARHGAP11A\\tH\\nNMBR\\tH\\nPRC1\\tH\\nSERF2\\tH\\nMC5R\\tH\\nCOX11\\tH\\nEFHC2\\tH\\nPLVAP\\tH\\nFCGR1A\\tH\\nGCG\\tH\\nOR2G3\\tH\\nSNAPIN\\tH\\nWBSCR28\\tH\\nPDCL3\\tH\\nFLJ40194\\tH\\nLOC407835\\tH\\nCT45A4\\tH\\nCCHCR1\\tH\\nUCHL3\\tH\\nMEP1B\\tH\\nNPIPB6\\tH\\nLOC101926940\\tH\\nLINC00959\\tH\\nLINC01180\\tH\\nDNAJC5G\\tH\\nFZD10\\tH\\nNDUFB8\\tH\\nERCC1\\tH\\nLOC389641\\tH\\nRPS14\\tH\\nARPC5L\\tH\\nDOCK10\\tH\\nLOC101928809\\tH\\nPLEKHA5\\tH\\nLINC00449\\tH\\nTFAP2B\\tH\\nMIR503HG\\tH\\nXG\\tH\\nCXCL3\\tH\\nCSTL1\\tH\\nLOC101928161\\tH\\nCOX6B1\\tH\\nCA8\\tH\\nIL1R1\\tH\\nLINC00619\\tH\\nGAGE1\\tH\\nNDUFA4\\tH\\nLINC01549\\tH\\nCCL16\\tH\\nERN2\\tH\\nALLC\\tH\\nCCDC43\\tH\\nFAM81B\\tH\\nMT2A\\tH\\nS100B\\tH\\nZSCAN12\\tH\\nCABP5\\tH\\nVAV3\\tH\\nIKZF3\\tH\\nDEFB118\\tH\\nDGCR6\\tH\\nLOC105371795\\tH\\nSLC28A3\\tH\\nLOC100129518\\tH\\nZNF503\\tH\\nJTB\\tH\\nLY9\\tH\\nMGC27345\\tH\\nMX2\\tH\\nLOC400002\\tH\\nUGGT2\\tH\\nNDUFA2\\tH\\nMFAP5\\tH\\nITGAM\\tH\\nXKR4\\tH\\nLINC01030\\tH\\nEBAG9\\tH\\nMAGEB5\\tH\\nTMEM150A\\tH\\nLOC101927653\\tH\\nEMC7\\tH\\nSIK1\\tH\\nEMB\\tH\\nDUXA\\tH\\nMIR3663HG\\tH\\nSPATA42\\tH\\nTNFRSF12A\\tH\\nLOC100507195\\tH\\nFAM78A\\tH\\nTENM2\\tH\\nLOC102724428\\tH\\nTRABD2A\\tH\\nTPTE2P3\\tH\\nRASAL1\\tH\\nITPRIP\\tH\\nADGRG6\\tH\\nVSIG4\\tH\\nADRBK2\\tH\\nTRIM49C\\tH\\nHOXC5\\tH\\nCMAHP\\tH\\nRPSAP58\\tH\\nOR7G3\\tH\\nLOC100288069\\tH\\nKRT9\\tH\\nARL6IP1\\tH\\nLINC00635\\tH\\nGPC3\\tH\\nSNX21\\tH\\nRIN2\\tH\\nMYHAS\\tH\\nPOTEE\\tH\\nCLEC2A\\tH\\nATP1A3\\tH\\nLOC105371267\\tH\\nLINC00696\\tH\\nBEND2\\tH\\nSPECC1\\tH\\nECM1\\tH\\nTSPAN1\\tH\\nFAM86JP\\tH\\nP2RX7\\tH\\nTMEM106A\\tH\\nPTPRH\\tH\\nEIF3K\\tH\\nSYK\\tH\\nAGR3\\tH\\nLINC00396\\tH\\nMR1\\tH\\nSLC9A2\\tH\\nGSTZ1\\tH\\nDEFB1\\tH\\nLOC101928370\\tH\\nCALD1\\tH\\nLINC01351\\tH\\nBICD1\\tH\\nFAM231D\\tH\\nSFRP5\\tH\\nEFNA1\\tH\\nLOC101929054\\tH\\nMETTL21A\\tH\\nHOXB5\\tH\\nRYR2\\tH\\nTCEA3\\tH\\nGOLGA8F\\tH\\nARL6IP6\\tH\\nLOC105369891\\tH\\nFAM185A\\tH\\nCCDC124\\tH\\nLOC100499194\\tH\\nKDM6A\\tH\\nLONRF1\\tH\\nADRA2A\\tH\\nFAM210B\\tH\\nTRIM31\\tH\\nRAB39B\\tH\\nKIAA0513\\tH\\nIQUB\\tH\\nTLL1\\tH\\nLRRC15\\tH\\nLOC284294\\tH\\nNQO1\\tH\\nRMST\\tH\\nC12orf57\\tH\\nSIRT1\\tH\\nPDGFC\\tH\\nPPIAL4C\\tH\\nPPIAL4A\\tH\\nC18orf61\\tH\\nLOC283194\\tH\\nRPS23\\tH\\nIFNLR1\\tH\\nGOLGA8G\\tH\\nLY6G6F\\tH\\nLINC00671\\tH\\nRPL23A\\tH\\nLOC101929726\\tH\\nOR10Q1\\tH\\nRNF7\\tH\\nSMCP\\tH\\nNCK2\\tH\\nRNF148\\tH\\nMIR17HG\\tH\\nLINC00479\\tH\\nLINC00551\\tH\\nSIRT4\\tH\\nHERC5\\tH\\nZNF738\\tH\\nLINC01209\\tH\\nTOB2P1\\tH\\nESPL1\\tH\\nLINC00116\\tH\\nHK1\\tH\\nLBP\\tH\\nLOC105369632\\tH\\nVIM\\tH\\nDSEL\\tH\\nPOTEJ\\tH\\nUSP44\\tH\\nLOC101927415\\tH\\nHSPH1\\tH\\nENPP7P13\\tH\\nTNFAIP3\\tH\\nBHLHE41\\tH\\nETV7\\tH\\nKCNQ4\\tH\\nLOC100287792\\tH\\nLOC101929511\\tH\\nMROH5\\tH\\nOAZ3\\tH\\nPPP1R15A\\tH\\nIDI2\\tH\\nCYB561A3\\tH\\nARMC4\\tH\\nBHMT2\\tH\\nNETO2\\tH\\nSUCNR1\\tH\\nSSU72\\tH\\nLOC399886\\tH\\nDISC1\\tH\\nSTAMBP\\tH\\nNLGN1\\tH\\nHAX1\\tH\\nTNRC18P1\\tH\\nAKR1B1\\tH\\nULK4P3\\tH\\nC1QTNF3\\tH\\nCT47A7\\tH\\nWBSCR22\\tH\\nHCAR1\\tH\\nRGL1\\tH\\nLINC01606\\tH\\nCLPS\\tH\\nDUPD1\\tH\\nSSX1\\tH\\nGSTK1\\tH\\nSPRY4\\tH\\nNUDCD2\\tH\\nRECK\\tH\\nNOL4L\\tH\\nPCBP4\\tH\\nCNTNAP2\\tH\\nKCNE1\\tH\\nLOC400541\\tH\\nLINC00261\\tH\\nC9orf173\\tH\\nMRPL48\\tH\\nPOM121L9P\\tH\\nMKRN2OS\\tH\\nRALY\\tH\\nESM1\\tH\\nEID1\\tH\\nNUDT6\\tH\\nHINT3\\tH\\nIPMK\\tH\\nC11orf98\\tH\\nCRLF1\\tH\\nCFL1P1\\tH\\nTMPRSS9\\tH\\nCHMP2A\\tH\\nOLFM1\\tH\\nZNF511\\tH\\nB3GNT7\\tH\\nSIK3\\tH\\nACER3\\tH\\nCIDEC\\tH\\nADGRD1\\tH\\nSPC25\\tH\\nLOC101926911\\tH\\nPELI3\\tH\\nEXT1\\tH\\nPCAT5\\tH\\nGDF15\\tH\\nMRPL47\\tH\\nPLSCR1\\tH\\nTOM1\\tH\\nC6\\tH\\nWDR87\\tH\\nFXYD5\\tH\\nCOBLL1\\tH\\nANGPT2\\tH\\nSRCIN1\\tH\\nSLC10A1\\tH\\nOAS1\\tH\\nMMP21\\tH\\nCOL19A1\\tH\\nGPR18\\tH\\nTMEM219\\tH\\nZNF296\\tH\\nUSP43\\tH\\nGOLGA2P9\\tH\\nRFX2\\tH\\nRAB27A\\tH\\nLOC102467217\\tH\\nMYH13\\tH\\nPHLPP2\\tH\\nLOC101928985\\tH\\nCDRT7\\tH\\nINTS6\\tH\\nHAS2\\tH\\nDZIP1\\tH\\nOR2V2\\tH\\nOR2H2\\tH\\nTSSC1\\tH\\nBOLA1\\tH\\nPABPC1P2\\tH\\nTMEM229A\\tH\\nATP8B1\\tH\\nLCNL1\\tH\\nDCDC5\\tH\\nSOD1\\tH\\nPAG1\\tH\\nCETN2\\tH\\nNCR1\\tH\\nTMEM100\\tH\\nURI1\\tH\\nTEKT4P2\\tH\\nPCAT1\\tH\\nSERTAD4\\tH\\nLINC00550\\tH\\nGLB1L\\tH\\nUNG\\tH\\nAGMAT\\tH\\nLOC101928540\\tH\\nZNF681\\tH\\nLINC01456\\tH\\nFCGR2C\\tH\\nABCG2\\tH\\nANAPC11\\tH\\nLOC102800447\\tH\\nCYLC2\\tH\\nC6orf226\\tH\\nREM2\\tH\\nBMPR1B\\tH\\nBECN1\\tH\\nADM\\tH\\nPDPR\\tH\\nKDM8\\tH\\nHMBS\\tH\\nMYO1H\\tH\\nLINC00493\\tH\\nFGF14\\tH\\nEIF2AK1\\tH\\nLOC101928489\\tH\\nKCNK1\\tH\\nCKS2\\tH\\nLOC101928035\\tH\\nLINC01221\\tH\\nEREG\\tH\\nNDUFB11\\tH\\nNARF\\tH\\nZC3HC1\\tH\\nADGRE2\\tH\\nUFC1\\tH\\nHOMER1\\tH\\nHDDC2\\tH\\nHIST1H3A\\tH\\nTNNT3\\tH\\nZNF670-ZNF695\\tH\\nGSR\\tH\\nNDRG4\\tH\\nTERC\\tH\\nFANCB\\tH\\nFFAR4\\tH\\nMGAM2\\tH\\nLRRTM4\\tH\\nINHBA\\tH\\nLOC403312\\tH\\nKLLN\\tH\\nDZANK1\\tH\\nRGS9BP\\tH\\nRIIAD1\\tH\\nARL2-SNX15\\tH\\nPLAU\\tH\\nSPDYE8P\\tH\\nSLC25A19\\tH\\nBMS1P6\\tH\\nZFYVE19\\tH\\nCTAGE1\\tH\\nMTIF3\\tH\\nSPACA4\\tH\\nSIPA1L1\\tH\\nSLC2A10\\tH\\nPGK1\\tH\\nGIF\\tH\\nMYH8\\tH\\nLOC101928098\\tH\\nFRMD4A\\tH\\nLINC01397\\tH\\nLIPE\\tH\\nTRIM49D2\\tH\\nPGM1\\tH\\nHRH4\\tH\\nLOC646241\\tH\\nLOC101927587\\tH\\nCTD-2201I18.1\\tH\\nRAPGEF4\\tH\\nRUNX1\\tH\\nC5\\tH\\nTRIM49D1\\tH\\nLOC100508046\\tH\\nLOC101928885\\tH\\nUCHL1\\tH\\nR3HDM4\\tH\\nMAP9\\tH\\nMIF4GD\\tH\\nLOC100190986\\tH\\nCOQ2\\tH\\nKNTC1\\tH\\nSAXO1\\tH\\nLOC105369860\\tH\\nFPR1\\tH\\nGP6\\tH\\nEIF2S2\\tH\\nLINC00461\\tH\\nHIST1H2AH\\tH\\nDHRS7\\tH\\nCHST8\\tH\\nHAGH\\tH\\nC4orf3\\tH\\nNMUR2\\tH\\nAKR1C3\\tH\\nLRRC70\\tH\\nREXO2\\tH\\nPRH1-TAS2R14\\tH\\nSLC9A1\\tH\\nMNAT1\\tH\\nSLC37A4\\tH\\nMGC34796\\tH\\nHSPB9\\tH\\nCADM3\\tH\\nMYEOV2\\tH\\nKRTAP6-3\\tH\\nARNTL2\\tH\\nENPP2\\tH\\nCUBN\\tH\\nLOC339059\\tH\\nGSDMA\\tH\\nBTG3\\tH\\nSTBD1\\tH\\nNAV3\\tH\\nALDH1L2\\tH\\nZBTB21\\tH\\nSPATA5\\tH\\nMRPL57\\tH\\nCWC15\\tH\\nNOMO3\\tH\\nUBTD1\\tH\\nIFI30\\tH\\nFMNL2\\tH\\nPRMT3\\tH\\nLOC101927692\\tH\\nNTPCR\\tH\\nDHRS7B\\tH\\nTBCB\\tH\\nC3orf58\\tH\\nKRT222\\tH\\nWRB-SH3BGR\\tH\\nLOC101928580\\tH\\nRWDD1\\tH\\nNKIRAS1\\tH\\nABCA1\\tH\\nCASC20\\tH\\nRTN4IP1\\tH\\nSPATA6L\\tH\\nLUZP1\\tH\\nCARS2\\tH\\nC2orf61\\tH\\nLOC102467226\\tH\\nMIR3945HG\\tH\\nFGF9\\tH\\nVRTN\\tH\\nPCDH18\\tH\\nPOLR3K\\tH\\nLINC00566\\tH\\nAOX1\\tH\\nPDLIM7\\tH\\nLOC102577426\\tH\\nUSE1\\tH\\nGINS2\\tH\\nRAPGEF2\\tH\\nLINC01492\\tH\\nTMEM70\\tH\\nCOX17\\tH\\nSRRM4\\tH\\nLOC101928295\\tH\\nISCA1\\tH\\nIL18R1\\tH\\nAPOC4-APOC2\\tH\\nMT1M\\tH\\nLMO2\\tH\\nSCN4B\\tH\\nRDH12\\tH\\nFEZF2\\tH\\nTMEM150B\\tH\\nCPS1\\tH\\nSLC35G2\\tH\\nTPM3\\tH\\nREG1A\\tH\\nLINC01133\\tH\\nAFAP1L2\\tH\\nPSENEN\\tH\\nFAM72A\\tH\\nLINC00467\\tH\\nHELLS\\tH\\nLINC00367\\tH\\nPLXNA4\\tH\\nC11orf73\\tH\\nKLF7\\tH\\nYBEY\\tH\\nOIT3\\tH\\nLOC101929681\\tH\\nPTPRD\\tH\\nLOC100422737\\tH\\nLINC01411\\tH\\nTSPAN17\\tH\\nUGT1A10\\tH\\nIFT22\\tH\\nRPS10P7\\tH\\nDBIL5P2\\tH\\nIFI44\\tH\\nBTK\\tH\\nMDP1\\tH\\nLOC284080\\tH\\nCYP2C18\\tH\\nFBXW12\\tH\\nCORO7-PAM16\\tH\\nTMEM14B\\tH\\nPOLQ\\tH\\nAFF4\\tH\\nLHFPL4\\tH\\nABTB2\\tH\\nNOMO1\\tH\\nFHDC1\\tH\\nTRIM38\\tH\\nCTSV\\tH\\nGATA3\\tH\\nLINCR-0002\\tH\\nCFAP20\\tH\\nNDUFB6\\tH\\nRASA4\\tH\\nLOC100288798\\tH\\nCFAP206\\tH\\nROR1\\tH\\nACOT13\\tH\\nLOC285626\\tH\\nBANF1\\tH\\nDCAF4L2\\tH\\nSH3BGR\\tH\\nOTOA\\tH\\nCD226\\tH\\nSLC29A4\\tH\\nRPL18\\tH\\nPRDX3\\tH\\nFGB\\tH\\nTEX14\\tH\\nFBN1\\tH\\nEPHA3\\tH\\n'}}]" ] }, - "execution_count": 28, + "execution_count": 30, "metadata": {}, "output_type": "execute_result" } @@ -2509,7 +2550,7 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 31, "metadata": {}, "outputs": [ { @@ -2518,7 +2559,7 @@ "(2421, 12)" ] }, - "execution_count": 29, + "execution_count": 31, "metadata": {}, "output_type": "execute_result" } @@ -2529,7 +2570,7 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": 32, "metadata": {}, "outputs": [ { @@ -2575,7 +2616,7 @@ " ENSG00000165672\n", " Ensembl\n", " [{'disease_name': 'SPINOCEREBELLAR ATAXIA, AUT...\n", - " [{'disease_name': nan, 'UMLS': nan, 'source': ...\n", + " [{'disease_name': nan, 'id': nan, 'source': nan}]\n", " [{'chembl_id': nan, 'drugbank_id': nan, 'compo...\n", " [{'pathway_id': 933.0, 'pathway_label': 'Elect...\n", " [{'pathway_id': nan, 'pathway_label': nan, 'pa...\n", @@ -2590,7 +2631,7 @@ " ENSG00000171564\n", " Ensembl\n", " [{'disease_name': 'Cardiovascular Diseases', '...\n", - " [{'disease_name': nan, 'UMLS': nan, 'source': ...\n", + " [{'disease_name': nan, 'id': nan, 'source': nan}]\n", " [{'chembl_id': 'CHEMBL2109072', 'drugbank_id':...\n", " [{'pathway_id': 951.0, 'pathway_label': 'Coagu...\n", " [{'pathway_id': 'WP5115', 'pathway_label': 'Ne...\n", @@ -2605,7 +2646,7 @@ " ENSG00000121101\n", " Ensembl\n", " [{'disease_name': 'Non-obstructive azoospermia...\n", - " [{'disease_name': nan, 'UMLS': nan, 'source': ...\n", + " [{'disease_name': nan, 'id': nan, 'source': nan}]\n", " [{'chembl_id': nan, 'drugbank_id': nan, 'compo...\n", " [{'pathway_id': nan, 'pathway_label': nan, 'pa...\n", " [{'pathway_id': nan, 'pathway_label': nan, 'pa...\n", @@ -2620,7 +2661,7 @@ " ENSG00000166147\n", " Ensembl\n", " [{'disease_name': 'Marfan Syndrome', 'HPO': ''...\n", - " [{'disease_name': nan, 'UMLS': nan, 'source': ...\n", + " [{'disease_name': nan, 'id': nan, 'source': nan}]\n", " [{'chembl_id': nan, 'drugbank_id': nan, 'compo...\n", " [{'pathway_id': 945.0, 'pathway_label': 'Nsp9 ...\n", " [{'pathway_id': 'WP3668', 'pathway_label': 'Hy...\n", @@ -2635,7 +2676,7 @@ " ENSG00000044524\n", " Ensembl\n", " [{'disease_name': 'Adenocarcinoma of lung (dis...\n", - " [{'disease_name': nan, 'UMLS': nan, 'source': ...\n", + " [{'disease_name': nan, 'id': nan, 'source': nan}]\n", " [{'chembl_id': 'CHEMBL24828', 'drugbank_id': '...\n", " [{'pathway_id': nan, 'pathway_label': nan, 'pa...\n", " [{'pathway_id': 'WP2882', 'pathway_label': 'Nu...\n", @@ -2663,11 +2704,11 @@ "2420 [{'disease_name': 'Adenocarcinoma of lung (dis... \n", "\n", " literature_based_info \\\n", - "2416 [{'disease_name': nan, 'UMLS': nan, 'source': ... \n", - "2417 [{'disease_name': nan, 'UMLS': nan, 'source': ... \n", - "2418 [{'disease_name': nan, 'UMLS': nan, 'source': ... \n", - "2419 [{'disease_name': nan, 'UMLS': nan, 'source': ... \n", - "2420 [{'disease_name': nan, 'UMLS': nan, 'source': ... \n", + "2416 [{'disease_name': nan, 'id': nan, 'source': nan}] \n", + "2417 [{'disease_name': nan, 'id': nan, 'source': nan}] \n", + "2418 [{'disease_name': nan, 'id': nan, 'source': nan}] \n", + "2419 [{'disease_name': nan, 'id': nan, 'source': nan}] \n", + "2420 [{'disease_name': nan, 'id': nan, 'source': nan}] \n", "\n", " OpenTargets_gene_compounds \\\n", "2416 [{'chembl_id': nan, 'drugbank_id': nan, 'compo... \n", @@ -2712,7 +2753,7 @@ "2420 [{'stringdb_link_to': 'EFNA2', 'Ensembl': 'ENS... " ] }, - "execution_count": 30, + "execution_count": 32, "metadata": {}, "output_type": "execute_result" } @@ -2730,7 +2771,7 @@ }, { "cell_type": "code", - "execution_count": 31, + "execution_count": 33, "metadata": {}, "outputs": [], "source": [ @@ -2815,7 +2856,7 @@ }, { "cell_type": "code", - "execution_count": 31, + "execution_count": 33, "metadata": {}, "outputs": [], "source": [ @@ -2833,7 +2874,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "##### Steps to load in Neo4j" + "##### Steps to load graph in Neo4j" ] }, { diff --git a/src/pyBiodatafuse/constants.py b/src/pyBiodatafuse/constants.py index 280c263e..632270d6 100644 --- a/src/pyBiodatafuse/constants.py +++ b/src/pyBiodatafuse/constants.py @@ -99,6 +99,11 @@ # literature based disease info LITERATURE_DISEASE_COL = "literature_based_info" +LITERATURE_DISEASE_OUTPUT_DICT = { + "disease_name": str, + "id": str, + "source": str, +} # Open Targets - Disease OPENTARGETS_DISEASE_OUTPUT_DICT = { diff --git a/src/pyBiodatafuse/graph/generator.py b/src/pyBiodatafuse/graph/generator.py index 3ab1af8c..e6486cd4 100644 --- a/src/pyBiodatafuse/graph/generator.py +++ b/src/pyBiodatafuse/graph/generator.py @@ -230,8 +230,8 @@ def add_literature_gene_disease_subgraph(g, gene_node_label, annot_list): annot_node_attrs = LITERATURE_DISEASE_NODE_ATTRS.copy() annot_node_attrs["source"] = annot["source"] annot_node_attrs["name"] = annot["disease_name"] - annot_node_attrs["id"] = annot["UMLS"] - annot_node_attrs["UMLS"] = annot["UMLS"] + annot_node_attrs["id"] = annot["id"] + annot_node_attrs["UMLS"] = annot["id"] g.add_node(annot_node_label, attr_dict=annot_node_attrs) From 36176f9830b23371bb56560bdb0d3cd902027e53 Mon Sep 17 00:00:00 2001 From: Tooba Abbassi-Daloii Date: Wed, 18 Sep 2024 11:47:07 +0200 Subject: [PATCH 6/7] update notebooks --- examples/gene_to_graph_workflow.ipynb | 318 +++++++++++++----------- examples/usecases/PCS/PCS_usecase.ipynb | 41 +-- 2 files changed, 193 insertions(+), 166 deletions(-) diff --git a/examples/gene_to_graph_workflow.ipynb b/examples/gene_to_graph_workflow.ipynb index d295b201..ee307947 100644 --- a/examples/gene_to_graph_workflow.ipynb +++ b/examples/gene_to_graph_workflow.ipynb @@ -11,7 +11,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 1, "metadata": {}, "outputs": [ { @@ -36,7 +36,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -83,7 +83,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 3, "metadata": {}, "outputs": [ { @@ -92,7 +92,7 @@ "6" ] }, - "execution_count": 4, + "execution_count": 3, "metadata": {}, "output_type": "execute_result" } @@ -154,7 +154,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 4, "metadata": {}, "outputs": [ { @@ -215,7 +215,7 @@ "4 SLC25A1" ] }, - "execution_count": 5, + "execution_count": 4, "metadata": {}, "output_type": "execute_result" } @@ -269,48 +269,48 @@ " 0\n", " CHRNG\n", " HGNC\n", - " GO:0042391\n", - " Gene Ontology\n", + " P07510\n", + " Uniprot-TrEMBL\n", " \n", " \n", " 1\n", " CHRNG\n", " HGNC\n", - " A_23_P5718\n", - " Agilent\n", + " ENSG00000196811\n", + " Ensembl\n", " \n", " \n", " 2\n", " CHRNG\n", " HGNC\n", - " GO:0016021\n", - " Gene Ontology\n", + " HGNC:1967\n", + " HGNC Accession Number\n", " \n", " \n", " 3\n", " CHRNG\n", " HGNC\n", - " GO:0016020\n", - " Gene Ontology\n", + " A0A6F7YAP6\n", + " Uniprot-TrEMBL\n", " \n", " \n", " 4\n", " CHRNG\n", " HGNC\n", - " GO:0006936\n", - " Gene Ontology\n", + " CHRNG\n", + " HGNC\n", " \n", " \n", "\n", "" ], "text/plain": [ - " identifier identifier.source target target.source\n", - "0 CHRNG HGNC GO:0042391 Gene Ontology\n", - "1 CHRNG HGNC A_23_P5718 Agilent\n", - "2 CHRNG HGNC GO:0016021 Gene Ontology\n", - "3 CHRNG HGNC GO:0016020 Gene Ontology\n", - "4 CHRNG HGNC GO:0006936 Gene Ontology" + " identifier identifier.source target target.source\n", + "0 CHRNG HGNC P07510 Uniprot-TrEMBL\n", + "1 CHRNG HGNC ENSG00000196811 Ensembl\n", + "2 CHRNG HGNC HGNC:1967 HGNC Accession Number\n", + "3 CHRNG HGNC A0A6F7YAP6 Uniprot-TrEMBL\n", + "4 CHRNG HGNC CHRNG HGNC" ] }, "execution_count": 5, @@ -1968,111 +1968,7 @@ "cell_type": "code", "execution_count": 12, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "E:\\BioDataFuse\\pyBiodatafuse\\src\\pyBiodatafuse\\annotators\\opentargets.py:1075: UserWarning: Not all values in column 'adverse_effect_count' have the correct type ''.\n", - " check_columns_against_constants(\n", - "E:\\BioDataFuse\\pyBiodatafuse\\src\\pyBiodatafuse\\annotators\\opentargets.py:1075: UserWarning: Not all values in column 'adverse_effect' have the correct type ''.\n", - " check_columns_against_constants(\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
identifieridentifier.sourcetargettarget.sourceOpenTargets_disease_compounds
0UMLS_C0000786UMLSEFO_1001255EFO[{'chembl_id': 'CHEMBL1276308', 'drugbank_id':...
1UMLS_C0000889UMLSEFO_1000660EFO[{'chembl_id': 'CHEMBL1431', 'drugbank_id': 'D...
2UMLS_C0002940UMLSEFO_0004264EFO[{'chembl_id': 'CHEMBL1491', 'drugbank_id': 'D...
3UMLS_C0002940UMLSEFO_0009659EFO[{'chembl_id': 'CHEMBL526', 'drugbank_id': 'DB...
4UMLS_C0003873UMLSEFO_0000685EFO[{'chembl_id': 'CHEMBL2103743', 'drugbank_id':...
\n", - "
" - ], - "text/plain": [ - " identifier identifier.source target target.source \\\n", - "0 UMLS_C0000786 UMLS EFO_1001255 EFO \n", - "1 UMLS_C0000889 UMLS EFO_1000660 EFO \n", - "2 UMLS_C0002940 UMLS EFO_0004264 EFO \n", - "3 UMLS_C0002940 UMLS EFO_0009659 EFO \n", - "4 UMLS_C0003873 UMLS EFO_0000685 EFO \n", - "\n", - " OpenTargets_disease_compounds \n", - "0 [{'chembl_id': 'CHEMBL1276308', 'drugbank_id':... \n", - "1 [{'chembl_id': 'CHEMBL1431', 'drugbank_id': 'D... \n", - "2 [{'chembl_id': 'CHEMBL1491', 'drugbank_id': 'D... \n", - "3 [{'chembl_id': 'CHEMBL526', 'drugbank_id': 'DB... \n", - "4 [{'chembl_id': 'CHEMBL2103743', 'drugbank_id':... " - ] - }, - "execution_count": 12, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "(\n", " opentargets_disease_compound_df,\n", @@ -2196,7 +2092,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 6, "metadata": {}, "outputs": [ { @@ -2288,7 +2184,7 @@ "4 [{'pathway_id': nan, 'pathway_label': nan, 'pa... " ] }, - "execution_count": 14, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } @@ -2300,6 +2196,36 @@ "minerva_df.head()" ] }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'datasource': 'MINERVA',\n", + " 'metadata': {'source_version': '17.1.3'},\n", + " 'query': {'size': 6,\n", + " 'input_type': 'Ensembl',\n", + " 'MINERVA project': 'COVID19 Disease Map',\n", + " 'MINERVA project URL': 'https://covid19map.elixir-luxembourg.org/minerva/',\n", + " 'time': '0:00:47.612140',\n", + " 'date': '2024-09-10 15:23:35',\n", + " 'url': 'https://covid19map.elixir-luxembourg.org/minerva/',\n", + " 'number_of_added_nodes': 1,\n", + " 'number_of_added_edges': 1}}" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "minerva_metadata" + ] + }, { "cell_type": "code", "execution_count": 15, @@ -4002,10 +3928,10 @@ } ], "source": [ - "combined_df = generator.load_dataframe_from_pickle(\"combined_df.pkl\")\n", - "opentargets_disease_compound_df = generator.load_dataframe_from_pickle(\n", - " \"opentargets_disease_compound_df.pkl\"\n", - ")\n", + "# combined_df = generator.load_dataframe_from_pickle(\"combined_df.pkl\")\n", + "# opentargets_disease_compound_df = generator.load_dataframe_from_pickle(\n", + "# \"opentargets_disease_compound_df.pkl\"\n", + "# )\n", "\n", "combined_df.head()" ] @@ -4159,25 +4085,123 @@ "# plt.show()" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Cytosacpe\n", + "Make sure that the Cytoscape is open" + ] + }, { "cell_type": "code", - "execution_count": 10, + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from pyBiodatafuse.graph import cytoscape\n", + "\n", + "cytoscape.load_graph(pygraph, network_name=\"PCS network\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Neo4j" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from pyBiodatafuse.graph import neo4j\n", + "\n", + "neo4j.save_graph_to_graphml(pygraph, \"pcs_networkx_graph.graphml\")" + ] + }, + { + "cell_type": "markdown", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Applying default style...\n", - "Applying preferred layout\n" - ] - } - ], "source": [ - "from pyBiodatafuse.graph import cytoscape, neo4j\n", + "##### Steps to load the graph in Neo4j" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "- Add `.graphml` file in **import** subfolder of the DBMS folder\n", + "- Install apoc plugin\n", + "- Add `apoc.conf` file to **conf** subfolder of the DBMS folder\n", + "\n", + " `apoc.conf` file:\n", + " ```\n", + " apoc.trigger.enabled=true\n", + " apoc.import.file.enabled=true\n", + " apoc.export.file.enabled=true\n", + " apoc.import.file.use_neo4j_config=true\n", + " ```\n", + "\n", + "- Open Neo4j Browser\n", + "- (Optionl, only run if you have imported a graph before) Remove all the nodes before importing `.graphml` file\n", + "\n", + " ```MATCH (n) DETACH DELETE n```\n", + "\n", + "- Import `.graphml` file\n", + "\n", + " ```call apoc.import.graphml('file:///pcs_networkx_graph.graphml',{readLabels:TRUE})```\n", + "\n", + "- Add indexes after importing the graph for improving the performance of queries\n", + "\n", + " ```create index Gene for (n:Gene) on (n.node_type)```\n", + "\n", + " ```create index Pathway for (n:Pathway) on (n.node_type)```\n", + "\n", + " ```create index `Biological Process` for (n:`Biological Process`) on (n.node_type)```\n", + "\n", + " ```create index `Molecular Function` for (n:`Molecular Function`) on (n.node_type)```\n", + "\n", + " ```create index `Cellular Component` for (n:`Cellular Component`) on (n.node_type)```\n", + "\n", + " ```create index Disease for (n:Disease) on (n.node_type)```\n", + "\n", + " ```create index Compound for (n:Compound) on (n.node_type)```\n", + "\n", + " ```create index `Side Effect` for (n:`Side Effect`) on (n.node_type)```\n", + " \n", + "\n", + "- Count the number of each node type\n", + " - total (```MATCH (n) RETURN count(n)```) \n", + " - Gene (```MATCH (n:Gene) RETURN count(n)```)\n", + " - Pathway (```MATCH (n:Pathway) RETURN count(n)```)\n", + " - WikiPathways (```MATCH (n:Pathway {source: \"WikiPathways\"}) RETURN count(n)```) \n", + " - OpenTargets, Reactome (```MATCH (n:Pathway {source: \"OpenTargets\"}) RETURN count(n)```) \n", + " - MINERVA (```MATCH (n:Pathway {source: \"MINERVA\"}) RETURN count(n)```) \n", + " - Biological Process (```MATCH (n:`Biological Process`) RETURN count(n)```) \n", + " - Molecular Function (```MATCH (n:`Molecular Function`) RETURN count(n)```) \n", + " - Cellular Component (```MATCH (n:`Cellular Component`) RETURN count(n)```) \n", + " - Disease (```MATCH (n:Disease) RETURN count(n)```) \n", + " - Compound (```MATCH (n:Compound) RETURN count(n)```)\n", + " - Side Effect (```MATCH (n:`Side Effect`) RETURN count(n)```) \n", + "- Count the number of each edge type\n", + " - total (```MATCH ()-[r]->() RETURN count(r)```) \n", + " - interacts_with (```MATCH ()-[r:interacts_with]->() RETURN count(r)```) \n", + " - part_of (```MATCH ()-[r:part_of]->() RETURN count(r)```) \n", + " - WikiPathways (```MATCH ()-[r:part_of {source: \"WikiPathways\"}]->() RETURN count(r)```) \n", + " - OpenTargets, Reactome (```MATCH ()-[r:part_of {source: \"OpenTargets\"}]->() RETURN count(r)```) \n", + " - MINERVA (```MATCH ()-[r:part_of {source: \"MINERVA\"}]->() RETURN count(r)```) \n", + " - activates (```MATCH ()-[r:activates]->() RETURN count(r)```) \n", + " - treats (```MATCH ()-[r:treats]->() RETURN count(r)```) \n", + " - has_side_effect (```MATCH ()-[r:has_side_effect]->() RETURN count(r)```) \n", + " - inhibits (```MATCH ()-[r:inhibits]->() RETURN count(r)```) = 71\n", + " - associated_with (```MATCH ()-[r:associated_with]->() RETURN count(r)```) \n", + "\n", + "- Export the graph as a `.csv` file\n", "\n", - "neo4j.save_graph_to_graphml(pygraph, output_path=\"graph_to-test.graphml\")\n", - "cytoscape.load_graph(pygraph, network_name=\"test_graph\")" + " ```call apoc.export.csv.all(\"pcs_networkx_graph.csv\",{})```" ] } ], diff --git a/examples/usecases/PCS/PCS_usecase.ipynb b/examples/usecases/PCS/PCS_usecase.ipynb index d67554f2..7241b81c 100644 --- a/examples/usecases/PCS/PCS_usecase.ipynb +++ b/examples/usecases/PCS/PCS_usecase.ipynb @@ -2874,7 +2874,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "##### Steps to load graph in Neo4j" + "##### Steps to load the graph in Neo4j" ] }, { @@ -2897,32 +2897,37 @@ "- (Optionl, only run if you have imported a graph before) Remove all the nodes before importing `.graphml` file\n", "\n", " ```\n", - " neo4j$ MATCH (n) DETACH DELETE n\n", + " ```MATCH (n) DETACH DELETE n\n", " ```\n", "\n", "- Import `.graphml` file\n", "\n", " ```\n", - " neo4j$ call apoc.import.graphml('file:///pcs_networkx_graph.graphml',{readLabels:TRUE})\n", + " ```call apoc.import.graphml('file:///pcs_networkx_graph.graphml',{readLabels:TRUE})\n", " ```\n", "\n", "- Add indexes after importing the graph for improving the performance of queries\n", "\n", - " ```\n", - " neo4j$ create index Gene for (n:Gene) on (n.node_type)\n", - " neo4j$ create index Pathway for (n:Pathway) on (n.node_type)\n", - " neo4j$ create index `Biological Process` for (n:`Biological Process`) on (n.node_type)\n", - " neo4j$ create index `Molecular Function` for (n:`Molecular Function`) on (n.node_type)\n", - " neo4j$ create index `Cellular Component` for (n:`Cellular Component`) on (n.node_type)\n", - " neo4j$ create index Disease for (n:Disease) on (n.node_type)\n", - " neo4j$ create index Compound for (n:Compound) on (n.node_type)\n", - " neo4j$ create index `Side Effect` for (n:`Side Effect`) on (n.node_type)\n", - " ```\n", + " ```create index Gene for (n:Gene) on (n.node_type)```\n", + "\n", + " ```create index Pathway for (n:Pathway) on (n.node_type)```\n", + "\n", + " ```create index `Biological Process` for (n:`Biological Process`) on (n.node_type)```\n", + "\n", + " ```create index `Molecular Function` for (n:`Molecular Function`) on (n.node_type)```\n", + "\n", + " ```create index `Cellular Component` for (n:`Cellular Component`) on (n.node_type)```\n", + "\n", + " ```create index Disease for (n:Disease) on (n.node_type)```\n", + "\n", + " ```create index Compound for (n:Compound) on (n.node_type)```\n", + "\n", + " ```create index `Side Effect` for (n:`Side Effect`) on (n.node_type)```\n", "\n", "- Count the number of each node type\n", - " - total (```neo4j$ MATCH (n) RETURN count(n)```) = 19859\n", - " - Gene (```neo4j$ MATCH (n:Gene) RETURN count(n)```) = 1667\n", - " - Pathway (```neo4j$ MATCH (n:Pathway) RETURN count(n)```) = 1847\n", + " - total (```MATCH (n) RETURN count(n)```) = 19859\n", + " - Gene (```MATCH (n:Gene) RETURN count(n)```) = 1667\n", + " - Pathway (```MATCH (n:Pathway) RETURN count(n)```) = 1847\n", " - WikiPathways (```MATCH (n:Pathway {source: \"WikiPathways\"}) RETURN count(n)```) = 678\n", " - OpenTargets, Reactome (```MATCH (n:Pathway {source: \"OpenTargets\"}) RETURN count(n)```) = 1154\n", " - MINERVA (```MATCH (n:Pathway {source: \"MINERVA\"}) RETURN count(n)```) = 15\n", @@ -2947,9 +2952,7 @@ "\n", "- Export the graph as a `.csv` file\n", "\n", - " ```\n", - " neo4j$ call apoc.export.csv.all(\"pcs_networkx_graph.csv\",{})\n", - " ```" + " ```call apoc.export.csv.all(\"pcs_networkx_graph.csv\",{})```" ] }, { From e860c719848b3250986503cdeb2c9beee57fb8af Mon Sep 17 00:00:00 2001 From: Tooba Abbassi-Daloii Date: Wed, 18 Sep 2024 11:47:59 +0200 Subject: [PATCH 7/7] tox fix --- examples/usecases/PCS/PCS_usecase.ipynb | 1 + 1 file changed, 1 insertion(+) diff --git a/examples/usecases/PCS/PCS_usecase.ipynb b/examples/usecases/PCS/PCS_usecase.ipynb index 7241b81c..cd68ffda 100644 --- a/examples/usecases/PCS/PCS_usecase.ipynb +++ b/examples/usecases/PCS/PCS_usecase.ipynb @@ -731,6 +731,7 @@ " else:\n", " return [{\"disease_name\": np.nan, \"id\": np.nan, \"source\": np.nan}]\n", "\n", + "\n", "disgenet_df[LITERATURE_DISEASE_COL] = disgenet_df[\"identifier\"].apply(get_literature_based_info)\n", "\n", "disgenet_df.head()"