diff --git a/examples/rdf_example.ipynb b/examples/rdf_example.ipynb index 06bbadc..83f52e6 100644 --- a/examples/rdf_example.ipynb +++ b/examples/rdf_example.ipynb @@ -11,9 +11,20 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/lib/python3/dist-packages/requests/__init__.py:87: RequestsDependencyWarning: urllib3 (1.26.5) or chardet (5.2.0) doesn't match a supported version!\n", + " warnings.warn(\"urllib3 ({}) or chardet ({}) doesn't match a supported \"\n", + "/home/javi/.local/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", + " from .autonotebook import tqdm as notebook_tqdm\n" + ] + } + ], "source": [ "import os\n", "os.chdir(os.path.abspath(os.path.join(os.getcwd(), \"..\", \"src\")))\n", @@ -478,7 +489,7 @@ { "data": { "text/plain": [ - ")>" + ")>" ] }, "execution_count": 6, @@ -492,7 +503,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 7, "metadata": {}, "outputs": [ { @@ -75921,7 +75932,7 @@ " a obo:NCIT_C25338 ;\n", " sio:has_value 6e-01 .\n", "\n", - " dcterms:created \"2024-09-20T12:03:58.779325+00:00\"^^xsd:dateTime ;\n", + " dcterms:created \"2024-09-20T13:21:14.649645+00:00\"^^xsd:dateTime ;\n", " dcterms:creator ;\n", " dcterms:identifier .\n", "\n", @@ -76350,7 +76361,8 @@ " obo:OMIM_620369,\n", " obo:UMLS_C5830501 .\n", "\n", - " sio:SIO_000068 .\n", + " a obo:NCIT_C17021 ;\n", + " sio:SIO_000068 .\n", "\n", " a obo:PW_0000001 ;\n", " rdfs:label \"Arrhythmogenic right ventricular cardiomyopathy\"^^xsd:string ;\n", diff --git a/src/pyBiodatafuse/constants.py b/src/pyBiodatafuse/constants.py index 6b78614..4951c6c 100644 --- a/src/pyBiodatafuse/constants.py +++ b/src/pyBiodatafuse/constants.py @@ -489,13 +489,14 @@ "gene_expression_value_node": f"{NAMESPACE_BINDINGS['sio']}SIO_001077", "anatomical_entity_node": f"{NAMESPACE_BINDINGS['sio']}UBERON_0001062", "tested_substance_node": "http://www.bioassayontology.org/bao#BAO_0003059", - "source_database" : f"{NAMESPACE_BINDINGS['sio']}SIO_000750", - "experimental_process_node" : f"{NAMESPACE_BINDINGS['obo']}EFO_0002694", + "source_database": f"{NAMESPACE_BINDINGS['sio']}SIO_000750", + "experimental_process_node": f"{NAMESPACE_BINDINGS['obo']}EFO_0002694", "pathway_node": f"{NAMESPACE_BINDINGS['obo']}PW_0000001", - "adverse_event_node" : f"{NAMESPACE_BINDINGS['obo']}OAE_0000001", - "ensemble" : "http://identifiers.org/ensembl/", - "ncbi_disease" : "https://www.ncbi.nlm.nih.gov/medgen/", - "article" : f"{NAMESPACE_BINDINGS['obo']}IAO:0000013", + "adverse_event_node": f"{NAMESPACE_BINDINGS['obo']}OAE_0000001", + "ensemble": "http://identifiers.org/ensembl/", + "ncbi_disease": "https://www.ncbi.nlm.nih.gov/medgen/", + "article": f"{NAMESPACE_BINDINGS['obo']}IAO:0000013", + "protein_node": "http://purl.obolibrary.org/obo/NCIT_C17021", } ## PREDICATES diff --git a/src/pyBiodatafuse/graph/rdf.py b/src/pyBiodatafuse/graph/rdf.py index f42ef7f..d5633b3 100644 --- a/src/pyBiodatafuse/graph/rdf.py +++ b/src/pyBiodatafuse/graph/rdf.py @@ -871,7 +871,79 @@ def add_transporter_inhibitor_node(g: Graph, transporter_inhibitor_data:dict, ba URIRef("https://purl.obolibrary.org/GO_0032410"), ) ) - + g.add( + ( + URIRef(f"https://www.uniprot.org/uniprotkb/{uniprot_trembl_id}"), + RDF.type, + URIRef(NODE_TYPES['protein_node']), + ) + ) + +def add_ppi_data(g: Graph, entry: dict, base_uri: str, new_uris:dict)->URIRef: + """Add a protein protein interaction node + + :param g: RDFLib graph + :entry: the ppi dictionary + :base_uri: the base URI for the project + :new_uris: dictionary with project node URIs + Returns a ppi node + """ + stringdb_link_to = entry.get('stringdb_link_to', None) + ensembl = entry.get('stringdb_link_to', None) + score = entry.get('score', None) + try: + score = int(score) + # Nodes + ppi_node = URIRef(base_uri + f"inhibition/{stringdb_link_to}_{ensembl}") + g.add( + ( + ppi_node, + RDF.type, + URIRef("http://purl.obolibrary.org/obo/NCIT_C18469"), + ) + ) + + g.add( + ( + URIRef(f"https://www.uniprot.org/uniprotkb/{stringdb_link_to}"), + RDF.type, + URIRef(NODE_TYPES['protein_node']), + ) + ) + g.add( + ( + URIRef(f"https://www.uniprot.org/uniprotkb/{stringdb_link_to}"), + URIRef(PREDICATES['sio_is_part_of']), + ppi_node, + ) + ) + g.add( + ( + URIRef(f"http://identifiers.org/ensembl/{ensembl}"), + URIRef(PREDICATES['sio_is_part_of']), + ppi_node, + ) + ) + g.add( + ( + URIRef(f"http://identifiers.org/ensembl/{ensembl}"), + RDF.type, + URIRef(NODE_TYPES['gene_node']), + ) + ) + score_node = URIRef(f"{new_uris['score_base_node']}/{stringdb_link_to}_{ensembl}") + g.add((score_node, RDF.type, URIRef(NODE_TYPES["score_node"]))) + g.add( + ( + score_node, + URIRef(NAMESPACE_BINDINGS["sio"] + "has_value"), + Literal(score, datatype=XSD.double), + ) + ) + return ppi_node + except: + return None + def generate_rdf( df: pd.DataFrame, base_uri: str, version_iri: str, author: str, orcid: str, metadata: dict ) -> Graph: @@ -1016,7 +1088,8 @@ def generate_rdf( if transporter_inhibitor_data: for entry in transporter_inhibitor_data: add_transporter_inhibitor_node(g, entry, base_uri) - + if stringdb_data: + add_ppi_data(g, entry, base_uri, new_uris) # Add metadata to the RDF graph add_metadata( g=g,