Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WikiPathways consistent spelling #13

Closed
wants to merge 6 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion src/pyBiodatafuse/annotators/opentargets.py
Original file line number Diff line number Diff line change
Expand Up @@ -432,7 +432,9 @@ def get_gene_drug_interactions(bridgedb_df: pd.DataFrame) -> Tuple[pd.DataFrame,
return merged_df, version_metadata


def get_gene_disease_associations(bridgedb_df: pd.DataFrame) -> Tuple[pd.DataFrame, dict]:
def get_gene_disease_associations(
bridgedb_df: pd.DataFrame,
) -> Tuple[pd.DataFrame, dict]:
"""Get information about diseases associated with genes based on OpenTargets.

:param bridgedb_df: BridgeDb output for creating the list of gene ids to query
Expand Down
32 changes: 21 additions & 11 deletions src/pyBiodatafuse/annotators/stringdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@


def get_version_stringdb() -> dict:
"""Get version of StringDB API.
"""Get version of STRING-DB API.

:returns: a dictionary containing the version information
"""
Expand All @@ -32,11 +32,11 @@


def _format_data(row, network_df):
"""Helper function to reformat StringDB response.
"""Helper function to reformat STRING-DB response.

:param row: input_df row
:param network_df: StringDB response annotation DataFrame
:returns: StringDB reformatted annotation.
:param network_df: STRING-DB response annotation DataFrame
:returns: STRING-DB reformatted annotation.
"""
gene_ppi_links = list()

Expand All @@ -46,25 +46,31 @@
if row_arr["preferredName_A"] == row["identifier"]:
if row_arr["preferredName_B"] not in target_links_set:
gene_ppi_links.append(
{"stringdb_link_to": row_arr["preferredName_B"], "score": row_arr["score"]}
{
"stringdb_link_to": row_arr["preferredName_B"],
"score": row_arr["score"],
}
)
target_links_set.add(row_arr["preferredName_B"])

elif row_arr["preferredName_B"] == row["identifier"]:
if row_arr["preferredName_A"] not in target_links_set:
gene_ppi_links.append(
{"stringdb_link_to": row_arr["preferredName_A"], "score": row_arr["score"]}
{
"stringdb_link_to": row_arr["preferredName_A"],
"score": row_arr["score"],
}
)
target_links_set.add(row_arr["preferredName_A"])

return gene_ppi_links


def get_ppi(bridgedb_df: pd.DataFrame):
"""Annotate genes with protein-protein interactions from StringDB.
"""Annotate genes with protein-protein interactions from STRING-DB.

:param bridgedb_df: BridgeDb output for creating the list of gene ids to query
:returns: a DataFrame containing the StringDB output and dictionary of the metadata.
:returns: a DataFrame containing the STRING-DB output and dictionary of the metadata.
"""
# Record the start time
start_time = datetime.datetime.now()
Expand All @@ -89,7 +95,9 @@

results = requests.post(request_url, data=params)

stringdb_ids_df = pd.read_csv(io.StringIO(results.content.decode("utf-8")), sep="\t")
stringdb_ids_df = pd.read_csv(

Check warning on line 98 in src/pyBiodatafuse/annotators/stringdb.py

View check run for this annotation

Codecov / codecov/patch

src/pyBiodatafuse/annotators/stringdb.py#L98

Added line #L98 was not covered by tests
io.StringIO(results.content.decode("utf-8")), sep="\t"
)
stringdb_ids_df.queryIndex = stringdb_ids_df.queryIndex.astype(str)

# ---------- Get String PPI network using the String identifiers ---------------#
Expand All @@ -98,7 +106,9 @@
request_url = "/".join([string_api_url, output_format, method])

params = {
"identifiers": "%0d".join(list(stringdb_ids_df.stringId.unique())), # your protein
"identifiers": "%0d".join(
list(stringdb_ids_df.stringId.unique())
), # your protein
"species": 9606, # species NCBI identifier
"caller_identity": "github.com", # your app name
}
Expand All @@ -125,7 +135,7 @@

# Add the datasource, query, query time, and the date to metadata
string_metadata = {
"datasource": "StringDB",
"datasource": "STRING-DB",
"metadata": {"source_version": string_version},
"query": {
"size": len(gene_list),
Expand Down
16 changes: 11 additions & 5 deletions src/pyBiodatafuse/annotators/wikipathways.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-

"""Python file for queriying Wikipathways SPARQL endpoint (https://sparql.wikipathways.org/sparql)."""
"""Python file for queriying WikiPathways SPARQL endpoint (https://sparql.wikipathways.org/sparql)."""

Check warning on line 4 in src/pyBiodatafuse/annotators/wikipathways.py

View check run for this annotation

Codecov / codecov/patch

src/pyBiodatafuse/annotators/wikipathways.py#L4

Added line #L4 was not covered by tests

import datetime
import os
Expand All @@ -14,11 +14,13 @@


def get_version_wikipathways() -> dict:
"""Get version of Wikipathways.
"""Get version of WikiPathways.

:returns: a dictionary containing the version information
"""
with open(os.path.dirname(__file__) + "/queries/wikipathways-metadata.rq", "r") as fin:
with open(
os.path.dirname(__file__) + "/queries/wikipathways-metadata.rq", "r"
) as fin:
sparql_query = fin.read()

sparql = SPARQLWrapper("https://sparql.wikipathways.org/sparql")
Expand All @@ -28,7 +30,9 @@

res = sparql.queryAndConvert()

wikipathways_version = {"wikipathways_version": res["results"]["bindings"][0]["title"]["value"]}
wikipathways_version = {

Check warning on line 33 in src/pyBiodatafuse/annotators/wikipathways.py

View check run for this annotation

Codecov / codecov/patch

src/pyBiodatafuse/annotators/wikipathways.py#L33

Added line #L33 was not covered by tests
"wikipathways_version": res["results"]["bindings"][0]["title"]["value"]
}

return wikipathways_version

Expand Down Expand Up @@ -57,7 +61,9 @@
else:
query_gene_lists.append(" ".join(f'"{g}"' for g in hgnc_gene_list))

with open(os.path.dirname(__file__) + "/queries/wikipathways-genes-pathways.rq", "r") as fin:
with open(
os.path.dirname(__file__) + "/queries/wikipathways-genes-pathways.rq", "r"
) as fin:
sparql_query = fin.read()

sparql = SPARQLWrapper("https://sparql.wikipathways.org/sparql")
Expand Down
Loading