Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WikiPathways consistent spelling #13

Closed
wants to merge 6 commits into from
Closed
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion src/pyBiodatafuse/annotators/opentargets.py
Original file line number Diff line number Diff line change
Expand Up @@ -432,7 +432,9 @@ def get_gene_drug_interactions(bridgedb_df: pd.DataFrame) -> Tuple[pd.DataFrame,
return merged_df, version_metadata


def get_gene_disease_associations(bridgedb_df: pd.DataFrame) -> Tuple[pd.DataFrame, dict]:
def get_gene_disease_associations(
bridgedb_df: pd.DataFrame,
) -> Tuple[pd.DataFrame, dict]:
"""Get information about diseases associated with genes based on OpenTargets.

:param bridgedb_df: BridgeDb output for creating the list of gene ids to query
Expand Down
22 changes: 17 additions & 5 deletions src/pyBiodatafuse/annotators/stringdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,9 @@ def annotate_genes_with_stringdb(bridgedb_df: pd.DataFrame):

results = requests.post(request_url, data=params)

stringdb_ids_df = pd.read_csv(io.StringIO(results.content.decode("utf-8")), sep="\t")
stringdb_ids_df = pd.read_csv(
io.StringIO(results.content.decode("utf-8")), sep="\t"
)
stringdb_ids_df.queryIndex = stringdb_ids_df.queryIndex.astype(str)

# for i, row in stringdb_ids_df.iterrows():
Expand All @@ -75,7 +77,9 @@ def annotate_genes_with_stringdb(bridgedb_df: pd.DataFrame):
request_url = "/".join([string_api_url, output_format, method])

params = {
"identifiers": "%0d".join(list(stringdb_ids_df.stringId.unique())), # your protein
"identifiers": "%0d".join(
list(stringdb_ids_df.stringId.unique())
), # your protein
"species": 9606, # species NCBI identifier
"caller_identity": "github.com", # your app name
}
Expand All @@ -86,7 +90,9 @@ def annotate_genes_with_stringdb(bridgedb_df: pd.DataFrame):

# ---------- Add the interactions of each protein (row) to a new column ('stringdb') ---------------#

data_df["stringdb"] = data_df.apply(get_protein_interactions, network_df=network_df, axis=1)
data_df["stringdb"] = data_df.apply(
get_protein_interactions, network_df=network_df, axis=1
)

# Record the end time
end_time = datetime.datetime.now()
Expand Down Expand Up @@ -130,14 +136,20 @@ def get_protein_interactions(row, network_df):
if row_arr["preferredName_A"] == row["identifier"]:
if row_arr["preferredName_B"] not in target_links_set:
gene_ppi_links.append(
{"stringdb_link_to": row_arr["preferredName_B"], "score": row_arr["score"]}
{
"stringdb_link_to": row_arr["preferredName_B"],
"score": row_arr["score"],
}
)
target_links_set.add(row_arr["preferredName_B"])

elif row_arr["preferredName_B"] == row["identifier"]:
if row_arr["preferredName_A"] not in target_links_set:
gene_ppi_links.append(
{"stringdb_link_to": row_arr["preferredName_A"], "score": row_arr["score"]}
{
"stringdb_link_to": row_arr["preferredName_A"],
"score": row_arr["score"],
}
)
target_links_set.add(row_arr["preferredName_A"])

Expand Down
16 changes: 11 additions & 5 deletions src/pyBiodatafuse/annotators/wikipathways.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-

"""Python file for queriying Wikipathways SPARQL endpoint (https://sparql.wikipathways.org/sparql)."""
"""Python file for queriying WikiPathways SPARQL endpoint (https://sparql.wikipathways.org/sparql)."""

import datetime
import os
Expand All @@ -14,11 +14,13 @@


def get_version_wikipathways() -> dict:
"""Get version of Wikipathways.
"""Get version of WikiPathways.

:returns: a dictionary containing the version information
"""
with open(os.path.dirname(__file__) + "/queries/wikipathways-metadata.rq", "r") as fin:
with open(
os.path.dirname(__file__) + "/queries/wikipathways-metadata.rq", "r"
) as fin:
sparql_query = fin.read()

sparql = SPARQLWrapper("https://sparql.wikipathways.org/sparql")
Expand All @@ -28,7 +30,9 @@ def get_version_wikipathways() -> dict:

res = sparql.queryAndConvert()

wikipathways_version = {"wikipathways_version": res["results"]["bindings"][0]["title"]["value"]}
wikipathways_version = {
"wikipathways_version": res["results"]["bindings"][0]["title"]["value"]
}

return wikipathways_version

Expand Down Expand Up @@ -57,7 +61,9 @@ def get_gene_wikipathway(bridgedb_df: pd.DataFrame):
else:
query_gene_lists.append(" ".join(f'"{g}"' for g in hgnc_gene_list))

with open(os.path.dirname(__file__) + "/queries/wikipathways-genes-pathways.rq", "r") as fin:
with open(
os.path.dirname(__file__) + "/queries/wikipathways-genes-pathways.rq", "r"
) as fin:
sparql_query = fin.read()

sparql = SPARQLWrapper("https://sparql.wikipathways.org/sparql")
Expand Down