Skip to content

Commit

Permalink
Small fixes: Pubchem and Bridgedb (#197)
Browse files Browse the repository at this point in the history
  • Loading branch information
YojanaGadiya authored Nov 21, 2024
1 parent a7b14dd commit 05f56af
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 6 deletions.
3 changes: 2 additions & 1 deletion src/pyBiodatafuse/annotators/pubchem.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

import datetime
import os
from tqdm import tqdm
import warnings
from string import Template
from typing import Any, Dict, Tuple
Expand Down Expand Up @@ -95,7 +96,7 @@ def get_protein_compound_screened(bridgedb_df: pd.DataFrame) -> Tuple[pd.DataFra

intermediate_df = pd.DataFrame()

for protein_str in query_protein_list:
for protein_str in tqdm(query_protein_list, desc="Querying PubChem"):
query_count += 1

sparql_query_template = Template(sparql_query)
Expand Down
12 changes: 7 additions & 5 deletions src/pyBiodatafuse/id_mapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,14 +108,16 @@ def bridgedb_xref(
if not input_datasource:
raise ValueError("Please provide the identifier datasource, e.g. HGNC")

if output_datasource is None or "All":
if output_datasource is None:
output_datasource = [
"Uniprot-TrEMBL",
"NCBI Gene",
"Ensembl",
"HGNC Accession Number",
"HGNC",
]
else:
assert isinstance(output_datasource, list), "output_datasource must be a list"

data_sources = read_resource_files()
input_source = data_sources.loc[data_sources["source"] == input_datasource, "systemCode"].iloc[
Expand Down Expand Up @@ -182,13 +184,13 @@ def bridgedb_xref(
bridgedb = bridgedb.dropna(subset=["target.source"])

# Subset based on the output_datasource
bridgedb = bridgedb[bridgedb["target.source"].isin(output_datasource)]
bridgedb_subset = bridgedb[bridgedb["target.source"].isin(output_datasource)]

bridgedb = bridgedb.drop_duplicates()
bridgedb_subset = bridgedb_subset.drop_duplicates()
identifiers.columns = [
"{}{}".format(c, "" if c in "identifier" else "_dea") for c in identifiers.columns
]
bridgedb = bridgedb.merge(identifiers, on="identifier")
bridgedb_subset = bridgedb_subset.merge(identifiers, on="identifier")

"""Metadata details"""
# Get the current date and time
Expand Down Expand Up @@ -216,7 +218,7 @@ def bridgedb_xref(
},
}

return bridgedb, bridgedb_metadata
return bridgedb_subset, bridgedb_metadata


"""PubChem helper functions."""
Expand Down

0 comments on commit 05f56af

Please sign in to comment.