Skip to content

Commit

Permalink
update method name
Browse files Browse the repository at this point in the history
  • Loading branch information
apriltuesday committed Apr 25, 2024
1 parent 2c17176 commit 51a69c5
Show file tree
Hide file tree
Showing 3 changed files with 12 additions and 12 deletions.
12 changes: 6 additions & 6 deletions opentargets_pharmgkb/evidence_generation.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@

from opentargets_pharmgkb.counts import ClinicalAnnotationCounts
from opentargets_pharmgkb.ontology_apis import get_efo_iri
from opentargets_pharmgkb.pandas_utils import none_to_nan, explode_column, read_tsv_to_df
from opentargets_pharmgkb.pandas_utils import none_to_nan, split_and_explode_column, read_tsv_to_df
from opentargets_pharmgkb.validation import validate_evidence_string
from opentargets_pharmgkb.variant_coordinates import Fasta, parse_genotype

Expand Down Expand Up @@ -53,7 +53,7 @@ def pipeline(data_dir, fasta_path, created_date, output_path):
merged_with_alleles_table = pd.merge(merged_with_variants_table, clinical_alleles_table, on=ID_COL_NAME, how='left')
counts.exploded_alleles = len(merged_with_alleles_table)

exploded_pgx_cat = explode_column(merged_with_alleles_table, 'Phenotype Category', 'split_pgx_category')
exploded_pgx_cat = split_and_explode_column(merged_with_alleles_table, 'Phenotype Category', 'split_pgx_category')
counts.exploded_pgx_cat = len(exploded_pgx_cat)

mapped_drugs = explode_drugs(exploded_pgx_cat)
Expand Down Expand Up @@ -281,7 +281,7 @@ def explode_and_map_genes(df):
:param df: dataframe to annotate (should have a 'Gene' column)
:return: dataframe with 'ensembl_gene_id' column added
"""
split_genes = explode_column(df, 'Gene', 'split_gene')
split_genes = split_and_explode_column(df, 'Gene', 'split_gene')
ensembl_ids = query_biomart(
('hgnc_symbol', 'split_gene'),
[('ensembl_gene_id', 'gene_from_pgkb')],
Expand All @@ -301,9 +301,9 @@ def explode_drugs(df):
:return: dataframe with 'split_drug' column added
"""
# Drugs on same row but not explicitly annotated as combinations
split_drugs = explode_column(df, 'Drug(s)', 'split_drug')
split_drugs = split_and_explode_column(df, 'Drug(s)', 'split_drug')
# Drugs explicitly annotated as combinations are kept as a list of drug names
split_drugs = explode_column(split_drugs, 'split_drug', 'split_drug', sep='/', split_only=True)
split_drugs = split_and_explode_column(split_drugs, 'split_drug', 'split_drug', sep='/', split_only=True)
return split_drugs


Expand All @@ -315,7 +315,7 @@ def explode_and_map_phenotypes(df):
:return: dataframe with 'efo' column added
"""
df['Phenotype(s)'].fillna('', inplace=True)
split_phenotypes = explode_column(df, 'Phenotype(s)', 'split_phenotype')
split_phenotypes = split_and_explode_column(df, 'Phenotype(s)', 'split_phenotype')
with multiprocessing.Pool(processes=24) as pool:
str_to_iri = {
s: pool.apply(get_efo_iri, args=(s,))
Expand Down
2 changes: 1 addition & 1 deletion opentargets_pharmgkb/pandas_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ def none_to_nan(x):
return np.nan if x is None else x


def explode_column(df, source_col, target_col, sep=';', split_only=False):
def split_and_explode_column(df, source_col, target_col, sep=';', split_only=False):
"""
Splits a string-valued column in dataframe and explodes on the values, storing them in the specified target column.
Any white space around the separator will be stripped.
Expand Down
10 changes: 5 additions & 5 deletions tests/test_pandas_utils.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
import numpy as np
import pandas as pd

from opentargets_pharmgkb.pandas_utils import explode_column
from opentargets_pharmgkb.pandas_utils import split_and_explode_column


def test_explode_column():
def test_split_and_explode_column():
df = pd.DataFrame([
[1, 'apple; pear; banana'],
[2, 'cat;frog'],
Expand All @@ -21,12 +21,12 @@ def test_explode_column():
[3, 'something', 'something'],
[4, np.nan, np.nan]
], columns=['A', 'B', 'C'])
result = explode_column(df, 'B', 'C')
result = split_and_explode_column(df, 'B', 'C')

assert result.equals(expected)


def test_explode_column_split_only():
def test_split_and_explode_column_split_only():
df = pd.DataFrame([
[1, 'apple1;apple2 / pear / banana'],
[2, 'cat/frog'],
Expand All @@ -40,6 +40,6 @@ def test_explode_column_split_only():
[3, 'something', ['something']],
[4, np.nan, np.nan]
], columns=['A', 'B', 'C'])
result = explode_column(df, 'B', 'C', sep='/', split_only=True)
result = split_and_explode_column(df, 'B', 'C', sep='/', split_only=True)

assert result.equals(expected)

0 comments on commit 51a69c5

Please sign in to comment.