update method name

EBIvariation · Apr 25, 2024 · 51a69c5 · 51a69c5
1 parent 2c17176
commit 51a69c5
Show file tree

Hide file tree

Showing 3 changed files with 12 additions and 12 deletions.
diff --git a/opentargets_pharmgkb/evidence_generation.py b/opentargets_pharmgkb/evidence_generation.py
@@ -14,7 +14,7 @@
 
 from opentargets_pharmgkb.counts import ClinicalAnnotationCounts
 from opentargets_pharmgkb.ontology_apis import get_efo_iri
-from opentargets_pharmgkb.pandas_utils import none_to_nan, explode_column, read_tsv_to_df
+from opentargets_pharmgkb.pandas_utils import none_to_nan, split_and_explode_column, read_tsv_to_df
 from opentargets_pharmgkb.validation import validate_evidence_string
 from opentargets_pharmgkb.variant_coordinates import Fasta, parse_genotype
 
@@ -53,7 +53,7 @@ def pipeline(data_dir, fasta_path, created_date, output_path):
     merged_with_alleles_table = pd.merge(merged_with_variants_table, clinical_alleles_table, on=ID_COL_NAME, how='left')
     counts.exploded_alleles = len(merged_with_alleles_table)
 
-    exploded_pgx_cat = explode_column(merged_with_alleles_table, 'Phenotype Category', 'split_pgx_category')
+    exploded_pgx_cat = split_and_explode_column(merged_with_alleles_table, 'Phenotype Category', 'split_pgx_category')
     counts.exploded_pgx_cat = len(exploded_pgx_cat)
 
     mapped_drugs = explode_drugs(exploded_pgx_cat)
@@ -281,7 +281,7 @@ def explode_and_map_genes(df):
     :param df: dataframe to annotate (should have a 'Gene' column)
     :return: dataframe with 'ensembl_gene_id' column added
     """
-    split_genes = explode_column(df, 'Gene', 'split_gene')
+    split_genes = split_and_explode_column(df, 'Gene', 'split_gene')
     ensembl_ids = query_biomart(
         ('hgnc_symbol', 'split_gene'),
         [('ensembl_gene_id', 'gene_from_pgkb')],
@@ -301,9 +301,9 @@ def explode_drugs(df):
     :return: dataframe with 'split_drug' column added
     """
     # Drugs on same row but not explicitly annotated as combinations
-    split_drugs = explode_column(df, 'Drug(s)', 'split_drug')
+    split_drugs = split_and_explode_column(df, 'Drug(s)', 'split_drug')
     # Drugs explicitly annotated as combinations are kept as a list of drug names
-    split_drugs = explode_column(split_drugs, 'split_drug', 'split_drug', sep='/', split_only=True)
+    split_drugs = split_and_explode_column(split_drugs, 'split_drug', 'split_drug', sep='/', split_only=True)
     return split_drugs
 
 
@@ -315,7 +315,7 @@ def explode_and_map_phenotypes(df):
     :return: dataframe with 'efo' column added
     """
     df['Phenotype(s)'].fillna('', inplace=True)
-    split_phenotypes = explode_column(df, 'Phenotype(s)', 'split_phenotype')
+    split_phenotypes = split_and_explode_column(df, 'Phenotype(s)', 'split_phenotype')
     with multiprocessing.Pool(processes=24) as pool:
         str_to_iri = {
             s: pool.apply(get_efo_iri, args=(s,))

diff --git a/opentargets_pharmgkb/pandas_utils.py b/opentargets_pharmgkb/pandas_utils.py
@@ -10,7 +10,7 @@ def none_to_nan(x):
     return np.nan if x is None else x
 
 
-def explode_column(df, source_col, target_col, sep=';', split_only=False):
+def split_and_explode_column(df, source_col, target_col, sep=';', split_only=False):
     """
     Splits a string-valued column in dataframe and explodes on the values, storing them in the specified target column.
     Any white space around the separator will be stripped.

diff --git a/tests/test_pandas_utils.py b/tests/test_pandas_utils.py
@@ -1,10 +1,10 @@
 import numpy as np
 import pandas as pd
 
-from opentargets_pharmgkb.pandas_utils import explode_column
+from opentargets_pharmgkb.pandas_utils import split_and_explode_column
 
 
-def test_explode_column():
+def test_split_and_explode_column():
     df = pd.DataFrame([
         [1, 'apple; pear; banana'],
         [2, 'cat;frog'],
@@ -21,12 +21,12 @@ def test_explode_column():
         [3, 'something', 'something'],
         [4, np.nan, np.nan]
     ], columns=['A', 'B', 'C'])
-    result = explode_column(df, 'B', 'C')
+    result = split_and_explode_column(df, 'B', 'C')
 
     assert result.equals(expected)
 
 
-def test_explode_column_split_only():
+def test_split_and_explode_column_split_only():
     df = pd.DataFrame([
         [1, 'apple1;apple2 / pear / banana'],
         [2, 'cat/frog'],
@@ -40,6 +40,6 @@ def test_explode_column_split_only():
         [3, 'something', ['something']],
         [4, np.nan, np.nan]
     ], columns=['A', 'B', 'C'])
-    result = explode_column(df, 'B', 'C', sep='/', split_only=True)
+    result = split_and_explode_column(df, 'B', 'C', sep='/', split_only=True)
 
     assert result.equals(expected)