From 51a69c544efb3d4c91186d986b6ff92cf47db02b Mon Sep 17 00:00:00 2001 From: April Shen Date: Thu, 25 Apr 2024 14:30:57 +0100 Subject: [PATCH] update method name --- opentargets_pharmgkb/evidence_generation.py | 12 ++++++------ opentargets_pharmgkb/pandas_utils.py | 2 +- tests/test_pandas_utils.py | 10 +++++----- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/opentargets_pharmgkb/evidence_generation.py b/opentargets_pharmgkb/evidence_generation.py index 81a686a..de2dddb 100644 --- a/opentargets_pharmgkb/evidence_generation.py +++ b/opentargets_pharmgkb/evidence_generation.py @@ -14,7 +14,7 @@ from opentargets_pharmgkb.counts import ClinicalAnnotationCounts from opentargets_pharmgkb.ontology_apis import get_efo_iri -from opentargets_pharmgkb.pandas_utils import none_to_nan, explode_column, read_tsv_to_df +from opentargets_pharmgkb.pandas_utils import none_to_nan, split_and_explode_column, read_tsv_to_df from opentargets_pharmgkb.validation import validate_evidence_string from opentargets_pharmgkb.variant_coordinates import Fasta, parse_genotype @@ -53,7 +53,7 @@ def pipeline(data_dir, fasta_path, created_date, output_path): merged_with_alleles_table = pd.merge(merged_with_variants_table, clinical_alleles_table, on=ID_COL_NAME, how='left') counts.exploded_alleles = len(merged_with_alleles_table) - exploded_pgx_cat = explode_column(merged_with_alleles_table, 'Phenotype Category', 'split_pgx_category') + exploded_pgx_cat = split_and_explode_column(merged_with_alleles_table, 'Phenotype Category', 'split_pgx_category') counts.exploded_pgx_cat = len(exploded_pgx_cat) mapped_drugs = explode_drugs(exploded_pgx_cat) @@ -281,7 +281,7 @@ def explode_and_map_genes(df): :param df: dataframe to annotate (should have a 'Gene' column) :return: dataframe with 'ensembl_gene_id' column added """ - split_genes = explode_column(df, 'Gene', 'split_gene') + split_genes = split_and_explode_column(df, 'Gene', 'split_gene') ensembl_ids = query_biomart( ('hgnc_symbol', 'split_gene'), [('ensembl_gene_id', 'gene_from_pgkb')], @@ -301,9 +301,9 @@ def explode_drugs(df): :return: dataframe with 'split_drug' column added """ # Drugs on same row but not explicitly annotated as combinations - split_drugs = explode_column(df, 'Drug(s)', 'split_drug') + split_drugs = split_and_explode_column(df, 'Drug(s)', 'split_drug') # Drugs explicitly annotated as combinations are kept as a list of drug names - split_drugs = explode_column(split_drugs, 'split_drug', 'split_drug', sep='/', split_only=True) + split_drugs = split_and_explode_column(split_drugs, 'split_drug', 'split_drug', sep='/', split_only=True) return split_drugs @@ -315,7 +315,7 @@ def explode_and_map_phenotypes(df): :return: dataframe with 'efo' column added """ df['Phenotype(s)'].fillna('', inplace=True) - split_phenotypes = explode_column(df, 'Phenotype(s)', 'split_phenotype') + split_phenotypes = split_and_explode_column(df, 'Phenotype(s)', 'split_phenotype') with multiprocessing.Pool(processes=24) as pool: str_to_iri = { s: pool.apply(get_efo_iri, args=(s,)) diff --git a/opentargets_pharmgkb/pandas_utils.py b/opentargets_pharmgkb/pandas_utils.py index 2ad6335..d4dd88e 100644 --- a/opentargets_pharmgkb/pandas_utils.py +++ b/opentargets_pharmgkb/pandas_utils.py @@ -10,7 +10,7 @@ def none_to_nan(x): return np.nan if x is None else x -def explode_column(df, source_col, target_col, sep=';', split_only=False): +def split_and_explode_column(df, source_col, target_col, sep=';', split_only=False): """ Splits a string-valued column in dataframe and explodes on the values, storing them in the specified target column. Any white space around the separator will be stripped. diff --git a/tests/test_pandas_utils.py b/tests/test_pandas_utils.py index a383858..5699847 100644 --- a/tests/test_pandas_utils.py +++ b/tests/test_pandas_utils.py @@ -1,10 +1,10 @@ import numpy as np import pandas as pd -from opentargets_pharmgkb.pandas_utils import explode_column +from opentargets_pharmgkb.pandas_utils import split_and_explode_column -def test_explode_column(): +def test_split_and_explode_column(): df = pd.DataFrame([ [1, 'apple; pear; banana'], [2, 'cat;frog'], @@ -21,12 +21,12 @@ def test_explode_column(): [3, 'something', 'something'], [4, np.nan, np.nan] ], columns=['A', 'B', 'C']) - result = explode_column(df, 'B', 'C') + result = split_and_explode_column(df, 'B', 'C') assert result.equals(expected) -def test_explode_column_split_only(): +def test_split_and_explode_column_split_only(): df = pd.DataFrame([ [1, 'apple1;apple2 / pear / banana'], [2, 'cat/frog'], @@ -40,6 +40,6 @@ def test_explode_column_split_only(): [3, 'something', ['something']], [4, np.nan, np.nan] ], columns=['A', 'B', 'C']) - result = explode_column(df, 'B', 'C', sep='/', split_only=True) + result = split_and_explode_column(df, 'B', 'C', sep='/', split_only=True) assert result.equals(expected)