From 78a56337d2ecf69eaa8102bd25101a88da3de23c Mon Sep 17 00:00:00 2001 From: Mario Picciani Date: Wed, 10 Apr 2024 14:43:21 +0200 Subject: [PATCH] added custom init with xl type parameter --- spectrum_io/search_result/xisearch.py | 22 +++++++++++++++++++--- tests/unit_tests/test_xisearch.py | 4 +++- 2 files changed, 22 insertions(+), 4 deletions(-) diff --git a/spectrum_io/search_result/xisearch.py b/spectrum_io/search_result/xisearch.py index ad32ca1..5493bbc 100644 --- a/spectrum_io/search_result/xisearch.py +++ b/spectrum_io/search_result/xisearch.py @@ -17,6 +17,22 @@ class Xisearch(SearchResults): """Handle search results from xisearch.""" + def __init__(self, xl: str, *args, **kwargs): + """ + Init XISearchResults object. + + :param xl: type of crosslinker used. Can be 'DSSO' or 'DSVO'. + :param args: additional positional arguments forwarded to SearchResults constructor. + :param kwargs: additional keyword arguments forwarded to SearchResults constructor. + :raises ValueError: if the type of crosslinker is unknown. + """ + if xl.lower() in ["dsso", "dsvo"]: + self.xl = xl.lower() + else: + raise ValueError(f"Unknown crosslinker type provided: {xl}. Only 'DSSO' and 'DSVO' are supported.") + + super().__init__(*args, **kwargs) + def read_result(self, tmt_labeled: str = "") -> pd.DataFrame: """ Function to read a csv of CSMs and perform some basic formatting. @@ -24,7 +40,7 @@ def read_result(self, tmt_labeled: str = "") -> pd.DataFrame: :param tmt_labeled: tmt label as str :return: pd.DataFrame with the formatted data """ - logger.info("Reading msms.csv file") + logger.info("Reading search results file...") columns_to_read = [ "run_name", "scan_number", @@ -50,7 +66,7 @@ def read_result(self, tmt_labeled: str = "") -> pd.DataFrame: ] df = pd.read_csv(self.path, sep="\t", usecols=columns_to_read) - logger.info("Finished reading msms.tsv file") + logger.info("Finished reading search results file.") # Standardize column names df = Xisearch.filter_xisearch_result(df) df = Xisearch.update_columns_for_prosit(df) @@ -183,7 +199,7 @@ def update_columns_for_prosit(df: pd.DataFrame) -> pd.DataFrame: df["ModificationPositions2"] = df["mod_pos_p2"] df["PEPTIDE_LENGTH_A"] = df["aa_len_p1"] df["PEPTIDE_LENGTH_B"] = df["aa_len_p2"] - logger.info("Converting xisearch peptide sequence to internal format") + logger.info("Converting XIsearch peptide sequence to internal format...") df["RAW_FILE"] = df["RAW_FILE"].str.replace(".raw", "") df["Modifications_A"] = df["Modifications_A"].astype("str") diff --git a/tests/unit_tests/test_xisearch.py b/tests/unit_tests/test_xisearch.py index d838c0f..ad4f3ae 100644 --- a/tests/unit_tests/test_xisearch.py +++ b/tests/unit_tests/test_xisearch.py @@ -14,7 +14,9 @@ def test_read_xisearch(self): """Test function for reading Xisearch results and transforming to Prosit format.""" expected_xisearch_internal_path = Path(__file__).parent / "data" / "xisearch_output_internal.tsv" - internal_search_results_df = Xisearch(Path(__file__).parent / "data" / "xisearch_output.tsv").read_result() + internal_search_results_df = Xisearch( + xl="DSSO", path=Path(__file__).parent / "data" / "xisearch_output.tsv" + ).read_result() internal_search_results_df.reset_index(drop=True, inplace=True) expected_df = pd.read_csv(expected_xisearch_internal_path) expected_df["Modifications_A"] = expected_df["Modifications_A"].fillna("nan")