From 78a56337d2ecf69eaa8102bd25101a88da3de23c Mon Sep 17 00:00:00 2001
From: Mario Picciani <mario.picciani@tum.de>
Date: Wed, 10 Apr 2024 14:43:21 +0200
Subject: [PATCH] added custom init with xl type parameter

---
 spectrum_io/search_result/xisearch.py | 22 +++++++++++++++++++---
 tests/unit_tests/test_xisearch.py     |  4 +++-
 2 files changed, 22 insertions(+), 4 deletions(-)

diff --git a/spectrum_io/search_result/xisearch.py b/spectrum_io/search_result/xisearch.py
index ad32ca1..5493bbc 100644
--- a/spectrum_io/search_result/xisearch.py
+++ b/spectrum_io/search_result/xisearch.py
@@ -17,6 +17,22 @@
 class Xisearch(SearchResults):
     """Handle search results from xisearch."""
 
+    def __init__(self, xl: str, *args, **kwargs):
+        """
+        Init XISearchResults object.
+
+        :param xl: type of crosslinker used. Can be 'DSSO' or 'DSVO'.
+        :param args: additional positional arguments forwarded to SearchResults constructor.
+        :param kwargs: additional keyword arguments forwarded to SearchResults constructor.
+        :raises ValueError: if the type of crosslinker is unknown.
+        """
+        if xl.lower() in ["dsso", "dsvo"]:
+            self.xl = xl.lower()
+        else:
+            raise ValueError(f"Unknown crosslinker type provided: {xl}. Only 'DSSO' and 'DSVO' are supported.")
+
+        super().__init__(*args, **kwargs)
+
     def read_result(self, tmt_labeled: str = "") -> pd.DataFrame:
         """
         Function to read a csv of CSMs and perform some basic formatting.
@@ -24,7 +40,7 @@ def read_result(self, tmt_labeled: str = "") -> pd.DataFrame:
         :param tmt_labeled: tmt label as str
         :return: pd.DataFrame with the formatted data
         """
-        logger.info("Reading msms.csv file")
+        logger.info("Reading search results file...")
         columns_to_read = [
             "run_name",
             "scan_number",
@@ -50,7 +66,7 @@ def read_result(self, tmt_labeled: str = "") -> pd.DataFrame:
         ]
 
         df = pd.read_csv(self.path, sep="\t", usecols=columns_to_read)
-        logger.info("Finished reading msms.tsv file")
+        logger.info("Finished reading search results file.")
         # Standardize column names
         df = Xisearch.filter_xisearch_result(df)
         df = Xisearch.update_columns_for_prosit(df)
@@ -183,7 +199,7 @@ def update_columns_for_prosit(df: pd.DataFrame) -> pd.DataFrame:
         df["ModificationPositions2"] = df["mod_pos_p2"]
         df["PEPTIDE_LENGTH_A"] = df["aa_len_p1"]
         df["PEPTIDE_LENGTH_B"] = df["aa_len_p2"]
-        logger.info("Converting xisearch peptide sequence to internal format")
+        logger.info("Converting XIsearch peptide sequence to internal format...")
 
         df["RAW_FILE"] = df["RAW_FILE"].str.replace(".raw", "")
         df["Modifications_A"] = df["Modifications_A"].astype("str")
diff --git a/tests/unit_tests/test_xisearch.py b/tests/unit_tests/test_xisearch.py
index d838c0f..ad4f3ae 100644
--- a/tests/unit_tests/test_xisearch.py
+++ b/tests/unit_tests/test_xisearch.py
@@ -14,7 +14,9 @@ def test_read_xisearch(self):
         """Test function for reading Xisearch results and transforming to Prosit format."""
         expected_xisearch_internal_path = Path(__file__).parent / "data" / "xisearch_output_internal.tsv"
 
-        internal_search_results_df = Xisearch(Path(__file__).parent / "data" / "xisearch_output.tsv").read_result()
+        internal_search_results_df = Xisearch(
+            xl="DSSO", path=Path(__file__).parent / "data" / "xisearch_output.tsv"
+        ).read_result()
         internal_search_results_df.reset_index(drop=True, inplace=True)
         expected_df = pd.read_csv(expected_xisearch_internal_path)
         expected_df["Modifications_A"] = expected_df["Modifications_A"].fillna("nan")