Skip to content

Commit

Permalink
custom init with xl arg and raise on tmt label
Browse files Browse the repository at this point in the history
  • Loading branch information
picciama committed Apr 10, 2024
1 parent 4a1f735 commit 4147606
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 4 deletions.
26 changes: 23 additions & 3 deletions spectrum_io/search_result/xisearch.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,34 @@
class Xisearch(SearchResults):
"""Handle search results from xisearch."""

def __init__(self, xl: str, *args, **kwargs):
"""
Init XISearchResults object.
:param xl: type of crosslinker used. Can be 'DSSO' or 'DSVO'.
:param args: additional positional arguments forwarded to SearchResults constructor.
:param kwargs: additional keyword arguments forwarded to SearchResults constructor.
:raises ValueError: if the type of crosslinker is unknown.
"""
if xl.lower() in ["dsso", "dsvo"]:
self.xl = xl.lower()
else:
raise ValueError(f"Unknown crosslinker type provided: {xl}. Only 'DSSO' and 'DSVO' are supported.")

super().__init__(*args, **kwargs)

def read_result(self, tmt_labeled: str = "") -> pd.DataFrame:
"""
Function to read a csv of CSMs and perform some basic formatting.
:param tmt_labeled: tmt label as str
:raises NotImplementedError: if a tmt label is provided
:return: pd.DataFrame with the formatted data
"""
logger.info("Reading msms.csv file")
if tmt_labeled != "":
raise NotImplementedError("TMT is not supported for XIsearch")

logger.info("Reading search results file...")
columns_to_read = [
"run_name",
"scan_number",
Expand All @@ -50,7 +70,7 @@ def read_result(self, tmt_labeled: str = "") -> pd.DataFrame:
]

df = pd.read_csv(self.path, sep="\t", usecols=columns_to_read)
logger.info("Finished reading msms.tsv file")
logger.info("Finished reading search results file.")
# Standardize column names
df = Xisearch.filter_xisearch_result(df)
df = Xisearch.update_columns_for_prosit(df)
Expand Down Expand Up @@ -183,7 +203,7 @@ def update_columns_for_prosit(df: pd.DataFrame) -> pd.DataFrame:
df["ModificationPositions2"] = df["mod_pos_p2"]
df["PEPTIDE_LENGTH_A"] = df["aa_len_p1"]
df["PEPTIDE_LENGTH_B"] = df["aa_len_p2"]
logger.info("Converting xisearch peptide sequence to internal format")
logger.info("Converting XIsearch peptide sequence to internal format...")

df["RAW_FILE"] = df["RAW_FILE"].str.replace(".raw", "")
df["Modifications_A"] = df["Modifications_A"].astype("str")
Expand Down
4 changes: 3 additions & 1 deletion tests/unit_tests/test_xisearch.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,9 @@ def test_read_xisearch(self):
"""Test function for reading Xisearch results and transforming to Prosit format."""
expected_xisearch_internal_path = Path(__file__).parent / "data" / "xisearch_output_internal.tsv"

internal_search_results_df = Xisearch(Path(__file__).parent / "data" / "xisearch_output.tsv").read_result()
internal_search_results_df = Xisearch(
xl="DSSO", path=Path(__file__).parent / "data" / "xisearch_output.tsv"
).read_result()
internal_search_results_df.reset_index(drop=True, inplace=True)
expected_df = pd.read_csv(expected_xisearch_internal_path)
expected_df["Modifications_A"] = expected_df["Modifications_A"].fillna("nan")
Expand Down

0 comments on commit 4147606

Please sign in to comment.