Skip to content

Commit

Permalink
fixed reading and testing sage
Browse files Browse the repository at this point in the history
  • Loading branch information
picciama committed Nov 10, 2023
1 parent 55279ac commit 2a0221a
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 16 deletions.
16 changes: 4 additions & 12 deletions spectrum_io/search_result/sage.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,15 +26,7 @@ def read_result(path: Union[str, Path], tmt_labeled: str = "") -> pd.DataFrame:
logger.info("Reading msms.tsv file")
df = pd.read_csv(
path,
usecols=[
"filename",
"scannr",
"peptide",
"charge",
"hyperscore",
"calcmass",
"proteins"
],
usecols=["filename", "scannr", "peptide", "charge", "hyperscore", "calcmass", "proteins"],
sep="\t",
)
logger.info("Finished reading msms.tsv file")
Expand Down Expand Up @@ -65,9 +57,9 @@ def update_columns_for_prosit(df: pd.DataFrame, tmt_labeled: str) -> pd.DataFram
)

# removing .mzML
df['RAW_FILE'] = df['RAW_FILE'].str.replace(".mzML","",regex=True)
# extracting only the scan number
df['SCAN_NUMBER'] = df['SCAN_NUMBER'].str.split('=').str[3:].str.join('=')
df["RAW_FILE"] = df["RAW_FILE"].str.replace(".mzML", "", regex=True)
# extracting only the scan number
df["SCAN_NUMBER"] = [int(x.rsplit("=", 1)[-1]) for x in df["SCAN_NUMBER"]]
# creating a column of decoys and targets
df["REVERSE"] = df["PROTEINS"].str.startswith("rev_")
# removing modification to create the unmodified sequences
Expand Down
7 changes: 3 additions & 4 deletions tests/unit_tests/test_sage.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from pathlib import Path

import pandas as pd

from spectrum_io.search_result import Sage


Expand All @@ -14,8 +15,6 @@ def test_read_sage(self):
expected_sage_internal_path = Path(__file__).parent / "data" / "sage_output_internal.csv"

internal_search_results_df = Sage.read_result(sage_output_path)
expected_df = pd.read_csv(expected_sage_internal_path, index_col=0)

# execute only once, then remove and test again
internal_search_results_df.to_csv(expected_sage_internal_path)

self.assertEqual(internal_search_results_df, pd.read_csv(expected_sage_internal_path))
pd.testing.assert_frame_equal(internal_search_results_df, expected_df)

0 comments on commit 2a0221a

Please sign in to comment.