From 64104433acffa56ddc014f4afe6a1e15cab7ccdc Mon Sep 17 00:00:00 2001 From: NormanRog <89592194+NormanRog@users.noreply.github.com> Date: Wed, 27 Mar 2024 15:36:17 +0100 Subject: [PATCH] Update search_results.py # for reading MSFragger pepxml files removed r"\[[0-9]+\]" from unsupported mods --- spectrum_io/search_result/search_results.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/spectrum_io/search_result/search_results.py b/spectrum_io/search_result/search_results.py index 4a38cbe..e158ce1 100644 --- a/spectrum_io/search_result/search_results.py +++ b/spectrum_io/search_result/search_results.py @@ -22,7 +22,8 @@ def filter_valid_prosit_sequences(df: pd.DataFrame) -> pd.DataFrame: # retain only peptides that fall within [7, 30] length supported by Prosit df = df[(df["PEPTIDE_LENGTH"] <= 30) & (df["PEPTIDE_LENGTH"] >= 7)] # remove unsupported mods to exclude - unsupported_mods = [r"Acetyl \(Protein N\-term\)", "ac", r"\[[0-9]+\]"] + # for reading MSFragger pepxml files removed r"\[[0-9]+\]" from unsupported mods + unsupported_mods = [r"Acetyl \(Protein N\-term\)", "ac"] exclude_mods_pattern = re.compile("|".join(unsupported_mods)) df = df[~df["MODIFIED_SEQUENCE"].str.contains(exclude_mods_pattern)] # remove non-canonical aas