diff --git a/spectrum_io/search_result/filter.py b/spectrum_io/search_result/filter.py index ef9afde..e8bd429 100644 --- a/spectrum_io/search_result/filter.py +++ b/spectrum_io/search_result/filter.py @@ -2,6 +2,7 @@ import re import pandas as pd +import spectrum_fundamentals.constants as c logger = logging.getLogger(__name__) @@ -27,3 +28,17 @@ def filter_valid_prosit_sequences(df: pd.DataFrame) -> pd.DataFrame: logger.info(f"#sequences after filtering for valid prosit sequences: {len(df.index)}") return df + + +def add_tmt_mod(mass: float, seq: str, unimod_tag: str) -> float: + """ + Add tmt modification. + + :param mass: mass without tmt modification + :param seq: sequence of the peptide + :param unimod_tag: UNIMOD tag for the modification + :return: mass as float + """ + num_of_tmt = seq.count(unimod_tag) + mass += num_of_tmt * c.MOD_MASSES[f"{unimod_tag}"] + return mass diff --git a/spectrum_io/search_result/mascot.py b/spectrum_io/search_result/mascot.py index 8915513..356dc42 100644 --- a/spectrum_io/search_result/mascot.py +++ b/spectrum_io/search_result/mascot.py @@ -7,7 +7,7 @@ import spectrum_fundamentals.constants as c from spectrum_fundamentals.mod_string import internal_without_mods -from .filter import filter_valid_prosit_sequences +from .filter import add_tmt_mod, filter_valid_prosit_sequences logger = logging.getLogger(__name__) @@ -92,6 +92,13 @@ def read_mascot(path: Union[str, Path], tmt_labeled: str) -> pd.DataFrame: df["MODIFIED_SEQUENCE"] = sequences + if tmt_labeled != "": + unimod_tag = c.TMT_MODS[tmt_labeled] + logger.info("Adding TMT fixed modifications") + df["MODIFIED_SEQUENCE"] = df["MODIFIED_SEQUENCE"].str.replace("K", f"K{unimod_tag}") + df["MODIFIED_SEQUENCE"] = unimod_tag + "-" + df["MODIFIED_SEQUENCE"] + df["MASS"] = df.apply(lambda x: add_tmt_mod(x.MASS, x.MODIFIED_SEQUENCE, unimod_tag), axis=1) + df["SEQUENCE"] = internal_without_mods(df["MODIFIED_SEQUENCE"]) df["PEPTIDE_LENGTH"] = df["SEQUENCE"].apply(lambda x: len(x)) diff --git a/spectrum_io/search_result/maxquant.py b/spectrum_io/search_result/maxquant.py index dd79058..a133848 100644 --- a/spectrum_io/search_result/maxquant.py +++ b/spectrum_io/search_result/maxquant.py @@ -6,7 +6,7 @@ import spectrum_fundamentals.constants as c from spectrum_fundamentals.mod_string import internal_without_mods, maxquant_to_internal -from .filter import filter_valid_prosit_sequences +from .filter import add_tmt_mod, filter_valid_prosit_sequences logger = logging.getLogger(__name__) @@ -88,17 +88,3 @@ def update_columns_for_prosit(df: pd.DataFrame, tmt_labeled: str) -> pd.DataFram df["PEPTIDE_LENGTH"] = df["SEQUENCE"].apply(lambda x: len(x)) return df - - -def add_tmt_mod(mass: float, seq: str, unimod_tag: str) -> float: - """ - Add tmt modification. - - :param mass: mass without tmt modification - :param seq: sequence of the peptide - :param unimod_tag: UNIMOD tag for the modification - :return: mass as float - """ - num_of_tmt = seq.count(unimod_tag) - mass += num_of_tmt * c.MOD_MASSES[f"{unimod_tag}"] - return mass diff --git a/spectrum_io/search_result/msfragger.py b/spectrum_io/search_result/msfragger.py index f87953d..7768aec 100644 --- a/spectrum_io/search_result/msfragger.py +++ b/spectrum_io/search_result/msfragger.py @@ -8,7 +8,7 @@ from spectrum_fundamentals.mod_string import internal_without_mods from tqdm import tqdm -from .filter import filter_valid_prosit_sequences +from .filter import add_tmt_mod, filter_valid_prosit_sequences logger = logging.getLogger(__name__) @@ -38,7 +38,7 @@ def read_msfragger(path: Union[str, Path], tmt_labeled: str) -> pd.DataFrame: df = pd.concat(ms_frag_results) - df = update_columns_for_prosit(df, "") + df = update_columns_for_prosit(df, tmt_labeled) return filter_valid_prosit_sequences(df) @@ -55,6 +55,11 @@ def update_columns_for_prosit(df, tmt_labeled: str) -> pd.DataFrame: df["MASS"] = df["precursor_neutral_mass"] df["PEPTIDE_LENGTH"] = df["peptide"].apply(lambda x: len(x)) df["MODIFIED_SEQUENCE"] = msfragger_to_internal(df["modified_peptide"]) + if tmt_labeled != "": + unimod_tag = c.TMT_MODS[tmt_labeled] + df["MODIFIED_SEQUENCE"] = df["MODIFIED_SEQUENCE"].str.replace("K", f"K{unimod_tag}") + df["MODIFIED_SEQUENCE"] = unimod_tag + "-" + df["MODIFIED_SEQUENCE"] + df["MASS"] = df.apply(lambda x: add_tmt_mod(x.MASS, x.MODIFIED_SEQUENCE, unimod_tag), axis=1) df.rename( columns={ "assumed_charge": "PRECURSOR_CHARGE",