Skip to content

Commit

Permalink
Added tmt modifications for msfragger and mascot
Browse files Browse the repository at this point in the history
  • Loading branch information
victorgiurcoiu committed Sep 15, 2023
1 parent 89dae69 commit 2b156e8
Show file tree
Hide file tree
Showing 4 changed files with 31 additions and 18 deletions.
15 changes: 15 additions & 0 deletions spectrum_io/search_result/filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import re

import pandas as pd
import spectrum_fundamentals.constants as c

logger = logging.getLogger(__name__)

Expand All @@ -27,3 +28,17 @@ def filter_valid_prosit_sequences(df: pd.DataFrame) -> pd.DataFrame:
logger.info(f"#sequences after filtering for valid prosit sequences: {len(df.index)}")

return df


def add_tmt_mod(mass: float, seq: str, unimod_tag: str) -> float:
"""
Add tmt modification.
:param mass: mass without tmt modification
:param seq: sequence of the peptide
:param unimod_tag: UNIMOD tag for the modification
:return: mass as float
"""
num_of_tmt = seq.count(unimod_tag)
mass += num_of_tmt * c.MOD_MASSES[f"{unimod_tag}"]
return mass
9 changes: 8 additions & 1 deletion spectrum_io/search_result/mascot.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import spectrum_fundamentals.constants as c
from spectrum_fundamentals.mod_string import internal_without_mods

from .filter import filter_valid_prosit_sequences
from .filter import add_tmt_mod, filter_valid_prosit_sequences

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -92,6 +92,13 @@ def read_mascot(path: Union[str, Path], tmt_labeled: str) -> pd.DataFrame:

df["MODIFIED_SEQUENCE"] = sequences

if tmt_labeled != "":
unimod_tag = c.TMT_MODS[tmt_labeled]
logger.info("Adding TMT fixed modifications")
df["MODIFIED_SEQUENCE"] = df["MODIFIED_SEQUENCE"].str.replace("K", f"K{unimod_tag}")
df["MODIFIED_SEQUENCE"] = unimod_tag + "-" + df["MODIFIED_SEQUENCE"]
df["MASS"] = df.apply(lambda x: add_tmt_mod(x.MASS, x.MODIFIED_SEQUENCE, unimod_tag), axis=1)

df["SEQUENCE"] = internal_without_mods(df["MODIFIED_SEQUENCE"])
df["PEPTIDE_LENGTH"] = df["SEQUENCE"].apply(lambda x: len(x))

Expand Down
16 changes: 1 addition & 15 deletions spectrum_io/search_result/maxquant.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import spectrum_fundamentals.constants as c
from spectrum_fundamentals.mod_string import internal_without_mods, maxquant_to_internal

from .filter import filter_valid_prosit_sequences
from .filter import add_tmt_mod, filter_valid_prosit_sequences

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -88,17 +88,3 @@ def update_columns_for_prosit(df: pd.DataFrame, tmt_labeled: str) -> pd.DataFram
df["PEPTIDE_LENGTH"] = df["SEQUENCE"].apply(lambda x: len(x))

return df


def add_tmt_mod(mass: float, seq: str, unimod_tag: str) -> float:
"""
Add tmt modification.
:param mass: mass without tmt modification
:param seq: sequence of the peptide
:param unimod_tag: UNIMOD tag for the modification
:return: mass as float
"""
num_of_tmt = seq.count(unimod_tag)
mass += num_of_tmt * c.MOD_MASSES[f"{unimod_tag}"]
return mass
9 changes: 7 additions & 2 deletions spectrum_io/search_result/msfragger.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from spectrum_fundamentals.mod_string import internal_without_mods
from tqdm import tqdm

from .filter import filter_valid_prosit_sequences
from .filter import add_tmt_mod, filter_valid_prosit_sequences

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -38,7 +38,7 @@ def read_msfragger(path: Union[str, Path], tmt_labeled: str) -> pd.DataFrame:

df = pd.concat(ms_frag_results)

df = update_columns_for_prosit(df, "")
df = update_columns_for_prosit(df, tmt_labeled)
return filter_valid_prosit_sequences(df)


Expand All @@ -55,6 +55,11 @@ def update_columns_for_prosit(df, tmt_labeled: str) -> pd.DataFrame:
df["MASS"] = df["precursor_neutral_mass"]
df["PEPTIDE_LENGTH"] = df["peptide"].apply(lambda x: len(x))
df["MODIFIED_SEQUENCE"] = msfragger_to_internal(df["modified_peptide"])
if tmt_labeled != "":
unimod_tag = c.TMT_MODS[tmt_labeled]
df["MODIFIED_SEQUENCE"] = df["MODIFIED_SEQUENCE"].str.replace("K", f"K{unimod_tag}")
df["MODIFIED_SEQUENCE"] = unimod_tag + "-" + df["MODIFIED_SEQUENCE"]
df["MASS"] = df.apply(lambda x: add_tmt_mod(x.MASS, x.MODIFIED_SEQUENCE, unimod_tag), axis=1)
df.rename(
columns={
"assumed_charge": "PRECURSOR_CHARGE",
Expand Down

0 comments on commit 2b156e8

Please sign in to comment.