Skip to content

Commit

Permalink
Merge pull request #78 from wilhelm-lab/feature/sage_dict_parse
Browse files Browse the repository at this point in the history
added sage reversed unimod dictionary
  • Loading branch information
picciama authored Nov 10, 2023
2 parents e024c59 + 8f55924 commit ae40d8b
Show file tree
Hide file tree
Showing 3 changed files with 81 additions and 2 deletions.
14 changes: 13 additions & 1 deletion spectrum_fundamentals/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,19 @@
"[UNIMOD:35]": 15.9949146, # Oxidation
"[UNIMOD:1]": 42.010565, # Acetylation
}

MOD_MASSES_SAGE = {
229.1629: "[UNIMOD:737]",
304.2071: "[UNIMOD:2016]",
144.1020: "[UNIMOD:214]",
304.2053: "[UNIMOD:730]",
8.0141: "[UNIMOD:259]",
10.0082: "[UNIMOD:267]",
79.9663: "[UNIMOD:21]",
-18.0105: "[UNIMOD:23]",
57.0214: "[UNIMOD:4]",
15.9949: "[UNIMOD:35]",
42.0105: "[UNIMOD:1]",
}
# these are only used for prosit_grpc, oktoberfest uses the masses from MOD_MASSES
AA_MOD_MASSES = {
"K[UNIMOD:737]": AA_MASSES["K"] + MOD_MASSES["[UNIMOD:737]"],
Expand Down
50 changes: 49 additions & 1 deletion spectrum_fundamentals/mod_string.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,55 @@
from itertools import repeat
from typing import Dict, List, Optional, Tuple

from .constants import MAXQUANT_VAR_MODS, MOD_MASSES, MOD_NAMES, MSFRAGGER_VAR_MODS, SPECTRONAUT_MODS
from .constants import MAXQUANT_VAR_MODS, MOD_MASSES, MOD_MASSES_SAGE, MOD_NAMES, MSFRAGGER_VAR_MODS, SPECTRONAUT_MODS


def sage_to_internal(sequences: List[str]) -> List[str]:
"""
Convert mod string from sage to the internal format.
This function converts sequences using the mass change of a modification in
square brackets as done by Sage to the internal format by replacing the mass
shift with the corresponding UNIMOD identifier of known and supported
modifications defined in the constants.
:param sequences: A list of sequences with values inside square brackets.
:return: A list of modified sequences with values converted to internal format.
"""
# Define a regular expression pattern to match values within square brackets, like [+1.0] or [-2.0].
pattern = r"[A-Z]?\[([\+\-]\d+\.\d+)\]-?"

# Define a function 'replace' that takes a regex match object.
def replace(match):
# Extract the value inside the square brackets as a float.
value = float(match.group(1))
key = match.string[match.start() : match.end()]
if key.endswith("-"):
unimod_expression = f"{MOD_MASSES_SAGE.get(value, match.group(0))}-"
elif key.startswith("C"):
unimod_expression = f"C{MOD_MASSES_SAGE.get(value, match.group(0))}"
elif key.startswith("K"):
unimod_expression = f"K{MOD_MASSES_SAGE.get(value, match.group(0))}"
elif key.startswith("M"):
unimod_expression = f"M{MOD_MASSES_SAGE.get(value, match.group(0))}"

# Check if the 'MOD_MASSES_SAGE' dictionary has a replacement value for the extracted value.
# If it does, use the replacement value; otherwise, use the original value from the match.
return unimod_expression

# Create an empty list 'modified_strings' to store the modified sequences.
modified_strings = []

# Iterate through the input 'sequences'.
for string in sequences:
# Use 're.sub' to search and replace values within square brackets in the 'string' using the 'replace' function.
modified_string = re.sub(pattern, replace, string)

# Append the modified string to the 'modified_strings' list.
modified_strings.append(modified_string)

# Return the list of modified sequences.
return modified_strings


def internal_to_spectronaut(sequences: List[str]) -> List[str]:
Expand Down
19 changes: 19 additions & 0 deletions tests/unit_tests/test_mod_string.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,25 @@ def test_internal_to_mod_names(self):
]


class TestSageToInternal(unittest.TestCase):
"""Class to test MaxQuant to internal."""

def test_sage_to_internal_carbamidomethylation(self):
"""Test maxquant_to_internal_carbamidomethylation."""
self.assertEqual(mod.sage_to_internal(["ABC[+57.0214]DEFGH"]), ["ABC[UNIMOD:4]DEFGH"])

def test_sage_to_internal_variable_oxidation(self):
"""Test maxquant_to_internal_variable_oxidation."""
self.assertEqual(mod.sage_to_internal(["ABC[+57.0214]DM[+15.9949]EFGH"]), ["ABC[UNIMOD:4]DM[UNIMOD:35]EFGH"])

def test_sage_to_internal_tmt(self):
"""Test maxquant_to_internal_tmt."""
self.assertEqual(
mod.sage_to_internal(["[+229.1629]-ABC[+57.0214]DEFGHK[+229.1629]"]),
["[UNIMOD:737]-ABC[UNIMOD:4]DEFGHK[UNIMOD:737]"],
)


class TestMaxQuantToInternal(unittest.TestCase):
"""Class to test MaxQuant to internal."""

Expand Down

0 comments on commit ae40d8b

Please sign in to comment.