diff --git a/spectrum_fundamentals/constants.py b/spectrum_fundamentals/constants.py index 5f48777..1ed7fa2 100644 --- a/spectrum_fundamentals/constants.py +++ b/spectrum_fundamentals/constants.py @@ -129,6 +129,15 @@ "dsbu": "[UNIMOD:1884]", } +####################### +# OpenMS constants # +####################### + +OPENMS_VAR_MODS = { + "M(Oxidation)": "M[UNIMOD:35]", + "C(Carbamidomethyl)": "C[UNIMOD:4]" +} + #################### # MASS CALCULATION # #################### diff --git a/spectrum_fundamentals/mod_string.py b/spectrum_fundamentals/mod_string.py index 3b1edbd..03773f1 100644 --- a/spectrum_fundamentals/mod_string.py +++ b/spectrum_fundamentals/mod_string.py @@ -12,6 +12,7 @@ MOD_MASSES_SAGE, MOD_NAMES, MSFRAGGER_VAR_MODS, + OPENMS_VAR_MODS, SPECTRONAUT_MODS, XISEARCH_VAR_MODS, ) @@ -228,6 +229,51 @@ def find_replacement(match: re.Match) -> str: return [regex.sub(find_replacement, seq) for seq in sequences] +def openms_to_internal(sequences: List[str], fixed_mods: Optional[Dict[str, str]] = None) -> List[str]: + """ + Function to translate a OpenMS modstring to the Prosit format. + + :param sequences: List[str] of sequences + :param fixed_mods: Optional dictionary of modifications with key aa and value mod, e.g. 'M(Oxidation)': 'M(UNIMOD:35)'. + Fixed modifications must be included in the variable modificatons dictionary. + By default, i.e. if nothing is supplied to fixed_mods, carbamidomethylation on cystein will be included + in the fixed modifications. If you want to have no fixed modifictions at all, supply fixed_mods={} + :raises AssertionError: if illegal modification was provided in the fixed_mods dictionary. + :return: a list of modified sequences + """ + + if fixed_mods is None: + fixed_mods = {"C": "C[UNIMOD:4]"} + err_msg = f"Provided illegal fixed mod, supported modifications are {set(OPENMS_VAR_MODS.values())}." + assert all(x in OPENMS_VAR_MODS.values() for x in fixed_mods.values()), err_msg + + replacements = {**OPENMS_VAR_MODS, **fixed_mods} + + def custom_regex_escape(key: str) -> str: + """ + Subfunction to escape only normal brackets in the modstring. + + :param key: The match to escape + :return: match with escaped special characters + """ + for k, v in {"(": r"\(", ")": r"\)"}.items(): + key = key.replace(k, v) + return key + + regex = re.compile("|".join(map(custom_regex_escape, replacements.keys()))) + + def find_replacement(match: re.Match) -> str: + """ + Subfunction to find the corresponding substitution for a match. + + :param match: an re.Match object found by re.sub + :return: substitution string for the given match + """ + key = match.string[match.start() : match.end()] + + return replacements[key] + + return [regex.sub(find_replacement, seq) for seq in sequences] def internal_without_mods(sequences: List[str]) -> List[str]: """