diff --git a/docs/reference.rst b/docs/reference.rst index ff7ad07..0323c01 100644 --- a/docs/reference.rst +++ b/docs/reference.rst @@ -1,7 +1,7 @@ How to cite =========== -If you want to cite spectrum_fundamentals in your work, please cite the main Oktoberfest publication following: +If you want to cite spectrum_fundamentals in your work, please cite the main Oktoberfest publication: .. [1] Picciani M, Gabriel W, Giurcoiu VG et al. (2023), *Oktoberfest: Open-source spectral library generation and rescoring pipeline based on Prosit*, diff --git a/spectrum_fundamentals/mod_string.py b/spectrum_fundamentals/mod_string.py index 4c14f21..2016c02 100644 --- a/spectrum_fundamentals/mod_string.py +++ b/spectrum_fundamentals/mod_string.py @@ -217,6 +217,41 @@ def internal_to_mod_mass( return [regex.sub(replacement_func, seq) for seq in sequences] +def internal_to_msp( + sequences: Union[List[str], pd.Series], + mods: Dict[str, str], +) -> List[Tuple[str, str]]: + """ + Function to translate an internal modstring to modstring and Mods for MSP format. + + :param sequences: sequences to translate + :param mods: dictionary mapping from internal unimod format (keys) to MSP format (values). + :return: a tuple for each sequence, containing (Mods, mod_string) for the MSP format + """ + ret_vals = [] + p = re.compile("|".join(mods.keys())) + for seq in sequences: + offset = 0 + mod_list = [] + matches = p.finditer(seq) + for match in matches: + replacement = mods[re.escape(match.group())] + start, end = match.span() + actual_start = start - offset + mod_list.append((actual_start, replacement)) + + offset += end - start - 1 + + mod_string = "; ".join([f"{mod[2:]}@{mod[0]}{pos}" for pos, mod in mod_list]) + n_mods = len(mod_list) + if n_mods > 0: + mods_field = f"{n_mods}/{'/'.join([f'{pos},{mod}' for pos, mod in mod_list])}" + else: + mods_field = "0" + ret_vals.append((mods_field, mod_string)) + return ret_vals + + def internal_to_mod_names( sequences: List[str], ) -> List[Tuple[str, str]]: