Skip to content

Commit

Permalink
added new internal_to_msp function
Browse files Browse the repository at this point in the history
  • Loading branch information
picciama committed Aug 5, 2024
1 parent 859a8ee commit ca09c0b
Show file tree
Hide file tree
Showing 2 changed files with 37 additions and 4 deletions.
2 changes: 1 addition & 1 deletion docs/reference.rst
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
How to cite
===========

If you want to cite spectrum_fundamentals in your work, please cite the main Oktoberfest publication following:
If you want to cite spectrum_fundamentals in your work, please cite the main Oktoberfest publication:

.. [1] Picciani M, Gabriel W, Giurcoiu VG et al. (2023),
*Oktoberfest: Open-source spectral library generation and rescoring pipeline based on Prosit*,
Expand Down
39 changes: 36 additions & 3 deletions spectrum_fundamentals/mod_string.py
Original file line number Diff line number Diff line change
Expand Up @@ -200,9 +200,7 @@ def internal_without_mods(sequences: List[str]) -> List[str]:
return [re.sub(regex, "", seq) for seq in sequences]


def internal_to_mod_mass(
sequences: List[str], custom_mods: Optional[Dict[str, Dict[str, Tuple[str, float]]]] = None
) -> List[str]:
def internal_to_mod_mass(sequences: List[str], custom_mods: Optional[Dict[str, float]] = None) -> List[str]:
"""
Function to exchange the internal mod identifiers with the masses of the specific modifiction.
Expand All @@ -217,6 +215,41 @@ def internal_to_mod_mass(
return [regex.sub(replacement_func, seq) for seq in sequences]


def internal_to_msp(
sequences: Union[List[str], pd.Series],
mods: Dict[str, str],
) -> List[Tuple[str, str]]:
"""
Function to translate an internal modstring to modstring and Mods for MSP format.
:param sequences: sequences to translate
:param mods: dictionary mapping from internal unimod format (keys) to MSP format (values).
:return: a tuple for each sequence, containing (Mods, mod_string) for the MSP format
"""
ret_vals = []
p = re.compile("|".join(mods.keys()))
for seq in sequences:
offset = 0
mod_list = []
matches = p.finditer(seq)
for match in matches:
replacement = mods[re.escape(match.group())]
start, end = match.span()
actual_start = start - offset
mod_list.append((actual_start, replacement))

offset += end - start - 1

mod_string = "; ".join([f"{mod[2:]}@{mod[0]}{pos}" for pos, mod in mod_list])
n_mods = len(mod_list)
if n_mods > 0:
mods = f"{n_mods}/{'/'.join([f'{pos},{mod}' for pos, mod in mod_list])}"
else:
mods = "0"
ret_vals.append((mods, mod_string))
return ret_vals


def internal_to_mod_names(
sequences: List[str],
) -> List[Tuple[str, str]]:
Expand Down

0 comments on commit ca09c0b

Please sign in to comment.