Skip to content

Commit

Permalink
Merge pull request #128 from wilhelm-lab/feature/internal_to_msp
Browse files Browse the repository at this point in the history
added new internal_to_msp function
  • Loading branch information
picciama authored Aug 5, 2024
2 parents 859a8ee + 347979c commit c54bcc6
Show file tree
Hide file tree
Showing 2 changed files with 36 additions and 1 deletion.
2 changes: 1 addition & 1 deletion docs/reference.rst
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
How to cite
===========

If you want to cite spectrum_fundamentals in your work, please cite the main Oktoberfest publication following:
If you want to cite spectrum_fundamentals in your work, please cite the main Oktoberfest publication:

.. [1] Picciani M, Gabriel W, Giurcoiu VG et al. (2023),
*Oktoberfest: Open-source spectral library generation and rescoring pipeline based on Prosit*,
Expand Down
35 changes: 35 additions & 0 deletions spectrum_fundamentals/mod_string.py
Original file line number Diff line number Diff line change
Expand Up @@ -217,6 +217,41 @@ def internal_to_mod_mass(
return [regex.sub(replacement_func, seq) for seq in sequences]


def internal_to_msp(
sequences: Union[List[str], pd.Series],
mods: Dict[str, str],
) -> List[Tuple[str, str]]:
"""
Function to translate an internal modstring to modstring and Mods for MSP format.
:param sequences: sequences to translate
:param mods: dictionary mapping from internal unimod format (keys) to MSP format (values).
:return: a tuple for each sequence, containing (Mods, mod_string) for the MSP format
"""
ret_vals = []
p = re.compile("|".join(mods.keys()))
for seq in sequences:
offset = 0
mod_list = []
matches = p.finditer(seq)
for match in matches:
replacement = mods[re.escape(match.group())]
start, end = match.span()
actual_start = start - offset
mod_list.append((actual_start, replacement))

offset += end - start - 1

mod_string = "; ".join([f"{mod[2:]}@{mod[0]}{pos}" for pos, mod in mod_list])
n_mods = len(mod_list)
if n_mods > 0:
mods_field = f"{n_mods}/{'/'.join([f'{pos},{mod}' for pos, mod in mod_list])}"
else:
mods_field = "0"
ret_vals.append((mods_field, mod_string))
return ret_vals


def internal_to_mod_names(
sequences: List[str],
) -> List[Tuple[str, str]]:
Expand Down

0 comments on commit c54bcc6

Please sign in to comment.