Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/fix/custom_mods_tokens' into fea…
Browse files Browse the repository at this point in the history
…ture/add_new_iontypes
  • Loading branch information
JSchlensok committed Aug 9, 2024
2 parents afdd07a + 04bec44 commit 30f9071
Showing 1 changed file with 11 additions and 3 deletions.
14 changes: 11 additions & 3 deletions spectrum_fundamentals/mod_string.py
Original file line number Diff line number Diff line change
Expand Up @@ -311,10 +311,17 @@ def split_modstring(sequence: str, r_pattern):
# Ugly and fast fix for reading modifications as is from maxquant we should reconsider how to fix it.
# sequence = sequence.replace('M(ox)','M(U:35)')
# sequence = sequence.replace('C','C(U:4)')
val = max(alphabet.values()) + 1
split_seq = r_pattern.findall(sequence)
if "".join(split_seq) == sequence:
if translate:
return [alphabet[aa] for aa in split_seq]
results = []
for aa in split_seq:
if aa not in alphabet: # does not exist
alphabet[aa] = val
val += 1
results.append(alphabet[aa])
return results
else:
return split_seq
elif filter:
Expand All @@ -327,9 +334,10 @@ def split_modstring(sequence: str, r_pattern):
f"The element(s) [{not_parsable_elements}] " f"in the sequence [{sequence}] could not be parsed"
)

pattern = sorted(alphabet, key=len, reverse=True)
unimod_pattern = r"[A-Z]\[UNIMOD:\d+\]"
alphabet_pattern = [re.escape(i) for i in sorted(alphabet, key=len, reverse=True)]

pattern = [re.escape(i) for i in pattern]
pattern = [unimod_pattern] + alphabet_pattern
regex_pattern = re.compile("|".join(pattern))
return map(split_modstring, sequences, repeat(regex_pattern))

Expand Down

0 comments on commit 30f9071

Please sign in to comment.