From 5df1448232e7fa6922e9f778f9cb0356ca2e07ff Mon Sep 17 00:00:00 2001
From: Fabian Basso <fabian.basso@tum.de>
Date: Fri, 9 Aug 2024 12:12:46 +0000
Subject: [PATCH 1/3] fixed tokens for custom mods

---
 spectrum_fundamentals/mod_string.py | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/spectrum_fundamentals/mod_string.py b/spectrum_fundamentals/mod_string.py
index 99b3702..685cb24 100644
--- a/spectrum_fundamentals/mod_string.py
+++ b/spectrum_fundamentals/mod_string.py
@@ -311,10 +311,17 @@ def split_modstring(sequence: str, r_pattern):
         # Ugly and fast fix for reading modifications as is from maxquant we should reconsider how to fix it.
         # sequence = sequence.replace('M(ox)','M(U:35)')
         # sequence = sequence.replace('C','C(U:4)')
+        val = max(alphabet.values()) + 1
         split_seq = r_pattern.findall(sequence)
         if "".join(split_seq) == sequence:
             if translate:
-                return [alphabet[aa] for aa in split_seq]
+                results = []
+                for aa in split_seq:
+                    if aa not in alphabet: #does not exist
+                        alphabet[aa] = val
+                        val += 1
+                    results.append(alphabet[aa])
+                return results
             else:
                 return split_seq
         elif filter:
@@ -327,9 +334,10 @@ def split_modstring(sequence: str, r_pattern):
                 f"The element(s) [{not_parsable_elements}] " f"in the sequence [{sequence}] could not be parsed"
             )
 
-    pattern = sorted(alphabet, key=len, reverse=True)
+    pattern = r"[A-Z]\[UNIMOD:\d+\]"
+    alphabet_pattern = [re.escape(i) for i in sorted(alphabet, key=len, reverse=True)]
 
-    pattern = [re.escape(i) for i in pattern]
+    pattern = [pattern] + alphabet_pattern
     regex_pattern = re.compile("|".join(pattern))
     return map(split_modstring, sequences, repeat(regex_pattern))
 

From 4e38d9d693ed0d4770435f4f033afbc58c2beab5 Mon Sep 17 00:00:00 2001
From: Fabian Basso <fabian.basso@tum.de>
Date: Fri, 9 Aug 2024 12:15:13 +0000
Subject: [PATCH 2/3] fixed tokens for custom mods

---
 spectrum_fundamentals/mod_string.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/spectrum_fundamentals/mod_string.py b/spectrum_fundamentals/mod_string.py
index 685cb24..8456124 100644
--- a/spectrum_fundamentals/mod_string.py
+++ b/spectrum_fundamentals/mod_string.py
@@ -317,7 +317,7 @@ def split_modstring(sequence: str, r_pattern):
             if translate:
                 results = []
                 for aa in split_seq:
-                    if aa not in alphabet: #does not exist
+                    if aa not in alphabet:  # does not exist
                         alphabet[aa] = val
                         val += 1
                     results.append(alphabet[aa])

From 04bec442b9400a2c5287d122395c25ff36b01a24 Mon Sep 17 00:00:00 2001
From: Fabian Basso <fabian.basso@tum.de>
Date: Fri, 9 Aug 2024 12:22:01 +0000
Subject: [PATCH 3/3] Mypy test passed

---
 spectrum_fundamentals/mod_string.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/spectrum_fundamentals/mod_string.py b/spectrum_fundamentals/mod_string.py
index 8456124..47a839e 100644
--- a/spectrum_fundamentals/mod_string.py
+++ b/spectrum_fundamentals/mod_string.py
@@ -334,10 +334,10 @@ def split_modstring(sequence: str, r_pattern):
                 f"The element(s) [{not_parsable_elements}] " f"in the sequence [{sequence}] could not be parsed"
             )
 
-    pattern = r"[A-Z]\[UNIMOD:\d+\]"
+    unimod_pattern = r"[A-Z]\[UNIMOD:\d+\]"
     alphabet_pattern = [re.escape(i) for i in sorted(alphabet, key=len, reverse=True)]
 
-    pattern = [pattern] + alphabet_pattern
+    pattern = [unimod_pattern] + alphabet_pattern
     regex_pattern = re.compile("|".join(pattern))
     return map(split_modstring, sequences, repeat(regex_pattern))