From 8710a79efb86096aeed5482ed4617256daa9468f Mon Sep 17 00:00:00 2001 From: WassimG Date: Thu, 19 Sep 2024 09:42:52 +0000 Subject: [PATCH] Added flag for different number of mods and unit tests for the function --- spectrum_fundamentals/mod_string.py | 15 +++++--- tests/unit_tests/test_mod_string.py | 54 +++++++++++++++++++++++++++++ 2 files changed, 65 insertions(+), 4 deletions(-) diff --git a/spectrum_fundamentals/mod_string.py b/spectrum_fundamentals/mod_string.py index cbbe4f6..9715859 100644 --- a/spectrum_fundamentals/mod_string.py +++ b/spectrum_fundamentals/mod_string.py @@ -351,22 +351,29 @@ def get_all_tokens(sequences: List[str]) -> Set[str]: return tokens -def add_permutations(modified_sequence: str, unimod_id: int, residues: List[str]): +def add_permutations(modified_sequence: str, unimod_id: int, residues: List[str], allow_one_less_modification: bool = False): """ Generate different peptide sequences with moving the modification to all possible residues. :param modified_sequence: Peptide sequence :param unimod_id: modification unimod id to be used for generating different permutations. :param residues: possible amino acids where this mod can exist + :param allow_one_less_modification: Flag to indicate if permutations with one less modification should be generated to check + whether the modification mass was mistakenly picked as the monoisotopic peak. Mainly used for Citrullination. :return: list of possible sequence permutations """ - sequence = modified_sequence.replace("[UNIMOD:" + str(unimod_id) + "]", "") - modifications = len(re.findall("UNIMOD:" + str(unimod_id), modified_sequence)) + sequence = modified_sequence.replace("[unimod:" + str(unimod_id) + "]", "") + modifications = len(re.findall("unimod:" + str(unimod_id), modified_sequence)) if modifications == 0: - return modified_sequence + return [modified_sequence] possible_positions = [i for i, ltr in enumerate(sequence) if ltr in residues] possible_positions.sort(reverse=True) all_combinations = [list(each_permutation) for each_permutation in combinations(possible_positions, modifications)] + + if allow_one_less_modification: + all_combinations_1 = [list(each_permutation) for each_permutation in combinations(possible_positions, modifications-1)] + all_combinations = all_combinations + all_combinations_1 + modified_sequences_comb = [] for comb in all_combinations: modified_sequence = sequence diff --git a/tests/unit_tests/test_mod_string.py b/tests/unit_tests/test_mod_string.py index d03c736..1feb365 100644 --- a/tests/unit_tests/test_mod_string.py +++ b/tests/unit_tests/test_mod_string.py @@ -331,3 +331,57 @@ def test_custom_to_internal_custom_mods(self): custom_mod = {"M[35]": "[UNIMOD:35]"} mods = {**fixed_mods, **custom_mod} self.assertEqual(mod.custom_to_internal(["ABCDEFGHM[35]"], mods), ["ABC[UNIMOD:4]DEFGHM[UNIMOD:35]"]) + +class TestAddPermutations(unittest.TestCase): + """Class to test add permutations.""" + def test_no_modifications(self): + """Test case where no modifications are present in the sequence""" + modified_sequence = "PEPTIDE" + unimod_id = 123 + residues = ['P', 'E'] + result = mod.add_permutations(modified_sequence, unimod_id, residues) + self.assertEqual(result, ["PEPTIDE"]) + + def test_single_modification(self): + """Test case with a single modification""" + modified_sequence = "P[unimod:123]EPTIDE" + unimod_id = 123 + residues = ['P', 'E'] + result = mod.add_permutations(modified_sequence, unimod_id, residues) + expected_result = ['PEPTIDE[unimod:123]', + 'PEP[unimod:123]TIDE', + 'PE[unimod:123]PTIDE', + 'P[unimod:123]EPTIDE'] + self.assertEqual(result, expected_result) + + def test_multiple_modifications(self): + """Test case with multiple modifications""" + modified_sequence = "PEP[unimod:123]TIDE[unimod:123]" + unimod_id = 123 + residues = ['P', 'E'] + result = mod.add_permutations(modified_sequence, unimod_id, residues) + expected_result = ['PEP[unimod:123]TIDE[unimod:123]', + 'PE[unimod:123]PTIDE[unimod:123]', + 'P[unimod:123]EPTIDE[unimod:123]', + 'PE[unimod:123]P[unimod:123]TIDE', + 'P[unimod:123]EP[unimod:123]TIDE', + 'P[unimod:123]E[unimod:123]PTIDE'] + self.assertEqual(result, expected_result) + + def test_check_monoisotopic_peak_enabled(self): + """Test case with check_monoisotopic_peak flag enabled""" + modified_sequence = "PEP[unimod:123]TID[unimod:123]E" + unimod_id = 123 + residues = ['P', 'E'] + result = mod.add_permutations(modified_sequence, unimod_id, residues, check_monoisotopic_peak=True) + expected_result = ['PEP[unimod:123]TIDE[unimod:123]', + 'PE[unimod:123]PTIDE[unimod:123]', + 'P[unimod:123]EPTIDE[unimod:123]', + 'PE[unimod:123]P[unimod:123]TIDE', + 'P[unimod:123]EP[unimod:123]TIDE', + 'P[unimod:123]E[unimod:123]PTIDE', + 'PEPTIDE[unimod:123]', + 'PEP[unimod:123]TIDE', + 'PE[unimod:123]PTIDE', + 'P[unimod:123]EPTIDE'] + self.assertEqual(result, expected_result) \ No newline at end of file