diff --git a/vermouth/dssp/dssp.py b/vermouth/dssp/dssp.py index 888d8c178..50ec414b6 100644 --- a/vermouth/dssp/dssp.py +++ b/vermouth/dssp/dssp.py @@ -421,8 +421,15 @@ def convert_dssp_to_martini(sequence): cg_sequence = ''.join(ss_cg[secstruct] for secstruct in sequence) wildcard_sequence = ''.join('H' if secstruct == 'H' else '.' for secstruct in cg_sequence) + # Flank the sequence with dots. Otherwise in a sequence consisting of only + # H will not have a start or end. See also issue 566. + # This should not cause further issues, since '..' doesn't map to anything + wildcard_sequence = '.' + wildcard_sequence + '.' for pattern, replacement in patterns.items(): - wildcard_sequence = wildcard_sequence.replace(pattern, replacement) + while pattern in wildcard_sequence: # EXPENSIVE! :'( + wildcard_sequence = wildcard_sequence.replace(pattern, replacement) + # And remove the flanking dots again + wildcard_sequence = wildcard_sequence[1:-1] result = ''.join( wildcard if wildcard != '.' else cg for wildcard, cg in zip(wildcard_sequence, cg_sequence) diff --git a/vermouth/tests/test_dssp.py b/vermouth/tests/test_dssp.py index 89c52fe01..28474ff43 100644 --- a/vermouth/tests/test_dssp.py +++ b/vermouth/tests/test_dssp.py @@ -680,3 +680,18 @@ def test_cterm_atomnames(): vermouth.processors.CanonicalizeModifications().run_system(system) dssp_out = dssp.run_dssp(system, executable=DSSP_EXECUTABLE) assert dssp_out == list("CC") + + +@pytest.mark.parametrize('sequence, expected', [ + ('H', '3'), + ('HH', '33'), + ('CHH', 'C33'), + ('HHHHHH', '113322'), + ('EHHHHHHC', 'E113322C'), + ('HHHHHHHHH', '1111H2222'), + ('CHHHHHHHHHC', 'C1111H2222C'), + ('CHHHHEHHHHC', 'C3333E3333C'), +]) +def test_convert_dssp_to_martini(sequence, expected): + found = dssp.convert_dssp_to_martini(sequence) + assert expected == found