From 78e3aca54915f3a921aac0f7608b4ce1e5de4706 Mon Sep 17 00:00:00 2001 From: Gabe Kneisley Date: Mon, 1 Apr 2019 13:37:19 -0500 Subject: [PATCH 1/5] Update README.rst to correct usage examples parse_list and validate_list both need an iterable as their first argument rather than N positional args (as suggested by the examples). --- README.rst | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/README.rst b/README.rst index 44ebd093..9a36a57b 100644 --- a/README.rst +++ b/README.rst @@ -89,7 +89,7 @@ To parse an address list: >>> from flanker.addresslib import address >>> - >>> address.parse_list('foo@example.com, bar@example.com, @example.com') + >>> address.parse_list(['foo@example.com, bar@example.com, @example.com']) [foo@example.com, bar@example.com] To parse an address list as well as return a tuple containing the parsed @@ -99,7 +99,7 @@ addresses and the unparsable portions >>> from flanker.addresslib import address >>> - >>> address.parse_list('foo@example.com, bar@example.com, @example.com', as_tuple=True) + >>> address.parse_list(['foo@example.com, bar@example.com, @example.com'], as_tuple=True) [foo@example.com, bar@example.com], ['@example.com'] To parse an address list in strict mode: @@ -108,7 +108,7 @@ To parse an address list in strict mode: >>> from flanker.addresslib import address >>> - >>> address.parse_list('foo@example.com, bar@example.com, @example.com', strict=True) + >>> address.parse_list(['foo@example.com, bar@example.com, @example.com'], strict=True) [foo@example.com, bar@example.com] To validate an email address (parse as well as DNS, MX existence, and ESP grammar checks): @@ -126,7 +126,7 @@ To validate an address list: >>> from flanker.addresslib import address >>> - >>> address.validate_list('foo@mailgun.com, bar@mailgun.com, @mailgun.com', as_tuple=True) + >>> address.validate_list(['foo@mailgun.com, bar@mailgun.com, @mailgun.com'], as_tuple=True) ([foo@mailgun.com, bar@mailgun.com], ['@mailgun.com']) MIME Parsing From b64bc835cf1f84dfae20621841d98c06dc1b821f Mon Sep 17 00:00:00 2001 From: Alexander Schrijver Date: Mon, 9 Sep 2019 10:34:19 +0200 Subject: [PATCH 2/5] Issue #208 -- Disable generating parser.out By default the ply/yacc parser generator runs in debug mode. This creates a parser.out file in the directory parser.py is located. The user which runs flanker does not always have access to this directory, which causes 'permission denied errors.' --- flanker/addresslib/_parser/parser.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/flanker/addresslib/_parser/parser.py b/flanker/addresslib/_parser/parser.py index d1a6593e..b503959e 100644 --- a/flanker/addresslib/_parser/parser.py +++ b/flanker/addresslib/_parser/parser.py @@ -158,27 +158,32 @@ def p_error(p): log.debug('building mailbox parser') mailbox_parser = yacc.yacc(start='mailbox', errorlog=log, - tabmodule='mailbox_parsetab') + tabmodule='mailbox_parsetab', + debug=False) log.debug('building addr_spec parser') addr_spec_parser = yacc.yacc(start='addr_spec', errorlog=log, - tabmodule='addr_spec_parsetab') + tabmodule='addr_spec_parsetab', + debug=False) log.debug('building url parser') url_parser = yacc.yacc(start='url', errorlog=log, - tabmodule='url_parsetab') + tabmodule='url_parsetab', + debug=False) log.debug('building mailbox_or_url parser') mailbox_or_url_parser = yacc.yacc(start='mailbox_or_url', errorlog=log, - tabmodule='mailbox_or_url_parsetab') + tabmodule='mailbox_or_url_parsetab', + debug=False) log.debug('building mailbox_or_url_list parser') mailbox_or_url_list_parser = yacc.yacc(start='mailbox_or_url_list', errorlog=log, - tabmodule='mailbox_or_url_list_parsetab') + tabmodule='mailbox_or_url_list_parsetab', + debug=False) # Interactive prompt for easy debugging From f87cbfd7c34f89739c0a95cba7c73353428bbdd9 Mon Sep 17 00:00:00 2001 From: "Derrick J. Wippler" Date: Fri, 13 Sep 2019 16:52:27 -0500 Subject: [PATCH 3/5] Replacing the leading '.' in an quoted-printable encoded mime part to avoid SMTP bug --- .gitignore | 6 + flanker/mime/message/part.py | 90 ++++++++++++++- setup.py | 2 +- tests/mime/message/headers/part_test.py | 146 ++++++++++++++++++++++++ 4 files changed, 241 insertions(+), 3 deletions(-) create mode 100644 tests/mime/message/headers/part_test.py diff --git a/.gitignore b/.gitignore index a6834947..e718e4e0 100644 --- a/.gitignore +++ b/.gitignore @@ -41,3 +41,9 @@ nosetests.xml # PLY parser.out + +# OSX +.DS_Store + +# Goland +.idea/ diff --git a/flanker/mime/message/part.py b/flanker/mime/message/part.py index 3a795ebf..008d51a3 100644 --- a/flanker/mime/message/part.py +++ b/flanker/mime/message/part.py @@ -631,7 +631,7 @@ def _encode_charset(preferred_charset, text): def _encode_transfer_encoding(encoding, body): if six.PY3: if encoding == 'quoted-printable': - body = quopri.encodestring(body, quotetabs=False) + body = fix_leading_dot(quopri.encodestring(body, quotetabs=False)) return body.decode('utf-8') if encoding == 'base64': @@ -647,13 +647,99 @@ def _encode_transfer_encoding(encoding, body): return body if encoding == 'quoted-printable': - return quopri.encodestring(body, quotetabs=False) + return fix_leading_dot(quopri.encodestring(body, quotetabs=False)) elif encoding == 'base64': return _email.encode_base64(body) else: return body +def fix_leading_dot(s): + """ + From SMTP RFC: https://tools.ietf.org/html/rfc5321#section-4.5.2 + + ----- + When a line of mail text is received by the SMTP server, it checks + the line. If the line is composed of a single period, it is + treated as the end of mail indicator. If the first character is a + period and there are other characters on the line, the first + character is deleted. + ----- + + We have observed some remote SMTP servers have an intermittent obscure bug + where the leading '.' is removed according to the above spec. Even when the '.' + is obviously within the bounds of a mime part. To combat this we convert any + leading '.' to a '=2E' + """ + infp = StringIO(s) + outfp = StringIO() + + # TODO(thrawn01): We could scan the entire string looking for leading '.' + # If none found return the original string. This would save memory at the + # expense of some additional processing + + while 1: + line = infp.readline() + if not line: + break + + if line[0] == '.': + line = _quote_and_cut(line) + + outfp.write(line) + + return outfp.getvalue() + + +def _quote_and_cut(ln): + """ + Quotes the leading '.', if the resulting line is longer than 76 characters + cut the line in half without dividing any quoted characters and + conforming to the quoted-printable RFC in regards to ending characters. + """ + q = quopri.quote(ln[0]) + ln = q + ln[1:] + + # If the line is under the 76 + '\n' character limit + if len(ln) <= 77: + return ln + + # Find a suitable cut point that doesn't divide a quoted character + in_quote, pos = 0, -1 + for pos, c in enumerate(ln): + + # Skip quoted (=XX) characters + if in_quote != 0: + in_quote += 1 + if in_quote <= 3: + continue + in_quote = 0 + + # If we are past the half way mark, make our cut here + if pos > len(ln)/2: + break + + # Should be a quoted character + if c == '=': + # Peak ahead, do the next 2 chars appear to be a hex values? + if quopri.ishex(ln[pos+1]) and quopri.ishex(ln[pos+2]): + in_quote = 1 + continue + + new_line = ln[:pos] + next_line = ln[pos:] + + # If new line ends with a :space or :tab + if new_line[-1:] in ' \t': + new_line = new_line[:-1] + quopri.quote(new_line[-1:]) + + # If the next line starts with a '.' + if next_line[0] == '.': + next_line = quopri.quote(next_line[0]) + next_line[1:] + + return new_line + "=\n" + next_line + + def _choose_text_encoding(charset, preferred_encoding, body): if charset in ('ascii', 'iso-8859-1', 'us-ascii'): if has_long_lines(body): diff --git a/setup.py b/setup.py index 1d885770..fdc27a4e 100644 --- a/setup.py +++ b/setup.py @@ -10,7 +10,7 @@ ], setup(name='flanker', - version='0.9.8', + version='0.10.0', description='Mailgun Parsing Tools', long_description=open('README.rst').read(), classifiers=[ diff --git a/tests/mime/message/headers/part_test.py b/tests/mime/message/headers/part_test.py new file mode 100644 index 00000000..7d82a8b3 --- /dev/null +++ b/tests/mime/message/headers/part_test.py @@ -0,0 +1,146 @@ +# coding:utf-8 + +import flanker.mime.message.part as part +from nose.tools import eq_ + +STRINGS = ( + # Some normal strings + ('', ''), + ('hello', 'hello'), + ('''hello + there + world''', '''hello + there + world'''), + ('''hello + there + world +''', '''hello + there + world +'''), + ('\201\202\203', '=81=82=83'), + # Add some trailing MUST QUOTE strings + ('hello ', 'hello=20'), + ('hello\t', 'hello=09'), + + # Some long lines. First, a single line of 108 characters + ('xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx\xd8\xd9\xda\xdb\xdc\xdd\xde\xdfxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx', + '''xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx=D8=D9=DA=DB=DC=DD=DE=DFx= +xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx'''), + + # A line of exactly 76 characters, no soft line break should be needed + ('yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy', + 'yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy'), + + # A line of 77 characters, forcing a soft line break at position 75, + # and a second line of exactly 2 characters (because the soft line + # break `=' sign counts against the line length limit). + ('zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz', + '''zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz= +zz'''), + + # A line of 151 characters, forcing a soft line break at position 75, + # with a second line of exactly 76 characters and no trailing = + ('zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz', + '''zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz= +zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz'''), + + # A string containing a hard line break, but which the first line is + # 151 characters and the second line is exactly 76 characters. This + # should leave us with three lines, the first which has a soft line + # break, and which the second and third do not. + ('''yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy +zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz''', + '''yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy= +yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy +zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz'''), + + # Lines that end with space or tab should be quoted + ('yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy ', + '''yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy= +=20'''), + + # Lines that end with a partial quoted character + ('yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy=y', + '''yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy= +=3Dy'''), + + # Lines that lead with a dot '.' should have the dot quoted + ('zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz.z', + 'zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz=\n' + + '=2Ez'), + + # Lines that end with a dot '.' are not quoted + ('zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz.zz', + 'zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz.=\n' + + 'zz'), + + # Lines that lead with a dot '.' should have the dot quoted and cut + # if the quoted line is longer than 76 characters. + ('zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz.zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz', + 'zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz=\n' + + '=2Ezzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz=\nzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz=\n' + + 'zz'), + + # Respect quoted characters when considering leading '.' + ('zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz' + + '.\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f', + 'zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz=\n' + + '=2E=7F=7F=7F=7F=7F=7F=7F=7F=7F=7F=7F=7F=7F=7F=7F=7F=7F=7F=7F=7F=7F=7F=7F=7F=\n' + + '=7F=7F=7F'), + + # Should cut somewhere near the middle of the line + ('zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz' + + '.quick brown fox, quick brown cat, quick hot dog, quick read dog, quick white bird', + 'zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz=\n' + '=2Equick brown fox, quick brown cat, qui=\n' + + 'ck hot dog, quick read dog, quick whi=\n' + + 'te bird'), + + # Respect quoted character when considering where to cut + ('zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz' + + '.quick brown fox, quick brown cat\x7f\x7f\x7f\x7f\x7f, quick read dog, quick white bird', + 'zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz=\n' + + '=2Equick brown fox, quick brown cat=7F=7F=\n' + + '=7F=7F=7F, quick read dog, quick whi=\n' + + 'te bird'), + + # Avoid considering non quoted characters when cutting + ('zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz' + + '.quick brown fox, quick brown cat=20=================, quick read dog, quick white bird', + 'zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz=\n' + + '=2Equick brown fox, quick brown cat=3D20=\n' + + '=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=\n' + + '=3D=3D=3D=3D=3D, quick read dog, quick white bird'), + + # Should quote leading '.' if the cut results in a '.' on the next line + ('zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz' + + '.quick brown fox, quick brown cat..................... quick read dog, quick white bird', + 'zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz=\n' + + '=2Equick brown fox, quick brown cat.....=\n' + + '=2E............... quick read dog, quic=\n' + + 'k white bird'), + + # Should quote :space if the cut results in a :space at the end of the next line + ('zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz' + + '.quick brown fox, quick brown cat quick read dog, quick white bird', + 'zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz=\n' + + '=2Equick brown fox, quick brown cat =20=\n' + + ' quick read dog, quic=\n' + + 'k white bird'), + # Should quote :tab if the cut results in a :tab at the end of the next line + ('zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz' + + '.quick brown fox, quick brown cat \t quick read dog, quick white bird', + 'zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz=\n' + + '=2Equick brown fox, quick brown cat =09=\n' + + ' quick read dog, quic=\n' + + 'k white bird'), +) + + +def test_encode(): + for p, e in STRINGS: + enc = part._encode_transfer_encoding('quoted-printable', p) + eq_(enc, e) + From 60442160a4ccc4f67922f0fb4543d7c82b8057c7 Mon Sep 17 00:00:00 2001 From: "Derrick J. Wippler" Date: Mon, 16 Sep 2019 13:41:20 -0500 Subject: [PATCH 4/5] Now python 3 compatible --- CHANGELOG.md | 5 ++ flanker/mime/message/part.py | 43 +++++++++++------ tests/mime/message/headers/part_test.py | 62 ++++++++++++------------- 3 files changed, 64 insertions(+), 46 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6b1b2af7..77294e96 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,11 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html). +## [0.9.9] - 2019-09-25 +### Changed +- Replace the leading '.' in an quoted-printable encoded mime part to avoid + obscure SMTP bug + ## [0.9.0] - 2018-05-16 ### Changed - Support for Python 3 was added with preserving the Python 2 behavior in mind. diff --git a/flanker/mime/message/part.py b/flanker/mime/message/part.py index 008d51a3..d35c934c 100644 --- a/flanker/mime/message/part.py +++ b/flanker/mime/message/part.py @@ -631,7 +631,8 @@ def _encode_charset(preferred_charset, text): def _encode_transfer_encoding(encoding, body): if six.PY3: if encoding == 'quoted-printable': - body = fix_leading_dot(quopri.encodestring(body, quotetabs=False)) + body = quopri.encodestring(body, quotetabs=False) + body = fix_leading_dot(body) return body.decode('utf-8') if encoding == 'base64': @@ -647,7 +648,8 @@ def _encode_transfer_encoding(encoding, body): return body if encoding == 'quoted-printable': - return fix_leading_dot(quopri.encodestring(body, quotetabs=False)) + body = quopri.encodestring(body, quotetabs=False) + return fix_leading_dot(body) elif encoding == 'base64': return _email.encode_base64(body) else: @@ -668,22 +670,27 @@ def fix_leading_dot(s): We have observed some remote SMTP servers have an intermittent obscure bug where the leading '.' is removed according to the above spec. Even when the '.' - is obviously within the bounds of a mime part. To combat this we convert any - leading '.' to a '=2E' + is obviously within the bounds of a mime part, and with our sending SMTP + clients dot stuffing the line. To combat this we convert any leading '.' + to a '=2E'. """ - infp = StringIO(s) - outfp = StringIO() + infp = six.BytesIO(s) + outfp = six.BytesIO() # TODO(thrawn01): We could scan the entire string looking for leading '.' # If none found return the original string. This would save memory at the # expense of some additional processing + dot = b"." + if six.PY3: + dot = ord('.') + while 1: line = infp.readline() if not line: break - if line[0] == '.': + if line[0] == dot: line = _quote_and_cut(line) outfp.write(line) @@ -697,8 +704,7 @@ def _quote_and_cut(ln): cut the line in half without dividing any quoted characters and conforming to the quoted-printable RFC in regards to ending characters. """ - q = quopri.quote(ln[0]) - ln = q + ln[1:] + ln = quopri.quote(ln[0:1]) + ln[1:] # If the line is under the 76 + '\n' character limit if len(ln) <= 77: @@ -719,10 +725,13 @@ def _quote_and_cut(ln): if pos > len(ln)/2: break + if six.PY3: + c = bytes((c,)) + # Should be a quoted character - if c == '=': + if c == b'=': # Peak ahead, do the next 2 chars appear to be a hex values? - if quopri.ishex(ln[pos+1]) and quopri.ishex(ln[pos+2]): + if quopri.ishex(ln[pos+1:pos+3]): in_quote = 1 continue @@ -730,14 +739,18 @@ def _quote_and_cut(ln): next_line = ln[pos:] # If new line ends with a :space or :tab - if new_line[-1:] in ' \t': + if new_line[-1:] in b' \t': new_line = new_line[:-1] + quopri.quote(new_line[-1:]) + dot = b'.' + if six.PY3: + dot = ord('.') + # If the next line starts with a '.' - if next_line[0] == '.': - next_line = quopri.quote(next_line[0]) + next_line[1:] + if next_line[0] == dot: + next_line = quopri.quote(next_line[0:1]) + next_line[1:] - return new_line + "=\n" + next_line + return new_line + b"=\n" + next_line def _choose_text_encoding(charset, preferred_encoding, body): diff --git a/tests/mime/message/headers/part_test.py b/tests/mime/message/headers/part_test.py index 7d82a8b3..8ffc9fbd 100644 --- a/tests/mime/message/headers/part_test.py +++ b/tests/mime/message/headers/part_test.py @@ -5,44 +5,44 @@ STRINGS = ( # Some normal strings - ('', ''), - ('hello', 'hello'), - ('''hello + (b'', ''), + (b'hello', 'hello'), + (b'''hello there world''', '''hello there world'''), - ('''hello + (b'''hello there world ''', '''hello there world '''), - ('\201\202\203', '=81=82=83'), + (b'\201\202\203', '=81=82=83'), # Add some trailing MUST QUOTE strings - ('hello ', 'hello=20'), - ('hello\t', 'hello=09'), + (b'hello ', 'hello=20'), + (b'hello\t', 'hello=09'), # Some long lines. First, a single line of 108 characters - ('xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx\xd8\xd9\xda\xdb\xdc\xdd\xde\xdfxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx', + (b'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx\xd8\xd9\xda\xdb\xdc\xdd\xde\xdfxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx', '''xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx=D8=D9=DA=DB=DC=DD=DE=DFx= xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx'''), # A line of exactly 76 characters, no soft line break should be needed - ('yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy', + (b'yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy', 'yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy'), # A line of 77 characters, forcing a soft line break at position 75, # and a second line of exactly 2 characters (because the soft line # break `=' sign counts against the line length limit). - ('zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz', + (b'zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz', '''zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz= zz'''), # A line of 151 characters, forcing a soft line break at position 75, # with a second line of exactly 76 characters and no trailing = - ('zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz', + (b'zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz', '''zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz= zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz'''), @@ -50,88 +50,88 @@ # 151 characters and the second line is exactly 76 characters. This # should leave us with three lines, the first which has a soft line # break, and which the second and third do not. - ('''yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy + (b'''yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz''', '''yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy= yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz'''), # Lines that end with space or tab should be quoted - ('yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy ', + (b'yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy ', '''yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy= =20'''), # Lines that end with a partial quoted character - ('yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy=y', + (b'yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy=y', '''yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy= =3Dy'''), # Lines that lead with a dot '.' should have the dot quoted - ('zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz.z', + (b'zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz.z', 'zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz=\n' + '=2Ez'), # Lines that end with a dot '.' are not quoted - ('zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz.zz', + (b'zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz.zz', 'zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz.=\n' + 'zz'), # Lines that lead with a dot '.' should have the dot quoted and cut # if the quoted line is longer than 76 characters. - ('zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz.zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz', + (b'zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz.zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz', 'zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz=\n' + '=2Ezzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz=\nzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz=\n' + 'zz'), # Respect quoted characters when considering leading '.' - ('zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz' + - '.\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f', + (b'zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz' + + b'.\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f', 'zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz=\n' + '=2E=7F=7F=7F=7F=7F=7F=7F=7F=7F=7F=7F=7F=7F=7F=7F=7F=7F=7F=7F=7F=7F=7F=7F=7F=\n' + '=7F=7F=7F'), # Should cut somewhere near the middle of the line - ('zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz' + - '.quick brown fox, quick brown cat, quick hot dog, quick read dog, quick white bird', + (b'zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz' + + b'.quick brown fox, quick brown cat, quick hot dog, quick read dog, quick white bird', 'zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz=\n' '=2Equick brown fox, quick brown cat, qui=\n' + 'ck hot dog, quick read dog, quick whi=\n' + 'te bird'), # Respect quoted character when considering where to cut - ('zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz' + - '.quick brown fox, quick brown cat\x7f\x7f\x7f\x7f\x7f, quick read dog, quick white bird', + (b'zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz' + + b'.quick brown fox, quick brown cat\x7f\x7f\x7f\x7f\x7f, quick read dog, quick white bird', 'zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz=\n' + '=2Equick brown fox, quick brown cat=7F=7F=\n' + '=7F=7F=7F, quick read dog, quick whi=\n' + 'te bird'), # Avoid considering non quoted characters when cutting - ('zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz' + - '.quick brown fox, quick brown cat=20=================, quick read dog, quick white bird', + (b'zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz' + + b'.quick brown fox, quick brown cat=20=================, quick read dog, quick white bird', 'zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz=\n' + '=2Equick brown fox, quick brown cat=3D20=\n' + '=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=\n' + '=3D=3D=3D=3D=3D, quick read dog, quick white bird'), # Should quote leading '.' if the cut results in a '.' on the next line - ('zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz' + - '.quick brown fox, quick brown cat..................... quick read dog, quick white bird', + (b'zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz' + + b'.quick brown fox, quick brown cat..................... quick read dog, quick white bird', 'zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz=\n' + '=2Equick brown fox, quick brown cat.....=\n' + '=2E............... quick read dog, quic=\n' + 'k white bird'), # Should quote :space if the cut results in a :space at the end of the next line - ('zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz' + - '.quick brown fox, quick brown cat quick read dog, quick white bird', + (b'zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz' + + b'.quick brown fox, quick brown cat quick read dog, quick white bird', 'zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz=\n' + '=2Equick brown fox, quick brown cat =20=\n' + ' quick read dog, quic=\n' + 'k white bird'), # Should quote :tab if the cut results in a :tab at the end of the next line - ('zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz' + - '.quick brown fox, quick brown cat \t quick read dog, quick white bird', + (b'zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz' + + b'.quick brown fox, quick brown cat \t quick read dog, quick white bird', 'zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz=\n' + '=2Equick brown fox, quick brown cat =09=\n' + ' quick read dog, quic=\n' + From 2b98a64d3d3a170f4127811e6eaff98117b39b58 Mon Sep 17 00:00:00 2001 From: "Derrick J. Wippler" Date: Tue, 22 Oct 2019 15:02:52 -0500 Subject: [PATCH 5/5] quoted-printable fix_leading_dot() no longer cuts the line in the middle of a quoted character --- flanker/mime/message/part.py | 4 ++-- tests/mime/message/headers/part_test.py | 8 ++++++++ 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/flanker/mime/message/part.py b/flanker/mime/message/part.py index d35c934c..c69b1da3 100644 --- a/flanker/mime/message/part.py +++ b/flanker/mime/message/part.py @@ -730,8 +730,8 @@ def _quote_and_cut(ln): # Should be a quoted character if c == b'=': - # Peak ahead, do the next 2 chars appear to be a hex values? - if quopri.ishex(ln[pos+1:pos+3]): + # Peak ahead, does the next char appear to be a hex value? + if quopri.ishex(ln[pos+1:pos+2]): in_quote = 1 continue diff --git a/tests/mime/message/headers/part_test.py b/tests/mime/message/headers/part_test.py index 8ffc9fbd..c375e373 100644 --- a/tests/mime/message/headers/part_test.py +++ b/tests/mime/message/headers/part_test.py @@ -136,6 +136,14 @@ '=2Equick brown fox, quick brown cat =09=\n' + ' quick read dog, quic=\n' + 'k white bird'), + # Should avoid cutting in the middle of multiple quoted characters near the cut point + (b'zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz' + + b'.foo. \xF0\x9F\x99\x82 also there is \xF0\x9F\x99\x82 more in \xF0\x9F\x99\x82 ' + + b'this \xF0\x9F\x99\x82 message', + 'zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz=\n' + + '=2Efoo. =F0=9F=99=82 also there is =F0=9F=\n' + + '=99=82 more in =F0=9F=99=82 this =F0=\n' + '=9F=99=82 message'), )