Skip to content

Commit

Permalink
Merge pull request #233 from mailgun/thrawn/develop
Browse files Browse the repository at this point in the history
PIP-631: printed-quotable encoding should convert lines leading in `.` to `=2E`
  • Loading branch information
thrawn01 authored Sep 25, 2019
2 parents 6ca60e9 + 6044216 commit 12532b3
Show file tree
Hide file tree
Showing 5 changed files with 258 additions and 2 deletions.
6 changes: 6 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -41,3 +41,9 @@ nosetests.xml

# PLY
parser.out

# OSX
.DS_Store

# Goland
.idea/
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,11 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/)
and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html).

## [0.9.9] - 2019-09-25
### Changed
- Replace the leading '.' in an quoted-printable encoded mime part to avoid
obscure SMTP bug

## [0.9.0] - 2018-05-16
### Changed
- Support for Python 3 was added with preserving the Python 2 behavior in mind.
Expand Down
101 changes: 100 additions & 1 deletion flanker/mime/message/part.py
Original file line number Diff line number Diff line change
Expand Up @@ -632,6 +632,7 @@ def _encode_transfer_encoding(encoding, body):
if six.PY3:
if encoding == 'quoted-printable':
body = quopri.encodestring(body, quotetabs=False)
body = fix_leading_dot(body)
return body.decode('utf-8')

if encoding == 'base64':
Expand All @@ -647,13 +648,111 @@ def _encode_transfer_encoding(encoding, body):
return body

if encoding == 'quoted-printable':
return quopri.encodestring(body, quotetabs=False)
body = quopri.encodestring(body, quotetabs=False)
return fix_leading_dot(body)
elif encoding == 'base64':
return _email.encode_base64(body)
else:
return body


def fix_leading_dot(s):
"""
From SMTP RFC: https://tools.ietf.org/html/rfc5321#section-4.5.2
-----
When a line of mail text is received by the SMTP server, it checks
the line. If the line is composed of a single period, it is
treated as the end of mail indicator. If the first character is a
period and there are other characters on the line, the first
character is deleted.
-----
We have observed some remote SMTP servers have an intermittent obscure bug
where the leading '.' is removed according to the above spec. Even when the '.'
is obviously within the bounds of a mime part, and with our sending SMTP
clients dot stuffing the line. To combat this we convert any leading '.'
to a '=2E'.
"""
infp = six.BytesIO(s)
outfp = six.BytesIO()

# TODO(thrawn01): We could scan the entire string looking for leading '.'
# If none found return the original string. This would save memory at the
# expense of some additional processing

dot = b"."
if six.PY3:
dot = ord('.')

while 1:
line = infp.readline()
if not line:
break

if line[0] == dot:
line = _quote_and_cut(line)

outfp.write(line)

return outfp.getvalue()


def _quote_and_cut(ln):
"""
Quotes the leading '.', if the resulting line is longer than 76 characters
cut the line in half without dividing any quoted characters and
conforming to the quoted-printable RFC in regards to ending characters.
"""
ln = quopri.quote(ln[0:1]) + ln[1:]

# If the line is under the 76 + '\n' character limit
if len(ln) <= 77:
return ln

# Find a suitable cut point that doesn't divide a quoted character
in_quote, pos = 0, -1
for pos, c in enumerate(ln):

# Skip quoted (=XX) characters
if in_quote != 0:
in_quote += 1
if in_quote <= 3:
continue
in_quote = 0

# If we are past the half way mark, make our cut here
if pos > len(ln)/2:
break

if six.PY3:
c = bytes((c,))

# Should be a quoted character
if c == b'=':
# Peak ahead, do the next 2 chars appear to be a hex values?
if quopri.ishex(ln[pos+1:pos+3]):
in_quote = 1
continue

new_line = ln[:pos]
next_line = ln[pos:]

# If new line ends with a :space or :tab
if new_line[-1:] in b' \t':
new_line = new_line[:-1] + quopri.quote(new_line[-1:])

dot = b'.'
if six.PY3:
dot = ord('.')

# If the next line starts with a '.'
if next_line[0] == dot:
next_line = quopri.quote(next_line[0:1]) + next_line[1:]

return new_line + b"=\n" + next_line


def _choose_text_encoding(charset, preferred_encoding, body):
if charset in ('ascii', 'iso-8859-1', 'us-ascii'):
if has_long_lines(body):
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
],

setup(name='flanker',
version='0.9.8',
version='0.10.0',
description='Mailgun Parsing Tools',
long_description=open('README.rst').read(),
classifiers=[
Expand Down
146 changes: 146 additions & 0 deletions tests/mime/message/headers/part_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
# coding:utf-8

import flanker.mime.message.part as part
from nose.tools import eq_

STRINGS = (
# Some normal strings
(b'', ''),
(b'hello', 'hello'),
(b'''hello
there
world''', '''hello
there
world'''),
(b'''hello
there
world
''', '''hello
there
world
'''),
(b'\201\202\203', '=81=82=83'),
# Add some trailing MUST QUOTE strings
(b'hello ', 'hello=20'),
(b'hello\t', 'hello=09'),

# Some long lines. First, a single line of 108 characters
(b'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx\xd8\xd9\xda\xdb\xdc\xdd\xde\xdfxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx',
'''xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx=D8=D9=DA=DB=DC=DD=DE=DFx=
xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx'''),

# A line of exactly 76 characters, no soft line break should be needed
(b'yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy',
'yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy'),

# A line of 77 characters, forcing a soft line break at position 75,
# and a second line of exactly 2 characters (because the soft line
# break `=' sign counts against the line length limit).
(b'zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz',
'''zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz=
zz'''),

# A line of 151 characters, forcing a soft line break at position 75,
# with a second line of exactly 76 characters and no trailing =
(b'zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz',
'''zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz=
zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz'''),

# A string containing a hard line break, but which the first line is
# 151 characters and the second line is exactly 76 characters. This
# should leave us with three lines, the first which has a soft line
# break, and which the second and third do not.
(b'''yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy
zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz''',
'''yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy=
yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy
zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz'''),

# Lines that end with space or tab should be quoted
(b'yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy ',
'''yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy=
=20'''),

# Lines that end with a partial quoted character
(b'yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy=y',
'''yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy=
=3Dy'''),

# Lines that lead with a dot '.' should have the dot quoted
(b'zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz.z',
'zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz=\n' +
'=2Ez'),

# Lines that end with a dot '.' are not quoted
(b'zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz.zz',
'zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz.=\n' +
'zz'),

# Lines that lead with a dot '.' should have the dot quoted and cut
# if the quoted line is longer than 76 characters.
(b'zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz.zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz',
'zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz=\n' +
'=2Ezzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz=\nzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz=\n' +
'zz'),

# Respect quoted characters when considering leading '.'
(b'zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz' +
b'.\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f',
'zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz=\n' +
'=2E=7F=7F=7F=7F=7F=7F=7F=7F=7F=7F=7F=7F=7F=7F=7F=7F=7F=7F=7F=7F=7F=7F=7F=7F=\n' +
'=7F=7F=7F'),

# Should cut somewhere near the middle of the line
(b'zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz' +
b'.quick brown fox, quick brown cat, quick hot dog, quick read dog, quick white bird',
'zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz=\n'
'=2Equick brown fox, quick brown cat, qui=\n' +
'ck hot dog, quick read dog, quick whi=\n'
+ 'te bird'),

# Respect quoted character when considering where to cut
(b'zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz' +
b'.quick brown fox, quick brown cat\x7f\x7f\x7f\x7f\x7f, quick read dog, quick white bird',
'zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz=\n' +
'=2Equick brown fox, quick brown cat=7F=7F=\n' +
'=7F=7F=7F, quick read dog, quick whi=\n' +
'te bird'),

# Avoid considering non quoted characters when cutting
(b'zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz' +
b'.quick brown fox, quick brown cat=20=================, quick read dog, quick white bird',
'zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz=\n' +
'=2Equick brown fox, quick brown cat=3D20=\n' +
'=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=\n' +
'=3D=3D=3D=3D=3D, quick read dog, quick white bird'),

# Should quote leading '.' if the cut results in a '.' on the next line
(b'zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz' +
b'.quick brown fox, quick brown cat..................... quick read dog, quick white bird',
'zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz=\n' +
'=2Equick brown fox, quick brown cat.....=\n' +
'=2E............... quick read dog, quic=\n' +
'k white bird'),

# Should quote :space if the cut results in a :space at the end of the next line
(b'zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz' +
b'.quick brown fox, quick brown cat quick read dog, quick white bird',
'zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz=\n' +
'=2Equick brown fox, quick brown cat =20=\n' +
' quick read dog, quic=\n' +
'k white bird'),
# Should quote :tab if the cut results in a :tab at the end of the next line
(b'zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz' +
b'.quick brown fox, quick brown cat \t quick read dog, quick white bird',
'zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz=\n' +
'=2Equick brown fox, quick brown cat =09=\n' +
' quick read dog, quic=\n' +
'k white bird'),
)


def test_encode():
for p, e in STRINGS:
enc = part._encode_transfer_encoding('quoted-printable', p)
eq_(enc, e)

0 comments on commit 12532b3

Please sign in to comment.