Skip to content

Commit

Permalink
Merge branch 'master' into faster-unfold
Browse files Browse the repository at this point in the history
  • Loading branch information
thrawn01 authored Oct 24, 2019
2 parents 91c583e + e689d48 commit ba8a898
Show file tree
Hide file tree
Showing 7 changed files with 280 additions and 11 deletions.
6 changes: 6 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -41,3 +41,9 @@ nosetests.xml

# PLY
parser.out

# OSX
.DS_Store

# Goland
.idea/
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,11 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/)
and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html).

## [0.9.9] - 2019-09-25
### Changed
- Replace the leading '.' in an quoted-printable encoded mime part to avoid
obscure SMTP bug

## [0.9.0] - 2018-05-16
### Changed
- Support for Python 3 was added with preserving the Python 2 behavior in mind.
Expand Down
8 changes: 4 additions & 4 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ To parse an address list:
>>> from flanker.addresslib import address
>>>
>>> address.parse_list('[email protected], [email protected], @example.com')
>>> address.parse_list(['[email protected], [email protected], @example.com'])
[foo@example.com, bar@example.com]
To parse an address list as well as return a tuple containing the parsed
Expand All @@ -99,7 +99,7 @@ addresses and the unparsable portions
>>> from flanker.addresslib import address
>>>
>>> address.parse_list('[email protected], [email protected], @example.com', as_tuple=True)
>>> address.parse_list(['[email protected], [email protected], @example.com'], as_tuple=True)
[foo@example.com, bar@example.com], ['@example.com']
To parse an address list in strict mode:
Expand All @@ -108,7 +108,7 @@ To parse an address list in strict mode:
>>> from flanker.addresslib import address
>>>
>>> address.parse_list('[email protected], [email protected], @example.com', strict=True)
>>> address.parse_list(['[email protected], [email protected], @example.com'], strict=True)
[foo@example.com, bar@example.com]
To validate an email address (parse as well as DNS, MX existence, and ESP grammar checks):
Expand All @@ -126,7 +126,7 @@ To validate an address list:
>>> from flanker.addresslib import address
>>>
>>> address.validate_list('[email protected], [email protected], @mailgun.com', as_tuple=True)
>>> address.validate_list(['[email protected], [email protected], @mailgun.com'], as_tuple=True)
([foo@mailgun.com, bar@mailgun.com], ['@mailgun.com'])
MIME Parsing
Expand Down
15 changes: 10 additions & 5 deletions flanker/addresslib/_parser/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,27 +158,32 @@ def p_error(p):
log.debug('building mailbox parser')
mailbox_parser = yacc.yacc(start='mailbox',
errorlog=log,
tabmodule='mailbox_parsetab')
tabmodule='mailbox_parsetab',
debug=False)

log.debug('building addr_spec parser')
addr_spec_parser = yacc.yacc(start='addr_spec',
errorlog=log,
tabmodule='addr_spec_parsetab')
tabmodule='addr_spec_parsetab',
debug=False)

log.debug('building url parser')
url_parser = yacc.yacc(start='url',
errorlog=log,
tabmodule='url_parsetab')
tabmodule='url_parsetab',
debug=False)

log.debug('building mailbox_or_url parser')
mailbox_or_url_parser = yacc.yacc(start='mailbox_or_url',
errorlog=log,
tabmodule='mailbox_or_url_parsetab')
tabmodule='mailbox_or_url_parsetab',
debug=False)

log.debug('building mailbox_or_url_list parser')
mailbox_or_url_list_parser = yacc.yacc(start='mailbox_or_url_list',
errorlog=log,
tabmodule='mailbox_or_url_list_parsetab')
tabmodule='mailbox_or_url_list_parsetab',
debug=False)


# Interactive prompt for easy debugging
Expand Down
101 changes: 100 additions & 1 deletion flanker/mime/message/part.py
Original file line number Diff line number Diff line change
Expand Up @@ -632,6 +632,7 @@ def _encode_transfer_encoding(encoding, body):
if six.PY3:
if encoding == 'quoted-printable':
body = quopri.encodestring(body, quotetabs=False)
body = fix_leading_dot(body)
return body.decode('utf-8')

if encoding == 'base64':
Expand All @@ -647,13 +648,111 @@ def _encode_transfer_encoding(encoding, body):
return body

if encoding == 'quoted-printable':
return quopri.encodestring(body, quotetabs=False)
body = quopri.encodestring(body, quotetabs=False)
return fix_leading_dot(body)
elif encoding == 'base64':
return _email.encode_base64(body)
else:
return body


def fix_leading_dot(s):
"""
From SMTP RFC: https://tools.ietf.org/html/rfc5321#section-4.5.2
-----
When a line of mail text is received by the SMTP server, it checks
the line. If the line is composed of a single period, it is
treated as the end of mail indicator. If the first character is a
period and there are other characters on the line, the first
character is deleted.
-----
We have observed some remote SMTP servers have an intermittent obscure bug
where the leading '.' is removed according to the above spec. Even when the '.'
is obviously within the bounds of a mime part, and with our sending SMTP
clients dot stuffing the line. To combat this we convert any leading '.'
to a '=2E'.
"""
infp = six.BytesIO(s)
outfp = six.BytesIO()

# TODO(thrawn01): We could scan the entire string looking for leading '.'
# If none found return the original string. This would save memory at the
# expense of some additional processing

dot = b"."
if six.PY3:
dot = ord('.')

while 1:
line = infp.readline()
if not line:
break

if line[0] == dot:
line = _quote_and_cut(line)

outfp.write(line)

return outfp.getvalue()


def _quote_and_cut(ln):
"""
Quotes the leading '.', if the resulting line is longer than 76 characters
cut the line in half without dividing any quoted characters and
conforming to the quoted-printable RFC in regards to ending characters.
"""
ln = quopri.quote(ln[0:1]) + ln[1:]

# If the line is under the 76 + '\n' character limit
if len(ln) <= 77:
return ln

# Find a suitable cut point that doesn't divide a quoted character
in_quote, pos = 0, -1
for pos, c in enumerate(ln):

# Skip quoted (=XX) characters
if in_quote != 0:
in_quote += 1
if in_quote <= 3:
continue
in_quote = 0

# If we are past the half way mark, make our cut here
if pos > len(ln)/2:
break

if six.PY3:
c = bytes((c,))

# Should be a quoted character
if c == b'=':
# Peak ahead, does the next char appear to be a hex value?
if quopri.ishex(ln[pos+1:pos+2]):
in_quote = 1
continue

new_line = ln[:pos]
next_line = ln[pos:]

# If new line ends with a :space or :tab
if new_line[-1:] in b' \t':
new_line = new_line[:-1] + quopri.quote(new_line[-1:])

dot = b'.'
if six.PY3:
dot = ord('.')

# If the next line starts with a '.'
if next_line[0] == dot:
next_line = quopri.quote(next_line[0:1]) + next_line[1:]

return new_line + b"=\n" + next_line


def _choose_text_encoding(charset, preferred_encoding, body):
if charset in ('ascii', 'iso-8859-1', 'us-ascii'):
if has_long_lines(body):
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
],

setup(name='flanker',
version='0.9.8',
version='0.10.0',
description='Mailgun Parsing Tools',
long_description=open('README.rst').read(),
classifiers=[
Expand Down
154 changes: 154 additions & 0 deletions tests/mime/message/headers/part_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@
# coding:utf-8

import flanker.mime.message.part as part
from nose.tools import eq_

STRINGS = (
# Some normal strings
(b'', ''),
(b'hello', 'hello'),
(b'''hello
there
world''', '''hello
there
world'''),
(b'''hello
there
world
''', '''hello
there
world
'''),
(b'\201\202\203', '=81=82=83'),
# Add some trailing MUST QUOTE strings
(b'hello ', 'hello=20'),
(b'hello\t', 'hello=09'),

# Some long lines. First, a single line of 108 characters
(b'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx\xd8\xd9\xda\xdb\xdc\xdd\xde\xdfxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx',
'''xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx=D8=D9=DA=DB=DC=DD=DE=DFx=
xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx'''),

# A line of exactly 76 characters, no soft line break should be needed
(b'yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy',
'yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy'),

# A line of 77 characters, forcing a soft line break at position 75,
# and a second line of exactly 2 characters (because the soft line
# break `=' sign counts against the line length limit).
(b'zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz',
'''zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz=
zz'''),

# A line of 151 characters, forcing a soft line break at position 75,
# with a second line of exactly 76 characters and no trailing =
(b'zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz',
'''zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz=
zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz'''),

# A string containing a hard line break, but which the first line is
# 151 characters and the second line is exactly 76 characters. This
# should leave us with three lines, the first which has a soft line
# break, and which the second and third do not.
(b'''yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy
zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz''',
'''yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy=
yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy
zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz'''),

# Lines that end with space or tab should be quoted
(b'yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy ',
'''yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy=
=20'''),

# Lines that end with a partial quoted character
(b'yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy=y',
'''yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy=
=3Dy'''),

# Lines that lead with a dot '.' should have the dot quoted
(b'zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz.z',
'zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz=\n' +
'=2Ez'),

# Lines that end with a dot '.' are not quoted
(b'zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz.zz',
'zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz.=\n' +
'zz'),

# Lines that lead with a dot '.' should have the dot quoted and cut
# if the quoted line is longer than 76 characters.
(b'zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz.zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz',
'zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz=\n' +
'=2Ezzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz=\nzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz=\n' +
'zz'),

# Respect quoted characters when considering leading '.'
(b'zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz' +
b'.\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f',
'zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz=\n' +
'=2E=7F=7F=7F=7F=7F=7F=7F=7F=7F=7F=7F=7F=7F=7F=7F=7F=7F=7F=7F=7F=7F=7F=7F=7F=\n' +
'=7F=7F=7F'),

# Should cut somewhere near the middle of the line
(b'zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz' +
b'.quick brown fox, quick brown cat, quick hot dog, quick read dog, quick white bird',
'zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz=\n'
'=2Equick brown fox, quick brown cat, qui=\n' +
'ck hot dog, quick read dog, quick whi=\n'
+ 'te bird'),

# Respect quoted character when considering where to cut
(b'zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz' +
b'.quick brown fox, quick brown cat\x7f\x7f\x7f\x7f\x7f, quick read dog, quick white bird',
'zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz=\n' +
'=2Equick brown fox, quick brown cat=7F=7F=\n' +
'=7F=7F=7F, quick read dog, quick whi=\n' +
'te bird'),

# Avoid considering non quoted characters when cutting
(b'zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz' +
b'.quick brown fox, quick brown cat=20=================, quick read dog, quick white bird',
'zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz=\n' +
'=2Equick brown fox, quick brown cat=3D20=\n' +
'=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=\n' +
'=3D=3D=3D=3D=3D, quick read dog, quick white bird'),

# Should quote leading '.' if the cut results in a '.' on the next line
(b'zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz' +
b'.quick brown fox, quick brown cat..................... quick read dog, quick white bird',
'zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz=\n' +
'=2Equick brown fox, quick brown cat.....=\n' +
'=2E............... quick read dog, quic=\n' +
'k white bird'),

# Should quote :space if the cut results in a :space at the end of the next line
(b'zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz' +
b'.quick brown fox, quick brown cat quick read dog, quick white bird',
'zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz=\n' +
'=2Equick brown fox, quick brown cat =20=\n' +
' quick read dog, quic=\n' +
'k white bird'),
# Should quote :tab if the cut results in a :tab at the end of the next line
(b'zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz' +
b'.quick brown fox, quick brown cat \t quick read dog, quick white bird',
'zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz=\n' +
'=2Equick brown fox, quick brown cat =09=\n' +
' quick read dog, quic=\n' +
'k white bird'),
# Should avoid cutting in the middle of multiple quoted characters near the cut point
(b'zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz' +
b'.foo. \xF0\x9F\x99\x82 also there is \xF0\x9F\x99\x82 more in \xF0\x9F\x99\x82 ' +
b'this \xF0\x9F\x99\x82 message</body></html>',
'zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz=\n' +
'=2Efoo. =F0=9F=99=82 also there is =F0=9F=\n' +
'=99=82 more in =F0=9F=99=82 this =F0=\n'
'=9F=99=82 message</body></html>'),
)


def test_encode():
for p, e in STRINGS:
enc = part._encode_transfer_encoding('quoted-printable', p)
eq_(enc, e)

0 comments on commit ba8a898

Please sign in to comment.