From d93f6bc6e7affbaf3da726cc1b4f74831862085e Mon Sep 17 00:00:00 2001 From: Mark Hahnenberg Date: Sun, 20 May 2018 12:41:10 -0700 Subject: [PATCH] Force quopri to use CRLF for soft line breaks quopri tries to automatically detect which type of line ending it should use for soft line breaks which can lead it to making the wrong decision where it tries to use =LF rather than =CRLF. This diff fixes this by prepending a CRLF when encoding a part's body to force quopri to use CRLF and then removes that prefix afterwards. --- flanker/mime/message/part.py | 6 +++++- tests/__init__.py | 1 + .../text-long-line-no-line-breaks.eml | 12 +++++++++++ tests/mime/message/part_test.py | 20 +++++++++++++------ 4 files changed, 32 insertions(+), 7 deletions(-) create mode 100644 tests/fixtures/messages/text-long-line-no-line-breaks.eml diff --git a/flanker/mime/message/part.py b/flanker/mime/message/part.py index 3a795ebf..6726da5b 100644 --- a/flanker/mime/message/part.py +++ b/flanker/mime/message/part.py @@ -631,7 +631,9 @@ def _encode_charset(preferred_charset, text): def _encode_transfer_encoding(encoding, body): if six.PY3: if encoding == 'quoted-printable': + body = b''.join([b'\r\n', body]) body = quopri.encodestring(body, quotetabs=False) + body = body[2:] return body.decode('utf-8') if encoding == 'base64': @@ -647,7 +649,9 @@ def _encode_transfer_encoding(encoding, body): return body if encoding == 'quoted-printable': - return quopri.encodestring(body, quotetabs=False) + body = '\r\n{}'.format(body) + result = quopri.encodestring(body, quotetabs=False) + return result[2:] elif encoding == 'base64': return _email.encode_base64(body) else: diff --git a/tests/__init__.py b/tests/__init__.py index 2cf562f3..377e40f9 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -76,6 +76,7 @@ def read_fixture(path, binary=False): 'messages/russian-attachment-yahoo.eml', binary=True) QUOTED_PRINTABLE = read_fixture('messages/quoted-printable.eml') TEXT_ONLY = read_fixture('messages/text-only.eml') +TEXT_LONG_LINE_NO_LINE_BREAKS = read_fixture("messages/text-long-line-no-line-breaks.eml") MAILGUN_PIC = read_fixture('messages/mailgun-pic.eml') BZ2_ATTACHMENT = read_fixture('messages/bz2-attachment.eml') OUTLOOK_EXPRESS = read_fixture('messages/outlook-express.eml') diff --git a/tests/fixtures/messages/text-long-line-no-line-breaks.eml b/tests/fixtures/messages/text-long-line-no-line-breaks.eml new file mode 100644 index 00000000..42d54d9e --- /dev/null +++ b/tests/fixtures/messages/text-long-line-no-line-breaks.eml @@ -0,0 +1,12 @@ +MIME-Version: 1.0 +Received: by 10.68.60.193 with HTTP; Thu, 29 Dec 2011 02:06:53 -0800 (PST) +X-Originating-IP: [95.37.185.143] +Date: Thu, 29 Dec 2011 14:06:53 +0400 +Delivered-To: bob@marley.com +Message-ID: +Subject: Testing message parsing +From: Bob Marley +To: hello@there.com +Content-Type: text/plain; charset=ascii + +Lorem ipsum dolor sit amet, nec reque propriae et, vix labitur dignissim ad. Sit in lobortis comprehensam, posse omittam atomorum et eos. Saperet oporteat molestiae pro eu, mel at scripta dolores vivendum, at nam eros ornatus. An est putant lobortis repudiandae. Quaeque persequeris neglegentur in nam, semper albucius consectetuer at duo. Qui populo nominavi te, vis et decore invenire ullamcorper, ea pri assum evertitur. Eam dicant hendrerit no, eos an sint sapientem, ei nam saperet complectitur mediocritatem. Ad saepe blandit iracundia mel, per cu veri interpretaris conclusionemque. In pri quaeque imperdiet repudiandae, falli decore minimum mei ut. Natum vidisse ei sit, ut malorum denique usu. Quo ex explicari conceptam temporibus. Ut mei graeci persecuti interpretaris, perfecto repudiandae mei ex. Mea ex splendide reprimique. No ridens verear commune nec. Laudem conclusionemque te sea, congue scriptorem et his. Tollit persius vix eu. Exerci efficiendi ea mei, id mei reque phaedrum disputando, recteque qualisque at vix. Vix noluisse dissentias at. Nec sanctus facilis at, et quo blandit pertinacia maiestatis, eum an erroribus reprimique scribentur. Cu sed graeco maluisset assueverit, omnesque vulputate vix id. Cu quod timeam reprehendunt vim, te esse placerat vim. Duo in mazim quaerendum dissentiet. Qui ea tempor impetus adipisci, vix eu eripuit saperet detracto, idque aeterno ne vim. Vim ne modo option pertinacia, graecis voluptua an vim. At consul scripta oblique cum, eu iriure fastidii erroribus quo. Eu gloriatur incorrupte definitiones mel. Oblique voluptatum ad sed, ea nostro abhorreant signiferumque nam, mediocrem posidonium vix eu. \ No newline at end of file diff --git a/tests/mime/message/part_test.py b/tests/mime/message/part_test.py index c3dba5c4..f27d5cc1 100644 --- a/tests/mime/message/part_test.py +++ b/tests/mime/message/part_test.py @@ -8,14 +8,14 @@ from flanker.mime import recover from flanker.mime.create import multipart, text from flanker.mime.message.errors import EncodingError -from flanker.mime.message.part import _encode_transfer_encoding, _base64_decode +from flanker.mime.message.part import _encode_transfer_encoding, _base64_decode, _encode_body from flanker.mime.message.scanner import scan from tests import (BILINGUAL, BZ2_ATTACHMENT, ENCLOSED, TORTURE, TORTURE_PART, ENCLOSED_BROKEN_ENCODING, EIGHT_BIT, QUOTED_PRINTABLE, TEXT_ONLY, ENCLOSED_BROKEN_BODY, RUSSIAN_ATTACH_YAHOO, MAILGUN_PIC, MAILGUN_PNG, MULTIPART, IPHONE, SPAM_BROKEN_CTYPE, BOUNCE, NDN, NO_CTYPE, RELATIVE, - MULTI_RECEIVED_HEADERS, OUTLOOK_EXPRESS) + MULTI_RECEIVED_HEADERS, OUTLOOK_EXPRESS, TEXT_LONG_LINE_NO_LINE_BREAKS) from tests.mime.message.scanner_test import TORTURE_PARTS, tree_to_string @@ -247,7 +247,7 @@ def parse_then_serialize_malformed_message_test(): def ascii_to_quoted_printable_test(): # contains unicode chars message = scan(TEXT_ONLY) - unicode_value = u'☯Привет! Как дела? Что делаешь?,\n Что новенького?☯' + unicode_value = u'☯Привет! Как дела? Что делаешь?,\r\n Что новенького?☯' message.body = unicode_value message = scan(message.to_string()) eq_('quoted-printable', message.content_encoding.value) @@ -291,6 +291,14 @@ def ascii_to_quoted_printable_test_2(): eq_(value, message.body) +def long_line_no_line_break_quoted_printable_test(): + message = scan(TEXT_LONG_LINE_NO_LINE_BREAKS) + message._container._body_changed = True + assert message.was_changed(ignore_prepends=True) + charset, encoding, body = _encode_body(message) + eq_(len(body.split(b'\r\n')), 23) + + # Make sure we can't create a message without headers. def create_message_without_headers_test(): message = scan(TEXT_ONLY) @@ -312,7 +320,7 @@ def create_message_without_body_test(): # Alter the complex message, make sure that the structure remained the same. def torture_alter_test(): message = scan(TORTURE) - unicode_value = u'☯Привет! Как дела? Что делаешь?,\n Что новенького?☯' + unicode_value = u'☯Привет! Как дела? Что делаешь?,\r\n Что новенького?☯' message.parts[5].enclosed.parts[0].parts[0].body = unicode_value for p in message.walk(): if str(p.content_type) == 'text/plain': @@ -505,11 +513,11 @@ def message_alter_body_and_serialize_test(): parts = list(message1.walk()) eq_(3, len(parts)) - eq_(u'Привет, Danielle!\n\n', parts[2].body) + eq_(u'Привет, Danielle!\r\n\r\n', parts[2].body) parts = list(message2.walk()) eq_(3, len(parts)) - eq_(u'Привет, Danielle!\n\n', parts[2].body) + eq_(u'Привет, Danielle!\r\n\r\n', parts[2].body) def alter_message_test_size():