From 32ee63d34e3e582969f8264c26d6e472af5e6c99 Mon Sep 17 00:00:00 2001 From: Arnt Gulbrandsen Date: Wed, 26 Jul 2023 11:37:13 +0200 Subject: [PATCH] GH-78319: Stop sending the UTF8 marker when appending messages to a mailbox. The UTF8 marker is defined in RFC 6855 and tells the server that the message being appended contains UTF8 addresses, an unencoded UTF8 subject, etc. However, if a client appends a message containing UTF8 addresses but without that marker, the bytes can only be parsed as UTF8 because that's the only RFC-compliant way to parse those bytes. RFC 6855 says clients MUST send the UTF8 marker. Due to an accidental discrepancy, RFC 9051 (IMAP4rev2) does not contain that marker. IMAP4rev2 was intended to be upwardly compatible with RFC 6855, but this problem broke that. This has no ill effects, since the marker does not change the message's meaning. While investigating the problem, I noticed that Python uses the marker incorrectly: Python uses it to mark ALL messages if UTF8=ACCEPT support has been enabled, not just ones that contain UTF8 addresses. The best way forward appear to be using the syntax defined in RFC 9051 and publishing a revision to RFC 6855, so this change modifies imaplib to match RFC 9051. FWIW JMAP is like IMAP4rev2 in this case; UTF8 is just there, without any marker. Also, neither UTF8=ACCEPT, IMAP4rev2 or JMAP provide any way to learn whether a message was stored with or without the marker. This quasi-accidentally solves #78319 by removing the case that broke. --- Lib/imaplib.py | 2 -- Lib/test/test_imaplib.py | 4 ++-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/Lib/imaplib.py b/Lib/imaplib.py index 577b4b9b03a88de..d097a98488e0dac 100644 --- a/Lib/imaplib.py +++ b/Lib/imaplib.py @@ -411,8 +411,6 @@ def append(self, mailbox, flags, date_time, message): else: date_time = None literal = MapCRLF.sub(CRLF, message) - if self.utf8_enabled: - literal = b'UTF8 (' + literal + b')' self.literal = literal return self._simple_command(name, mailbox, flags, date_time) diff --git a/Lib/test/test_imaplib.py b/Lib/test/test_imaplib.py index 60f5b671b1da485..8de6ea90250a995 100644 --- a/Lib/test/test_imaplib.py +++ b/Lib/test/test_imaplib.py @@ -324,7 +324,7 @@ def cmd_APPEND(self, tag, args): typ, data = client.append(None, None, None, msg_string.encode('utf-8')) self.assertEqual(typ, 'OK') self.assertEqual(server.response, - ('UTF8 (%s)\r\n' % msg_string).encode('utf-8')) + ('%s\r\n' % msg_string).encode('utf-8')) def test_search_disallows_charset_in_utf8_mode(self): class UTF8Server(SimpleIMAPHandler): @@ -775,7 +775,7 @@ def cmd_APPEND(self, tag, args): self.assertEqual(typ, 'OK') self.assertEqual( server.response, - ('UTF8 (%s)\r\n' % msg_string).encode('utf-8') + ('%s\r\n' % msg_string).encode('utf-8') ) # XXX also need a test that makes sure that the Literal and Untagged_status