From f2bff9f05b45b5c0b09fdba98a0e7dce6304489b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9Cbhuvi27=E2=80=9D?= Date: Fri, 3 Jul 2026 17:04:17 +0530 Subject: [PATCH] [3.14] gh-150771: Fix email serialization for shift_jis and euc-jp (GH-151120) Encode the payload with output_charset instead of input_charset. (cherry picked from commit 0777a58d8012bbdd0d72654b56f9112686ae6ff0) --- Lib/email/contentmanager.py | 7 ++-- Lib/test/test_email/test_contentmanager.py | 40 +++++++++++++++++++ ...-06-09-12-00-00.gh-issue-150771.K7mNx2.rst | 4 ++ 3 files changed, 48 insertions(+), 3 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2026-06-09-12-00-00.gh-issue-150771.K7mNx2.rst diff --git a/Lib/email/contentmanager.py b/Lib/email/contentmanager.py index 11d1536db27d79..dfda3519d585e8 100644 --- a/Lib/email/contentmanager.py +++ b/Lib/email/contentmanager.py @@ -186,11 +186,12 @@ def set_text_content(msg, string, subtype="plain", charset='utf-8', cte=None, disposition=None, filename=None, cid=None, params=None, headers=None): _prepare_set(msg, 'text', subtype, headers) + + cs = email.charset.Charset(charset) + charset = cs.output_charset cte, payload = _encode_text(string, charset, cte, msg.policy) msg.set_payload(payload) - msg.set_param('charset', - email.charset.ALIASES.get(charset, charset), - replace=True) + msg.set_param('charset', charset, replace=True) msg['Content-Transfer-Encoding'] = cte _finalize_set(msg, disposition, filename, cid, params) raw_data_manager.add_set_handler(str, set_text_content) diff --git a/Lib/test/test_email/test_contentmanager.py b/Lib/test/test_email/test_contentmanager.py index dceb54f15e48f4..5904ea66402368 100644 --- a/Lib/test/test_email/test_contentmanager.py +++ b/Lib/test/test_email/test_contentmanager.py @@ -342,6 +342,46 @@ def test_set_text_charset_latin_1(self): self.assertEqual(m.get_payload(decode=True).decode('utf-8'), content) self.assertEqual(m.get_content(), content) + def test_set_text_charset_shift_jis(self): + m = self._make_message() + content = "\u65e5\u672c\u8a9e\n" + raw_data_manager.set_content(m, content, charset='shift_jis') + self.assertEqual(m['Content-Type'], 'text/plain; charset="iso-2022-jp"') + self.assertEqual(m.get_payload(decode=True), content.encode('iso-2022-jp')) + self.assertEqual(m.get_content(), content) + self.assertEqual(str(m), textwrap.dedent("""\ + Content-Type: text/plain; charset="iso-2022-jp" + Content-Transfer-Encoding: 7bit + + \x1b$BF|K\\8l\x1b(B + """)) + self.assertEqual(bytes(m), textwrap.dedent("""\ + Content-Type: text/plain; charset="iso-2022-jp" + Content-Transfer-Encoding: 7bit + + \u65e5\u672c\u8a9e + """).encode('iso-2022-jp')) + + def test_set_text_charset_euc_jp(self): + m = self._make_message() + content = "\u65e5\u672c\u8a9e\n" + raw_data_manager.set_content(m, content, charset='euc-jp') + self.assertEqual(m['Content-Type'], 'text/plain; charset="iso-2022-jp"') + self.assertEqual(m.get_payload(decode=True), content.encode('iso-2022-jp')) + self.assertEqual(m.get_content(), content) + self.assertEqual(str(m), textwrap.dedent("""\ + Content-Type: text/plain; charset="iso-2022-jp" + Content-Transfer-Encoding: 7bit + + \x1b$BF|K\\8l\x1b(B + """)) + self.assertEqual(bytes(m), textwrap.dedent("""\ + Content-Type: text/plain; charset="iso-2022-jp" + Content-Transfer-Encoding: 7bit + + \u65e5\u672c\u8a9e + """).encode('iso-2022-jp')) + def test_set_text_plain_long_line_heuristics(self): m = self._make_message() content = ("Simple but long message that is over 78 characters" diff --git a/Misc/NEWS.d/next/Library/2026-06-09-12-00-00.gh-issue-150771.K7mNx2.rst b/Misc/NEWS.d/next/Library/2026-06-09-12-00-00.gh-issue-150771.K7mNx2.rst new file mode 100644 index 00000000000000..2965176d438172 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2026-06-09-12-00-00.gh-issue-150771.K7mNx2.rst @@ -0,0 +1,4 @@ +Fix serialization of :mod:`email` messages using the ``shift_jis`` or +``euc-jp`` charsets: ``set_content()`` now encodes the payload using +the output charset, so ``str(m)`` no longer raises +:exc:`UnicodeEncodeError`.