diff --git a/Lib/email/contentmanager.py b/Lib/email/contentmanager.py index 11d1536db27d79c..dfda3519d585e82 100644 --- a/Lib/email/contentmanager.py +++ b/Lib/email/contentmanager.py @@ -186,11 +186,12 @@ def set_text_content(msg, string, subtype="plain", charset='utf-8', cte=None, disposition=None, filename=None, cid=None, params=None, headers=None): _prepare_set(msg, 'text', subtype, headers) + + cs = email.charset.Charset(charset) + charset = cs.output_charset cte, payload = _encode_text(string, charset, cte, msg.policy) msg.set_payload(payload) - msg.set_param('charset', - email.charset.ALIASES.get(charset, charset), - replace=True) + msg.set_param('charset', charset, replace=True) msg['Content-Transfer-Encoding'] = cte _finalize_set(msg, disposition, filename, cid, params) raw_data_manager.add_set_handler(str, set_text_content) diff --git a/Lib/test/test_email/test_contentmanager.py b/Lib/test/test_email/test_contentmanager.py index dceb54f15e48f4e..5904ea664023689 100644 --- a/Lib/test/test_email/test_contentmanager.py +++ b/Lib/test/test_email/test_contentmanager.py @@ -342,6 +342,46 @@ def test_set_text_charset_latin_1(self): self.assertEqual(m.get_payload(decode=True).decode('utf-8'), content) self.assertEqual(m.get_content(), content) + def test_set_text_charset_shift_jis(self): + m = self._make_message() + content = "\u65e5\u672c\u8a9e\n" + raw_data_manager.set_content(m, content, charset='shift_jis') + self.assertEqual(m['Content-Type'], 'text/plain; charset="iso-2022-jp"') + self.assertEqual(m.get_payload(decode=True), content.encode('iso-2022-jp')) + self.assertEqual(m.get_content(), content) + self.assertEqual(str(m), textwrap.dedent("""\ + Content-Type: text/plain; charset="iso-2022-jp" + Content-Transfer-Encoding: 7bit + + \x1b$BF|K\\8l\x1b(B + """)) + self.assertEqual(bytes(m), textwrap.dedent("""\ + Content-Type: text/plain; charset="iso-2022-jp" + Content-Transfer-Encoding: 7bit + + \u65e5\u672c\u8a9e + """).encode('iso-2022-jp')) + + def test_set_text_charset_euc_jp(self): + m = self._make_message() + content = "\u65e5\u672c\u8a9e\n" + raw_data_manager.set_content(m, content, charset='euc-jp') + self.assertEqual(m['Content-Type'], 'text/plain; charset="iso-2022-jp"') + self.assertEqual(m.get_payload(decode=True), content.encode('iso-2022-jp')) + self.assertEqual(m.get_content(), content) + self.assertEqual(str(m), textwrap.dedent("""\ + Content-Type: text/plain; charset="iso-2022-jp" + Content-Transfer-Encoding: 7bit + + \x1b$BF|K\\8l\x1b(B + """)) + self.assertEqual(bytes(m), textwrap.dedent("""\ + Content-Type: text/plain; charset="iso-2022-jp" + Content-Transfer-Encoding: 7bit + + \u65e5\u672c\u8a9e + """).encode('iso-2022-jp')) + def test_set_text_plain_long_line_heuristics(self): m = self._make_message() content = ("Simple but long message that is over 78 characters" diff --git a/Misc/NEWS.d/next/Library/2026-06-09-12-00-00.gh-issue-150771.K7mNx2.rst b/Misc/NEWS.d/next/Library/2026-06-09-12-00-00.gh-issue-150771.K7mNx2.rst new file mode 100644 index 000000000000000..2965176d438172e --- /dev/null +++ b/Misc/NEWS.d/next/Library/2026-06-09-12-00-00.gh-issue-150771.K7mNx2.rst @@ -0,0 +1,4 @@ +Fix serialization of :mod:`email` messages using the ``shift_jis`` or +``euc-jp`` charsets: ``set_content()`` now encodes the payload using +the output charset, so ``str(m)`` no longer raises +:exc:`UnicodeEncodeError`.