Skip to content

Commit 4f61ee3

Browse files
committed
[3.13] gh-150771: Fix email serialization for shift_jis and euc-jp (GH-151120)
Encode the payload with output_charset instead of input_charset. (cherry picked from commit 0777a58)
1 parent 0ed6cf2 commit 4f61ee3

3 files changed

Lines changed: 48 additions & 3 deletions

File tree

Lib/email/contentmanager.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -186,11 +186,12 @@ def set_text_content(msg, string, subtype="plain", charset='utf-8', cte=None,
186186
disposition=None, filename=None, cid=None,
187187
params=None, headers=None):
188188
_prepare_set(msg, 'text', subtype, headers)
189+
190+
cs = email.charset.Charset(charset)
191+
charset = cs.output_charset
189192
cte, payload = _encode_text(string, charset, cte, msg.policy)
190193
msg.set_payload(payload)
191-
msg.set_param('charset',
192-
email.charset.ALIASES.get(charset, charset),
193-
replace=True)
194+
msg.set_param('charset', charset, replace=True)
194195
msg['Content-Transfer-Encoding'] = cte
195196
_finalize_set(msg, disposition, filename, cid, params)
196197
raw_data_manager.add_set_handler(str, set_text_content)

Lib/test/test_email/test_contentmanager.py

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -342,6 +342,46 @@ def test_set_text_charset_latin_1(self):
342342
self.assertEqual(m.get_payload(decode=True).decode('utf-8'), content)
343343
self.assertEqual(m.get_content(), content)
344344

345+
def test_set_text_charset_shift_jis(self):
346+
m = self._make_message()
347+
content = "\u65e5\u672c\u8a9e\n"
348+
raw_data_manager.set_content(m, content, charset='shift_jis')
349+
self.assertEqual(m['Content-Type'], 'text/plain; charset="iso-2022-jp"')
350+
self.assertEqual(m.get_payload(decode=True), content.encode('iso-2022-jp'))
351+
self.assertEqual(m.get_content(), content)
352+
self.assertEqual(str(m), textwrap.dedent("""\
353+
Content-Type: text/plain; charset="iso-2022-jp"
354+
Content-Transfer-Encoding: 7bit
355+
356+
\x1b$BF|K\\8l\x1b(B
357+
"""))
358+
self.assertEqual(bytes(m), textwrap.dedent("""\
359+
Content-Type: text/plain; charset="iso-2022-jp"
360+
Content-Transfer-Encoding: 7bit
361+
362+
\u65e5\u672c\u8a9e
363+
""").encode('iso-2022-jp'))
364+
365+
def test_set_text_charset_euc_jp(self):
366+
m = self._make_message()
367+
content = "\u65e5\u672c\u8a9e\n"
368+
raw_data_manager.set_content(m, content, charset='euc-jp')
369+
self.assertEqual(m['Content-Type'], 'text/plain; charset="iso-2022-jp"')
370+
self.assertEqual(m.get_payload(decode=True), content.encode('iso-2022-jp'))
371+
self.assertEqual(m.get_content(), content)
372+
self.assertEqual(str(m), textwrap.dedent("""\
373+
Content-Type: text/plain; charset="iso-2022-jp"
374+
Content-Transfer-Encoding: 7bit
375+
376+
\x1b$BF|K\\8l\x1b(B
377+
"""))
378+
self.assertEqual(bytes(m), textwrap.dedent("""\
379+
Content-Type: text/plain; charset="iso-2022-jp"
380+
Content-Transfer-Encoding: 7bit
381+
382+
\u65e5\u672c\u8a9e
383+
""").encode('iso-2022-jp'))
384+
345385
def test_set_text_plain_long_line_heuristics(self):
346386
m = self._make_message()
347387
content = ("Simple but long message that is over 78 characters"
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
Fix serialization of :mod:`email` messages using the ``shift_jis`` or
2+
``euc-jp`` charsets: ``set_content()`` now encodes the payload using
3+
the output charset, so ``str(m)`` no longer raises
4+
:exc:`UnicodeEncodeError`.

0 commit comments

Comments
 (0)