Skip to content

Commit 3010adc

Browse files
serhiy-storchakaclaude
authored andcommitted
gh-133031: Support non-ASCII characters in curses.textpad.Textbox (GH-152451)
Textbox mangled non-ASCII characters of an 8-bit locale encoding: it masked reads with curses.ascii.ascii(), which clears the 8th bit, and tested input with curses.ascii.isprint(), which rejects bytes above 127. Decode cells and input bytes with the window's encoding instead. Cells are read with instr() and the lower-right cell is written as text, since inch() and insch() with an int byte mishandle a non-ASCII character on a wide build. This uses only the byte-oriented curses API, so it works without wide-character support. (cherry picked from commit c253f0c) Co-authored-by: Serhiy Storchaka <storchaka@gmail.com> Co-authored-by: Claude Opus 4.8 <noreply@anthropic.com>
1 parent 65d110d commit 3010adc

3 files changed

Lines changed: 107 additions & 7 deletions

File tree

Lib/curses/textpad.py

Lines changed: 37 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -56,13 +56,42 @@ def _update_max_yx(self):
5656
self.maxy = maxy - 1
5757
self.maxx = maxx - 1
5858

59+
def _decode(self, ch):
60+
# The text of a chtype cell or input byte, decoded with the window's
61+
# encoding. A_CHARTEXT keeps the character byte, dropping the attributes.
62+
return bytes([ch & curses.A_CHARTEXT]).decode(self.win.encoding, 'replace')
63+
64+
def _char_at(self, *yx):
65+
# The text of the cell at the given position (default: the cursor).
66+
# instr() re-encodes it to the window's encoding; inch() cannot
67+
# represent a non-ASCII 8-bit-locale character on a wide build.
68+
return self.win.instr(*yx, 1).decode(self.win.encoding, 'replace')
69+
70+
def _cell_at(self, *yx):
71+
# The cell at the given position (default: the cursor) as a chtype
72+
# addch() can write back with its rendition. inch() mangles a non-ASCII
73+
# character on a wide build, so take the byte from instr() and the
74+
# attributes from inch().
75+
return self.win.instr(*yx, 1)[0] | self.win.inch(*yx) & curses.A_ATTRIBUTES
76+
77+
def _isprint(self, cell):
78+
# Whether a chtype cell holds a printable character; _decode() drops the
79+
# attribute bits.
80+
return self._decode(cell).isprintable()
81+
82+
def _printable_key(self, ch):
83+
# Whether the integer keystroke is a printable character, not a key
84+
# code. 0..255 are character bytes (decoded with the window's encoding);
85+
# larger values are function and navigation keys.
86+
return ch <= 0xff and self._decode(ch).isprintable()
87+
5988
def _end_of_line(self, y):
6089
"""Go to the location of the first blank on the given line,
6190
returning the index of the last non-blank character."""
6291
self._update_max_yx()
6392
last = self.maxx
6493
while True:
65-
if curses.ascii.ascii(self.win.inch(y, last)) != curses.ascii.SP:
94+
if self._char_at(y, last) != ' ':
6695
last = min(self.maxx, last+1)
6796
break
6897
elif last == 0:
@@ -76,15 +105,16 @@ def _insert_printable_char(self, ch):
76105
backyx = None
77106
while True:
78107
if self.insert_mode:
79-
oldch = self.win.inch()
108+
oldch = self._cell_at()
80109
if y >= self.maxy and x >= self.maxx:
81110
# Use insch() in the lower-right cell: addch() there would move
82111
# the cursor out of the window, raising an error and scrolling
83-
# a scrollable window.
84-
self.win.insch(ch)
112+
# a scrollable window. Pass it as text: insch() does not decode
113+
# an int byte through the locale on a wide build.
114+
self.win.insch(self._decode(ch), ch & curses.A_ATTRIBUTES)
85115
break
86116
self.win.addch(ch)
87-
if not self.insert_mode or not curses.ascii.isprint(oldch):
117+
if not self.insert_mode or not self._isprint(oldch):
88118
break
89119
ch = oldch
90120
(y, x) = self.win.getyx()
@@ -100,7 +130,7 @@ def do_command(self, ch):
100130
self._update_max_yx()
101131
(y, x) = self.win.getyx()
102132
self.lastcmd = ch
103-
if curses.ascii.isprint(ch):
133+
if self._printable_key(ch):
104134
self._insert_printable_char(ch)
105135
elif ch == curses.ascii.SOH: # ^a
106136
self.win.move(y, 0)
@@ -174,7 +204,7 @@ def gather(self):
174204
for x in range(self.maxx+1):
175205
if self.stripspaces and x > stop:
176206
break
177-
result = result + chr(curses.ascii.ascii(self.win.inch(y, x)))
207+
result = result + self._char_at(y, x)
178208
if self.maxy > 0:
179209
result = result + "\n"
180210
return result

Lib/test/test_curses.py

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1524,6 +1524,68 @@ def test_textbox_fill_last_cell_scrollok(self):
15241524
self._type(box, 'def')
15251525
self.assertEqual(box.gather(), 'abc\ndef\n')
15261526

1527+
def test_textbox_8bit(self):
1528+
# A character of an 8-bit locale encoding is entered and read back
1529+
# through the byte API. The byte path also runs on a wide build, so the
1530+
# test is not skipped there. Run the suite under an 8-bit locale
1531+
# (ISO-8859-1, ISO-8859-15 or KOI8-U) to reach the non-ASCII cases; each
1532+
# string is used only if the encoding maps it to single bytes. 'abc' is
1533+
# ASCII, 'café' is common to the Latin encodings, and the rest are
1534+
# distinctive (byte 0xA4 is '¤'/'€'/'є' in ISO-8859-1/-15/KOI8-U).
1535+
encoding = self.stdscr.encoding
1536+
for text in ['abc', 'café', 'naïve ¤¦', 'café €Šž', 'дякую єі']:
1537+
try:
1538+
data = text.encode(encoding)
1539+
except UnicodeEncodeError:
1540+
continue
1541+
if len(data) != len(text):
1542+
continue # a multibyte encoding is not the 8-bit byte path
1543+
with self.subTest(text=text):
1544+
box, win = self._make_textbox(1, 16)
1545+
for byte in data:
1546+
box.do_command(byte)
1547+
self.assertEqual(box.gather(), text + ' ')
1548+
1549+
def test_textbox_8bit_insert(self):
1550+
# Insert mode shifts the rest of the line right by reading each cell back
1551+
# and rewriting it; a non-ASCII 8-bit-locale character must survive the
1552+
# shift, even on a wide build where inch() mangles it. See
1553+
# test_textbox_8bit for the character choices.
1554+
encoding = self.stdscr.encoding
1555+
for ch in ['é', '¤', '€', 'є']:
1556+
try:
1557+
data = ch.encode(encoding)
1558+
except UnicodeEncodeError:
1559+
continue
1560+
if len(data) != 1:
1561+
continue
1562+
with self.subTest(ch=ch):
1563+
box, win = self._make_textbox(1, 10, insert_mode=True)
1564+
for byte in ('a' + ch + 'c').encode(encoding):
1565+
box.do_command(byte)
1566+
win.move(0, 1)
1567+
box.do_command(ord('b')) # insert 'b', shifting ch and 'c' right
1568+
self.assertEqual(box.gather(), 'ab' + ch + 'c ')
1569+
1570+
def test_textbox_8bit_fill_last_cell(self):
1571+
# A non-ASCII 8-bit-locale character must survive being written to the
1572+
# lower-right cell, which uses insch() rather than addch(). See
1573+
# test_textbox_8bit for the character choices.
1574+
encoding = self.stdscr.encoding
1575+
for ch in ['é', '¤', '€', 'є']:
1576+
try:
1577+
data = ch.encode(encoding)
1578+
except UnicodeEncodeError:
1579+
continue
1580+
if len(data) != 1:
1581+
continue
1582+
with self.subTest(ch=ch):
1583+
text = 'ab' + ch # the last character fills the corner
1584+
box, win = self._make_textbox(1, len(text), stripspaces=0)
1585+
for byte in text.encode(encoding):
1586+
box.do_command(byte)
1587+
self.assertEqual(box.gather(), text)
1588+
15271589
def test_textbox_movement(self):
15281590
box, win = self._make_textbox(3, 10)
15291591
self._type(box, 'abc')
@@ -1879,6 +1941,11 @@ def setUp(self):
18791941
self.mock_win = MagicMock(spec=curses.window)
18801942
self.mock_win.getyx.return_value = (1, 1)
18811943
self.mock_win.getmaxyx.return_value = (10, 20)
1944+
self.mock_win.encoding = 'utf-8'
1945+
# A non-blank cell so that _end_of_line() reports a full line: instr()
1946+
# backs the text reads, inch() the insert-mode shift.
1947+
self.mock_win.instr.return_value = b'x'
1948+
self.mock_win.inch.return_value = ord('x')
18821949
self.textbox = curses.textpad.Textbox(self.mock_win)
18831950

18841951
def test_init(self):
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
:class:`curses.textpad.Textbox` now enters and reads back the non-ASCII
2+
characters of an 8-bit locale encoding, instead of mangling them with a 7-bit
3+
mask.

0 commit comments

Comments
 (0)