diff --git a/Lib/curses/textpad.py b/Lib/curses/textpad.py index 57b2f4a523c95bc..c58a7174d194cf3 100644 --- a/Lib/curses/textpad.py +++ b/Lib/curses/textpad.py @@ -56,13 +56,42 @@ def _update_max_yx(self): self.maxy = maxy - 1 self.maxx = maxx - 1 + def _decode(self, ch): + # The text of a chtype cell or input byte, decoded with the window's + # encoding. A_CHARTEXT keeps the character byte, dropping the attributes. + return bytes([ch & curses.A_CHARTEXT]).decode(self.win.encoding, 'replace') + + def _char_at(self, *yx): + # The text of the cell at the given position (default: the cursor). + # instr() re-encodes it to the window's encoding; inch() cannot + # represent a non-ASCII 8-bit-locale character on a wide build. + return self.win.instr(*yx, 1).decode(self.win.encoding, 'replace') + + def _cell_at(self, *yx): + # The cell at the given position (default: the cursor) as a chtype + # addch() can write back with its rendition. inch() mangles a non-ASCII + # character on a wide build, so take the byte from instr() and the + # attributes from inch(). + return self.win.instr(*yx, 1)[0] | self.win.inch(*yx) & curses.A_ATTRIBUTES + + def _isprint(self, cell): + # Whether a chtype cell holds a printable character; _decode() drops the + # attribute bits. + return self._decode(cell).isprintable() + + def _printable_key(self, ch): + # Whether the integer keystroke is a printable character, not a key + # code. 0..255 are character bytes (decoded with the window's encoding); + # larger values are function and navigation keys. + return ch <= 0xff and self._decode(ch).isprintable() + def _end_of_line(self, y): """Go to the location of the first blank on the given line, returning the index of the last non-blank character.""" self._update_max_yx() last = self.maxx while True: - if curses.ascii.ascii(self.win.inch(y, last)) != curses.ascii.SP: + if self._char_at(y, last) != ' ': last = min(self.maxx, last+1) break elif last == 0: @@ -76,15 +105,16 @@ def _insert_printable_char(self, ch): backyx = None while True: if self.insert_mode: - oldch = self.win.inch() + oldch = self._cell_at() if y >= self.maxy and x >= self.maxx: # Use insch() in the lower-right cell: addch() there would move # the cursor out of the window, raising an error and scrolling - # a scrollable window. - self.win.insch(ch) + # a scrollable window. Pass it as text: insch() does not decode + # an int byte through the locale on a wide build. + self.win.insch(self._decode(ch), ch & curses.A_ATTRIBUTES) break self.win.addch(ch) - if not self.insert_mode or not curses.ascii.isprint(oldch): + if not self.insert_mode or not self._isprint(oldch): break ch = oldch (y, x) = self.win.getyx() @@ -100,7 +130,7 @@ def do_command(self, ch): self._update_max_yx() (y, x) = self.win.getyx() self.lastcmd = ch - if curses.ascii.isprint(ch): + if self._printable_key(ch): self._insert_printable_char(ch) elif ch == curses.ascii.SOH: # ^a self.win.move(y, 0) @@ -174,7 +204,7 @@ def gather(self): for x in range(self.maxx+1): if self.stripspaces and x > stop: break - result = result + chr(curses.ascii.ascii(self.win.inch(y, x))) + result = result + self._char_at(y, x) if self.maxy > 0: result = result + "\n" return result diff --git a/Lib/test/test_curses.py b/Lib/test/test_curses.py index c17f4c87705c46c..6a6ef6e0226ef62 100644 --- a/Lib/test/test_curses.py +++ b/Lib/test/test_curses.py @@ -1993,6 +1993,68 @@ def test_textbox_fill_last_cell_scrollok(self): self._type(box, 'def') self.assertEqual(box.gather(), 'abc\ndef\n') + def test_textbox_8bit(self): + # A character of an 8-bit locale encoding is entered and read back + # through the byte API. The byte path also runs on a wide build, so the + # test is not skipped there. Run the suite under an 8-bit locale + # (ISO-8859-1, ISO-8859-15 or KOI8-U) to reach the non-ASCII cases; each + # string is used only if the encoding maps it to single bytes. 'abc' is + # ASCII, 'café' is common to the Latin encodings, and the rest are + # distinctive (byte 0xA4 is '¤'/'€'/'є' in ISO-8859-1/-15/KOI8-U). + encoding = self.stdscr.encoding + for text in ['abc', 'café', 'naïve ¤¦', 'café €Šž', 'дякую єі']: + try: + data = text.encode(encoding) + except UnicodeEncodeError: + continue + if len(data) != len(text): + continue # a multibyte encoding is not the 8-bit byte path + with self.subTest(text=text): + box, win = self._make_textbox(1, 16) + for byte in data: + box.do_command(byte) + self.assertEqual(box.gather(), text + ' ') + + def test_textbox_8bit_insert(self): + # Insert mode shifts the rest of the line right by reading each cell back + # and rewriting it; a non-ASCII 8-bit-locale character must survive the + # shift, even on a wide build where inch() mangles it. See + # test_textbox_8bit for the character choices. + encoding = self.stdscr.encoding + for ch in ['é', '¤', '€', 'є']: + try: + data = ch.encode(encoding) + except UnicodeEncodeError: + continue + if len(data) != 1: + continue + with self.subTest(ch=ch): + box, win = self._make_textbox(1, 10, insert_mode=True) + for byte in ('a' + ch + 'c').encode(encoding): + box.do_command(byte) + win.move(0, 1) + box.do_command(ord('b')) # insert 'b', shifting ch and 'c' right + self.assertEqual(box.gather(), 'ab' + ch + 'c ') + + def test_textbox_8bit_fill_last_cell(self): + # A non-ASCII 8-bit-locale character must survive being written to the + # lower-right cell, which uses insch() rather than addch(). See + # test_textbox_8bit for the character choices. + encoding = self.stdscr.encoding + for ch in ['é', '¤', '€', 'є']: + try: + data = ch.encode(encoding) + except UnicodeEncodeError: + continue + if len(data) != 1: + continue + with self.subTest(ch=ch): + text = 'ab' + ch # the last character fills the corner + box, win = self._make_textbox(1, len(text), stripspaces=0) + for byte in text.encode(encoding): + box.do_command(byte) + self.assertEqual(box.gather(), text) + def test_textbox_movement(self): box, win = self._make_textbox(3, 10) self._type(box, 'abc') @@ -2369,6 +2431,11 @@ def setUp(self): self.mock_win = MagicMock(spec=curses.window) self.mock_win.getyx.return_value = (1, 1) self.mock_win.getmaxyx.return_value = (10, 20) + self.mock_win.encoding = 'utf-8' + # A non-blank cell so that _end_of_line() reports a full line: instr() + # backs the text reads, inch() the insert-mode shift. + self.mock_win.instr.return_value = b'x' + self.mock_win.inch.return_value = ord('x') self.textbox = curses.textpad.Textbox(self.mock_win) def test_init(self): diff --git a/Misc/NEWS.d/next/Library/2026-06-27-12-30-00.gh-issue-133031.Na8Bit.rst b/Misc/NEWS.d/next/Library/2026-06-27-12-30-00.gh-issue-133031.Na8Bit.rst new file mode 100644 index 000000000000000..96e9efe20e42f04 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2026-06-27-12-30-00.gh-issue-133031.Na8Bit.rst @@ -0,0 +1,3 @@ +:class:`curses.textpad.Textbox` now enters and reads back the non-ASCII +characters of an 8-bit locale encoding, instead of mangling them with a 7-bit +mask.