Skip to content

Commit 3f93eb5

Browse files
gh-133031: Support the full Unicode range in curses.textpad.Textbox
Read input with get_wch() and the window back with in_wch(), so combining characters and characters outside the locale encoding now work where curses has wide-character support. edit() passes non-ASCII characters to validate() as strings, keeping ASCII and key codes as integers so existing validators keep working. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
1 parent 286b512 commit 3f93eb5

5 files changed

Lines changed: 107 additions & 43 deletions

File tree

Doc/library/curses.rst

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2645,6 +2645,11 @@ You can instantiate a :class:`Textbox` object as follows:
26452645
upper-left corner of the containing window, with coordinates ``(0, 0)``.
26462646
The instance's :attr:`stripspaces` flag is initially on.
26472647

2648+
.. versionchanged:: next
2649+
Entering and reading back the full Unicode range, including combining
2650+
characters, is now supported when curses is built with wide-character
2651+
support.
2652+
26482653
:class:`Textbox` objects have the following methods:
26492654

26502655

@@ -2659,6 +2664,10 @@ You can instantiate a :class:`Textbox` object as follows:
26592664
string; whether blanks in the window are included is affected by the
26602665
:attr:`stripspaces` attribute.
26612666

2667+
.. versionchanged:: next
2668+
*validate* is now called with a non-ASCII character as a string;
2669+
other keystrokes are still passed as an integer.
2670+
26622671

26632672
.. method:: do_command(ch)
26642673

Doc/whatsnew/3.16.rst

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -192,6 +192,11 @@ curses
192192
against an ncurses with ``NCURSES_EXT_FUNCS``.
193193
(Contributed by Serhiy Storchaka in :gh:`152334`.)
194194

195+
* :class:`curses.textpad.Textbox` now supports entering and reading back the
196+
full Unicode range, including combining characters, when curses is built with
197+
wide-character support.
198+
(Contributed by Serhiy Storchaka in :gh:`133031`.)
199+
195200
gzip
196201
----
197202

Lib/curses/textpad.py

Lines changed: 36 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -57,32 +57,13 @@ def _update_max_yx(self):
5757
self.maxx = maxx - 1
5858

5959
def _decode(self, ch):
60-
# The text of a chtype cell or input byte, decoded with the window's
61-
# encoding. A_CHARTEXT keeps the character byte, dropping the attributes.
60+
# Decode an integer keystroke or byte to text with the window's encoding.
61+
# A_CHARTEXT drops any attribute bits.
6262
return bytes([ch & curses.A_CHARTEXT]).decode(self.win.encoding, 'replace')
6363

64-
def _char_at(self, *yx):
65-
# The text of the cell at the given position (default: the cursor).
66-
# instr() re-encodes it to the window's encoding; inch() cannot
67-
# represent a non-ASCII 8-bit-locale character on a wide build.
68-
return self.win.instr(*yx, 1).decode(self.win.encoding, 'replace')
69-
70-
def _cell_at(self, *yx):
71-
# The cell at the given position (default: the cursor) as a chtype
72-
# addch() can write back with its rendition. inch() mangles a non-ASCII
73-
# character on a wide build, so take the byte from instr() and the
74-
# attributes from inch().
75-
return self.win.instr(*yx, 1)[0] | self.win.inch(*yx) & curses.A_ATTRIBUTES
76-
77-
def _isprint(self, cell):
78-
# Whether a chtype cell holds a printable character; _decode() drops the
79-
# attribute bits.
80-
return self._decode(cell).isprintable()
81-
8264
def _printable_key(self, ch):
83-
# Whether the integer keystroke is a printable character, not a key
84-
# code. 0..255 are character bytes (decoded with the window's encoding);
85-
# larger values are function and navigation keys.
65+
# Whether the integer keystroke is a printable character, not a key code:
66+
# 0..255 are character bytes, larger values are function keys.
8667
return ch <= 0xff and self._decode(ch).isprintable()
8768

8869
def _end_of_line(self, y):
@@ -91,7 +72,8 @@ def _end_of_line(self, y):
9172
self._update_max_yx()
9273
last = self.maxx
9374
while True:
94-
if self._char_at(y, last) != ' ':
75+
# The text of the cell at (y, last).
76+
if str(self.win.in_wch(y, last)) != ' ':
9577
last = min(self.maxx, last+1)
9678
break
9779
elif last == 0:
@@ -105,16 +87,22 @@ def _insert_printable_char(self, ch):
10587
backyx = None
10688
while True:
10789
if self.insert_mode:
108-
oldch = self._cell_at()
90+
# The displaced cell, as a complexchar so addch() can rewrite it
91+
# with its rendition.
92+
oldch = self.win.in_wch()
10993
if y >= self.maxy and x >= self.maxx:
110-
# Use insch() in the lower-right cell: addch() there would move
111-
# the cursor out of the window, raising an error and scrolling
112-
# a scrollable window. Pass it as text: insch() does not decode
113-
# an int byte through the locale on a wide build.
114-
self.win.insch(self._decode(ch), ch & curses.A_ATTRIBUTES)
94+
# Use insch() in the lower-right cell; addch() there would push
95+
# the cursor out of the window (an error, and it scrolls a
96+
# scrollable window). insch() does not decode an int byte
97+
# through the locale on a wide build, so pass it as text.
98+
if isinstance(ch, int):
99+
self.win.insch(self._decode(ch), ch & curses.A_ATTRIBUTES)
100+
else:
101+
self.win.insch(ch)
115102
break
116103
self.win.addch(ch)
117-
if not self.insert_mode or not self._isprint(oldch):
104+
# In insert mode keep shifting cells right until a blank one.
105+
if not self.insert_mode or not str(oldch).isprintable():
118106
break
119107
ch = oldch
120108
(y, x) = self.win.getyx()
@@ -130,9 +118,17 @@ def do_command(self, ch):
130118
self._update_max_yx()
131119
(y, x) = self.win.getyx()
132120
self.lastcmd = ch
133-
if self._printable_key(ch):
121+
if isinstance(ch, str):
122+
# A character from get_wch(); a control character is dispatched
123+
# below by its code point.
124+
if ch.isprintable():
125+
self._insert_printable_char(ch)
126+
return 1
127+
ch = ord(ch)
128+
elif self._printable_key(ch):
134129
self._insert_printable_char(ch)
135-
elif ch == curses.ascii.SOH: # ^a
130+
return 1
131+
if ch == curses.ascii.SOH: # ^a
136132
self.win.move(y, 0)
137133
elif ch in (curses.ascii.STX,curses.KEY_LEFT,
138134
curses.ascii.BS,
@@ -204,15 +200,20 @@ def gather(self):
204200
for x in range(self.maxx+1):
205201
if self.stripspaces and x > stop:
206202
break
207-
result = result + self._char_at(y, x)
203+
result = result + str(self.win.in_wch(y, x))
208204
if self.maxy > 0:
209205
result = result + "\n"
210206
return result
211207

212208
def edit(self, validate=None):
213209
"Edit in the widget window and collect the results."
214210
while 1:
215-
ch = self.win.getch()
211+
ch = self.win.get_wch()
212+
# Represent an ASCII keystroke by its code point, the way getch()
213+
# always has, so that existing validators and the command dispatch
214+
# keep working; only non-ASCII characters are passed as strings.
215+
if isinstance(ch, str) and ch.isascii():
216+
ch = ord(ch)
216217
if validate:
217218
ch = validate(ch)
218219
if not ch:

Lib/test/test_curses.py

Lines changed: 54 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2247,9 +2247,9 @@ def test_textbox_fill_last_cell_scrollok(self):
22472247
self.assertEqual(box.gather(), 'abc\ndef\n')
22482248

22492249
def test_textbox_8bit(self):
2250-
# A character of an 8-bit locale encoding is entered and read back
2251-
# through the byte API. The byte path also runs on a wide build, so the
2252-
# test is not skipped there. Run the suite under an 8-bit locale
2250+
# An 8-bit-locale character is entered as integer bytes -- the way
2251+
# do_command() receives getch() input -- and read back; runs on both
2252+
# builds. Run the suite under an 8-bit locale
22532253
# (ISO-8859-1, ISO-8859-15 or KOI8-U) to reach the non-ASCII cases; each
22542254
# string is used only if the encoding maps it to single bytes. 'abc' is
22552255
# ASCII, 'café' is common to the Latin encodings, and the rest are
@@ -2270,9 +2270,8 @@ def test_textbox_8bit(self):
22702270

22712271
def test_textbox_8bit_insert(self):
22722272
# Insert mode shifts the rest of the line right by reading each cell back
2273-
# and rewriting it; a non-ASCII 8-bit-locale character must survive the
2274-
# shift, even on a wide build where inch() mangles it. See
2275-
# test_textbox_8bit for the character choices.
2273+
# and rewriting it; an 8-bit-locale character entered as bytes must
2274+
# survive the shift. See test_textbox_8bit for the character choices.
22762275
encoding = self.stdscr.encoding
22772276
for ch in ['é', '¤', '€', 'є']:
22782277
try:
@@ -2290,8 +2289,8 @@ def test_textbox_8bit_insert(self):
22902289
self.assertEqual(box.gather(), 'ab' + ch + 'c ')
22912290

22922291
def test_textbox_8bit_fill_last_cell(self):
2293-
# A non-ASCII 8-bit-locale character must survive being written to the
2294-
# lower-right cell, which uses insch() rather than addch(). See
2292+
# An 8-bit-locale character entered as bytes must survive being written
2293+
# to the lower-right cell, which uses insch() rather than addch(). See
22952294
# test_textbox_8bit for the character choices.
22962295
encoding = self.stdscr.encoding
22972296
for ch in ['é', '¤', '€', 'є']:
@@ -2308,6 +2307,53 @@ def test_textbox_8bit_fill_last_cell(self):
23082307
box.do_command(byte)
23092308
self.assertEqual(box.gather(), text)
23102309

2310+
def test_textbox_unicode(self):
2311+
# Like test_textbox_8bit, but characters are entered as strings -- the
2312+
# way do_command() receives get_wch() input -- rather than integer
2313+
# bytes. Each string is used only if encodable in the current locale.
2314+
for text in ['abc', 'héšλ', 'café', 'naïve ¤', 'soupçon €Š', 'дякую єі']:
2315+
if self._encodable(text):
2316+
with self.subTest(text=text):
2317+
box, win = self._make_textbox(1, 12)
2318+
for ch in text:
2319+
box.do_command(ch)
2320+
self.assertEqual(box.gather(), text + ' ')
2321+
2322+
def test_textbox_unicode_insert_mode(self):
2323+
# Like test_textbox_8bit_insert, but the character is entered as a string
2324+
# (get_wch() input). Each string is used only if encodable.
2325+
for text in ['abcd', 'aβλc', 'aéàc', 'a¤½c', 'a€Šc', 'aдві']:
2326+
if self._encodable(text):
2327+
with self.subTest(text=text):
2328+
box, win = self._make_textbox(1, 10, insert_mode=True)
2329+
for ch in text[0] + text[2:]: # all but the 2nd character
2330+
box.do_command(ch)
2331+
win.move(0, 1)
2332+
box.do_command(text[1]) # insert it at position 1
2333+
self.assertEqual(box.gather(), text + ' ')
2334+
2335+
@requires_wide_build
2336+
def test_textbox_combining(self):
2337+
# A spacing character plus a combining mark is a single cell, which
2338+
# needs the wide build (a narrow build stores one byte per cell).
2339+
text = 'e\u0301' # 'e' + COMBINING ACUTE ACCENT
2340+
if self._encodable(text):
2341+
box, win = self._make_textbox(1, 10)
2342+
for ch in text:
2343+
box.do_command(ch)
2344+
self.assertEqual(box.gather(), text + ' ')
2345+
2346+
def test_textbox_edit_wide(self):
2347+
# edit() reads characters through get_wch(). Each is used only if
2348+
# encodable in the current locale.
2349+
for ch in ['A', 'é', '¤', '€', 'д']:
2350+
if self._encodable(ch):
2351+
with self.subTest(ch=ch):
2352+
box, win = self._make_textbox(1, 10)
2353+
for c in reversed(['a', ch, chr(curses.ascii.BEL)]):
2354+
curses.unget_wch(c)
2355+
self.assertEqual(box.edit(), 'a' + ch + ' ')
2356+
23112357
def test_textbox_movement(self):
23122358
box, win = self._make_textbox(3, 10)
23132359
self._type(box, 'abc')
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
:class:`curses.textpad.Textbox` now supports entering and reading back the full
2+
Unicode range, including combining characters, when curses is built with
3+
wide-character support.

0 commit comments

Comments
 (0)