Skip to content

Commit 36a27ac

Browse files
committed
gh142035: Make TextWrapper ANSI-aware (#152702)
# Conflicts: # Doc/whatsnew/3.16.rst
1 parent f1c5363 commit 36a27ac

9 files changed

Lines changed: 199 additions & 20 deletions

File tree

Doc/library/textwrap.rst

Lines changed: 24 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ functions should be good enough; otherwise, you should use an instance of
1919
replace_whitespace=True, fix_sentence_endings=False, \
2020
break_long_words=True, drop_whitespace=True, \
2121
break_on_hyphens=True, tabsize=8, max_lines=None, \
22-
placeholder=' [...]')
22+
placeholder=' [...]', text_len=len)
2323

2424
Wraps the single paragraph in *text* (a string) so every line is at most
2525
*width* characters long. Returns a list of output lines, without final
@@ -37,7 +37,7 @@ functions should be good enough; otherwise, you should use an instance of
3737
replace_whitespace=True, fix_sentence_endings=False, \
3838
break_long_words=True, drop_whitespace=True, \
3939
break_on_hyphens=True, tabsize=8, \
40-
max_lines=None, placeholder=' [...]')
40+
max_lines=None, placeholder=' [...]', text_len=len)
4141

4242
Wraps the single paragraph in *text*, and returns a single string containing the
4343
wrapped paragraph. :func:`fill` is shorthand for ::
@@ -50,7 +50,7 @@ functions should be good enough; otherwise, you should use an instance of
5050

5151
.. function:: shorten(text, width, *, fix_sentence_endings=False, \
5252
break_long_words=True, break_on_hyphens=True, \
53-
placeholder=' [...]')
53+
placeholder=' [...]', text_len=len)
5454

5555
Collapse and truncate the given *text* to fit in the given *width*.
5656

@@ -293,6 +293,27 @@ hyphenated words; only then will long words be broken if necessary, unless
293293
.. versionadded:: 3.4
294294

295295

296+
.. attribute:: text_len
297+
298+
(default: :func:`len`) Callable used to measure the visible width of a
299+
string when deciding where to wrap. Override the default to account for
300+
characters that are not a single column wide, such as zero-width or
301+
double-width characters, or invisible ANSI escape sequences::
302+
303+
>>> import re, textwrap
304+
>>> visible_len = lambda s: len(re.sub(r'\x1b\[[0-9;]*m', '', s))
305+
>>> colored = 'normal \x1b[31mcolored\x1b[0m words here'
306+
>>> lines = textwrap.wrap(colored, width=14, text_len=visible_len)
307+
>>> [re.sub(r'\x1b\[[0-9;]*m', '', line) for line in lines]
308+
['normal colored', 'words here']
309+
310+
The callable must return a non-negative integer. It is assumed to be
311+
additive over the whitespace- and hyphen-delimited chunks that wrapping
312+
produces; a chunk that is too long to fit is split by visible width.
313+
314+
.. versionadded:: 3.16
315+
316+
296317
:class:`TextWrapper` also provides some public methods, analogous to the
297318
module-level convenience functions:
298319

Doc/whatsnew/3.16.rst

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -297,6 +297,17 @@ shlex
297297
(Contributed by Jay Berry in :gh:`148846`.)
298298

299299

300+
textwrap
301+
--------
302+
303+
* Add a *text_len* parameter to :func:`textwrap.wrap`, :func:`textwrap.fill`,
304+
:func:`textwrap.shorten`, and :class:`textwrap.TextWrapper`. It customizes
305+
how the visible width of a string is measured, so text that contains
306+
zero-width or double-width characters, or invisible ANSI escape sequences,
307+
can be wrapped correctly.
308+
(Contributed by Kevin Deldycke in :gh:`152702`.)
309+
310+
300311
tkinter
301312
-------
302313

Lib/argparse.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -771,19 +771,25 @@ def _iter_indented_subactions(self, action):
771771
yield from get_subactions()
772772
self._dedent()
773773

774+
def _text_len(self, text):
775+
# Measure the visible width of *text*, ignoring any ANSI color escape
776+
# sequences that may have been inserted for colored help output.
777+
return len(self._decolor(text))
778+
774779
def _split_lines(self, text, width):
775780
text = self._whitespace_matcher.sub(' ', text).strip()
776781
# The textwrap module is used only for formatting help.
777782
# Delay its import for speeding up the common usage of argparse.
778783
import textwrap
779-
return textwrap.wrap(text, width)
784+
return textwrap.wrap(text, width, text_len=self._text_len)
780785

781786
def _fill_text(self, text, width, indent):
782787
text = self._whitespace_matcher.sub(' ', text).strip()
783788
import textwrap
784789
return textwrap.fill(text, width,
785790
initial_indent=indent,
786-
subsequent_indent=indent)
791+
subsequent_indent=indent,
792+
text_len=self._text_len)
787793

788794
def _get_help_string(self, action):
789795
return action.help

Lib/idlelib/idle_test/test_calltip.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,7 @@ def test_signature_wrap(self):
105105
(width=70, initial_indent='', subsequent_indent='', expand_tabs=True,
106106
replace_whitespace=True, fix_sentence_endings=False, break_long_words=True,
107107
drop_whitespace=True, break_on_hyphens=True, tabsize=8, *, max_lines=None,
108-
placeholder=' [...]')
108+
placeholder=' [...]', text_len=<built-in function len>)
109109
Object for wrapping/filling text. The public interface consists of
110110
the wrap() and fill() methods; the other methods are just there for
111111
subclasses to override in order to tweak the default behaviour.

Lib/test/test_argparse.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7869,6 +7869,32 @@ def test_help_with_format_specifiers(self):
78697869
self.assertIn(f'type: {interp}int{reset}', help_text)
78707870
self.assertIn(f'choices: {interp}a, b{reset}', help_text)
78717871

7872+
def test_colored_help_wraps_like_plain_help(self):
7873+
# gh-142035: ANSI color escapes in the help text (around the
7874+
# interpolated "(default: ...)" value) must not change where lines
7875+
# wrap. Stripping the colors must yield exactly the plain layout.
7876+
env = self.enterContext(os_helper.EnvironmentVarGuard())
7877+
env["COLUMNS"] = "70"
7878+
7879+
def build(color):
7880+
parser = argparse.ArgumentParser(
7881+
prog="PROG",
7882+
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
7883+
color=color,
7884+
)
7885+
parser.add_argument(
7886+
"--verbose",
7887+
action="store_true",
7888+
help="A l o n g d e s c r i p t i o n f o r t h e v e r b "
7889+
"o s e f l a g t o d e m o n s t r a t e w r a p p i n g",
7890+
)
7891+
parser.add_argument("--input", default="input.txt", help="Input file path")
7892+
return parser
7893+
7894+
colored = build(color=True).format_help()
7895+
plain = build(color=False).format_help()
7896+
self.assertEqual(_colorize.decolor(colored), plain)
7897+
78727898
def test_print_help_uses_target_file_for_color_decision(self):
78737899
parser = argparse.ArgumentParser(prog='PROG', color=True)
78747900
parser.add_argument('--opt')

Lib/test/test_textwrap.py

Lines changed: 71 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
# $Id$
99
#
1010

11+
import re
1112
import unittest
1213

1314
from textwrap import TextWrapper, wrap, fill, dedent, indent, shorten
@@ -1133,5 +1134,74 @@ def test_first_word_too_long_but_placeholder_fits(self):
11331134
self.check_shorten("Helloo", 5, "[...]")
11341135

11351136

1136-
if __name__ == '__main__':
1137+
class TextLenTestCase(BaseTestCase):
1138+
# The text_len option customizes how the visible width of a string is
1139+
# measured. The motivating case is colored output, where invisible ANSI
1140+
# escape sequences must not count towards the line width (gh-142035).
1141+
1142+
_ansi = re.compile(r"\x1b\[[0-9;]*m")
1143+
1144+
@classmethod
1145+
def visible_len(cls, text):
1146+
return len(cls._ansi.sub("", text))
1147+
1148+
@classmethod
1149+
def decolor(cls, lines):
1150+
return [cls._ansi.sub("", line) for line in lines]
1151+
1152+
@staticmethod
1153+
def color(text):
1154+
# Wrap every word in a pair of (zero visible width) escape sequences.
1155+
return " ".join(f"\x1b[31m{word}\x1b[0m" for word in text.split())
1156+
1157+
def check_shorten(self, text, width, expect, **kwargs):
1158+
self.check(shorten(text, width, **kwargs), expect)
1159+
1160+
def test_default_text_len_is_len(self):
1161+
self.assertIs(TextWrapper().text_len, len)
1162+
1163+
def test_explicit_len_matches_default(self):
1164+
text = "Hello there, how are you this fine day? I'm glad to hear it!"
1165+
self.check_wrap(text, 12, wrap(text, 12), text_len=len)
1166+
1167+
def test_color_does_not_change_breaks(self):
1168+
text = "These are several short words to be wrapped and colored here"
1169+
for width in (10, 15, 20, 30):
1170+
with self.subTest(width=width):
1171+
lines = wrap(self.color(text), width, text_len=self.visible_len)
1172+
self.assertEqual(self.decolor(lines), wrap(text, width))
1173+
1174+
def test_color_respects_width(self):
1175+
lines = wrap(
1176+
self.color("one two three four five six seven"),
1177+
9,
1178+
text_len=self.visible_len,
1179+
)
1180+
for line in lines:
1181+
self.assertLessEqual(self.visible_len(line), 9)
1182+
1183+
def test_break_long_word_by_visible_width(self):
1184+
word = "\x1b[31m" + "x" * 20 + "\x1b[0m"
1185+
lines = wrap(word, 8, text_len=self.visible_len)
1186+
self.assertEqual(self.decolor(lines), ["xxxxxxxx", "xxxxxxxx", "xxxx"])
1187+
1188+
def test_break_on_hyphens_with_color(self):
1189+
lines = wrap(self.color("spam-egg-ham-bacon"), 9, text_len=self.visible_len)
1190+
self.assertEqual(self.decolor(lines), ["spam-egg-", "ham-bacon"])
1191+
1192+
def test_shorten_with_text_len(self):
1193+
result = shorten(
1194+
self.color("one two three four five"), 12, text_len=self.visible_len
1195+
)
1196+
self.assertLessEqual(self.visible_len(result), 12)
1197+
self.assertEqual(self._ansi.sub("", result), "one [...]")
1198+
1199+
def test_measure_is_not_limited_to_ansi(self):
1200+
# Any width measure works, e.g. counting every character as two columns.
1201+
double = lambda s: 2 * len(s)
1202+
self.check_wrap("aa bb cc dd", 4, ["aa", "bb", "cc", "dd"], text_len=double)
1203+
self.check_wrap("aa bb cc dd", 5, ["aa", "bb", "cc", "dd"], text_len=double)
1204+
1205+
1206+
if __name__ == "__main__":
11371207
unittest.main()

Lib/textwrap.py

Lines changed: 49 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,11 @@ class TextWrapper:
6161
Truncate wrapped lines.
6262
placeholder (default: ' [...]')
6363
Append to the last line of truncated text.
64+
text_len (default: len)
65+
Callable returning the visible width of a string. Override the
66+
default to account for characters that are not one column wide,
67+
such as zero-width or double-width characters, or invisible ANSI
68+
escape sequences. It should return a non-negative integer.
6469
"""
6570

6671
unicode_whitespace_trans = dict.fromkeys(map(ord, _whitespace), ord(' '))
@@ -122,7 +127,8 @@ def __init__(self,
122127
tabsize=8,
123128
*,
124129
max_lines=None,
125-
placeholder=' [...]'):
130+
placeholder=' [...]',
131+
text_len=len):
126132
self.width = width
127133
self.initial_indent = initial_indent
128134
self.subsequent_indent = subsequent_indent
@@ -135,6 +141,7 @@ def __init__(self,
135141
self.tabsize = tabsize
136142
self.max_lines = max_lines
137143
self.placeholder = placeholder
144+
self.text_len = text_len
138145

139146

140147
# -- Private methods -----------------------------------------------
@@ -194,6 +201,28 @@ def _fix_sentence_endings(self, chunks):
194201
else:
195202
i += 1
196203

204+
def _truncate_to_width(self, text, width):
205+
"""_truncate_to_width(text : string, width : int) -> string
206+
207+
Return the longest prefix of *text* whose visible width, as measured
208+
by ``self.text_len``, does not exceed *width*. With a custom text_len the
209+
number of characters that fit need not equal *width*, so an over-long
210+
word cannot be broken by slicing at the column count. At least one
211+
character is always kept so that wrapping makes progress.
212+
"""
213+
# Fast path for the default len(): the width is the number of
214+
# characters, so the prefix can be sliced directly.
215+
if self.text_len is len:
216+
return text[: max(width, 1)]
217+
if self.text_len(text) <= width:
218+
return text
219+
cut = 1
220+
for i in range(1, len(text) + 1):
221+
if self.text_len(text[:i]) > width:
222+
break
223+
cut = i
224+
return text[:cut]
225+
197226
def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):
198227
"""_handle_long_word(chunks : [string],
199228
cur_line : [string],
@@ -212,9 +241,10 @@ def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):
212241
# If we're allowed to break long words, then do so: put as much
213242
# of the next chunk onto the current line as will fit.
214243
if self.break_long_words and space_left > 0:
215-
end = space_left
216244
chunk = reversed_chunks[-1]
217-
if self.break_on_hyphens and len(chunk) > space_left:
245+
# Keep as many leading characters as fit in the visible width.
246+
end = len(self._truncate_to_width(chunk, space_left))
247+
if self.break_on_hyphens and self.text_len(chunk) > space_left:
218248
# break after last hyphen, but only if there are
219249
# non-hyphens before it
220250
hyphen = chunk.rfind('-', 0, space_left)
@@ -256,7 +286,10 @@ def _wrap_chunks(self, chunks):
256286
indent = self.subsequent_indent
257287
else:
258288
indent = self.initial_indent
259-
if len(indent) + len(self.placeholder.lstrip()) > self.width:
289+
if (
290+
self.text_len(indent) + self.text_len(self.placeholder.lstrip())
291+
> self.width
292+
):
260293
raise ValueError("placeholder too large for max width")
261294

262295
# Arrange in reverse order so items can be efficiently popped
@@ -277,15 +310,15 @@ def _wrap_chunks(self, chunks):
277310
indent = self.initial_indent
278311

279312
# Maximum width for this line.
280-
width = self.width - len(indent)
313+
width = self.width - self.text_len(indent)
281314

282315
# First chunk on line is whitespace -- drop it, unless this
283316
# is the very beginning of the text (ie. no lines started yet).
284317
if self.drop_whitespace and chunks[-1].strip() == '' and lines:
285318
del chunks[-1]
286319

287320
while chunks:
288-
l = len(chunks[-1])
321+
l = self.text_len(chunks[-1])
289322

290323
# Can at least squeeze this chunk onto the current line.
291324
if cur_len + l <= width:
@@ -298,13 +331,13 @@ def _wrap_chunks(self, chunks):
298331

299332
# The current line is full, and the next chunk is too big to
300333
# fit on *any* line (not just this one).
301-
if chunks and len(chunks[-1]) > width:
334+
if chunks and self.text_len(chunks[-1]) > width:
302335
self._handle_long_word(chunks, cur_line, cur_len, width)
303-
cur_len = sum(map(len, cur_line))
336+
cur_len = sum(map(self.text_len, cur_line))
304337

305338
# If the last chunk on this line is all whitespace, drop it.
306339
if self.drop_whitespace and cur_line and cur_line[-1].strip() == '':
307-
cur_len -= len(cur_line[-1])
340+
cur_len -= self.text_len(cur_line[-1])
308341
del cur_line[-1]
309342

310343
if cur_line:
@@ -320,17 +353,20 @@ def _wrap_chunks(self, chunks):
320353
else:
321354
while cur_line:
322355
if (cur_line[-1].strip() and
323-
cur_len + len(self.placeholder) <= width):
356+
cur_len + self.text_len(self.placeholder) <= width):
324357
cur_line.append(self.placeholder)
325358
lines.append(indent + ''.join(cur_line))
326359
break
327-
cur_len -= len(cur_line[-1])
360+
cur_len -= self.text_len(cur_line[-1])
328361
del cur_line[-1]
329362
else:
330363
if lines:
331364
prev_line = lines[-1].rstrip()
332-
if (len(prev_line) + len(self.placeholder) <=
333-
self.width):
365+
if (
366+
self.text_len(prev_line)
367+
+ self.text_len(self.placeholder)
368+
<= self.width
369+
):
334370
lines[-1] = prev_line + self.placeholder
335371
break
336372
lines.append(indent + self.placeholder.lstrip())
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
Add a *text_len* parameter to :func:`textwrap.wrap`, :func:`textwrap.fill`,
2+
:func:`textwrap.shorten`, and :class:`textwrap.TextWrapper`. It customizes how
3+
the visible width of a string is measured, allowing text that contains
4+
zero-width or double-width characters, or invisible ANSI escape sequences, to
5+
be wrapped correctly.
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
Fix :mod:`argparse` help text wrapping when colors are enabled. ANSI escape
2+
sequences inserted around interpolated values such as the ``(default: ...)``
3+
suffix no longer count towards the line width, so colored help wraps at the
4+
same place as the equivalent uncolored help.

0 commit comments

Comments
 (0)