From d946074a8fd4bd743dcde12e08e1561f062a2a68 Mon Sep 17 00:00:00 2001 From: Kevin Deldycke Date: Tue, 12 May 2026 17:00:35 +0200 Subject: [PATCH] Rewrite `_wrap_chunks` in `TextWrapper` to be ANSI-aware --- CHANGES.rst | 3 + src/click/_textwrap.py | 141 ++++++++++++++++++++++++++++++++++++++- src/click/formatting.py | 6 ++ tests/test_formatting.py | 66 ++++++++++++++++++ 4 files changed, 214 insertions(+), 2 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 77f4b4b4b1..168488911c 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -53,6 +53,9 @@ Unreleased fail. :issue:`3105` :pr:`3211` - Add ``click.get_pager_file`` for file-like access to an output pager. :pr:`1572` +- :class:`~click.formatting.TextWrapper` and + :func:`~click.formatting.wrap_text` now measure line width in visible + characters, ignoring ANSI escape sequences. :pr:`3420` Version 8.3.3 ------------- diff --git a/src/click/_textwrap.py b/src/click/_textwrap.py index 97fbee3dc6..82840f2dff 100644 --- a/src/click/_textwrap.py +++ b/src/click/_textwrap.py @@ -4,8 +4,47 @@ import textwrap from contextlib import contextmanager +from ._compat import _ansi_re +from ._compat import term_len + + +def _truncate_visible(text: str, n: int) -> str: + """Return the longest prefix of ``text`` containing at most ``n`` visible + characters. + + ANSI escape sequences inside the prefix are kept intact and do not count + toward the visible width. A cut is never placed inside an escape sequence. + """ + if n <= 0: + return "" + + visible = 0 + i = 0 + cut = 0 + end = len(text) + while i < end: + m = _ansi_re.match(text, i) + if m is not None: + i = m.end() + continue + visible += 1 + i += 1 + cut = i + if visible >= n: + break + return text[:cut] + class TextWrapper(textwrap.TextWrapper): + """``textwrap.TextWrapper`` variant that measures widths by visible + character count. + + ANSI escape sequences embedded in chunks, indents, or the placeholder are + excluded from the width budget. Without this, styled help text (a styled + ``Usage:`` prefix, a colorized option name, ...) would be wrapped earlier + than its visible length warrants and tokens would split mid-word. + """ + def _handle_long_word( self, reversed_chunks: list[str], @@ -17,13 +56,111 @@ def _handle_long_word( if self.break_long_words: last = reversed_chunks[-1] - cut = last[:space_left] - res = last[space_left:] + cut = _truncate_visible(last, space_left) + res = last[len(cut) :] cur_line.append(cut) reversed_chunks[-1] = res elif not cur_line: cur_line.append(reversed_chunks.pop()) + def _wrap_chunks(self, chunks: list[str]) -> list[str]: + """Wrap chunks counting widths in visible characters. + + Mirrors the algorithm of :meth:`textwrap.TextWrapper._wrap_chunks` + with every width measurement routed through + :func:`click._compat.term_len` instead of :func:`len`, so ANSI escape + bytes in chunks, indents, or the placeholder do not inflate the count. + + .. seealso:: + :class:`textwrap.TextWrapper` in the Python standard library documentation: + https://docs.python.org/3/library/textwrap.html#textwrap.TextWrapper + + Reference implementation in CPython: + https://github.com/python/cpython/blob/main/Lib/textwrap.py + """ + lines: list[str] = [] + if self.width <= 0: + raise ValueError(f"invalid width {self.width!r} (must be > 0)") + if self.max_lines is not None: + if self.max_lines > 1: + indent = self.subsequent_indent + else: + indent = self.initial_indent + if term_len(indent) + term_len(self.placeholder.lstrip()) > self.width: + raise ValueError("placeholder too large for max width") + + chunks.reverse() + + while chunks: + cur_line: list[str] = [] + cur_len = 0 + + if lines: + indent = self.subsequent_indent + else: + indent = self.initial_indent + + width = self.width - term_len(indent) + + if self.drop_whitespace and chunks[-1].strip() == "" and lines: + del chunks[-1] + + while chunks: + n = term_len(chunks[-1]) + + if cur_len + n <= width: + cur_line.append(chunks.pop()) + cur_len += n + + else: + break + + if chunks and term_len(chunks[-1]) > width: + self._handle_long_word(chunks, cur_line, cur_len, width) + cur_len = sum(map(term_len, cur_line)) + + if self.drop_whitespace and cur_line and cur_line[-1].strip() == "": + cur_len -= term_len(cur_line[-1]) + del cur_line[-1] + + if cur_line: + if ( + self.max_lines is None + or len(lines) + 1 < self.max_lines + or ( + not chunks + or self.drop_whitespace + and len(chunks) == 1 + and not chunks[0].strip() + ) + and cur_len <= width + ): + lines.append(indent + "".join(cur_line)) + else: + while cur_line: + if ( + cur_line[-1].strip() + and cur_len + term_len(self.placeholder) <= width + ): + cur_line.append(self.placeholder) + lines.append(indent + "".join(cur_line)) + break + cur_len -= term_len(cur_line[-1]) + del cur_line[-1] + else: + if lines: + prev_line = lines[-1].rstrip() + if ( + term_len(prev_line) + term_len(self.placeholder) + <= self.width + ): + lines[-1] = prev_line + self.placeholder + break + lines.append(indent + self.placeholder.lstrip()) + break + + return lines + @contextmanager def extra_indent(self, indent: str) -> cabc.Iterator[None]: old_initial_indent = self.initial_indent diff --git a/src/click/formatting.py b/src/click/formatting.py index de2ca47117..d9075ca1f0 100644 --- a/src/click/formatting.py +++ b/src/click/formatting.py @@ -52,6 +52,12 @@ def wrap_text( each consecutive line. :param preserve_paragraphs: if this flag is set then the wrapping will intelligently handle paragraphs. + + .. versionchanged:: 8.4 + Width is measured in visible characters. ANSI escape sequences in + ``text``, ``initial_indent``, or ``subsequent_indent`` no longer + count toward the width budget, so styled input wraps based on what + the user sees instead of raw byte length. """ from ._textwrap import TextWrapper diff --git a/tests/test_formatting.py b/tests/test_formatting.py index c74b53a3df..3e85688560 100644 --- a/tests/test_formatting.py +++ b/tests/test_formatting.py @@ -1,6 +1,7 @@ import pytest import click +from click._compat import strip_ansi def test_basic_functionality(runner): @@ -433,3 +434,68 @@ def test_help_formatter_write_text(): actual = formatter.getvalue() expected = " Lorem ipsum dolor sit amet,\n consectetur adipiscing elit\n" assert actual == expected + + +@pytest.mark.parametrize( + ("body", "width", "initial_indent"), + [ + # Styled ``initial_indent`` must be measured by visible width, so the + # ``Usage:`` prefix shouldn't push ``[OPTIONS]`` to the second line. + # Regression for the asymmetry between ``HelpFormatter.write_usage`` + # (which sized the prefix with ``term_len``) and ``wrap_text`` + # (which previously used raw ``len``). + pytest.param( + "[OPTIONS]", + 30, + "\x1b[38;2;38;139;210m\x1b[1mUsage:\x1b[0m ", + id="styled-initial-indent-does-not-break-body", + ), + # Styled chunks in the body itself wrap on visible width. + pytest.param( + "\x1b[31malpha\x1b[0m \x1b[31mbeta\x1b[0m" + " \x1b[31mgamma\x1b[0m \x1b[31mdelta\x1b[0m", + 15, + "", + id="styled-body-wraps-on-visible-width", + ), + # ``_handle_long_word`` cuts a styled token between visible + # characters; the ANSI escape sequence must not be split. + pytest.param( + "\x1b[31mabcdefghij\x1b[0m", + 5, + "", + id="styled-long-word-breaks-on-visible-width", + ), + ], +) +def test_wrap_text_visible_width(body, width, initial_indent): + """``wrap_text`` of styled input produces the same line layout as + ``wrap_text`` of the ANSI-stripped input. + + ANSI escape bytes must not count toward the width budget, regardless + of whether they appear in the body, in ``initial_indent``, or when a + styled token has to be broken in the middle. + """ + styled = click.formatting.wrap_text( + body, width=width, initial_indent=initial_indent + ) + plain = click.formatting.wrap_text( + strip_ansi(body), width=width, initial_indent=strip_ansi(initial_indent) + ) + + styled_visible = [strip_ansi(line) for line in styled.splitlines()] + assert styled_visible == plain.splitlines() + + +def test_write_usage_styled_prefix_keeps_options_on_one_line(): + """End-to-end: a downstream-styled ``Usage:`` prefix should not split + ``[OPTIONS]`` across two lines. + """ + styled_prefix = "\x1b[38;2;38;139;210m\x1b[1mUsage:\x1b[0m " + + formatter = click.HelpFormatter(width=40) + formatter.write_usage("cli", "[OPTIONS]", prefix=styled_prefix) + rendered = formatter.getvalue() + + visible = strip_ansi(rendered) + assert visible == "Usage: cli [OPTIONS]\n"