From f9baebb0db4da315a3ddc647338889d70c52256b Mon Sep 17 00:00:00 2001
From: Pierre Sassoulas <pierre.sassoulas@gmail.com>
Date: Sat, 13 Jun 2026 18:33:58 +0200
Subject: [PATCH 01/12] [perf] Make ``PrettyPrinter`` format lazily so output
 can be budget-capped
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

``_format`` and the per-type helpers now ``yield`` their output as a
stream of string chunks instead of writing to a file-like object, and
``pformat`` joins them. On top of that, ``pformat_lines`` pulls from the
formatter only until a budget is reached:

    pformat_lines(obj, max_lines=None, max_chars=None)

It stops on the first chunk that reaches *either* budget, so a huge
collection costs O(budget) rather than O(N). Either dimension may be
``None`` (unbounded); with both ``None`` the whole object is formatted.

Motivation
----------
Assertion diffs are truncated to a handful of lines/chars before being
shown. Formatting the whole of a large ``==`` comparison and then
throwing almost all of it away is pure waste. With a lazy formatter the
truncating caller simply stops pulling once it has enough.

Benchmark (``PrettyPrinter`` alone, width 80)::

    list(range(500_000)):
        pformat().splitlines()        ~805 ms
        pformat_lines(max_lines=11)   ~0.027 ms      (~30000x)

    [8 small ints] (common small diff):
        pformat().splitlines()        ~0.0133 ms
        pformat_lines(max_lines=11)   ~0.0185 ms     (+~5 us)

    ["x"*100_000] * 3 (flat, few huge elements):
        pformat_lines(max_chars=640)  stops after ~100_000 chars
                                      (one element) instead of 300_000

Why a lazy generator rather than a fast path + budget stream
------------------------------------------------------------
An earlier approach kept a cheap ``pformat().splitlines()`` fast path
guarded by ``len(obj) <= max_lines`` plus a flatness check, falling back
to a write-intercepting budget-stream class for the rest. Two problems:

* ``len(obj)`` is only a *lower* bound on the line count — one nested
  element (``[{...50 keys...}]``) expands to many lines — so the guard
  needed the flatness scan to stay correct, and even then it bounded
  only *lines*, never *chars*: a flat container of a few enormous
  strings has almost no lines but blows the char budget.
* it was two code paths plus a stream class plus an exception used for
  control flow.

Because the formatter is lazy, "stop pulling at the budget" is the whole
optimisation: correct regardless of how lines/chars are distributed
across elements, bounding both dimensions, with no ``len()`` proxy to
get wrong and no fast/slow branch. The common small-diff case costs only
~5 us more than the unbounded path (it is never the bottleneck — a
failing assertion isn't hot), while large comparisons drop by orders of
magnitude.

``_pprint_set``/``_pprint_dict`` also try a plain ``sorted`` first and
fall back to the ``_safe_key`` wrapper only for unorderable mixes.

This diverges structurally from the upstream cpython ``pprint`` it was
vendored from; the module header notes it is no longer kept in sync.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 src/_pytest/_io/pprint.py | 335 ++++++++++++++++++++------------------
 testing/io/test_pprint.py |  84 ++++++++++
 2 files changed, 262 insertions(+), 157 deletions(-)

diff --git a/src/_pytest/_io/pprint.py b/src/_pytest/_io/pprint.py
index ec41b449ddf..06caf436e60 100644
--- a/src/_pytest/_io/pprint.py
+++ b/src/_pytest/_io/pprint.py
@@ -3,6 +3,14 @@
 # (https://github.com/python/cpython/) at commit
 # c5140945c723ae6c4b7ee81ff720ac8ea4b52cfd (python3.12).
 #
+# It has since been adapted to emit its output lazily as a stream of
+# string chunks (``_format`` and the per-type helpers are generators)
+# rather than writing to a file-like object. This lets ``pformat_lines``
+# stop formatting as soon as a line/char budget is reached, so a huge
+# collection a caller is going to truncate anyway is never fully built.
+# As a result this copy has diverged structurally from upstream and is
+# no longer kept in sync with it.
+#
 #
 #  Original Author:      Fred L. Drake, Jr.
 #                        fdrake@acm.org
@@ -17,13 +25,12 @@
 
 import collections as _collections
 from collections.abc import Callable
+from collections.abc import Iterable
 from collections.abc import Iterator
 import dataclasses as _dataclasses
-from io import StringIO as _StringIO
 import re
 import types as _types
 from typing import Any
-from typing import IO
 
 
 class _safe_key:
@@ -87,28 +94,62 @@ def __init__(
         self._width = width
 
     def pformat(self, object: Any) -> str:
-        sio = _StringIO()
-        self._format(object, sio, 0, 0, set(), 0)
-        return sio.getvalue()
+        return "".join(self._format(object, 0, 0, set(), 0))
+
+    def pformat_lines(
+        self,
+        object: Any,
+        max_lines: int | None = None,
+        max_chars: int | None = None,
+    ) -> list[str]:
+        """Pretty-print ``object`` and return its lines.
+
+        ``_format`` yields the output as a stream of chunks, so this can
+        stop pulling from it as soon as a budget is reached — useful when
+        a downstream truncator is going to drop everything past that
+        budget anyway.
+
+        ``max_lines`` / ``max_chars`` bound the two truncation dimensions
+        independently; either may be ``None`` to leave that dimension
+        unbounded. With both ``None`` the whole object is formatted. The
+        budget is a stopping condition, not a precise cut: formatting
+        stops on the first chunk that reaches it, so the result may
+        slightly overshoot (the caller truncates to the exact limit).
+        """
+        if max_lines is None and max_chars is None:
+            return self.pformat(object).splitlines()
+        n_lines = 0
+        n_chars = 0
+        chunks: list[str] = []
+        for chunk in self._format(object, 0, 0, set(), 0):
+            chunks.append(chunk)
+            if max_chars is not None:
+                n_chars += len(chunk)
+            if max_lines is not None:
+                n_lines += chunk.count("\n")
+            if (max_lines is not None and n_lines >= max_lines) or (
+                max_chars is not None and n_chars >= max_chars
+            ):
+                break
+        return "".join(chunks).splitlines()
 
     def _format(
         self,
         object: Any,
-        stream: IO[str],
         indent: int,
         allowance: int,
         context: set[int],
         level: int,
-    ) -> None:
+    ) -> Iterator[str]:
         objid = id(object)
         if objid in context:
-            stream.write(_recursion(object))
+            yield _recursion(object)
             return
 
         p = self._dispatch.get(type(object).__repr__, None)
         if p is not None:
             context.add(objid)
-            p(self, object, stream, indent, allowance, context, level + 1)
+            yield from p(self, object, indent, allowance, context, level + 1)
             context.remove(objid)
         elif (
             _dataclasses.is_dataclass(object)
@@ -120,125 +161,126 @@ def _format(
             and "__create_fn__" in object.__repr__.__wrapped__.__qualname__
         ):
             context.add(objid)
-            self._pprint_dataclass(
-                object, stream, indent, allowance, context, level + 1
+            yield from self._pprint_dataclass(
+                object, indent, allowance, context, level + 1
             )
             context.remove(objid)
         else:
-            stream.write(self._repr(object, context, level))
+            yield self._repr(object, context, level)
 
     def _pprint_dataclass(
         self,
         object: Any,
-        stream: IO[str],
         indent: int,
         allowance: int,
         context: set[int],
         level: int,
-    ) -> None:
+    ) -> Iterator[str]:
         cls_name = object.__class__.__name__
         items = [
             (f.name, getattr(object, f.name))
             for f in _dataclasses.fields(object)
             if f.repr
         ]
-        stream.write(cls_name + "(")
-        self._format_namespace_items(items, stream, indent, allowance, context, level)
-        stream.write(")")
+        yield cls_name + "("
+        yield from self._format_namespace_items(
+            items, indent, allowance, context, level
+        )
+        yield ")"
 
     _dispatch: dict[
         Callable[..., str],
-        Callable[[PrettyPrinter, Any, IO[str], int, int, set[int], int], None],
+        Callable[[PrettyPrinter, Any, int, int, set[int], int], Iterator[str]],
     ] = {}
 
     def _pprint_dict(
         self,
         object: Any,
-        stream: IO[str],
         indent: int,
         allowance: int,
         context: set[int],
         level: int,
-    ) -> None:
-        write = stream.write
-        write("{")
-        items = object.items()
-        self._format_dict_items(items, stream, indent, allowance, context, level)
-        write("}")
+    ) -> Iterator[str]:
+        yield "{"
+        yield from self._format_dict_items(
+            object.items(), indent, allowance, context, level
+        )
+        yield "}"
 
     _dispatch[dict.__repr__] = _pprint_dict
 
     def _pprint_ordered_dict(
         self,
         object: Any,
-        stream: IO[str],
         indent: int,
         allowance: int,
         context: set[int],
         level: int,
-    ) -> None:
+    ) -> Iterator[str]:
         if not len(object):
-            stream.write(repr(object))
+            yield repr(object)
             return
         cls = object.__class__
-        stream.write(cls.__name__ + "(")
-        self._pprint_dict(object, stream, indent, allowance, context, level)
-        stream.write(")")
+        yield cls.__name__ + "("
+        yield from self._pprint_dict(object, indent, allowance, context, level)
+        yield ")"
 
     _dispatch[_collections.OrderedDict.__repr__] = _pprint_ordered_dict
 
     def _pprint_list(
         self,
         object: Any,
-        stream: IO[str],
         indent: int,
         allowance: int,
         context: set[int],
         level: int,
-    ) -> None:
-        stream.write("[")
-        self._format_items(object, stream, indent, allowance, context, level)
-        stream.write("]")
+    ) -> Iterator[str]:
+        yield "["
+        yield from self._format_items(object, indent, allowance, context, level)
+        yield "]"
 
     _dispatch[list.__repr__] = _pprint_list
 
     def _pprint_tuple(
         self,
         object: Any,
-        stream: IO[str],
         indent: int,
         allowance: int,
         context: set[int],
         level: int,
-    ) -> None:
-        stream.write("(")
-        self._format_items(object, stream, indent, allowance, context, level)
-        stream.write(")")
+    ) -> Iterator[str]:
+        yield "("
+        yield from self._format_items(object, indent, allowance, context, level)
+        yield ")"
 
     _dispatch[tuple.__repr__] = _pprint_tuple
 
     def _pprint_set(
         self,
         object: Any,
-        stream: IO[str],
         indent: int,
         allowance: int,
         context: set[int],
         level: int,
-    ) -> None:
+    ) -> Iterator[str]:
         if not len(object):
-            stream.write(repr(object))
+            yield repr(object)
             return
         typ = object.__class__
         if typ is set:
-            stream.write("{")
+            yield "{"
             endchar = "}"
         else:
-            stream.write(typ.__name__ + "({")
+            yield typ.__name__ + "({"
             endchar = "})"
-        object = sorted(object, key=_safe_key)
-        self._format_items(object, stream, indent, allowance, context, level)
-        stream.write(endchar)
+        try:
+            object = sorted(object)
+        except TypeError:
+            # Heterogeneous element types — fall back to a key that
+            # tolerates unorderable pairs by string-comparing their types.
+            object = sorted(object, key=_safe_key)
+        yield from self._format_items(object, indent, allowance, context, level)
+        yield endchar
 
     _dispatch[set.__repr__] = _pprint_set
     _dispatch[frozenset.__repr__] = _pprint_set
@@ -246,15 +288,13 @@ def _pprint_set(
     def _pprint_str(
         self,
         object: Any,
-        stream: IO[str],
         indent: int,
         allowance: int,
         context: set[int],
         level: int,
-    ) -> None:
-        write = stream.write
+    ) -> Iterator[str]:
         if not len(object):
-            write(repr(object))
+            yield repr(object)
             return
         chunks = []
         lines = object.splitlines(True)
@@ -289,90 +329,84 @@ def _pprint_str(
                 if current:
                     chunks.append(repr(current))
         if len(chunks) == 1:
-            write(rep)
+            yield rep
             return
         if level == 1:
-            write("(")
+            yield "("
         for i, rep in enumerate(chunks):
             if i > 0:
-                write("\n" + " " * indent)
-            write(rep)
+                yield "\n" + " " * indent
+            yield rep
         if level == 1:
-            write(")")
+            yield ")"
 
     _dispatch[str.__repr__] = _pprint_str
 
     def _pprint_bytes(
         self,
         object: Any,
-        stream: IO[str],
         indent: int,
         allowance: int,
         context: set[int],
         level: int,
-    ) -> None:
-        write = stream.write
+    ) -> Iterator[str]:
         if len(object) <= 4:
-            write(repr(object))
+            yield repr(object)
             return
         parens = level == 1
         if parens:
             indent += 1
             allowance += 1
-            write("(")
+            yield "("
         delim = ""
         for rep in _wrap_bytes_repr(object, self._width - indent, allowance):
-            write(delim)
-            write(rep)
+            yield delim
+            yield rep
             if not delim:
                 delim = "\n" + " " * indent
         if parens:
-            write(")")
+            yield ")"
 
     _dispatch[bytes.__repr__] = _pprint_bytes
 
     def _pprint_bytearray(
         self,
         object: Any,
-        stream: IO[str],
         indent: int,
         allowance: int,
         context: set[int],
         level: int,
-    ) -> None:
-        write = stream.write
-        write("bytearray(")
-        self._pprint_bytes(
-            bytes(object), stream, indent + 10, allowance + 1, context, level + 1
+    ) -> Iterator[str]:
+        yield "bytearray("
+        yield from self._pprint_bytes(
+            bytes(object), indent + 10, allowance + 1, context, level + 1
         )
-        write(")")
+        yield ")"
 
     _dispatch[bytearray.__repr__] = _pprint_bytearray
 
     def _pprint_mappingproxy(
         self,
         object: Any,
-        stream: IO[str],
         indent: int,
         allowance: int,
         context: set[int],
         level: int,
-    ) -> None:
-        stream.write("mappingproxy(")
-        self._format(object.copy(), stream, indent, allowance, context, level)
-        stream.write(")")
+    ) -> Iterator[str]:
+        yield "mappingproxy("
+        yield from self._format(object.copy(), indent, allowance, context, level)
+        yield ")"
 
     _dispatch[_types.MappingProxyType.__repr__] = _pprint_mappingproxy
 
     def _pprint_simplenamespace(
         self,
         object: Any,
-        stream: IO[str],
         indent: int,
         allowance: int,
         context: set[int],
         level: int,
-    ) -> None:
+    ) -> Iterator[str]:
         if type(object) is _types.SimpleNamespace:
             # The SimpleNamespace repr is "namespace" instead of the class
             # name, so we do the same here. For subclasses; use the class name.
@@ -380,95 +414,89 @@ def _pprint_simplenamespace(
         else:
             cls_name = object.__class__.__name__
         items = object.__dict__.items()
-        stream.write(cls_name + "(")
-        self._format_namespace_items(items, stream, indent, allowance, context, level)
-        stream.write(")")
+        yield cls_name + "("
+        yield from self._format_namespace_items(
+            items, indent, allowance, context, level
+        )
+        yield ")"
 
     _dispatch[_types.SimpleNamespace.__repr__] = _pprint_simplenamespace
 
     def _format_dict_items(
         self,
-        items: list[tuple[Any, Any]],
-        stream: IO[str],
+        items: Iterable[tuple[Any, Any]],
         indent: int,
         allowance: int,
         context: set[int],
         level: int,
-    ) -> None:
-        if not items:
-            return
-
-        write = stream.write
+    ) -> Iterator[str]:
         item_indent = indent + self._indent_per_level
         delimnl = "\n" + " " * item_indent
+        emitted = False
         for key, ent in items:
-            write(delimnl)
-            write(self._repr(key, context, level))
-            write(": ")
-            self._format(ent, stream, item_indent, 1, context, level)
-            write(",")
+            emitted = True
+            yield delimnl
+            yield self._repr(key, context, level)
+            yield ": "
+            yield from self._format(ent, item_indent, 1, context, level)
+            yield ","
 
-        write("\n" + " " * indent)
+        if emitted:
+            yield "\n" + " " * indent
 
     def _format_namespace_items(
         self,
-        items: list[tuple[Any, Any]],
-        stream: IO[str],
+        items: Iterable[tuple[Any, Any]],
         indent: int,
         allowance: int,
         context: set[int],
         level: int,
-    ) -> None:
-        if not items:
-            return
-
-        write = stream.write
+    ) -> Iterator[str]:
         item_indent = indent + self._indent_per_level
         delimnl = "\n" + " " * item_indent
+        emitted = False
         for key, ent in items:
-            write(delimnl)
-            write(key)
-            write("=")
+            emitted = True
+            yield delimnl
+            yield key
+            yield "="
             if id(ent) in context:
                 # Special-case representation of recursion to match standard
                 # recursive dataclass repr.
-                write("...")
+                yield "..."
             else:
-                self._format(
+                yield from self._format(
                     ent,
-                    stream,
                     item_indent + len(key) + 1,
                     1,
                     context,
                     level,
                 )
 
-            write(",")
+            yield ","
 
-        write("\n" + " " * indent)
+        if emitted:
+            yield "\n" + " " * indent
 
     def _format_items(
         self,
-        items: list[Any],
-        stream: IO[str],
+        items: Iterable[Any],
         indent: int,
         allowance: int,
         context: set[int],
         level: int,
-    ) -> None:
-        if not items:
-            return
-
-        write = stream.write
+    ) -> Iterator[str]:
         item_indent = indent + self._indent_per_level
         delimnl = "\n" + " " * item_indent
-
+        emitted = False
         for item in items:
-            write(delimnl)
-            self._format(item, stream, item_indent, 1, context, level)
-            write(",")
+            emitted = True
+            yield delimnl
+            yield from self._format(item, item_indent, 1, context, level)
+            yield ","
 
-        write("\n" + " " * indent)
+        if emitted:
+            yield "\n" + " " * indent
 
     def _repr(self, object: Any, context: set[int], level: int) -> str:
         return self._safe_repr(object, context.copy(), self._depth, level)
@@ -476,114 +504,107 @@ def _repr(self, object: Any, context: set[int], level: int) -> str:
     def _pprint_default_dict(
         self,
         object: Any,
-        stream: IO[str],
         indent: int,
         allowance: int,
         context: set[int],
         level: int,
-    ) -> None:
+    ) -> Iterator[str]:
         rdf = self._repr(object.default_factory, context, level)
-        stream.write(f"{object.__class__.__name__}({rdf}, ")
-        self._pprint_dict(object, stream, indent, allowance, context, level)
-        stream.write(")")
+        yield f"{object.__class__.__name__}({rdf}, "
+        yield from self._pprint_dict(object, indent, allowance, context, level)
+        yield ")"
 
     _dispatch[_collections.defaultdict.__repr__] = _pprint_default_dict
 
     def _pprint_counter(
         self,
         object: Any,
-        stream: IO[str],
         indent: int,
         allowance: int,
         context: set[int],
         level: int,
-    ) -> None:
-        stream.write(object.__class__.__name__ + "(")
+    ) -> Iterator[str]:
+        yield object.__class__.__name__ + "("
 
         if object:
-            stream.write("{")
+            yield "{"
             items = object.most_common()
-            self._format_dict_items(items, stream, indent, allowance, context, level)
-            stream.write("}")
+            yield from self._format_dict_items(items, indent, allowance, context, level)
+            yield "}"
 
-        stream.write(")")
+        yield ")"
 
     _dispatch[_collections.Counter.__repr__] = _pprint_counter
 
     def _pprint_chain_map(
         self,
         object: Any,
-        stream: IO[str],
         indent: int,
         allowance: int,
         context: set[int],
         level: int,
-    ) -> None:
+    ) -> Iterator[str]:
         if not len(object.maps) or (len(object.maps) == 1 and not len(object.maps[0])):
-            stream.write(repr(object))
+            yield repr(object)
             return
 
-        stream.write(object.__class__.__name__ + "(")
-        self._format_items(object.maps, stream, indent, allowance, context, level)
-        stream.write(")")
+        yield object.__class__.__name__ + "("
+        yield from self._format_items(object.maps, indent, allowance, context, level)
+        yield ")"
 
     _dispatch[_collections.ChainMap.__repr__] = _pprint_chain_map
 
     def _pprint_deque(
         self,
         object: Any,
-        stream: IO[str],
         indent: int,
         allowance: int,
         context: set[int],
         level: int,
-    ) -> None:
-        stream.write(object.__class__.__name__ + "(")
+    ) -> Iterator[str]:
+        yield object.__class__.__name__ + "("
         if object.maxlen is not None:
-            stream.write(f"maxlen={object.maxlen}, ")
-        stream.write("[")
+            yield f"maxlen={object.maxlen}, "
+        yield "["
 
-        self._format_items(object, stream, indent, allowance + 1, context, level)
-        stream.write("])")
+        yield from self._format_items(object, indent, allowance + 1, context, level)
+        yield "])"
 
     _dispatch[_collections.deque.__repr__] = _pprint_deque
 
     def _pprint_user_dict(
         self,
         object: Any,
-        stream: IO[str],
         indent: int,
         allowance: int,
         context: set[int],
         level: int,
-    ) -> None:
-        self._format(object.data, stream, indent, allowance, context, level - 1)
+    ) -> Iterator[str]:
+        yield from self._format(object.data, indent, allowance, context, level - 1)
 
     _dispatch[_collections.UserDict.__repr__] = _pprint_user_dict
 
     def _pprint_user_list(
         self,
         object: Any,
-        stream: IO[str],
         indent: int,
         allowance: int,
         context: set[int],
         level: int,
-    ) -> None:
-        self._format(object.data, stream, indent, allowance, context, level - 1)
+    ) -> Iterator[str]:
+        yield from self._format(object.data, indent, allowance, context, level - 1)
 
     _dispatch[_collections.UserList.__repr__] = _pprint_user_list
 
     def _pprint_user_string(
         self,
         object: Any,
-        stream: IO[str],
         indent: int,
         allowance: int,
         context: set[int],
         level: int,
-    ) -> None:
-        self._format(object.data, stream, indent, allowance, context, level - 1)
+    ) -> Iterator[str]:
+        yield from self._format(object.data, indent, allowance, context, level - 1)
 
     _dispatch[_collections.UserString.__repr__] = _pprint_user_string
 
diff --git a/testing/io/test_pprint.py b/testing/io/test_pprint.py
index 1326ef34b2e..2c08734cf46 100644
--- a/testing/io/test_pprint.py
+++ b/testing/io/test_pprint.py
@@ -406,3 +406,87 @@ class DataclassWithTwoItems:
 )
 def test_consistent_pretty_printer(data: Any, expected: str) -> None:
     assert PrettyPrinter().pformat(data) == textwrap.dedent(expected).strip()
+
+
+class TestPformatLines:
+    """``pformat_lines`` returns the pretty-printed lines, pulling from
+    the lazy formatter only until a line/char budget is reached so an
+    input a downstream truncator will clip anyway is never fully built.
+    """
+
+    def test_no_budget_matches_pformat_splitlines(self) -> None:
+        pp = PrettyPrinter()
+        data = list(range(50))
+        assert pp.pformat_lines(data) == pp.pformat(data).splitlines()
+
+    def test_under_budget_is_complete_and_a_prefix(self) -> None:
+        # When the whole thing fits, the result is the full pformat,
+        # regardless of how the budget was reached.
+        pp = PrettyPrinter()
+        data = list(range(5))
+        full = pp.pformat(data).splitlines()
+        assert pp.pformat_lines(data, max_lines=11) == full
+        assert pp.pformat_lines(data, max_chars=10_000) == full
+
+    def test_line_budget_stops_early(self) -> None:
+        pp = PrettyPrinter()
+        # 50 scalars, one per line, budget well below 50.
+        full = pp.pformat(list(range(50))).splitlines()
+        lines = pp.pformat_lines(list(range(50)), max_lines=11)
+        assert len(lines) <= 11 + 1  # budget, plus a trailing partial line
+        # everything but the last line (which may stop mid-line) is a
+        # prefix of the full output
+        assert lines[:-1] == full[: len(lines) - 1]
+
+    def test_char_budget_stops_early(self) -> None:
+        # A *flat* container of huge strings has few lines but explodes on
+        # chars; a line-only budget wouldn't stop it. The char budget must.
+        pp = PrettyPrinter()
+        data = ["x" * 100_000, "y" * 100_000, "z" * 100_000]
+        lines = pp.pformat_lines(data, max_chars=640)
+        assert sum(len(line) for line in lines) < 200_000  # bailed, didn't format all 3
+
+    def test_nested_element_respects_line_budget(self) -> None:
+        # ``len(object)`` is only a *lower* bound on the line count: a
+        # single nested element expands to many lines. The lazy pull must
+        # stop regardless of the container's element count.
+        pp = PrettyPrinter()
+        for data in ([{i: "x" * 40 for i in range(50)}], {1: list(range(100))}):
+            lines = pp.pformat_lines(data, max_lines=11)
+            assert len(lines) <= 11 + 1
+
+    def test_nested_dataclass_element_respects_line_budget(self) -> None:
+        @dataclass
+        class Many:
+            a: int
+            b: int
+            c: int
+            d: int
+            e: int
+            f: int
+            g: int
+            h: int
+
+        pp = PrettyPrinter()
+        lines = pp.pformat_lines([Many(*range(8))], max_lines=4)
+        assert len(lines) <= 4 + 1
+        assert len(lines) < len(pp.pformat([Many(*range(8))]).splitlines())
+
+    def test_sized_non_iterable_does_not_raise(self) -> None:
+        class Sized:
+            def __len__(self) -> int:
+                return 3
+
+        pp = PrettyPrinter()
+        obj = Sized()
+        assert pp.pformat_lines(obj, max_lines=5) == pp.pformat(obj).splitlines()
+
+
+def test_pformat_sorts_heterogeneous_set() -> None:
+    # The set sort tries a natural sort first and falls back to a key
+    # that compares the element types' names only for unorderable
+    # mixes; both must succeed.
+    pp = PrettyPrinter()
+    assert pp.pformat({3, 1, 2}) == "{\n    1,\n    2,\n    3,\n}"
+    # Mixed unorderable types must not raise.
+    pp.pformat({1, "a", 2, "b"})

From b541e20cb2897694b033175ad58b0ed2f7cebdf2 Mon Sep 17 00:00:00 2001
From: Pierre Sassoulas <pierre.sassoulas@gmail.com>
Date: Sat, 13 Jun 2026 18:34:16 +0200
Subject: [PATCH 02/12] [perf] Skip the newline count on chunks without a
 newline
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In ``pformat_lines``'s budget loop, ``chunk.count("\n")`` ran on every
chunk, but most chunks (brackets, indentation, item reprs) contain no
newline. Guarding the call with ``"\n" in chunk`` skips it on those and
recovers part of the per-chunk budget-tracking overhead: formatting an
8-element list under a budget drops from ~0.0185 ms to ~0.0163 ms
(versus ~0.0132 ms for an uncapped ``pformat().splitlines()``, so the
budget overhead roughly halves, from ~+5 us to ~+3 us).

The win is small and only matters on the ``-v`` truncating path of a
failing assertion (the default path doesn't format the diff at all), so
this is kept as a separate commit — easy to drop if the extra branch
isn't judged worth it.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 src/_pytest/_io/pprint.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/_pytest/_io/pprint.py b/src/_pytest/_io/pprint.py
index 06caf436e60..d9fd6955032 100644
--- a/src/_pytest/_io/pprint.py
+++ b/src/_pytest/_io/pprint.py
@@ -125,7 +125,10 @@ def pformat_lines(
             chunks.append(chunk)
             if max_chars is not None:
                 n_chars += len(chunk)
-            if max_lines is not None:
+            if max_lines is not None and "\n" in chunk:
+                # Guard the count: most chunks (brackets, indents, item
+                # reprs) have no newline, and skipping the call on them
+                # is meaningfully cheaper than counting every chunk.
                 n_lines += chunk.count("\n")
             if (max_lines is not None and n_lines >= max_lines) or (
                 max_chars is not None and n_chars >= max_chars

From 77343a81f7a53fb4369c24c0e728473db3ae6135 Mon Sep 17 00:00:00 2001
From: Pierre Sassoulas <pierre.sassoulas@gmail.com>
Date: Sun, 14 Jun 2026 11:00:40 +0200
Subject: [PATCH 03/12] [perf] pprint: apply review feedback on
 ``pformat_lines``

Addresses review on #14588:

* make ``max_lines`` / ``max_chars`` keyword-only so they can't be
  confused at the call site.
* drop the implementation detail (``_format``) and the "what the caller
  does" note from the docstring; describe the behaviour instead.
* comment the set-sort fast path ("try a direct sort first, faster than
  the fallback").
* assert the heterogeneous-set output in the test rather than only
  checking it does not raise.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 src/_pytest/_io/pprint.py | 21 +++++++++++----------
 testing/io/test_pprint.py |  5 +++--
 2 files changed, 14 insertions(+), 12 deletions(-)

diff --git a/src/_pytest/_io/pprint.py b/src/_pytest/_io/pprint.py
index d9fd6955032..2685d838b68 100644
--- a/src/_pytest/_io/pprint.py
+++ b/src/_pytest/_io/pprint.py
@@ -99,22 +99,21 @@ def pformat(self, object: Any) -> str:
     def pformat_lines(
         self,
         object: Any,
+        *,
         max_lines: int | None = None,
         max_chars: int | None = None,
     ) -> list[str]:
         """Pretty-print ``object`` and return its lines.
 
-        ``_format`` yields the output as a stream of chunks, so this can
-        stop pulling from it as soon as a budget is reached — useful when
-        a downstream truncator is going to drop everything past that
-        budget anyway.
-
-        ``max_lines`` / ``max_chars`` bound the two truncation dimensions
+        ``max_lines`` / ``max_chars`` bound the two output dimensions
         independently; either may be ``None`` to leave that dimension
-        unbounded. With both ``None`` the whole object is formatted. The
-        budget is a stopping condition, not a precise cut: formatting
-        stops on the first chunk that reaches it, so the result may
-        slightly overshoot (the caller truncates to the exact limit).
+        unbounded, and with both ``None`` the whole object is formatted.
+        When a bound is given the object is only formatted far enough to
+        reach it, so a huge object costs O(budget) rather than O(N).
+
+        The budget is a stopping condition, not a precise cut: formatting
+        stops on the first piece of output that reaches it, so the result
+        may slightly overshoot the bound.
         """
         if max_lines is None and max_chars is None:
             return self.pformat(object).splitlines()
@@ -277,6 +276,8 @@ def _pprint_set(
             yield typ.__name__ + "({"
             endchar = "})"
         try:
+            # Try a direct sort first; it is faster than the fallback and
+            # works for the common homogeneous, orderable case.
             object = sorted(object)
         except TypeError:
             # Heterogeneous element types — fall back to a key that
diff --git a/testing/io/test_pprint.py b/testing/io/test_pprint.py
index 2c08734cf46..805809b3778 100644
--- a/testing/io/test_pprint.py
+++ b/testing/io/test_pprint.py
@@ -488,5 +488,6 @@ def test_pformat_sorts_heterogeneous_set() -> None:
     # mixes; both must succeed.
     pp = PrettyPrinter()
     assert pp.pformat({3, 1, 2}) == "{\n    1,\n    2,\n    3,\n}"
-    # Mixed unorderable types must not raise.
-    pp.pformat({1, "a", 2, "b"})
+    # Mixed unorderable types must not raise; the fallback orders by type
+    # name (ints before strs), then by value.
+    assert pp.pformat({1, "a", 2, "b"}) == "{\n    1,\n    2,\n    'a',\n    'b',\n}"

From 5d94ad3439b34e4aa96f99632f77fe7959efa713 Mon Sep 17 00:00:00 2001
From: Pierre Sassoulas <pierre.sassoulas@gmail.com>
Date: Mon, 15 Jun 2026 12:09:32 +0200
Subject: [PATCH 04/12] [perf] test: mark Sized.__len__ as no cover

The body exists only to make the test type ``Sized``; the lazy budget
code never calls ``len`` on a non-dispatched object, so the line is
intentionally unreachable.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 testing/io/test_pprint.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/testing/io/test_pprint.py b/testing/io/test_pprint.py
index 805809b3778..f25f703c7cd 100644
--- a/testing/io/test_pprint.py
+++ b/testing/io/test_pprint.py
@@ -475,7 +475,7 @@ class Many:
     def test_sized_non_iterable_does_not_raise(self) -> None:
         class Sized:
             def __len__(self) -> int:
-                return 3
+                return 3  # pragma: no cover - exists only to make the type Sized
 
         pp = PrettyPrinter()
         obj = Sized()

From e4478724b243f3ae9fa8cb575e24c9daa9b909d4 Mon Sep 17 00:00:00 2001
From: Pierre Sassoulas <pierre.sassoulas@gmail.com>
Date: Mon, 15 Jun 2026 12:13:52 +0200
Subject: [PATCH 05/12] [perf] test: cover vendored pprint
 UserDict/UserList/UserString, frozenset, bytes/str wrap

The vendored ``pprint`` copy shipped without upstream's test suite, so
several per-type helpers had no coverage. Add ``pformat`` cases for
``UserDict``/``UserList``/``UserString``, ``frozenset`` (empty and the
frozenset-prefix branch of ``_pprint_set``), short and line-wrapped
``bytes``/``bytearray``, and the multi-line / wrapped ``str`` paths.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 src/_pytest/_io/pprint.py |   2 +-
 testing/io/test_pprint.py | 220 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 221 insertions(+), 1 deletion(-)

diff --git a/src/_pytest/_io/pprint.py b/src/_pytest/_io/pprint.py
index 2685d838b68..a550b65b0c8 100644
--- a/src/_pytest/_io/pprint.py
+++ b/src/_pytest/_io/pprint.py
@@ -330,7 +330,7 @@ def _pprint_str(
                         current = part
                     else:
                         current = candidate
-                if current:
+                if current:  # pragma: no branch - a wrapped line always ends with a pending chunk
                     chunks.append(repr(current))
         if len(chunks) == 1:
             yield rep
diff --git a/testing/io/test_pprint.py b/testing/io/test_pprint.py
index f25f703c7cd..28bcf193ee1 100644
--- a/testing/io/test_pprint.py
+++ b/testing/io/test_pprint.py
@@ -5,12 +5,17 @@
 from collections import defaultdict
 from collections import deque
 from collections import OrderedDict
+from collections import UserDict
+from collections import UserList
+from collections import UserString
 from dataclasses import dataclass
 import textwrap
 from types import MappingProxyType
 from types import SimpleNamespace
 from typing import Any
 
+from _pytest._io.pprint import _safe_tuple
+from _pytest._io.pprint import _wrap_bytes_repr
 from _pytest._io.pprint import PrettyPrinter
 import pytest
 
@@ -329,6 +334,112 @@ class DataclassWithTwoItems:
             """,
             id="deque-maxlen",
         ),
+        pytest.param(frozenset(), "frozenset()", id="frozenset-empty"),
+        pytest.param(
+            frozenset({1, 2, 3}),
+            """
+            frozenset({
+                1,
+                2,
+                3,
+            })
+            """,
+            id="frozenset-items",
+        ),
+        pytest.param(UserDict(), "{}", id="userdict-empty"),
+        pytest.param(
+            UserDict({"one": 1, "two": 2}),
+            """
+            {
+                'one': 1,
+                'two': 2,
+            }
+            """,
+            id="userdict-items",
+        ),
+        pytest.param(UserList(), "[]", id="userlist-empty"),
+        pytest.param(
+            UserList([1, 2]),
+            """
+            [
+                1,
+                2,
+            ]
+            """,
+            id="userlist-items",
+        ),
+        pytest.param(UserString("hello world"), "'hello world'", id="userstring"),
+        pytest.param(b"short", "(b'short')", id="bytes-short"),
+        pytest.param(
+            b"x" * 100,
+            "(b'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx'\n"
+            " b'xxxxxxxxxxxxxxxxxxxxxxxx')",
+            id="bytes-long",
+        ),
+        pytest.param(
+            # Length not a multiple of 4 so the final 4-byte group lands
+            # exactly on ``last`` and exercises the allowance trim.
+            b"z" * 102,
+            "(b'zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz'\n"
+            " b'zzzzzzzzzzzzzzzzzzzzzzzzzz')",
+            id="bytes-long-unaligned",
+        ),
+        pytest.param(bytearray(b"short"), "bytearray(b'short')", id="bytearray-short"),
+        pytest.param(
+            bytearray(b"y" * 100),
+            "bytearray(b'yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy'\n"
+            "          b'yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy')",
+            id="bytearray-long",
+        ),
+        pytest.param(
+            "word " * 30,
+            "('word word word word word word word word word word word word word word word '\n"
+            " 'word word word word word word word word word word word word word word word ')",
+            id="str-long-wrap",
+        ),
+        pytest.param(
+            "line1\nline2\nline3",
+            "('line1\\n'\n 'line2\\n'\n 'line3')",
+            id="str-multiline",
+        ),
+        pytest.param("", "''", id="str-empty"),
+        pytest.param("hello", "'hello'", id="str-single-chunk"),
+        pytest.param(
+            ["word " * 30],
+            "[\n"
+            "    'word word word word word word word word word word word word word word '\n"
+            "    'word word word word word word word word word word word word word word '\n"
+            "    'word word ',\n"
+            "]",
+            id="str-nested-wrap",
+        ),
+        pytest.param(b"abc", "b'abc'", id="bytes-le-4"),
+        pytest.param(
+            "word " * 30 + "\nshort",
+            "('word word word word word word word word word word word word word word word '\n"
+            " 'word word word word word word word word word word word word word word word \\n'\n"
+            " 'short')",
+            id="str-wrap-then-line",
+        ),
+        pytest.param({(): 0}, "{\n    (): 0,\n}", id="dict-empty-tuple-key"),
+        pytest.param(
+            {(1, 2): 0},
+            """
+            {
+                (1, 2): 0,
+            }
+            """,
+            id="dict-tuple-key",
+        ),
+        pytest.param(
+            {(1,): 0},
+            """
+            {
+                (1,): 0,
+            }
+            """,
+            id="dict-singleton-tuple-key",
+        ),
         pytest.param(
             {
                 "chainmap": ChainMap({"one": 1}, {"two": 2}),
@@ -491,3 +602,112 @@ def test_pformat_sorts_heterogeneous_set() -> None:
     # Mixed unorderable types must not raise; the fallback orders by type
     # name (ints before strs), then by value.
     assert pp.pformat({1, "a", 2, "b"}) == "{\n    1,\n    2,\n    'a',\n    'b',\n}"
+
+
+@pytest.mark.parametrize(
+    "kwargs",
+    [
+        pytest.param({"indent": -1}, id="indent-negative"),
+        pytest.param({"depth": 0}, id="depth-zero"),
+        pytest.param({"width": 0}, id="width-zero"),
+    ],
+)
+def test_invalid_constructor_args_raise(kwargs: dict[str, int]) -> None:
+    with pytest.raises(ValueError):
+        PrettyPrinter(**kwargs)
+
+
+def test_recursive_list_shows_recursion_marker() -> None:
+    pp = PrettyPrinter()
+    a: list[Any] = [1]
+    a.append(a)
+    out = pp.pformat(a)
+    assert f"<Recursion on list with id={id(a)}>" in out
+
+
+def test_recursive_namespace_shows_ellipsis() -> None:
+    # A self-referential namespace must render the cycle as ``...`` rather
+    # than recursing forever.
+    ns = SimpleNamespace(x=1)
+    ns.self = ns
+    out = PrettyPrinter().pformat(ns)
+    assert "self=..." in out
+
+
+def test_depth_limit_truncates_nested_container() -> None:
+    # ``depth`` caps nesting in the ``_safe_repr`` fallback: containers
+    # past the limit collapse to ``...``.
+    pp = PrettyPrinter(depth=1)
+    assert pp.pformat({((1, 2),): 0}) == "{\n    (...,): 0,\n}"
+
+
+def test_simplenamespace_subclass_uses_class_name() -> None:
+    # Plain ``SimpleNamespace`` prints as ``namespace(...)``; a subclass
+    # uses its own class name instead.
+    class MyNamespace(SimpleNamespace):
+        pass
+
+    pp = PrettyPrinter()
+    assert pp.pformat(MyNamespace(one=1)) == "MyNamespace(\n    one=1,\n)"
+
+
+def test_safe_tuple_sorts_unorderable_pairs() -> None:
+    # ``_safe_tuple`` wraps each element of a 2-tuple in ``_safe_key`` so a
+    # list of pairs with unorderable elements can be sorted without raising.
+    pairs = [(2, "b"), (1, "a"), ("z", 3)]
+    assert sorted(pairs, key=_safe_tuple)  # does not raise
+
+
+class _HashableDict(dict):
+    # ``dict`` subclasses that are hashable can be used as dict keys, which
+    # is the only way the ``_safe_repr`` ``dict`` branch is reached.
+    def __hash__(self) -> int:
+        return id(self)
+
+
+class _HashableList(list):
+    # Likewise for ``list`` and the ``_safe_repr`` ``list`` branch.
+    def __hash__(self) -> int:
+        return id(self)
+
+
+@pytest.mark.parametrize(
+    ("key", "expected"),
+    [
+        pytest.param(_HashableDict(), "{\n    {}: 0,\n}", id="empty-dict-key"),
+        pytest.param(
+            _HashableDict({"a": 1}), "{\n    {'a': 1}: 0,\n}", id="dict-key"
+        ),
+        pytest.param(_HashableList(), "{\n    []: 0,\n}", id="empty-list-key"),
+        pytest.param(_HashableList([1, 2]), "{\n    [1, 2]: 0,\n}", id="list-key"),
+    ],
+)
+def test_hashable_container_subclass_as_key(key: Any, expected: str) -> None:
+    # A hashable ``dict``/``list`` subclass key is rendered via the
+    # ``_safe_repr`` fallback rather than a per-type dispatcher.
+    assert PrettyPrinter().pformat({key: 0}) == expected
+
+
+def test_safe_repr_depth_limit_on_dict_key() -> None:
+    pp = PrettyPrinter(depth=1)
+    assert pp.pformat({_HashableDict({"a": 1}): 0}) == "{\n    {...}: 0,\n}"
+
+
+def test_safe_repr_recursion_marker() -> None:
+    # Self-referential containers reached through ``_safe_repr`` (as dict
+    # keys) must terminate with a recursion marker, for both the ``dict``
+    # branch and the ``tuple``/``list`` branch.
+    hd = _HashableDict()
+    hd["self"] = hd
+    assert "<Recursion on _HashableDict" in PrettyPrinter().pformat({hd: 0})
+
+    hl = _HashableList()
+    hl.append(hl)
+    assert "<Recursion on _HashableList" in PrettyPrinter().pformat({(hl,): 0})
+
+
+def test_wrap_bytes_repr_edges() -> None:
+    # Empty input yields nothing; a width too small for a group still
+    # emits each group rather than dropping bytes.
+    assert list(_wrap_bytes_repr(b"", 80, 0)) == []
+    assert list(_wrap_bytes_repr(b"abcdefgh", 6, 0)) == ["b'abcd'", "b'efgh'"]

From 98281ea734e5da77f2a4eb26c1b696b8d4360787 Mon Sep 17 00:00:00 2001
From: Pierre Sassoulas <pierre.sassoulas@gmail.com>
Date: Sat, 13 Jun 2026 15:45:49 +0200
Subject: [PATCH 06/12] [refactor] Stream assertion explanations through
 truncation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Make the assertion-comparison explanation lazy end-to-end so a huge
comparison short-circuits at the truncation threshold instead of
building (then discarding) megabytes of diff text.

* add ``materialize_with_truncation`` — pull from the explanation
  iterator only until the truncation budget is reached, then drop the
  rest unconsumed.
* feed ``util.assertrepr_compare``'s iterator through it from both the
  ``pytest_assertrepr_compare`` hook and ``callbinrepr``, while keeping
  the hook's spec'd ``list[str] | None`` return type intact (the
  iterator is still consumed lazily).
* flatten ``callbinrepr``'s two ``continue``s into nested truthiness
  checks so codecov stops reporting a sticky partial branch.
* drop the exact hidden-line count from the truncation footer
  ("...Full output truncated, use '-vv' to show") — the streaming
  truncator can't know how many lines it never pulled, and maintainers
  agreed the count isn't worth materialising the whole diff for.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 doc/en/example/reportingdemo.rst  |   2 +-
 doc/en/how-to/output.rst          |   4 +-
 src/_pytest/assertion/__init__.py |  46 ++++++---
 src/_pytest/assertion/truncate.py |  87 +++++++++-------
 testing/python/approx.py          |   2 +-
 testing/test_assertion.py         | 162 +++++++++++++++++++++++++++---
 6 files changed, 232 insertions(+), 71 deletions(-)

diff --git a/doc/en/example/reportingdemo.rst b/doc/en/example/reportingdemo.rst
index 1eef1322927..65e99a14a1f 100644
--- a/doc/en/example/reportingdemo.rst
+++ b/doc/en/example/reportingdemo.rst
@@ -148,7 +148,7 @@ Here is a nice run of several failures and how ``pytest`` presents things:
     E           1
     E           1...
     E
-    E         ...Full output truncated (7 lines hidden), use '-vv' to show
+    E         ...Full output truncated, use '-vv' to show
 
     failure_demo.py:62: AssertionError
     _________________ TestSpecialisedExplanations.test_eq_list _________________
diff --git a/doc/en/how-to/output.rst b/doc/en/how-to/output.rst
index db36a5a7206..752d0206526 100644
--- a/doc/en/how-to/output.rst
+++ b/doc/en/how-to/output.rst
@@ -172,7 +172,7 @@ Now we can increase pytest's verbosity:
     E               'banana',
     E               'apple',...
     E
-    E         ...Full output truncated (7 lines hidden), use '-vv' to show
+    E         ...Full output truncated, use '-vv' to show
 
     test_verbosity_example.py:8: AssertionError
     ____________________________ test_numbers_fail _____________________________
@@ -190,7 +190,7 @@ Now we can increase pytest's verbosity:
     E         {'10': 10, '20': 20, '30': 30, '40': 40}
     E         ...
     E
-    E         ...Full output truncated (16 lines hidden), use '-vv' to show
+    E         ...Full output truncated, use '-vv' to show
 
     test_verbosity_example.py:14: AssertionError
     ___________________________ test_long_text_fail ____________________________
diff --git a/src/_pytest/assertion/__init__.py b/src/_pytest/assertion/__init__.py
index e33f8b29609..7968b056b02 100644
--- a/src/_pytest/assertion/__init__.py
+++ b/src/_pytest/assertion/__init__.py
@@ -181,13 +181,21 @@ def callbinrepr(op, left: object, right: object) -> str | None:
             config=item.config, op=op, left=left, right=right
         )
         for new_expl in hook_result:
+            # Plugin-supplied lists are truncated here; the built-in impl
+            # already truncates as it streams, so re-applying truncation
+            # to its output is a near no-op (the body fits the budget,
+            # only the footer line is re-emitted with the same wording).
+            # ``materialize_with_truncation`` can return ``[]`` when the
+            # input was a truthy-but-empty iterable, so re-check after
+            # materialising.
             if new_expl:
-                new_expl = truncate.truncate_if_required(new_expl, item)
-                new_expl = [line.replace("\n", "\\n") for line in new_expl]
-                res = "\n~".join(new_expl)
-                if item.config.getvalue("assertmode") == "rewrite":
-                    res = res.replace("%", "%%")
-                return res
+                new_expl = truncate.materialize_with_truncation(new_expl, item.config)
+                if new_expl:
+                    new_expl = [line.replace("\n", "\\n") for line in new_expl]
+                    res = "\n~".join(new_expl)
+                    if item.config.getvalue("assertmode") == "rewrite":
+                        res = res.replace("%", "%%")
+                    return res
         return None
 
     saved_assert_hooks = util._reprcompare, util._assertion_pass
@@ -218,19 +226,25 @@ def pytest_sessionfinish(session: Session) -> None:
 def pytest_assertrepr_compare(
     config: Config, op: str, left: Any, right: Any
 ) -> list[str] | None:
+    """Return an explanation for ``left op right``.
+
+    Internally ``util.assertrepr_compare`` is a generator; we feed it
+    through ``materialize_with_truncation`` so a huge comparison
+    short-circuits at the truncation threshold without building the
+    full diff, while still returning the ``list[str] | None`` shape
+    the hook spec advertises.
+    """
     if config.pluginmanager.has_plugin("terminalreporter"):
         highlighter = config.get_terminal_writer()._highlight
     else:
         # Keep it plaintext when not using terminalrepoterer (#14377).
         highlighter = util.dummy_highlighter
-    explanation = list(
-        util.assertrepr_compare(
-            op=op,
-            left=left,
-            right=right,
-            verbose=config.get_verbosity(Config.VERBOSITY_ASSERTIONS),
-            highlighter=highlighter,
-            assertion_text_diff_style=util.get_assertion_text_diff_style(config),
-        )
+    lines = util.assertrepr_compare(
+        op=op,
+        left=left,
+        right=right,
+        verbose=config.get_verbosity(Config.VERBOSITY_ASSERTIONS),
+        highlighter=highlighter,
+        assertion_text_diff_style=util.get_assertion_text_diff_style(config),
     )
-    return explanation or None
+    return truncate.materialize_with_truncation(lines, config) or None
diff --git a/src/_pytest/assertion/truncate.py b/src/_pytest/assertion/truncate.py
index d62ca33cc4b..2f0c9fe2df0 100644
--- a/src/_pytest/assertion/truncate.py
+++ b/src/_pytest/assertion/truncate.py
@@ -6,9 +6,10 @@
 
 from __future__ import annotations
 
+from collections.abc import Iterable
+
 from _pytest.compat import running_on_ci
 from _pytest.config import Config
-from _pytest.nodes import Item
 
 
 DEFAULT_MAX_LINES = 8
@@ -16,32 +17,52 @@
 USAGE_MSG = "use '-vv' to show"
 
 
-def truncate_if_required(explanation: list[str], item: Item) -> list[str]:
-    """Truncate this assertion explanation if the given test item is eligible."""
-    should_truncate, max_lines, max_chars = _get_truncation_parameters(item)
-    if should_truncate:
-        return _truncate_explanation(
-            explanation,
-            max_lines=max_lines,
-            max_chars=max_chars,
-        )
-    return explanation
+def materialize_with_truncation(lines: Iterable[str], config: Config) -> list[str]:
+    """Materialise a streaming explanation, applying truncation lazily.
+
+    Pulls from ``lines`` only until the truncation threshold is reached;
+    once exceeded, the rest of the iterator is dropped without being
+    consumed. This lets a huge comparison short-circuit instead of
+    building (and immediately discarding) megabytes of explanation text.
+    """
+    should_truncate, max_lines, max_chars = _get_truncation_parameters(config)
+    if not should_truncate:
+        return list(lines)
+
+    tolerable_max_chars = max_chars + 70
+    # Pull just past max_lines so ``_truncate_explanation`` can detect the
+    # overflow without us materialising more than we need.
+    line_cap = max_lines + 3 if max_lines > 0 else None
+    buffered: list[str] = []
+    char_count = 0
+    for line in lines:
+        buffered.append(line)
+        char_count += len(line)
+        if line_cap is not None and len(buffered) >= line_cap:
+            break
+        if max_chars > 0 and char_count > tolerable_max_chars:
+            break
+    else:
+        # Iterator exhausted within limits — nothing to truncate.
+        return buffered
+
+    return _truncate_explanation(buffered, max_lines=max_lines, max_chars=max_chars)
 
 
-def _get_truncation_parameters(item: Item) -> tuple[bool, int, int]:
-    """Return the truncation parameters related to the given item, as (should truncate, max lines, max chars)."""
+def _get_truncation_parameters(config: Config) -> tuple[bool, int, int]:
+    """Return the truncation parameters from the given config, as (should truncate, max lines, max chars)."""
     # We do not need to truncate if one of conditions is met:
     # 1. Verbosity level is 2 or more;
     # 2. Test is being run in CI environment;
     # 3. Both truncation_limit_lines and truncation_limit_chars
     #    .ini parameters are set to 0 explicitly.
-    max_lines = item.config.getini("truncation_limit_lines")
+    max_lines = config.getini("truncation_limit_lines")
     max_lines = int(max_lines if max_lines is not None else DEFAULT_MAX_LINES)
 
-    max_chars = item.config.getini("truncation_limit_chars")
+    max_chars = config.getini("truncation_limit_chars")
     max_chars = int(max_chars if max_chars is not None else DEFAULT_MAX_CHARS)
 
-    verbose = item.config.get_verbosity(Config.VERBOSITY_ASSERTIONS)
+    verbose = config.get_verbosity(Config.VERBOSITY_ASSERTIONS)
 
     should_truncate = verbose < 2 and not running_on_ci()
     should_truncate = should_truncate and (max_lines > 0 or max_chars > 0)
@@ -66,20 +87,15 @@ def _truncate_explanation(
     When this function is launched we know max_lines > 0 or max_chars > 0
     because _get_truncation_parameters was called first.
     """
-    # The length of the truncation explanation depends on the number of lines
-    # removed but is at least 68 characters:
-    # The real value is
-    # 64 (for the base message:
-    # '...\n...Full output truncated (1 line hidden), use '-vv' to show")'
-    # )
-    # + 1 (for plural)
-    # + int(math.log10(len(input_lines) - max_lines)) (number of hidden line, at least 1)
-    # + 3 for the '...' added to the truncated line
-    # But if there's more than 100 lines it's very likely that we're going to
-    # truncate, so we don't need the exact value using log10.
-    tolerable_max_chars = (
-        max_chars + 70  # 64 + 1 (for plural) + 2 (for '99') + 3 for '...'
-    )
+    # ``max_chars`` bounds the body only; the truncation footer is added on
+    # top of it. We allow some slack so a body that nearly fits the budget is
+    # not truncated solely to make room for that footer. The footer costs:
+    #   + 3   the "..." appended to the last kept line
+    #   + 43  the "...Full output truncated, use '-vv' to show" line
+    # i.e. ~46 characters. We round up to 70 to keep a comfortable margin
+    # (this was historically larger, to also fit the now-removed
+    # "(N lines hidden)" count, which streaming can no longer compute).
+    tolerable_max_chars = max_chars + 70
     # The truncation explanation add two lines to the output
     if max_lines == 0 or len(input_lines) <= max_lines + 2:
         if max_chars == 0 or sum(len(s) for s in input_lines) <= tolerable_max_chars:
@@ -89,24 +105,19 @@ def _truncate_explanation(
         # Truncate first to max_lines, and then truncate to max_chars if necessary
         truncated_explanation = input_lines[:max_lines]
     # We reevaluate the need to truncate chars following removal of some lines
-    need_to_truncate_char = (
+    if (
         max_chars > 0
         and sum(len(e) for e in truncated_explanation) > tolerable_max_chars
-    )
-    if need_to_truncate_char:
+    ):
         truncated_explanation = _truncate_by_char_count(
             truncated_explanation, max_chars
         )
     # Something was truncated, adding '...' at the end to show that
     truncated_explanation[-1] += "..."
-    truncated_line_count = (
-        len(input_lines) - len(truncated_explanation) + int(need_to_truncate_char)
-    )
     return [
         *truncated_explanation,
         "",
-        f"...Full output truncated ({truncated_line_count} line"
-        f"{'' if truncated_line_count == 1 else 's'} hidden), {USAGE_MSG}",
+        f"...Full output truncated, {USAGE_MSG}",
     ]
 
 
diff --git a/testing/python/approx.py b/testing/python/approx.py
index 88d46cbb755..c5ca03fe823 100644
--- a/testing/python/approx.py
+++ b/testing/python/approx.py
@@ -313,7 +313,7 @@ def test_error_messages_with_different_verbosity(self, assert_approx_raises_rege
                 rf"^  \(0,\)\s+\| {SOME_FLOAT} \| {SOME_FLOAT} ± {SOME_FLOAT}e-{SOME_INT}$",
                 rf"^  \(1,\)\s+\| {SOME_FLOAT} \| {SOME_FLOAT} ± {SOME_FLOAT}e-{SOME_INT}\.\.\.$",
                 "^  $",
-                rf"^  ...Full output truncated \({SOME_INT} lines hidden\), use '-vv' to show$",
+                r"^  ...Full output truncated, use '-vv' to show$",
             ],
             verbosity_level=0,
         )
diff --git a/testing/test_assertion.py b/testing/test_assertion.py
index 492834ba9de..ac20a172a8c 100644
--- a/testing/test_assertion.py
+++ b/testing/test_assertion.py
@@ -56,6 +56,11 @@ def get_verbosity(self, verbosity_type: str | None = None) -> int:
         def getini(self, name: str) -> str:
             if name == util.ASSERTION_TEXT_DIFF_STYLE_INI:
                 return assertion_text_diff_style
+            # Disable truncation so ``callop``-style tests can compare
+            # against the full explanation. Dedicated truncation tests
+            # use their own config in :class:`TestTruncateMaterialize`.
+            if name in ("truncation_limit_lines", "truncation_limit_chars"):
+                return "0"
             raise KeyError(f"Not mocked out: {name}")
 
     return Config()
@@ -1154,7 +1159,7 @@ def test_recursive_dataclasses(self, pytester: Pytester) -> None:
                 "E         Drill down into differing attribute g:",
                 "E           g: S(a=10, b='ten') != S(a=20, b='xxx')...",
                 "E         ",
-                "E         ...Full output truncated (51 lines hidden), use '-vv' to show",
+                "E         ...Full output truncated, use '-vv' to show",
             ],
             consecutive=True,
         )
@@ -1527,7 +1532,6 @@ def test_truncates_at_8_lines_when_given_list_of_empty_strings(self) -> None:
         assert result != expl
         assert len(result) == 8 + self.LINES_IN_TRUNCATION_MSG
         assert "Full output truncated" in result[-1]
-        assert "42 lines hidden" in result[-1]
         last_line_before_trunc_msg = result[-self.LINES_IN_TRUNCATION_MSG - 1]
         assert last_line_before_trunc_msg.endswith("...")
 
@@ -1538,7 +1542,6 @@ def test_truncates_at_8_lines_when_first_8_lines_are_LT_max_chars(self) -> None:
         assert result != expl
         assert len(result) == 8 + self.LINES_IN_TRUNCATION_MSG
         assert "Full output truncated" in result[-1]
-        assert f"{total_lines - 8} lines hidden" in result[-1]
         last_line_before_trunc_msg = result[-self.LINES_IN_TRUNCATION_MSG - 1]
         assert last_line_before_trunc_msg.endswith("...")
 
@@ -1557,7 +1560,7 @@ def test_truncates_full_line_because_of_max_chars(self) -> None:
             "a" * 10,
             "...",
             "",
-            "...Full output truncated (1 line hidden), use '-vv' to show",
+            "...Full output truncated, use '-vv' to show",
         ]
 
     def test_truncates_edgecase_when_truncation_message_makes_the_result_longer_for_chars(
@@ -1582,7 +1585,6 @@ def test_truncates_at_8_lines_when_first_8_lines_are_EQ_max_chars(self) -> None:
         assert result != expl
         assert len(result) == 16 - 8 + self.LINES_IN_TRUNCATION_MSG
         assert "Full output truncated" in result[-1]
-        assert "8 lines hidden" in result[-1]
         last_line_before_trunc_msg = result[-self.LINES_IN_TRUNCATION_MSG - 1]
         assert last_line_before_trunc_msg.endswith("...")
 
@@ -1592,7 +1594,6 @@ def test_truncates_at_4_lines_when_first_4_lines_are_GT_max_chars(self) -> None:
         assert result != expl
         assert len(result) == 4 + self.LINES_IN_TRUNCATION_MSG
         assert "Full output truncated" in result[-1]
-        assert "7 lines hidden" in result[-1]
         last_line_before_trunc_msg = result[-self.LINES_IN_TRUNCATION_MSG - 1]
         assert last_line_before_trunc_msg.endswith("...")
 
@@ -1602,7 +1603,6 @@ def test_truncates_at_1_line_when_first_line_is_GT_max_chars(self) -> None:
         assert result != expl
         assert len(result) == 1 + self.LINES_IN_TRUNCATION_MSG
         assert "Full output truncated" in result[-1]
-        assert "1000 lines hidden" in result[-1]
         last_line_before_trunc_msg = result[-self.LINES_IN_TRUNCATION_MSG - 1]
         assert last_line_before_trunc_msg.endswith("...")
 
@@ -1610,7 +1610,6 @@ def test_full_output_truncated(self, monkeypatch, pytester: Pytester) -> None:
         """Test against full runpytest() output."""
         line_count = 7
         line_len = 100
-        expected_truncated_lines = 2
         pytester.makepyfile(
             rf"""
             def test_many_lines():
@@ -1629,7 +1628,7 @@ def test_many_lines():
             [
                 "*+ 1*",
                 "*+ 3*",
-                f"*truncated ({expected_truncated_lines} lines hidden)*use*-vv*",
+                "*Full output truncated*use*-vv*",
             ]
         )
 
@@ -1643,7 +1642,7 @@ def test_many_lines():
             [
                 "*+ 1*",
                 "*+ 3*",
-                f"*truncated ({expected_truncated_lines} lines hidden)*use*-vv*",
+                "*Full output truncated*use*-vv*",
             ]
         )
 
@@ -1699,9 +1698,7 @@ def test():
         result = pytester.runpytest()
 
         if expected_lines_hidden != 0:
-            result.stdout.fnmatch_lines(
-                [f"*truncated ({expected_lines_hidden} lines hidden)*"]
-            )
+            result.stdout.fnmatch_lines(["*Full output truncated*"])
         else:
             result.stdout.no_fnmatch_line("*truncated*")
             result.stdout.fnmatch_lines(
@@ -1712,6 +1709,92 @@ def test():
             )
 
 
+class TestMaterializeWithTruncation:
+    """Tests for ``truncate.materialize_with_truncation``.
+
+    Assertions check *behaviour* — that truncation kicks in / doesn't,
+    that the original lines are preserved, that the iterator's contract
+    is honoured — and never the literal footer wording. That way the
+    tests survive any future change to the truncation message format.
+    """
+
+    @staticmethod
+    def _config_with_limits(verbose: int = 0):
+        # Minimal stand-in for ``Config`` that ``materialize_with_truncation``
+        # uses through ``_get_truncation_parameters``.
+        class C:
+            def getini(self, name: str) -> object:
+                return None  # use defaults (8 lines / 640 chars)
+
+            def get_verbosity(self, _verbosity_type: str | None = None) -> int:
+                return verbose
+
+        return C()
+
+    def test_iterator_within_limits_returns_all_lines(self) -> None:
+        lines = iter(["one", "two", "three"])
+        result = truncate.materialize_with_truncation(lines, self._config_with_limits())
+        assert result == ["one", "two", "three"]
+
+    def test_iterator_exceeding_limits_is_truncated(self) -> None:
+        lines = (f"line {i}" for i in range(1000))
+        result = truncate.materialize_with_truncation(lines, self._config_with_limits())
+        # Bounded length — we kept the truncation footer plus at most a few
+        # lines past the cap; we never collect the full 1000-line stream.
+        assert len(result) < 20
+        # The first lines we kept are the first lines of the input.
+        assert result[0] == "line 0"
+        # Some truncation marker is present (wording deliberately not asserted).
+        assert any("truncated" in line for line in result)
+
+    def test_sized_input_returns_same_shape_as_iterator_input(self) -> None:
+        # When the input is already a sized container, the function still
+        # returns the truncated form; behaviour is the same as for an
+        # iterator over the same content.
+        content = [f"line {i}" for i in range(50)]
+        sized = truncate.materialize_with_truncation(
+            content, self._config_with_limits()
+        )
+        unsized = truncate.materialize_with_truncation(
+            iter(content), self._config_with_limits()
+        )
+        assert sized[0] == unsized[0] == "line 0"
+        assert any("truncated" in line for line in sized)
+        assert any("truncated" in line for line in unsized)
+
+    def test_truncation_disabled_returns_full_input(self) -> None:
+        # verbose >= 2 disables truncation; the iterator is fully drained.
+        lines = (f"line {i}" for i in range(50))
+        result = truncate.materialize_with_truncation(
+            lines, self._config_with_limits(verbose=2)
+        )
+        assert result == [f"line {i}" for i in range(50)]
+        assert not any("truncated" in line for line in result)
+
+    def test_first_lines_are_preserved_verbatim(self) -> None:
+        lines = (f"line {i}" for i in range(200))
+        result = truncate.materialize_with_truncation(lines, self._config_with_limits())
+        # The first kept lines should match the start of the input exactly
+        # (modulo the "..." appended to the last surviving line by the
+        # truncator, which we strip before comparing).
+        kept = [line.rstrip(".") for line in result if "truncated" not in line]
+        for i, line in enumerate(kept):
+            if line == "":
+                # Blank line separating content from the footer.
+                continue
+            assert line.startswith(f"line {i}")
+
+    def test_idempotent_on_already_truncated_list(self) -> None:
+        # The dispatcher applies ``materialize_with_truncation`` after the
+        # built-in hook impl already truncated. Re-applying it must not
+        # corrupt the footer count or chop further lines.
+        once = truncate.materialize_with_truncation(
+            (f"line {i}" for i in range(200)), self._config_with_limits()
+        )
+        twice = truncate.materialize_with_truncation(once, self._config_with_limits())
+        assert twice == once
+
+
 def test_python25_compile_issue257(pytester: Pytester) -> None:
     pytester.makepyfile(
         """
@@ -2205,6 +2288,59 @@ def raise_exit(obj):
         callequal(1, 1)
 
 
+def test_plugin_hook_returning_none_is_skipped(pytester: Pytester) -> None:
+    """A ``pytest_assertrepr_compare`` impl returning ``None`` is skipped
+    so the next impl (or the built-in) can produce the explanation.
+    Covers the ``if new_expl is None: continue`` branch in
+    ``callbinrepr``.
+    """
+    pytester.makeconftest(
+        """
+        def pytest_assertrepr_compare(op, left, right):
+            # Always defer to the next plugin / the built-in.
+            return None
+        """
+    )
+    pytester.makepyfile(
+        """
+        def test_diff():
+            assert {1, 2} == {1, 3}
+        """
+    )
+    result = pytester.runpytest()
+    # The built-in set-comparison explanation still reaches the user
+    # (so the None-returning hook did not swallow it).
+    result.stdout.fnmatch_lines(
+        ["*Extra items in the left set:*", "*Extra items in the right set:*"]
+    )
+
+
+def test_exception_before_first_yield_emits_summary_and_notice(monkeypatch) -> None:
+    """When the comparator raises *before* any explanation line has been
+    yielded, ``assertrepr_compare`` should still produce the summary so
+    the reader sees what was being compared, then append the failure
+    notice. Covers the ``summary_yielded is False`` branch of the
+    exception handler.
+    """
+    from _pytest.assertion import _compare_any
+
+    def raise_value_error(obj):
+        raise ValueError("synthetic repr failure")
+
+    # ``istext`` is called inside ``_compare_eq_any`` before the first
+    # yield, so this triggers the failure path on the very first
+    # ``next()`` call from ``assertrepr_compare``.
+    monkeypatch.setattr(_compare_any, "istext", raise_value_error)
+
+    expl = callequal(1, 1)
+    assert expl is not None
+    # Summary line still produced.
+    assert expl[0] == "1 == 1"
+    # The failure notice survives in the output; wording deliberately not
+    # asserted, only the underlying error's signature.
+    assert any("ValueError" in line or "synthetic" in line for line in expl)
+
+
 def test_assertion_location_with_coverage(pytester: Pytester) -> None:
     """This used to report the wrong location when run with coverage (#5754)."""
     p = pytester.makepyfile(

From d9ab9995af90983de024e2c497b10a2707ee1fbc Mon Sep 17 00:00:00 2001
From: Pierre Sassoulas <pierre.sassoulas@gmail.com>
Date: Sat, 13 Jun 2026 18:11:11 +0200
Subject: [PATCH 07/12] [perf] Wire the pformat budget and streamed ndiff into
 the diff
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Use the lazy ``PrettyPrinter`` from the assertion comparison so a big
``==`` diff is never fully built when truncation will clip it.

* compute a per-side pformat budget ``(max_lines, max_chars)`` in
  ``pytest_assertrepr_compare`` from the truncator's
  ``truncation_limit_lines`` / ``truncation_limit_chars`` and thread it
  through ``util.assertrepr_compare`` → ``_compare_eq_any`` →
  ``_compare_eq_iterable``, which passes it to ``pformat_lines``. Both
  dimensions are bounded, so a flat container of a few enormous strings
  (huge chars, few lines) stops as early as a many-element collection
  (many lines) — and a char-only truncation config
  (``truncation_limit_lines=0``) now caps formatting too, where before
  it fell back to no cap. With truncation disabled (``-vv``/CI) the
  budget stays ``(None, None)`` and the full diff is produced.
* stream ``difflib.ndiff`` output line-by-line, highlighting each line
  individually (the diff lexer is line-oriented), so the truncator can
  stop pulling as soon as its budget is full instead of joining the
  whole diff first.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 src/_pytest/assertion/__init__.py          | 28 ++++++++++++++++++++++
 src/_pytest/assertion/_compare_any.py      |  5 +++-
 src/_pytest/assertion/_compare_sequence.py | 28 +++++++++++++++-------
 src/_pytest/assertion/util.py              |  2 ++
 testing/test_assertion.py                  |  8 +++----
 5 files changed, 58 insertions(+), 13 deletions(-)

diff --git a/src/_pytest/assertion/__init__.py b/src/_pytest/assertion/__init__.py
index 7968b056b02..f1db4347f36 100644
--- a/src/_pytest/assertion/__init__.py
+++ b/src/_pytest/assertion/__init__.py
@@ -239,6 +239,33 @@ def pytest_assertrepr_compare(
     else:
         # Keep it plaintext when not using terminalrepoterer (#14377).
         highlighter = util.dummy_highlighter
+    # When truncation is going to clip the explanation downstream, tell
+    # the comparison helpers to cap their pformat output at the same
+    # budget so they don't spend O(N) formatting lines/chars we're about
+    # to drop. The cap is ``(max_lines, max_chars)`` per side, mirroring
+    # the truncator's own slack so a side is never under-formatted:
+    #
+    # * ``trunc_lines + 3``: 2 lines for the truncation footer it appends
+    #   (blank + message) plus 1 for overshoot detection.
+    # * ``trunc_chars + 70``: the truncator's own ``tolerable_max_chars``
+    #   slack (footer length).
+    #
+    # ``difflib.ndiff`` over two K-line/char pformat outputs produces at
+    # least K output lines/chars (more when the sides differ), and the
+    # truncator pulls at most that much from the whole explanation, so a
+    # per-side budget covers the worst case. A dimension whose limit is 0
+    # (disabled) stays ``None`` so it isn't bounded; with truncation off
+    # both stay ``None`` and the user gets the full diff.
+    should_truncate, trunc_lines, trunc_chars = truncate._get_truncation_parameters(
+        config
+    )
+    if should_truncate:
+        pformat_cap = (
+            trunc_lines + 3 if trunc_lines > 0 else None,
+            trunc_chars + 70 if trunc_chars > 0 else None,
+        )
+    else:
+        pformat_cap = (None, None)
     lines = util.assertrepr_compare(
         op=op,
         left=left,
@@ -246,5 +273,6 @@ def pytest_assertrepr_compare(
         verbose=config.get_verbosity(Config.VERBOSITY_ASSERTIONS),
         highlighter=highlighter,
         assertion_text_diff_style=util.get_assertion_text_diff_style(config),
+        pformat_cap=pformat_cap,
     )
     return truncate.materialize_with_truncation(lines, config) or None
diff --git a/src/_pytest/assertion/_compare_any.py b/src/_pytest/assertion/_compare_any.py
index 9e577683736..d005580ea45 100644
--- a/src/_pytest/assertion/_compare_any.py
+++ b/src/_pytest/assertion/_compare_any.py
@@ -28,6 +28,7 @@ def _compare_eq_any(
     highlighter: _HighlightFunc,
     verbose: int,
     assertion_text_diff_style: _AssertionTextDiffStyle,
+    pformat_cap: tuple[int | None, int | None] = (None, None),
 ) -> Iterator[str]:
     """Yield the per-line explanation for ``left == right`` (without summary).
 
@@ -73,7 +74,9 @@ def _compare_eq_any(
             yield from _compare_eq_mapping(left, right, highlighter, verbose)
 
         if isiterable(left) and isiterable(right):
-            yield from _compare_eq_iterable(left, right, highlighter, verbose)
+            yield from _compare_eq_iterable(
+                left, right, highlighter, verbose, pformat_cap
+            )
 
 
 def _compare_eq_cls(
diff --git a/src/_pytest/assertion/_compare_sequence.py b/src/_pytest/assertion/_compare_sequence.py
index cd0043bf7ce..c81ca68f9ea 100644
--- a/src/_pytest/assertion/_compare_sequence.py
+++ b/src/_pytest/assertion/_compare_sequence.py
@@ -15,6 +15,7 @@ def _compare_eq_iterable(
     right: Iterable[object],
     highlighter: _HighlightFunc,
     verbose: int = 0,
+    pformat_cap: tuple[int | None, int | None] = (None, None),
 ) -> Iterator[str]:
     if verbose <= 0 and not running_on_ci():
         yield "Use -v to get more diff"
@@ -22,19 +23,30 @@ def _compare_eq_iterable(
     # dynamic import to speedup pytest
     import difflib
 
-    left_formatting = PrettyPrinter().pformat(left).splitlines()
-    right_formatting = PrettyPrinter().pformat(right).splitlines()
+    # ``pformat_cap`` is ``(max_lines, max_chars)``, computed by the
+    # dispatcher from the truncator's ``truncation_limit_lines`` /
+    # ``truncation_limit_chars``: when truncation is going to drop
+    # everything past those budgets anyway, we don't bother formatting
+    # more. ``(None, None)`` means no cap (``-vv`` or CI: the user wants
+    # the full diff).
+    pp = PrettyPrinter()
+    max_lines, max_chars = pformat_cap
+    left_formatting = pp.pformat_lines(left, max_lines=max_lines, max_chars=max_chars)
+    right_formatting = pp.pformat_lines(right, max_lines=max_lines, max_chars=max_chars)
 
     yield ""
     yield "Full diff:"
     # "right" is the expected base against which we compare "left",
     # see https://github.com/pytest-dev/pytest/issues/3333
-    yield from highlighter(
-        "\n".join(
-            line.rstrip() for line in difflib.ndiff(right_formatting, left_formatting)
-        ),
-        lexer="diff",
-    ).splitlines()
+    #
+    # Yield each ndiff line through the highlighter individually so the
+    # streaming truncator can stop pulling from ``difflib.ndiff`` as
+    # soon as its budget is full. The diff lexer is line-oriented, so
+    # per-line highlighting is equivalent — it just adds a redundant
+    # ``\x1b[0m`` reset at the start of each line (invisible to the
+    # terminal).
+    for line in difflib.ndiff(right_formatting, left_formatting):
+        yield highlighter(line.rstrip(), lexer="diff")
 
 
 def _compare_eq_sequence(
diff --git a/src/_pytest/assertion/util.py b/src/_pytest/assertion/util.py
index 5e5ef543c13..986e3231b93 100644
--- a/src/_pytest/assertion/util.py
+++ b/src/_pytest/assertion/util.py
@@ -140,6 +140,7 @@ def assertrepr_compare(
     verbose: int,
     highlighter: _HighlightFunc,
     assertion_text_diff_style: _AssertionTextDiffStyle,
+    pformat_cap: tuple[int | None, int | None] = (None, None),
 ) -> Iterator[str]:
     """Yield specialised explanations for some operators/operands.
 
@@ -183,6 +184,7 @@ def assertrepr_compare(
                 highlighter,
                 verbose,
                 assertion_text_diff_style,
+                pformat_cap,
             )
         elif op == "not in" and istext(left) and istext(right):
             source = _notin_text(left, right, verbose)
diff --git a/testing/test_assertion.py b/testing/test_assertion.py
index ac20a172a8c..50705800aae 100644
--- a/testing/test_assertion.py
+++ b/testing/test_assertion.py
@@ -2387,8 +2387,8 @@ def test():
             """,
             [
                 "{bold}{red}E         At index 1 diff: {reset}{number}1{hl-reset}{endline} != {reset}{number}2*",
-                "{bold}{red}E         {light-red}-     2,{hl-reset}{endline}{reset}",
-                "{bold}{red}E         {light-green}+     1,{hl-reset}{endline}{reset}",
+                "{bold}{red}E         {reset}{light-red}-     2,{hl-reset}{endline}{reset}",
+                "{bold}{red}E         {reset}{light-green}+     1,{hl-reset}{endline}{reset}",
             ],
         ),
         (
@@ -2406,8 +2406,8 @@ def test():
                 "{bold}{red}E         Right contains 1 more item:{reset}",
                 "{bold}{red}E         {reset}{{{str}'{hl-reset}{str}number-is-0{hl-reset}{str}'{hl-reset}: {number}0*",
                 "{bold}{red}E         {reset}{light-gray} {hl-reset} {{{endline}{reset}",
-                "{bold}{red}E         {light-gray} {hl-reset}     'number-is-1': 1,{endline}{reset}",
-                "{bold}{red}E         {light-green}+     'number-is-5': 5,{hl-reset}{endline}{reset}",
+                "{bold}{red}E         {reset}{light-gray} {hl-reset}     'number-is-1': 1,{endline}{reset}",
+                "{bold}{red}E         {reset}{light-green}+     'number-is-5': 5,{hl-reset}{endline}{reset}",
             ],
         ),
         (

From b064967e23edc364bc55d2e4ebbb9600c4e29519 Mon Sep 17 00:00:00 2001
From: Pierre Sassoulas <pierre.sassoulas@gmail.com>
Date: Sat, 13 Jun 2026 15:46:49 +0200
Subject: [PATCH 08/12] [test] Cover the streaming truncation coverage gaps
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Close the patch-coverage gaps codecov flagged on the streaming refactor:

* a plugin returning a truthy-but-empty iterator (``iter([])``), which
  slips past the first falsy check but is empty once materialised
  through truncation — exercises the second skip in ``callbinrepr``.
* a ``--assert=plain`` run, exercising the false branch of the
  ``assertmode == "rewrite"`` guard.
* every hook returning ``None`` (``assert 1 == 2``), so the dispatcher
  falls through the loop and returns ``None``.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 testing/test_assertion.py | 73 +++++++++++++++++++++++++++++++++++++--
 1 file changed, 71 insertions(+), 2 deletions(-)

diff --git a/testing/test_assertion.py b/testing/test_assertion.py
index 50705800aae..95e31332c0c 100644
--- a/testing/test_assertion.py
+++ b/testing/test_assertion.py
@@ -2291,8 +2291,7 @@ def raise_exit(obj):
 def test_plugin_hook_returning_none_is_skipped(pytester: Pytester) -> None:
     """A ``pytest_assertrepr_compare`` impl returning ``None`` is skipped
     so the next impl (or the built-in) can produce the explanation.
-    Covers the ``if new_expl is None: continue`` branch in
-    ``callbinrepr``.
+    Covers the ``if not new_expl: continue`` branch in ``callbinrepr``.
     """
     pytester.makeconftest(
         """
@@ -2315,6 +2314,76 @@ def test_diff():
     )
 
 
+def test_plugin_hook_returning_empty_iterator_is_skipped(pytester: Pytester) -> None:
+    """A plugin returning a truthy but ultimately empty iterable is
+    skipped after materialisation. Covers the second
+    ``if not new_expl: continue`` branch in ``callbinrepr``.
+    """
+    pytester.makeconftest(
+        """
+        def pytest_assertrepr_compare(op, left, right):
+            # An iterator object is truthy, so it slips past the first
+            # falsy check; once materialised through truncation it is
+            # empty and the dispatcher must move on.
+            return iter([])
+        """
+    )
+    pytester.makepyfile(
+        """
+        def test_diff():
+            assert {1, 2} == {1, 3}
+        """
+    )
+    result = pytester.runpytest()
+    # The built-in set-comparison explanation still reaches the user.
+    result.stdout.fnmatch_lines(
+        ["*Extra items in the left set:*", "*Extra items in the right set:*"]
+    )
+
+
+def test_callbinrepr_falls_through_when_all_hooks_return_none(
+    pytester: Pytester,
+) -> None:
+    """When every ``pytest_assertrepr_compare`` impl returns ``None``
+    (no specialised explanation applies, e.g. ``assert 1 == 2``), the
+    dispatcher exhausts ``hook_result``, exits the loop, and returns
+    ``None``. Covers the ``continue → loop exit`` branch on the first
+    ``if not new_expl: continue`` line.
+    """
+    pytester.makepyfile(
+        """
+        def test_trivial():
+            assert 1 == 2
+        """
+    )
+    result = pytester.runpytest()
+    # Just the plain ``assert 1 == 2`` rewrite, with no specialised
+    # comparator explanation appended (because the dispatcher fell
+    # through to ``return None``).
+    result.stdout.fnmatch_lines(["*assert 1 == 2*"])
+    result.assert_outcomes(failed=1)
+
+
+def test_callbinrepr_plain_assert_mode(pytester: Pytester) -> None:
+    """In ``--assert=plain`` mode ``callbinrepr`` skips the ``%`` escape.
+    Covers the false branch of ``if item.config.getvalue("assertmode")
+    == "rewrite"``.
+    """
+    pytester.makepyfile(
+        """
+        def test_diff():
+            assert {1, 2} == {1, 3}
+        """
+    )
+    result = pytester.runpytest("--assert=plain")
+    # In plain mode the comparator still runs via ``callbinrepr`` (it
+    # is the rewrite escaping that's skipped), so the explanation is
+    # still produced.
+    result.stdout.fnmatch_lines(
+        ["*Extra items in the left set:*", "*Extra items in the right set:*"]
+    )
+
+
 def test_exception_before_first_yield_emits_summary_and_notice(monkeypatch) -> None:
     """When the comparator raises *before* any explanation line has been
     yielded, ``assertrepr_compare`` should still produce the summary so

From 38a1b9d18f2508c121d2cdf4d6c7ccd155aca791 Mon Sep 17 00:00:00 2001
From: Pierre Sassoulas <pierre.sassoulas@gmail.com>
Date: Sat, 13 Jun 2026 15:46:58 +0200
Subject: [PATCH 09/12] [doc] Add changelog for streaming assertion comparisons
 (#14523)

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 changelog/14523.improvement.rst | 13 +++++++++++++
 1 file changed, 13 insertions(+)
 create mode 100644 changelog/14523.improvement.rst

diff --git a/changelog/14523.improvement.rst b/changelog/14523.improvement.rst
new file mode 100644
index 00000000000..d53374c62df
--- /dev/null
+++ b/changelog/14523.improvement.rst
@@ -0,0 +1,13 @@
+Assertion explanations are now built lazily and the truncator stops
+the comparison helpers as soon as it has enough output, so comparing
+two large collections no longer builds the full diff in order to
+discard it. A focused micro-benchmark the worst case scenario
+(``set(range(500_000)) == set(range(1, 500_001))``) drops from ~2,200 ms
+to ~43 ms; but realistic test suite with mostly small diffs should be
+unchanged.
+
+The truncation footer no longer reports the hidden-line count
+(``...Full output truncated (N lines hidden), ...`` becomes
+``...Full output truncated, ...``); diff lines now carry a redundant
+``\x1b[0m`` reset prefix (invisible to terminals) so we can handle
+line one by one.

From d29ea7b9377c7a0633d05a164213c7085e4f9d3e Mon Sep 17 00:00:00 2001
From: Pierre Sassoulas <pierre.sassoulas@gmail.com>
Date: Sat, 13 Jun 2026 23:08:41 +0200
Subject: [PATCH 10/12] [perf] Cap the text (``ndiff``) diff by the truncation
 budget
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

``str == str`` mismatches went through ``difflib.ndiff`` on the full
inputs, with no bound. ``ndiff`` is not lazy — its intraline
``_fancy_replace`` refinement (the ``? ^^^`` hint markers) runs in full
before the first line is yielded — so streaming its output, as the
other helpers do, saves nothing. A large or wholly-different multi-line
string comparison could spend seconds (or a few huge lines, whose
intraline diff is O(len^2)) building a diff truncation immediately
clips.

Thread the truncation budget ``(max_lines, max_chars)`` already computed
for ``pformat`` into ``_compare_eq_text`` → ``_diff_text`` and cap the
``ndiff`` inputs to it: a char slice first (bounds huge lines), then a
line slice (bounds many lines). With truncation off (``-vv``/CI) the
budget is ``(None, None)`` and the full diff is produced.

The truncated diff now reflects a *local* alignment of the bounded
prefix rather than the global one, so for wholly-dissimilar inputs the
truncated head can differ from truncating the full diff; for the common
case of two mostly-similar texts it is identical. (The iterable path
already caps its ``pformat`` inputs to ``ndiff`` the same way.)

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 changelog/14523.improvement.rst       |  9 +++++++
 src/_pytest/assertion/_compare_any.py |  1 +
 src/_pytest/assertion/compare_text.py | 38 ++++++++++++++++++++++-----
 testing/test_assertion.py             | 31 ++++++++++++++++++++++
 4 files changed, 73 insertions(+), 6 deletions(-)

diff --git a/changelog/14523.improvement.rst b/changelog/14523.improvement.rst
index d53374c62df..388038375aa 100644
--- a/changelog/14523.improvement.rst
+++ b/changelog/14523.improvement.rst
@@ -6,6 +6,15 @@ discard it. A focused micro-benchmark the worst case scenario
 to ~43 ms; but realistic test suite with mostly small diffs should be
 unchanged.
 
+String comparisons (the ``ndiff`` diff style) are bounded the same way:
+``difflib.ndiff`` computes the whole diff up front and cannot be
+short-circuited by streaming, so its inputs are capped to the truncation
+budget instead. A large ``str == str`` mismatch that previously spent
+seconds in ``ndiff`` is now near-instant. When truncating, the diff
+reflects a local alignment of the bounded prefix, which can differ from
+the full diff for wholly-dissimilar inputs; use ``-vv`` for the exact
+full diff.
+
 The truncation footer no longer reports the hidden-line count
 (``...Full output truncated (N lines hidden), ...`` becomes
 ``...Full output truncated, ...``); diff lines now carry a redundant
diff --git a/src/_pytest/assertion/_compare_any.py b/src/_pytest/assertion/_compare_any.py
index d005580ea45..89a3ca9e172 100644
--- a/src/_pytest/assertion/_compare_any.py
+++ b/src/_pytest/assertion/_compare_any.py
@@ -43,6 +43,7 @@ def _compare_eq_any(
             highlighter,
             verbose,
             assertion_text_diff_style,
+            pformat_cap,
         )
     else:
         from _pytest.python_api import ApproxBase
diff --git a/src/_pytest/assertion/compare_text.py b/src/_pytest/assertion/compare_text.py
index 31096444ba6..00c221ec8b8 100644
--- a/src/_pytest/assertion/compare_text.py
+++ b/src/_pytest/assertion/compare_text.py
@@ -15,12 +15,13 @@ def _compare_eq_text(
     highlighter: _HighlightFunc,
     verbose: int,
     assertion_text_diff_style: _AssertionTextDiffStyle,
+    pformat_cap: tuple[int | None, int | None] = (None, None),
 ) -> Iterator[str]:
     match assertion_text_diff_style:
         case "block":
             yield from _diff_text_block(left, right)
         case "ndiff":
-            yield from _diff_text(left, right, highlighter, verbose)
+            yield from _diff_text(left, right, highlighter, verbose, pformat_cap)
         case unreachable:
             assert_never(unreachable)
 
@@ -39,12 +40,27 @@ def _format_text_block_lines(text: str) -> Iterator[str]:
 
 
 def _diff_text(
-    left: str, right: str, highlighter: _HighlightFunc, verbose: int = 0
+    left: str,
+    right: str,
+    highlighter: _HighlightFunc,
+    verbose: int = 0,
+    pformat_cap: tuple[int | None, int | None] = (None, None),
 ) -> Iterator[str]:
     """Yield the explanation for the diff between text.
 
     Unless --verbose is used this will skip leading and trailing
     characters which are identical to keep the diff minimal.
+
+    ``pformat_cap`` is ``(max_lines, max_chars)`` from the truncator:
+    when truncation will clip the diff anyway, the inputs to ``ndiff``
+    are capped to that budget first. ``ndiff`` is not lazy — its
+    intraline ``_fancy_replace`` refinement runs in full before the
+    first line is yielded — so an enormous comparison can't be
+    short-circuited by streaming; the only lever is feeding it less.
+    The resulting diff reflects a *local* alignment of the bounded
+    prefix rather than the global one, so the truncated head may differ
+    from truncating the full diff (use ``-vv`` for the exact diff); for
+    the common case of two mostly-similar texts the head is identical.
     """
     from difflib import ndiff
 
@@ -75,13 +91,23 @@ def _diff_text(
         left = repr(str(left))
         right = repr(str(right))
         yield "Strings contain only whitespace, escaping them using repr()"
+    # Cap the inputs to ndiff to the truncation budget: a char slice
+    # first (bounds a few huge lines, whose intraline diff is O(len^2)),
+    # then a line slice (bounds many lines). ``None`` leaves a dimension
+    # unbounded (``-vv``/CI: the full diff is wanted).
+    max_lines, max_chars = pformat_cap
+    if max_chars is not None:
+        left = left[:max_chars]
+        right = right[:max_chars]
+    left_lines = left.splitlines(keepends)
+    right_lines = right.splitlines(keepends)
+    if max_lines is not None:
+        left_lines = left_lines[:max_lines]
+        right_lines = right_lines[:max_lines]
     # "right" is the expected base against which we compare "left",
     # see https://github.com/pytest-dev/pytest/issues/3333
     yield from highlighter(
-        "\n".join(
-            line.strip("\n")
-            for line in ndiff(right.splitlines(keepends), left.splitlines(keepends))
-        ),
+        "\n".join(line.strip("\n") for line in ndiff(right_lines, left_lines)),
         lexer="diff",
     ).splitlines()
 
diff --git a/testing/test_assertion.py b/testing/test_assertion.py
index 95e31332c0c..0711d4f9d89 100644
--- a/testing/test_assertion.py
+++ b/testing/test_assertion.py
@@ -496,6 +496,37 @@ def test_text_diff_ndiff_style(self) -> None:
             "+ spam",
         ]
 
+    def test_text_diff_budget_caps_ndiff_input(self) -> None:
+        # A large text diff fed a truncation budget caps the inputs to
+        # ndiff, so the result is bounded instead of growing with N.
+        left = "\n".join(f"left {i}" for i in range(1000))
+        right = "\n".join(f"right {i}" for i in range(1000))
+        ndiff_style = util.ASSERTION_TEXT_DIFF_STYLE_NDIFF
+        capped = list(
+            _compare_eq_text(
+                left, right, util.dummy_highlighter, 1, ndiff_style, (11, 710)
+            )
+        )
+        full = list(
+            _compare_eq_text(
+                left, right, util.dummy_highlighter, 1, ndiff_style, (None, None)
+            )
+        )
+        assert len(capped) < 80
+        assert len(full) > 1500
+        # a few huge lines: the char budget bounds each emitted line.
+        capped_chars = list(
+            _compare_eq_text(
+                "x" * 100_000,
+                "y" * 100_000,
+                util.dummy_highlighter,
+                1,
+                ndiff_style,
+                (11, 710),
+            )
+        )
+        assert all(len(line) < 1000 for line in capped_chars)
+
     def test_text_skipping(self) -> None:
         lines = callequal("a" * 50 + "spam", "a" * 50 + "eggs")
         assert lines is not None

From 2d22a3e35aff5b997f57c094e56bb68ec65851e9 Mon Sep 17 00:00:00 2001
From: Pierre Sassoulas <pierre.sassoulas@gmail.com>
Date: Sat, 13 Jun 2026 23:23:40 +0200
Subject: [PATCH 11/12] [perf] Bound the dict extra-items diff by the
 truncation budget
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

``_compare_eq_mapping``'s "Left/Right contains N more items" rendered the
whole extra-keys subdict with ``pprint.pformat`` and let the truncator
clip it — O(N) formatting for output that is about to be dropped. It was
the last comparison path still formatting in full (pre-existing, not
touched by the streaming work).

Thread the truncation budget into ``_compare_eq_mapping`` and, when there
are more extra keys than the line budget, emit only the smallest
``max_lines`` keys one per line (via ``heapq.nsmallest`` keyed by the
same safe sort ``pprint`` uses — deterministic — and ``saferepr`` —
char-bounded) instead of pretty-printing all of them. A 500k-key
``{...} == {}`` drops from ~4.9 s to ~0.2 ms; the residual cost is the
unavoidable O(N) scan to pick the smallest keys deterministically.

Small comparisons (at or under the budget, or ``-vv``) keep the compact,
key-sorted ``pprint`` block unchanged. Only the truncated tail of a large
extra-items diff changes (one-per-line, consistent with the per-key
"Differing items" rendering just above it); the leading keys shown are
the ones ``pprint`` would have led with.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 changelog/14523.improvement.rst           |  5 ++++
 src/_pytest/assertion/_compare_any.py     |  4 ++-
 src/_pytest/assertion/_compare_mapping.py | 35 ++++++++++++++++++++---
 testing/test_assertion.py                 | 34 ++++++++++++++++++++++
 4 files changed, 73 insertions(+), 5 deletions(-)

diff --git a/changelog/14523.improvement.rst b/changelog/14523.improvement.rst
index 388038375aa..9b837310893 100644
--- a/changelog/14523.improvement.rst
+++ b/changelog/14523.improvement.rst
@@ -15,6 +15,11 @@ reflects a local alignment of the bounded prefix, which can differ from
 the full diff for wholly-dissimilar inputs; use ``-vv`` for the exact
 full diff.
 
+A dict comparison with many keys only on one side ("Left/Right contains
+N more items") no longer pretty-prints the whole extra subdict when it is
+going to be truncated: past the budget it shows the smallest few keys one
+per line instead. Small comparisons are unchanged (the compact block).
+
 The truncation footer no longer reports the hidden-line count
 (``...Full output truncated (N lines hidden), ...`` becomes
 ``...Full output truncated, ...``); diff lines now carry a redundant
diff --git a/src/_pytest/assertion/_compare_any.py b/src/_pytest/assertion/_compare_any.py
index 89a3ca9e172..ff44dec8bb5 100644
--- a/src/_pytest/assertion/_compare_any.py
+++ b/src/_pytest/assertion/_compare_any.py
@@ -72,7 +72,9 @@ def _compare_eq_any(
         elif isset(left) and isset(right):
             yield from _compare_eq_set(left, right, highlighter, verbose)
         elif ismapping(left) and ismapping(right):
-            yield from _compare_eq_mapping(left, right, highlighter, verbose)
+            yield from _compare_eq_mapping(
+                left, right, highlighter, verbose, pformat_cap
+            )
 
         if isiterable(left) and isiterable(right):
             yield from _compare_eq_iterable(
diff --git a/src/_pytest/assertion/_compare_mapping.py b/src/_pytest/assertion/_compare_mapping.py
index 4edb47026c6..1a80f9a5632 100644
--- a/src/_pytest/assertion/_compare_mapping.py
+++ b/src/_pytest/assertion/_compare_mapping.py
@@ -1,9 +1,12 @@
 from __future__ import annotations
 
+from collections.abc import Collection
 from collections.abc import Iterator
 from collections.abc import Mapping
+import heapq
 import pprint
 
+from _pytest._io.pprint import _safe_key
 from _pytest._io.saferepr import saferepr
 from _pytest.assertion._typing import _HighlightFunc
 
@@ -13,7 +16,9 @@ def _compare_eq_mapping(
     right: Mapping[object, object],
     highlighter: _HighlightFunc,
     verbose: int = 0,
+    pformat_cap: tuple[int | None, int | None] = (None, None),
 ) -> Iterator[str]:
+    max_lines, _ = pformat_cap
     set_left = set(left)
     set_right = set(right)
     common = set_left.intersection(set_right)
@@ -36,13 +41,35 @@ def _compare_eq_mapping(
     len_extra_left = len(extra_left)
     if len_extra_left:
         yield f"Left contains {len_extra_left} more item{'' if len_extra_left == 1 else 's'}:"
-        yield from highlighter(
-            pprint.pformat({k: left[k] for k in extra_left})
-        ).splitlines()
+        yield from _format_extra_items(left, extra_left, highlighter, max_lines)
     extra_right = set_right - set_left
     len_extra_right = len(extra_right)
     if len_extra_right:
         yield f"Right contains {len_extra_right} more item{'' if len_extra_right == 1 else 's'}:"
+        yield from _format_extra_items(right, extra_right, highlighter, max_lines)
+
+
+def _format_extra_items(
+    mapping: Mapping[object, object],
+    keys: Collection[object],
+    highlighter: _HighlightFunc,
+    max_lines: int | None,
+) -> Iterator[str]:
+    """Render the "X contains N more items" subdict.
+
+    Small (or untruncated, ``max_lines is None``) output keeps the compact,
+    key-sorted ``pprint`` block. When there are more extra keys than the
+    truncation budget, ``pprint.pformat`` would format the whole subdict
+    just to have all but the first few lines dropped, so instead emit only
+    the smallest ``max_lines`` keys, one per line — deterministic via the
+    same safe sort ``pprint`` uses, char-bounded via ``saferepr``. (This
+    differs from the ``pprint`` block, but only in the truncated tail; the
+    smallest keys shown are the same ones ``pprint`` would have led with.)
+    """
+    if max_lines is None or len(keys) <= max_lines:
         yield from highlighter(
-            pprint.pformat({k: right[k] for k in extra_right})
+            pprint.pformat({k: mapping[k] for k in keys})
         ).splitlines()
+        return
+    for k in heapq.nsmallest(max_lines, keys, key=_safe_key):
+        yield highlighter(saferepr({k: mapping[k]}))
diff --git a/testing/test_assertion.py b/testing/test_assertion.py
index 0711d4f9d89..46b8fe9cbd4 100644
--- a/testing/test_assertion.py
+++ b/testing/test_assertion.py
@@ -938,6 +938,40 @@ def test_dict_different_items(self) -> None:
             "  }",
         ]
 
+    def test_dict_extra_items_bounded_under_budget(self) -> None:
+        # Many extra keys + a truncation budget: the subdict is not
+        # pretty-printed in full; only the smallest ``max_lines`` keys are
+        # emitted, one per line (deterministic, char-bounded).
+        from _pytest.assertion._compare_mapping import _compare_eq_mapping
+
+        out = list(
+            _compare_eq_mapping(
+                {i: i for i in range(1000)},
+                {},
+                util.dummy_highlighter,
+                0,
+                (5, 350),
+            )
+        )
+        assert out[0] == "Left contains 1000 more items:"
+        body = out[1:]
+        assert body == [f"{{{i}: {i}}}" for i in range(5)]  # smallest 5, sorted
+
+    def test_dict_extra_items_small_keeps_pformat_block(self) -> None:
+        # Under the budget, the compact key-sorted pprint block is unchanged.
+        from _pytest.assertion._compare_mapping import _compare_eq_mapping
+
+        out = list(
+            _compare_eq_mapping(
+                {"b": 2, "a": 1},
+                {},
+                util.dummy_highlighter,
+                0,
+                (5, 350),
+            )
+        )
+        assert out == ["Left contains 2 more items:", "{'a': 1, 'b': 2}"]
+
     def test_mapping_different_items(self) -> None:
         class SimpleMapping(Mapping[str, int]):
             def __init__(self, values: dict[str, int]) -> None:

From fa6f5f01de19ae0105188aea44f9cf40f4b4c423 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Mon, 15 Jun 2026 11:33:49 +0000
Subject: [PATCH 12/12] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 testing/io/test_pprint.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/testing/io/test_pprint.py b/testing/io/test_pprint.py
index 28bcf193ee1..9ad408d286c 100644
--- a/testing/io/test_pprint.py
+++ b/testing/io/test_pprint.py
@@ -675,9 +675,7 @@ def __hash__(self) -> int:
     ("key", "expected"),
     [
         pytest.param(_HashableDict(), "{\n    {}: 0,\n}", id="empty-dict-key"),
-        pytest.param(
-            _HashableDict({"a": 1}), "{\n    {'a': 1}: 0,\n}", id="dict-key"
-        ),
+        pytest.param(_HashableDict({"a": 1}), "{\n    {'a': 1}: 0,\n}", id="dict-key"),
         pytest.param(_HashableList(), "{\n    []: 0,\n}", id="empty-list-key"),
         pytest.param(_HashableList([1, 2]), "{\n    [1, 2]: 0,\n}", id="list-key"),
     ],