Skip to content

Commit 3cd7c6e

Browse files
ogriselserhiy-storchaka
authored andcommitted
bpo-31993: Do not allocate large temporary buffers in pickle dump. (#4353)
The picklers do no longer allocate temporary memory when dumping large bytes and str objects into a file object. Instead the data is directly streamed into the underlying file object. Previously the C implementation would buffer all content and issue a single call to file.write() at the end of the dump. With protocol 4 this behavior has changed to issue one call to file.write() per frame. The Python pickler with protocol 4 now dumps each frame content as a memoryview to an IOBytes instance that is never reused and the memoryview is no longer released after the call to write. This makes it possible for the file object to delay access to the memoryview of previous frames without forcing any additional memory copy as was already possible with the C pickler.
1 parent 85ac726 commit 3cd7c6e

6 files changed

Lines changed: 297 additions & 50 deletions

File tree

Lib/pickle.py

Lines changed: 40 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -201,21 +201,46 @@ def commit_frame(self, force=False):
201201
if self.current_frame:
202202
f = self.current_frame
203203
if f.tell() >= self._FRAME_SIZE_TARGET or force:
204-
with f.getbuffer() as data:
205-
n = len(data)
206-
write = self.file_write
207-
write(FRAME)
208-
write(pack("<Q", n))
209-
write(data)
210-
f.seek(0)
211-
f.truncate()
204+
data = f.getbuffer()
205+
write = self.file_write
206+
# Issue a single call to the write method of the underlying
207+
# file object for the frame opcode with the size of the
208+
# frame. The concatenation is expected to be less expensive
209+
# than issuing an additional call to write.
210+
write(FRAME + pack("<Q", len(data)))
211+
212+
# Issue a separate call to write to append the frame
213+
# contents without concatenation to the above to avoid a
214+
# memory copy.
215+
write(data)
216+
217+
# Start the new frame with a new io.BytesIO instance so that
218+
# the file object can have delayed access to the previous frame
219+
# contents via an unreleased memoryview of the previous
220+
# io.BytesIO instance.
221+
self.current_frame = io.BytesIO()
212222

213223
def write(self, data):
214224
if self.current_frame:
215225
return self.current_frame.write(data)
216226
else:
217227
return self.file_write(data)
218228

229+
def write_large_bytes(self, header, payload):
230+
write = self.file_write
231+
if self.current_frame:
232+
# Terminate the current frame and flush it to the file.
233+
self.commit_frame(force=True)
234+
235+
# Perform direct write of the header and payload of the large binary
236+
# object. Be careful not to concatenate the header and the payload
237+
# prior to calling 'write' as we do not want to allocate a large
238+
# temporary bytes object.
239+
# We intentionally do not insert a protocol 4 frame opcode to make
240+
# it possible to optimize file.read calls in the loader.
241+
write(header)
242+
write(payload)
243+
219244

220245
class _Unframer:
221246

@@ -379,6 +404,7 @@ def __init__(self, file, protocol=None, *, fix_imports=True):
379404
raise TypeError("file must have a 'write' attribute")
380405
self.framer = _Framer(self._file_write)
381406
self.write = self.framer.write
407+
self._write_large_bytes = self.framer.write_large_bytes
382408
self.memo = {}
383409
self.proto = int(protocol)
384410
self.bin = protocol >= 1
@@ -699,7 +725,9 @@ def save_bytes(self, obj):
699725
if n <= 0xff:
700726
self.write(SHORT_BINBYTES + pack("<B", n) + obj)
701727
elif n > 0xffffffff and self.proto >= 4:
702-
self.write(BINBYTES8 + pack("<Q", n) + obj)
728+
self._write_large_bytes(BINBYTES8 + pack("<Q", n), obj)
729+
elif n >= self.framer._FRAME_SIZE_TARGET:
730+
self._write_large_bytes(BINBYTES + pack("<I", n), obj)
703731
else:
704732
self.write(BINBYTES + pack("<I", n) + obj)
705733
self.memoize(obj)
@@ -712,7 +740,9 @@ def save_str(self, obj):
712740
if n <= 0xff and self.proto >= 4:
713741
self.write(SHORT_BINUNICODE + pack("<B", n) + encoded)
714742
elif n > 0xffffffff and self.proto >= 4:
715-
self.write(BINUNICODE8 + pack("<Q", n) + encoded)
743+
self._write_large_bytes(BINUNICODE8 + pack("<Q", n), encoded)
744+
elif n >= self.framer._FRAME_SIZE_TARGET:
745+
self._write_large_bytes(BINUNICODE + pack("<I", n), encoded)
716746
else:
717747
self.write(BINUNICODE + pack("<I", n) + encoded)
718748
else:

Lib/pickletools.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2279,7 +2279,7 @@ def optimize(p):
22792279
if arg > proto:
22802280
proto = arg
22812281
if pos == 0:
2282-
protoheader = p[pos: end_pos]
2282+
protoheader = p[pos:end_pos]
22832283
else:
22842284
opcodes.append((pos, end_pos))
22852285
else:
@@ -2295,6 +2295,7 @@ def optimize(p):
22952295
pickler.framer.start_framing()
22962296
idx = 0
22972297
for op, arg in opcodes:
2298+
frameless = False
22982299
if op is put:
22992300
if arg not in newids:
23002301
continue
@@ -2305,8 +2306,12 @@ def optimize(p):
23052306
data = pickler.get(newids[arg])
23062307
else:
23072308
data = p[op:arg]
2308-
pickler.framer.commit_frame()
2309-
pickler.write(data)
2309+
frameless = len(data) > pickler.framer._FRAME_SIZE_TARGET
2310+
pickler.framer.commit_frame(force=frameless)
2311+
if frameless:
2312+
pickler.framer.file_write(data)
2313+
else:
2314+
pickler.write(data)
23102315
pickler.framer.end_framing()
23112316
return out.getvalue()
23122317

Lib/test/pickletester.py

Lines changed: 118 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -2042,21 +2042,40 @@ def test_setitems_on_non_dicts(self):
20422042
def check_frame_opcodes(self, pickled):
20432043
"""
20442044
Check the arguments of FRAME opcodes in a protocol 4+ pickle.
2045+
2046+
Note that binary objects that are larger than FRAME_SIZE_TARGET are not
2047+
framed by default and are therefore considered a frame by themselves in
2048+
the following consistency check.
20452049
"""
2046-
frame_opcode_size = 9
2047-
last_arg = last_pos = None
2050+
last_arg = last_pos = last_frame_opcode_size = None
2051+
frameless_opcode_sizes = {
2052+
'BINBYTES': 5,
2053+
'BINUNICODE': 5,
2054+
'BINBYTES8': 9,
2055+
'BINUNICODE8': 9,
2056+
}
20482057
for op, arg, pos in pickletools.genops(pickled):
2049-
if op.name != 'FRAME':
2058+
if op.name in frameless_opcode_sizes:
2059+
if len(arg) > self.FRAME_SIZE_TARGET:
2060+
frame_opcode_size = frameless_opcode_sizes[op.name]
2061+
arg = len(arg)
2062+
else:
2063+
continue
2064+
elif op.name == 'FRAME':
2065+
frame_opcode_size = 9
2066+
else:
20502067
continue
2068+
20512069
if last_pos is not None:
20522070
# The previous frame's size should be equal to the number
20532071
# of bytes up to the current frame.
2054-
frame_size = pos - last_pos - frame_opcode_size
2072+
frame_size = pos - last_pos - last_frame_opcode_size
20552073
self.assertEqual(frame_size, last_arg)
20562074
last_arg, last_pos = arg, pos
2075+
last_frame_opcode_size = frame_opcode_size
20572076
# The last frame's size should be equal to the number of bytes up
20582077
# to the pickle's end.
2059-
frame_size = len(pickled) - last_pos - frame_opcode_size
2078+
frame_size = len(pickled) - last_pos - last_frame_opcode_size
20602079
self.assertEqual(frame_size, last_arg)
20612080

20622081
def test_framing_many_objects(self):
@@ -2076,15 +2095,36 @@ def test_framing_many_objects(self):
20762095

20772096
def test_framing_large_objects(self):
20782097
N = 1024 * 1024
2079-
obj = [b'x' * N, b'y' * N, b'z' * N]
2098+
obj = [b'x' * N, b'y' * N, 'z' * N]
20802099
for proto in range(4, pickle.HIGHEST_PROTOCOL + 1):
2081-
with self.subTest(proto=proto):
2082-
pickled = self.dumps(obj, proto)
2083-
unpickled = self.loads(pickled)
2084-
self.assertEqual(obj, unpickled)
2085-
n_frames = count_opcode(pickle.FRAME, pickled)
2086-
self.assertGreaterEqual(n_frames, len(obj))
2087-
self.check_frame_opcodes(pickled)
2100+
for fast in [True, False]:
2101+
with self.subTest(proto=proto, fast=fast):
2102+
if hasattr(self, 'pickler'):
2103+
buf = io.BytesIO()
2104+
pickler = self.pickler(buf, protocol=proto)
2105+
pickler.fast = fast
2106+
pickler.dump(obj)
2107+
pickled = buf.getvalue()
2108+
elif fast:
2109+
continue
2110+
else:
2111+
# Fallback to self.dumps when fast=False and
2112+
# self.pickler is not available.
2113+
pickled = self.dumps(obj, proto)
2114+
unpickled = self.loads(pickled)
2115+
# More informative error message in case of failure.
2116+
self.assertEqual([len(x) for x in obj],
2117+
[len(x) for x in unpickled])
2118+
# Perform full equality check if the lengths match.
2119+
self.assertEqual(obj, unpickled)
2120+
n_frames = count_opcode(pickle.FRAME, pickled)
2121+
if not fast:
2122+
# One frame per memoize for each large object.
2123+
self.assertGreaterEqual(n_frames, len(obj))
2124+
else:
2125+
# One frame at the beginning and one at the end.
2126+
self.assertGreaterEqual(n_frames, 2)
2127+
self.check_frame_opcodes(pickled)
20882128

20892129
def test_optional_frames(self):
20902130
if pickle.HIGHEST_PROTOCOL < 4:
@@ -2125,6 +2165,71 @@ def remove_frames(pickled, keep_frame=None):
21252165
count_opcode(pickle.FRAME, pickled))
21262166
self.assertEqual(obj, self.loads(some_frames_pickle))
21272167

2168+
def test_framed_write_sizes_with_delayed_writer(self):
2169+
class ChunkAccumulator:
2170+
"""Accumulate pickler output in a list of raw chunks."""
2171+
2172+
def __init__(self):
2173+
self.chunks = []
2174+
2175+
def write(self, chunk):
2176+
self.chunks.append(chunk)
2177+
2178+
def concatenate_chunks(self):
2179+
# Some chunks can be memoryview instances, we need to convert
2180+
# them to bytes to be able to call join
2181+
return b"".join([c.tobytes() if hasattr(c, 'tobytes') else c
2182+
for c in self.chunks])
2183+
2184+
small_objects = [(str(i).encode('ascii'), i % 42, {'i': str(i)})
2185+
for i in range(int(1e4))]
2186+
2187+
for proto in range(4, pickle.HIGHEST_PROTOCOL + 1):
2188+
# Protocol 4 packs groups of small objects into frames and issues
2189+
# calls to write only once or twice per frame:
2190+
# The C pickler issues one call to write per-frame (header and
2191+
# contents) while Python pickler issues two calls to write: one for
2192+
# the frame header and one for the frame binary contents.
2193+
writer = ChunkAccumulator()
2194+
self.pickler(writer, proto).dump(small_objects)
2195+
2196+
# Actually read the binary content of the chunks after the end
2197+
# of the call to dump: ant memoryview passed to write should not
2198+
# be released otherwise this delayed access would not be possible.
2199+
pickled = writer.concatenate_chunks()
2200+
reconstructed = self.loads(pickled)
2201+
self.assertEqual(reconstructed, small_objects)
2202+
self.assertGreater(len(writer.chunks), 1)
2203+
2204+
n_frames, remainder = divmod(len(pickled), self.FRAME_SIZE_TARGET)
2205+
if remainder > 0:
2206+
n_frames += 1
2207+
2208+
# There should be at least one call to write per frame
2209+
self.assertGreaterEqual(len(writer.chunks), n_frames)
2210+
2211+
# but not too many either: there can be one for the proto,
2212+
# one per-frame header and one per frame for the actual contents.
2213+
self.assertGreaterEqual(2 * n_frames + 1, len(writer.chunks))
2214+
2215+
chunk_sizes = [len(c) for c in writer.chunks[:-1]]
2216+
large_sizes = [s for s in chunk_sizes
2217+
if s >= self.FRAME_SIZE_TARGET]
2218+
small_sizes = [s for s in chunk_sizes
2219+
if s < self.FRAME_SIZE_TARGET]
2220+
2221+
# Large chunks should not be too large:
2222+
for chunk_size in large_sizes:
2223+
self.assertGreater(2 * self.FRAME_SIZE_TARGET, chunk_size)
2224+
2225+
last_chunk_size = len(writer.chunks[-1])
2226+
self.assertGreater(2 * self.FRAME_SIZE_TARGET, last_chunk_size)
2227+
2228+
# Small chunks (if any) should be very small
2229+
# (only proto and frame headers)
2230+
for chunk_size in small_sizes:
2231+
self.assertGreaterEqual(9, chunk_size)
2232+
21282233
def test_nested_names(self):
21292234
global Nested
21302235
class Nested:

Lib/test/test_pickletools.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,9 @@ def loads(self, buf, **kwds):
1515
# Test relies on precise output of dumps()
1616
test_pickle_to_2x = None
1717

18+
# Test relies on writing by chunks into a file object.
19+
test_framed_write_sizes_with_delayed_writer = None
20+
1821
def test_optimize_long_binget(self):
1922
data = [str(i) for i in range(257)]
2023
data.append(data[-1])
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
The picklers do no longer allocate temporary memory when dumping large
2+
``bytes`` and ``str`` objects into a file object. Instead the data is
3+
directly streamed into the underlying file object.
4+
5+
Previously the C implementation would buffer all content and issue a
6+
single call to ``file.write`` at the end of the dump. With protocol 4
7+
this behavior has changed to issue one call to ``file.write`` per frame.
8+
9+
The Python pickler with protocol 4 now dumps each frame content as a
10+
memoryview to an IOBytes instance that is never reused and the
11+
memoryview is no longer released after the call to write. This makes it
12+
possible for the file object to delay access to the memoryview of
13+
previous frames without forcing any additional memory copy as was
14+
already possible with the C pickler.

0 commit comments

Comments
 (0)