From 76371940296ebc4a5c2e3d4a5c366e1a1dfc66a0 Mon Sep 17 00:00:00 2001 From: Prashanth Badari <102688956+prashanthbdremio@users.noreply.github.com> Date: Thu, 18 Jun 2026 13:12:27 -0400 Subject: [PATCH 1/6] Fix IOOBE in ListVector/LargeListVector.setReaderAndWriterIndex() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit GH-343 (Jan 2026) changed setReaderAndWriterIndex() to unconditionally set offsetBuffer.writerIndex((valueCount + 1) * OFFSET_WIDTH) so that even empty lists export the trailing-zero offset required by the Arrow IPC spec. However, unlike the analogous fix in BaseVariableWidthVector, this change did not include a realloc guard — when the offset buffer has never been allocated (capacity == 0), the writerIndex is set to 4 on a zero-capacity buffer, producing the inconsistent state: readerIndex: 0, writerIndex: 4, capacity(0) Netty's AbstractByteBuf.writerIndex() bounds-checks against capacity and throws IndexOutOfBoundsException. This crashes Dremio's FragmentWritableBatch serialization path (SingleSenderOperator, PartitionSenderOperator, etc.) when an empty ListVector reaches the VectorUnloader -> NettyArrowBuf.unwrapBuffer() chain. The fix adds the same realloc guard that BaseVariableWidthVector already uses: grow the offset buffer to the required size before setting writerIndex. This mirrors what exportCDataBuffers() already does for the capacity == 0 case. Affects both ListVector (OFFSET_WIDTH = 4) and LargeListVector (OFFSET_WIDTH = 8). Co-Authored-By: Claude Opus 4.6 (1M context) Change-Id: Ib5ca53064ea5bc69c43fd6cc09692b07199c586c --- .../apache/arrow/vector/complex/LargeListVector.java | 11 ++++++++++- .../org/apache/arrow/vector/complex/ListVector.java | 11 ++++++++++- 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/vector/src/main/java/org/apache/arrow/vector/complex/LargeListVector.java b/vector/src/main/java/org/apache/arrow/vector/complex/LargeListVector.java index 92dd3eaef7..d8ac81a721 100644 --- a/vector/src/main/java/org/apache/arrow/vector/complex/LargeListVector.java +++ b/vector/src/main/java/org/apache/arrow/vector/complex/LargeListVector.java @@ -316,7 +316,16 @@ private void setReaderAndWriterIndex() { // Both are set to 0 means 0 bytes are written to the IPC stream which will crash IPC readers // in other libraries. According to Arrow spec, we should still output the offset buffer which // is [0]. - offsetBuffer.writerIndex((long) (valueCount + 1) * OFFSET_WIDTH); + final long requiredOffsetBufferSize = (long) (valueCount + 1) * OFFSET_WIDTH; + if (offsetBuffer.capacity() < requiredOffsetBufferSize) { + ArrowBuf newOffsetBuffer = allocateOffsetBuffer(requiredOffsetBufferSize); + if (offsetBuffer.capacity() > 0) { + newOffsetBuffer.setBytes(0, offsetBuffer, 0, offsetBuffer.capacity()); + } + offsetBuffer.getReferenceManager().release(); + offsetBuffer = newOffsetBuffer; + } + offsetBuffer.writerIndex(requiredOffsetBufferSize); } /** diff --git a/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java b/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java index 6c3993df63..dd7cde8d7d 100644 --- a/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java +++ b/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java @@ -274,7 +274,16 @@ private void setReaderAndWriterIndex() { // Both are set to 0 means 0 bytes are written to the IPC stream which will crash IPC readers // in other libraries. According to Arrow spec, we should still output the offset buffer which // is [0]. - offsetBuffer.writerIndex((long) (valueCount + 1) * OFFSET_WIDTH); + final long requiredOffsetBufferSize = (long) (valueCount + 1) * OFFSET_WIDTH; + if (offsetBuffer.capacity() < requiredOffsetBufferSize) { + ArrowBuf newOffsetBuffer = allocateOffsetBuffer(requiredOffsetBufferSize); + if (offsetBuffer.capacity() > 0) { + newOffsetBuffer.setBytes(0, offsetBuffer, 0, offsetBuffer.capacity()); + } + offsetBuffer.getReferenceManager().release(); + offsetBuffer = newOffsetBuffer; + } + offsetBuffer.writerIndex(requiredOffsetBufferSize); } /** From 81ac711c9d0ec72d201bb83a020c47d650514e80 Mon Sep 17 00:00:00 2001 From: Prashanth Badari <102688956+prashanthbdremio@users.noreply.github.com> Date: Thu, 18 Jun 2026 13:27:18 -0400 Subject: [PATCH 2/6] Add regression tests for never-allocated ListVector/LargeListVector Tests exercise the exact bug path: setValueCount(0) on a vector where allocateNew() was never called, leaving the offset buffer at capacity 0. - testEmptyListOffsetBufferWithoutAllocate: verifies getFieldBuffers() produces a valid offset buffer after the realloc guard fires - testEmptyListGetBuffersWithoutAllocate: exercises getBuffers(false), the IPC serialization entry point that produced the original Netty IOOBE via VectorUnloader -> NettyArrowBuf.unwrapBuffer() - Analogous tests for LargeListVector The existing testEmptyListOffsetBuffer / testEmptyLargeListOffsetBuffer tests call allocateNew() before setValueCount(0), so the offset buffer always has nonzero capacity and the realloc guard is never entered. Co-Authored-By: Claude Opus 4.6 (1M context) Change-Id: I1972ad3d80f405ed9b41103e88902571b63defb0 --- .../arrow/vector/TestLargeListVector.java | 35 +++++++++++++++++ .../apache/arrow/vector/TestListVector.java | 38 +++++++++++++++++++ 2 files changed, 73 insertions(+) diff --git a/vector/src/test/java/org/apache/arrow/vector/TestLargeListVector.java b/vector/src/test/java/org/apache/arrow/vector/TestLargeListVector.java index bf9bba9c78..295aef1b63 100644 --- a/vector/src/test/java/org/apache/arrow/vector/TestLargeListVector.java +++ b/vector/src/test/java/org/apache/arrow/vector/TestLargeListVector.java @@ -1120,6 +1120,41 @@ public void testEmptyLargeListOffsetBuffer() { } } + @Test + public void testEmptyLargeListOffsetBufferWithoutAllocate() { + // Regression test for the Arrow 19 IOOBE: a never-allocated LargeListVector must still produce + // a valid offset buffer after setValueCount(0). Without the realloc guard in + // setReaderAndWriterIndex(), this sets writerIndex=8 on a capacity-0 buffer. + try (LargeListVector list = LargeListVector.empty("list", allocator)) { + list.addOrGetVector(FieldType.nullable(MinorType.INT.getType())); + list.setValueCount(0); // no allocateNew() — offset buffer starts at capacity 0 + + List buffers = list.getFieldBuffers(); + assertTrue( + buffers.get(1).readableBytes() >= LargeListVector.OFFSET_WIDTH, + "Offset buffer should have at least " + + LargeListVector.OFFSET_WIDTH + + " bytes for offset[0]"); + assertEquals(0L, list.getOffsetBuffer().getLong(0)); + } + } + + @Test + public void testEmptyLargeListGetBuffersWithoutAllocate() { + // Exercises the getBuffers(false) entry point — the IPC serialization path. + try (LargeListVector list = LargeListVector.empty("list", allocator)) { + list.addOrGetVector(FieldType.nullable(MinorType.INT.getType())); + list.setValueCount(0); + + ArrowBuf[] bufs = list.getBuffers(false); + assertTrue( + list.getOffsetBuffer().capacity() >= LargeListVector.OFFSET_WIDTH, + "Offset buffer capacity should be >= " + + LargeListVector.OFFSET_WIDTH + + " after setReaderAndWriterIndex"); + } + } + private void writeIntValues(UnionLargeListWriter writer, int[] values) { writer.startList(); for (int v : values) { diff --git a/vector/src/test/java/org/apache/arrow/vector/TestListVector.java b/vector/src/test/java/org/apache/arrow/vector/TestListVector.java index 0c90b32abc..cf227102d5 100644 --- a/vector/src/test/java/org/apache/arrow/vector/TestListVector.java +++ b/vector/src/test/java/org/apache/arrow/vector/TestListVector.java @@ -1399,6 +1399,44 @@ public void testEmptyListOffsetBuffer() { } } + @Test + public void testEmptyListOffsetBufferWithoutAllocate() { + // Regression test for the Arrow 19 IOOBE: a never-allocated ListVector must still produce + // a valid offset buffer after setValueCount(0). Without the realloc guard in + // setReaderAndWriterIndex(), this sets writerIndex=4 on a capacity-0 buffer. + try (ListVector list = ListVector.empty("list", allocator)) { + list.addOrGetVector(FieldType.nullable(MinorType.INT.getType())); + list.setValueCount(0); // no allocateNew() — offset buffer starts at capacity 0 + + List buffers = list.getFieldBuffers(); + assertTrue( + buffers.get(1).readableBytes() >= BaseRepeatedValueVector.OFFSET_WIDTH, + "Offset buffer should have at least " + + BaseRepeatedValueVector.OFFSET_WIDTH + + " bytes for offset[0]"); + assertEquals(0, list.getOffsetBuffer().getInt(0)); + } + } + + @Test + public void testEmptyListGetBuffersWithoutAllocate() { + // Exercises the getBuffers(false) entry point — the IPC serialization path that produced the + // original Netty IOOBE via VectorUnloader -> NettyArrowBuf.unwrapBuffer(). + try (ListVector list = ListVector.empty("list", allocator)) { + list.addOrGetVector(FieldType.nullable(MinorType.INT.getType())); + list.setValueCount(0); + + ArrowBuf[] bufs = list.getBuffers(false); + // getBufferSize() returns 0 for valueCount==0, so getBuffers returns empty array. + // But the offset buffer on the vector itself must have been grown to valid capacity. + assertTrue( + list.getOffsetBuffer().capacity() >= BaseRepeatedValueVector.OFFSET_WIDTH, + "Offset buffer capacity should be >= " + + BaseRepeatedValueVector.OFFSET_WIDTH + + " after setReaderAndWriterIndex"); + } + } + private void writeIntValues(UnionListWriter writer, int[] values) { writer.startList(); for (int v : values) { From 3c64cb2b36da9d7912cea21cfaec13c132ba5b23 Mon Sep 17 00:00:00 2001 From: Prashanth Badari <102688956+prashanthbdremio@users.noreply.github.com> Date: Thu, 18 Jun 2026 14:16:23 -0400 Subject: [PATCH 3/6] Narrow realloc guard to capacity==0 only The previous guard (capacity < required) could replace an already-allocated offset buffer with a smaller one when setReaderAndWriterIndex() was called before the vector was populated (e.g., during validateFull() on an empty vector). The replacement buffer was too small for subsequent writes. Narrowing to capacity==0 targets only the never-allocated empty singleton from allocator.getEmpty(), which is the exact state that causes the IOOBE. Buffers with capacity>0 were properly allocated and should not be replaced. Also removes the dead copy branch (capacity>0 is always false when we enter the guard). Co-Authored-By: Claude Opus 4.6 (1M context) Change-Id: I28ee94d2a2b431f7699ce8c3ce133f184ede5458 --- .../org/apache/arrow/vector/complex/LargeListVector.java | 5 +---- .../java/org/apache/arrow/vector/complex/ListVector.java | 5 +---- 2 files changed, 2 insertions(+), 8 deletions(-) diff --git a/vector/src/main/java/org/apache/arrow/vector/complex/LargeListVector.java b/vector/src/main/java/org/apache/arrow/vector/complex/LargeListVector.java index d8ac81a721..15bd6b3ce0 100644 --- a/vector/src/main/java/org/apache/arrow/vector/complex/LargeListVector.java +++ b/vector/src/main/java/org/apache/arrow/vector/complex/LargeListVector.java @@ -317,11 +317,8 @@ private void setReaderAndWriterIndex() { // in other libraries. According to Arrow spec, we should still output the offset buffer which // is [0]. final long requiredOffsetBufferSize = (long) (valueCount + 1) * OFFSET_WIDTH; - if (offsetBuffer.capacity() < requiredOffsetBufferSize) { + if (offsetBuffer.capacity() == 0) { ArrowBuf newOffsetBuffer = allocateOffsetBuffer(requiredOffsetBufferSize); - if (offsetBuffer.capacity() > 0) { - newOffsetBuffer.setBytes(0, offsetBuffer, 0, offsetBuffer.capacity()); - } offsetBuffer.getReferenceManager().release(); offsetBuffer = newOffsetBuffer; } diff --git a/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java b/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java index dd7cde8d7d..e0b3705241 100644 --- a/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java +++ b/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java @@ -275,11 +275,8 @@ private void setReaderAndWriterIndex() { // in other libraries. According to Arrow spec, we should still output the offset buffer which // is [0]. final long requiredOffsetBufferSize = (long) (valueCount + 1) * OFFSET_WIDTH; - if (offsetBuffer.capacity() < requiredOffsetBufferSize) { + if (offsetBuffer.capacity() == 0) { ArrowBuf newOffsetBuffer = allocateOffsetBuffer(requiredOffsetBufferSize); - if (offsetBuffer.capacity() > 0) { - newOffsetBuffer.setBytes(0, offsetBuffer, 0, offsetBuffer.capacity()); - } offsetBuffer.getReferenceManager().release(); offsetBuffer = newOffsetBuffer; } From 1a009d1bbd0a6569b7201eb4fccbfb158c539b95 Mon Sep 17 00:00:00 2001 From: Prashanth Badari <102688956+prashanthbdremio@users.noreply.github.com> Date: Thu, 18 Jun 2026 14:25:02 -0400 Subject: [PATCH 4/6] Move fix from setReaderAndWriterIndex() to getFieldBuffers() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The previous approach mutated this.offsetBuffer inside setReaderAndWriterIndex(), which is called from getFieldBuffers(), getBuffers(), and indirectly from validation. Replacing the offset buffer during validation broke subsequent writes — tests that called validateFull() on an empty vector before populating data got a 4-byte buffer that was too small for the actual data. The fix now mirrors exportCDataBuffers() exactly: getFieldBuffers() detects the inconsistent state (capacity==0 but writerIndex>0, produced by setReaderAndWriterIndex on a never-allocated buffer) and substitutes a properly-sized temporary buffer for serialization. The vector's own offsetBuffer is never mutated, so subsequent allocateNew/setValueCount calls work normally. setReaderAndWriterIndex() is reverted to upstream Arrow 19 behavior. Co-Authored-By: Claude Opus 4.6 (1M context) Change-Id: I696163fac4fcf5ada1af0edb5da2f4e8ba4e3e84 --- .../arrow/vector/complex/LargeListVector.java | 17 ++++++++--------- .../apache/arrow/vector/complex/ListVector.java | 17 ++++++++--------- 2 files changed, 16 insertions(+), 18 deletions(-) diff --git a/vector/src/main/java/org/apache/arrow/vector/complex/LargeListVector.java b/vector/src/main/java/org/apache/arrow/vector/complex/LargeListVector.java index 15bd6b3ce0..ae8f8916de 100644 --- a/vector/src/main/java/org/apache/arrow/vector/complex/LargeListVector.java +++ b/vector/src/main/java/org/apache/arrow/vector/complex/LargeListVector.java @@ -277,8 +277,13 @@ public List getFieldBuffers() { List result = new ArrayList<>(2); setReaderAndWriterIndex(); result.add(validityBuffer); - result.add(offsetBuffer); - + if (offsetBuffer.capacity() == 0 && offsetBuffer.writerIndex() > 0) { + ArrowBuf tempOffset = allocateOffsetBuffer(offsetBuffer.writerIndex()); + tempOffset.writerIndex(offsetBuffer.writerIndex()); + result.add(tempOffset); + } else { + result.add(offsetBuffer); + } return result; } @@ -316,13 +321,7 @@ private void setReaderAndWriterIndex() { // Both are set to 0 means 0 bytes are written to the IPC stream which will crash IPC readers // in other libraries. According to Arrow spec, we should still output the offset buffer which // is [0]. - final long requiredOffsetBufferSize = (long) (valueCount + 1) * OFFSET_WIDTH; - if (offsetBuffer.capacity() == 0) { - ArrowBuf newOffsetBuffer = allocateOffsetBuffer(requiredOffsetBufferSize); - offsetBuffer.getReferenceManager().release(); - offsetBuffer = newOffsetBuffer; - } - offsetBuffer.writerIndex(requiredOffsetBufferSize); + offsetBuffer.writerIndex((long) (valueCount + 1) * OFFSET_WIDTH); } /** diff --git a/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java b/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java index e0b3705241..9e10d80671 100644 --- a/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java +++ b/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java @@ -235,8 +235,13 @@ public List getFieldBuffers() { List result = new ArrayList<>(2); setReaderAndWriterIndex(); result.add(validityBuffer); - result.add(offsetBuffer); - + if (offsetBuffer.capacity() == 0 && offsetBuffer.writerIndex() > 0) { + ArrowBuf tempOffset = allocateOffsetBuffer(offsetBuffer.writerIndex()); + tempOffset.writerIndex(offsetBuffer.writerIndex()); + result.add(tempOffset); + } else { + result.add(offsetBuffer); + } return result; } @@ -274,13 +279,7 @@ private void setReaderAndWriterIndex() { // Both are set to 0 means 0 bytes are written to the IPC stream which will crash IPC readers // in other libraries. According to Arrow spec, we should still output the offset buffer which // is [0]. - final long requiredOffsetBufferSize = (long) (valueCount + 1) * OFFSET_WIDTH; - if (offsetBuffer.capacity() == 0) { - ArrowBuf newOffsetBuffer = allocateOffsetBuffer(requiredOffsetBufferSize); - offsetBuffer.getReferenceManager().release(); - offsetBuffer = newOffsetBuffer; - } - offsetBuffer.writerIndex(requiredOffsetBufferSize); + offsetBuffer.writerIndex((long) (valueCount + 1) * OFFSET_WIDTH); } /** From 974a9a7bbb4067bac25fdcd17ce5b482a8285e9a Mon Sep 17 00:00:00 2001 From: "logan.riggs@gmail.com" Date: Thu, 18 Jun 2026 21:20:16 +0000 Subject: [PATCH 5/6] fix tests --- .../arrow/vector/complex/LargeListVector.java | 28 ++++++++++++++-- .../arrow/vector/complex/ListVector.java | 28 ++++++++++++++-- .../arrow/vector/TestLargeListVector.java | 28 +++++++++++----- .../apache/arrow/vector/TestListVector.java | 32 ++++++++++++------- 4 files changed, 91 insertions(+), 25 deletions(-) diff --git a/vector/src/main/java/org/apache/arrow/vector/complex/LargeListVector.java b/vector/src/main/java/org/apache/arrow/vector/complex/LargeListVector.java index ae8f8916de..fd2545a9eb 100644 --- a/vector/src/main/java/org/apache/arrow/vector/complex/LargeListVector.java +++ b/vector/src/main/java/org/apache/arrow/vector/complex/LargeListVector.java @@ -105,6 +105,13 @@ public static LargeListVector empty(String name, BufferAllocator allocator) { /** The maximum index that is actually set. */ private int lastSet; + /** + * Temporary offset buffer used only for serialization of a never-allocated vector (see {@link + * #getFieldBuffers()}). Owned by this vector so it is released in {@link #clear()} rather than + * leaked. + */ + private ArrowBuf serializationOffsetBuffer; + /** * Constructs a new instance. * @@ -134,6 +141,7 @@ public LargeListVector(Field field, BufferAllocator allocator, CallBack callBack BitVectorHelper.getValidityBufferSizeFromCount(INITIAL_VALUE_ALLOCATION); this.lastSet = -1; this.offsetBuffer = allocator.getEmpty(); + this.serializationOffsetBuffer = allocator.getEmpty(); this.vector = vector == null ? DEFAULT_DATA_VECTOR : vector; this.valueCount = 0; } @@ -277,10 +285,23 @@ public List getFieldBuffers() { List result = new ArrayList<>(2); setReaderAndWriterIndex(); result.add(validityBuffer); + // A never-allocated vector has an empty (capacity 0) offset buffer, yet setReaderAndWriterIndex + // marks OFFSET_WIDTH bytes as written so that serializers still emit offset[0] = 0 (an empty + // offset buffer would crash IPC readers in other libraries). Serializers read `writerIndex` + // bytes, so we must hand them a properly sized buffer. Mirror exportCDataBuffers() by + // substituting a temporary buffer instead of mutating this.offsetBuffer, which validation and + // subsequent writes still rely on being empty. The temporary is owned by this vector and + // released in clear()/close(), so it is not leaked. if (offsetBuffer.capacity() == 0 && offsetBuffer.writerIndex() > 0) { - ArrowBuf tempOffset = allocateOffsetBuffer(offsetBuffer.writerIndex()); - tempOffset.writerIndex(offsetBuffer.writerIndex()); - result.add(tempOffset); + // Allocate directly rather than via allocateOffsetBuffer(), which would overwrite + // offsetAllocationSizeInBytes and shrink a later allocateNew()'s offset buffer. + final long size = offsetBuffer.writerIndex(); + serializationOffsetBuffer = releaseBuffer(serializationOffsetBuffer); + serializationOffsetBuffer = allocator.buffer(size); + serializationOffsetBuffer.readerIndex(0); + serializationOffsetBuffer.setZero(0, serializationOffsetBuffer.capacity()); + serializationOffsetBuffer.writerIndex(size); + result.add(serializationOffsetBuffer); } else { result.add(offsetBuffer); } @@ -810,6 +831,7 @@ public void clear() { valueCount = 0; super.clear(); validityBuffer = releaseBuffer(validityBuffer); + serializationOffsetBuffer = releaseBuffer(serializationOffsetBuffer); lastSet = -1; } diff --git a/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java b/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java index 9e10d80671..81071d8636 100644 --- a/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java +++ b/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java @@ -82,6 +82,13 @@ public static ListVector empty(String name, BufferAllocator allocator) { /** The maximum index that is actually set. */ protected int lastSet; + /** + * Temporary offset buffer used only for serialization of a never-allocated vector (see {@link + * #getFieldBuffers()}). Owned by this vector so it is released in {@link #clear()} rather than + * leaked. + */ + private ArrowBuf serializationOffsetBuffer; + /** * Constructs a new instance. * @@ -110,6 +117,7 @@ public ListVector(Field field, BufferAllocator allocator, CallBack callBack) { this.validityAllocationSizeInBytes = BitVectorHelper.getValidityBufferSizeFromCount(INITIAL_VALUE_ALLOCATION); this.lastSet = -1; + this.serializationOffsetBuffer = allocator.getEmpty(); } @Override @@ -235,10 +243,23 @@ public List getFieldBuffers() { List result = new ArrayList<>(2); setReaderAndWriterIndex(); result.add(validityBuffer); + // A never-allocated vector has an empty (capacity 0) offset buffer, yet setReaderAndWriterIndex + // marks OFFSET_WIDTH bytes as written so that serializers still emit offset[0] = 0 (an empty + // offset buffer would crash IPC readers in other libraries). Serializers read `writerIndex` + // bytes, so we must hand them a properly sized buffer. Mirror exportCDataBuffers() by + // substituting a temporary buffer instead of mutating this.offsetBuffer, which validation and + // subsequent writes still rely on being empty. The temporary is owned by this vector and + // released in clear()/close(), so it is not leaked. if (offsetBuffer.capacity() == 0 && offsetBuffer.writerIndex() > 0) { - ArrowBuf tempOffset = allocateOffsetBuffer(offsetBuffer.writerIndex()); - tempOffset.writerIndex(offsetBuffer.writerIndex()); - result.add(tempOffset); + // Allocate directly rather than via allocateOffsetBuffer(), which would overwrite + // offsetAllocationSizeInBytes and shrink a later allocateNew()'s offset buffer. + final long size = offsetBuffer.writerIndex(); + serializationOffsetBuffer = releaseBuffer(serializationOffsetBuffer); + serializationOffsetBuffer = allocator.buffer(size); + serializationOffsetBuffer.readerIndex(0); + serializationOffsetBuffer.setZero(0, serializationOffsetBuffer.capacity()); + serializationOffsetBuffer.writerIndex(size); + result.add(serializationOffsetBuffer); } else { result.add(offsetBuffer); } @@ -657,6 +678,7 @@ public MinorType getMinorType() { public void clear() { super.clear(); validityBuffer = releaseBuffer(validityBuffer); + serializationOffsetBuffer = releaseBuffer(serializationOffsetBuffer); lastSet = -1; } diff --git a/vector/src/test/java/org/apache/arrow/vector/TestLargeListVector.java b/vector/src/test/java/org/apache/arrow/vector/TestLargeListVector.java index 295aef1b63..cc98d72207 100644 --- a/vector/src/test/java/org/apache/arrow/vector/TestLargeListVector.java +++ b/vector/src/test/java/org/apache/arrow/vector/TestLargeListVector.java @@ -1123,35 +1123,47 @@ public void testEmptyLargeListOffsetBuffer() { @Test public void testEmptyLargeListOffsetBufferWithoutAllocate() { // Regression test for the Arrow 19 IOOBE: a never-allocated LargeListVector must still produce - // a valid offset buffer after setValueCount(0). Without the realloc guard in - // setReaderAndWriterIndex(), this sets writerIndex=8 on a capacity-0 buffer. + // a valid offset buffer for serialization after setValueCount(0). getFieldBuffers() substitutes + // a properly sized temporary offset buffer (holding offset[0] = 0) without mutating the + // vector's own capacity-0 offset buffer. try (LargeListVector list = LargeListVector.empty("list", allocator)) { list.addOrGetVector(FieldType.nullable(MinorType.INT.getType())); list.setValueCount(0); // no allocateNew() — offset buffer starts at capacity 0 List buffers = list.getFieldBuffers(); + ArrowBuf offsetBuffer = buffers.get(1); assertTrue( - buffers.get(1).readableBytes() >= LargeListVector.OFFSET_WIDTH, + offsetBuffer.readableBytes() >= LargeListVector.OFFSET_WIDTH, "Offset buffer should have at least " + LargeListVector.OFFSET_WIDTH + " bytes for offset[0]"); - assertEquals(0L, list.getOffsetBuffer().getLong(0)); + assertEquals(0L, offsetBuffer.getLong(0)); + // The vector's own offset buffer is left untouched so subsequent writes still work. + assertEquals(0, list.getOffsetBuffer().capacity()); } } @Test public void testEmptyLargeListGetBuffersWithoutAllocate() { - // Exercises the getBuffers(false) entry point — the IPC serialization path. + // Exercises the IPC serialization entry points — getBuffers(false) and getFieldBuffers(), the + // latter being the path that produced the original Netty IOOBE. try (LargeListVector list = LargeListVector.empty("list", allocator)) { list.addOrGetVector(FieldType.nullable(MinorType.INT.getType())); list.setValueCount(0); + // getBufferSize() returns 0 for valueCount==0, so getBuffers returns an empty array and must + // not crash on the never-allocated offset buffer. ArrowBuf[] bufs = list.getBuffers(false); + assertEquals(0, bufs.length); + + // getFieldBuffers() must hand serializers a readable offset buffer holding offset[0] = 0. + List fieldBuffers = list.getFieldBuffers(); assertTrue( - list.getOffsetBuffer().capacity() >= LargeListVector.OFFSET_WIDTH, - "Offset buffer capacity should be >= " + fieldBuffers.get(1).readableBytes() >= LargeListVector.OFFSET_WIDTH, + "Offset buffer should be readable for >= " + LargeListVector.OFFSET_WIDTH - + " after setReaderAndWriterIndex"); + + " bytes"); + assertEquals(0L, fieldBuffers.get(1).getLong(0)); } } diff --git a/vector/src/test/java/org/apache/arrow/vector/TestListVector.java b/vector/src/test/java/org/apache/arrow/vector/TestListVector.java index cf227102d5..2712c199f0 100644 --- a/vector/src/test/java/org/apache/arrow/vector/TestListVector.java +++ b/vector/src/test/java/org/apache/arrow/vector/TestListVector.java @@ -1402,38 +1402,48 @@ public void testEmptyListOffsetBuffer() { @Test public void testEmptyListOffsetBufferWithoutAllocate() { // Regression test for the Arrow 19 IOOBE: a never-allocated ListVector must still produce - // a valid offset buffer after setValueCount(0). Without the realloc guard in - // setReaderAndWriterIndex(), this sets writerIndex=4 on a capacity-0 buffer. + // a valid offset buffer for serialization after setValueCount(0). getFieldBuffers() substitutes + // a properly sized temporary offset buffer (holding offset[0] = 0) without mutating the + // vector's own capacity-0 offset buffer. try (ListVector list = ListVector.empty("list", allocator)) { list.addOrGetVector(FieldType.nullable(MinorType.INT.getType())); list.setValueCount(0); // no allocateNew() — offset buffer starts at capacity 0 List buffers = list.getFieldBuffers(); + ArrowBuf offsetBuffer = buffers.get(1); assertTrue( - buffers.get(1).readableBytes() >= BaseRepeatedValueVector.OFFSET_WIDTH, + offsetBuffer.readableBytes() >= BaseRepeatedValueVector.OFFSET_WIDTH, "Offset buffer should have at least " + BaseRepeatedValueVector.OFFSET_WIDTH + " bytes for offset[0]"); - assertEquals(0, list.getOffsetBuffer().getInt(0)); + assertEquals(0, offsetBuffer.getInt(0)); + // The vector's own offset buffer is left untouched so subsequent writes still work. + assertEquals(0, list.getOffsetBuffer().capacity()); } } @Test public void testEmptyListGetBuffersWithoutAllocate() { - // Exercises the getBuffers(false) entry point — the IPC serialization path that produced the - // original Netty IOOBE via VectorUnloader -> NettyArrowBuf.unwrapBuffer(). + // Exercises the IPC serialization entry points — getBuffers(false) and getFieldBuffers(), the + // latter being the path that produced the original Netty IOOBE via + // VectorUnloader -> NettyArrowBuf.unwrapBuffer(). try (ListVector list = ListVector.empty("list", allocator)) { list.addOrGetVector(FieldType.nullable(MinorType.INT.getType())); list.setValueCount(0); + // getBufferSize() returns 0 for valueCount==0, so getBuffers returns an empty array and must + // not crash on the never-allocated offset buffer. ArrowBuf[] bufs = list.getBuffers(false); - // getBufferSize() returns 0 for valueCount==0, so getBuffers returns empty array. - // But the offset buffer on the vector itself must have been grown to valid capacity. + assertEquals(0, bufs.length); + + // getFieldBuffers() must hand serializers a readable offset buffer holding offset[0] = 0. + List fieldBuffers = list.getFieldBuffers(); assertTrue( - list.getOffsetBuffer().capacity() >= BaseRepeatedValueVector.OFFSET_WIDTH, - "Offset buffer capacity should be >= " + fieldBuffers.get(1).readableBytes() >= BaseRepeatedValueVector.OFFSET_WIDTH, + "Offset buffer should be readable for >= " + BaseRepeatedValueVector.OFFSET_WIDTH - + " after setReaderAndWriterIndex"); + + " bytes"); + assertEquals(0, fieldBuffers.get(1).getInt(0)); } } From e3cfac401a1a8dbc1be993cb695c62fb791644dd Mon Sep 17 00:00:00 2001 From: "logan.riggs@gmail.com" Date: Thu, 18 Jun 2026 21:26:02 +0000 Subject: [PATCH 6/6] spotless --- .../java/org/apache/arrow/vector/TestLargeListVector.java | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/vector/src/test/java/org/apache/arrow/vector/TestLargeListVector.java b/vector/src/test/java/org/apache/arrow/vector/TestLargeListVector.java index cc98d72207..41681d3c62 100644 --- a/vector/src/test/java/org/apache/arrow/vector/TestLargeListVector.java +++ b/vector/src/test/java/org/apache/arrow/vector/TestLargeListVector.java @@ -1160,9 +1160,7 @@ public void testEmptyLargeListGetBuffersWithoutAllocate() { List fieldBuffers = list.getFieldBuffers(); assertTrue( fieldBuffers.get(1).readableBytes() >= LargeListVector.OFFSET_WIDTH, - "Offset buffer should be readable for >= " - + LargeListVector.OFFSET_WIDTH - + " bytes"); + "Offset buffer should be readable for >= " + LargeListVector.OFFSET_WIDTH + " bytes"); assertEquals(0L, fieldBuffers.get(1).getLong(0)); } }