diff --git a/vector/src/main/java/org/apache/arrow/vector/complex/LargeListVector.java b/vector/src/main/java/org/apache/arrow/vector/complex/LargeListVector.java index 92dd3eaef7..23d0380df7 100644 --- a/vector/src/main/java/org/apache/arrow/vector/complex/LargeListVector.java +++ b/vector/src/main/java/org/apache/arrow/vector/complex/LargeListVector.java @@ -277,8 +277,23 @@ public List getFieldBuffers() { List result = new ArrayList<>(2); setReaderAndWriterIndex(); result.add(validityBuffer); + if (valueCount == 0 && offsetBuffer.capacity() == 0 && offsetBuffer.writerIndex() > 0) { + long writerIdx = offsetBuffer.writerIndex(); + ArrowBuf oldOffsetBuffer = offsetBuffer; + if (validityBuffer == oldOffsetBuffer) { + validityBuffer.readerIndex(0); + validityBuffer.writerIndex(0); + } else { + oldOffsetBuffer.getReferenceManager().release(); + } + long allocSize = + Math.max((long) (valueCount + 1) * OFFSET_WIDTH, INITIAL_VALUE_ALLOCATION * OFFSET_WIDTH); + offsetBuffer = allocator.buffer(allocSize); + offsetBuffer.setZero(0, offsetBuffer.capacity()); + offsetBuffer.readerIndex(0); + offsetBuffer.writerIndex(writerIdx); + } result.add(offsetBuffer); - return result; } diff --git a/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java b/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java index 6c3993df63..5f3518ce1d 100644 --- a/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java +++ b/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java @@ -235,8 +235,23 @@ public List getFieldBuffers() { List result = new ArrayList<>(2); setReaderAndWriterIndex(); result.add(validityBuffer); + if (valueCount == 0 && offsetBuffer.capacity() == 0 && offsetBuffer.writerIndex() > 0) { + long writerIdx = offsetBuffer.writerIndex(); + ArrowBuf oldOffsetBuffer = offsetBuffer; + if (validityBuffer == oldOffsetBuffer) { + validityBuffer.readerIndex(0); + validityBuffer.writerIndex(0); + } else { + oldOffsetBuffer.getReferenceManager().release(); + } + long allocSize = + Math.max((long) (valueCount + 1) * OFFSET_WIDTH, INITIAL_VALUE_ALLOCATION * OFFSET_WIDTH); + offsetBuffer = allocator.buffer(allocSize); + offsetBuffer.setZero(0, offsetBuffer.capacity()); + offsetBuffer.readerIndex(0); + offsetBuffer.writerIndex(writerIdx); + } result.add(offsetBuffer); - return result; } diff --git a/vector/src/test/java/org/apache/arrow/vector/TestLargeListVector.java b/vector/src/test/java/org/apache/arrow/vector/TestLargeListVector.java index bf9bba9c78..ade8478e06 100644 --- a/vector/src/test/java/org/apache/arrow/vector/TestLargeListVector.java +++ b/vector/src/test/java/org/apache/arrow/vector/TestLargeListVector.java @@ -1120,6 +1120,32 @@ public void testEmptyLargeListOffsetBuffer() { } } + @Test + public void testEmptyLargeListOffsetBufferWithoutAllocate() { + // Regression test for the Arrow 19 IOOBE: a never-allocated LargeListVector must produce a + // valid offset buffer from getFieldBuffers() even when allocateNew() was never called. + try (LargeListVector list = LargeListVector.empty("list", allocator)) { + list.addOrGetVector(FieldType.nullable(MinorType.INT.getType())); + list.setValueCount(0); + + List buffers = list.getFieldBuffers(); + ArrowBuf validityBuf = buffers.get(0); + assertEquals(0, validityBuf.readerIndex()); + assertEquals(0, validityBuf.writerIndex()); + assertEquals(0, validityBuf.readableBytes()); + + ArrowBuf offsetBuf = buffers.get(1); + assertTrue( + offsetBuf.capacity() >= LargeListVector.OFFSET_WIDTH, + "Returned offset buffer should have capacity >= " + LargeListVector.OFFSET_WIDTH); + assertTrue( + offsetBuf.readableBytes() >= LargeListVector.OFFSET_WIDTH, + "Returned offset buffer should have readableBytes >= " + LargeListVector.OFFSET_WIDTH); + assertEquals(0L, offsetBuf.getLong(0)); + // Vector owns the buffer — no manual close needed + } + } + private void writeIntValues(UnionLargeListWriter writer, int[] values) { writer.startList(); for (int v : values) { diff --git a/vector/src/test/java/org/apache/arrow/vector/TestListVector.java b/vector/src/test/java/org/apache/arrow/vector/TestListVector.java index 0c90b32abc..6dcd8437f0 100644 --- a/vector/src/test/java/org/apache/arrow/vector/TestListVector.java +++ b/vector/src/test/java/org/apache/arrow/vector/TestListVector.java @@ -1399,6 +1399,35 @@ public void testEmptyListOffsetBuffer() { } } + @Test + public void testEmptyListOffsetBufferWithoutAllocate() { + // Regression test for the Arrow 19 IOOBE: a never-allocated ListVector must produce a valid + // offset buffer from getFieldBuffers() even when allocateNew() was never called. + // getFieldBuffers() allocates a real offset buffer for an empty vector whose own offset buffer + // has capacity 0 but writerIndex > 0 (the inconsistent state from Arrow 19). + try (ListVector list = ListVector.empty("list", allocator)) { + list.addOrGetVector(FieldType.nullable(MinorType.INT.getType())); + list.setValueCount(0); + + List buffers = list.getFieldBuffers(); + ArrowBuf validityBuf = buffers.get(0); + assertEquals(0, validityBuf.readerIndex()); + assertEquals(0, validityBuf.writerIndex()); + assertEquals(0, validityBuf.readableBytes()); + + ArrowBuf offsetBuf = buffers.get(1); + assertTrue( + offsetBuf.capacity() >= BaseRepeatedValueVector.OFFSET_WIDTH, + "Returned offset buffer should have capacity >= " + BaseRepeatedValueVector.OFFSET_WIDTH); + assertTrue( + offsetBuf.readableBytes() >= BaseRepeatedValueVector.OFFSET_WIDTH, + "Returned offset buffer should have readableBytes >= " + + BaseRepeatedValueVector.OFFSET_WIDTH); + assertEquals(0, offsetBuf.getInt(0)); + // Vector owns the buffer — no manual close needed + } + } + private void writeIntValues(UnionListWriter writer, int[] values) { writer.startList(); for (int v : values) {