From ea9008a17079e7de0242517239742580e37a3665 Mon Sep 17 00:00:00 2001 From: Logan Riggs Date: Thu, 18 Jun 2026 12:11:12 -0700 Subject: [PATCH] Revert "GH-343: Fix ListVector offset buffer not properly serialized for nested empty arrays (#967)" This reverts commit 0f8a0808fd9cf0bd22d3c6b40a2016ee724ce185. --- .../arrow/vector/complex/LargeListVector.java | 7 ++----- .../arrow/vector/complex/ListVector.java | 7 ++----- .../arrow/vector/TestLargeListVector.java | 20 ------------------- .../apache/arrow/vector/TestListVector.java | 20 ------------------- 4 files changed, 4 insertions(+), 50 deletions(-) diff --git a/vector/src/main/java/org/apache/arrow/vector/complex/LargeListVector.java b/vector/src/main/java/org/apache/arrow/vector/complex/LargeListVector.java index 92dd3eaef7..997b5a8b78 100644 --- a/vector/src/main/java/org/apache/arrow/vector/complex/LargeListVector.java +++ b/vector/src/main/java/org/apache/arrow/vector/complex/LargeListVector.java @@ -309,14 +309,11 @@ private void setReaderAndWriterIndex() { offsetBuffer.readerIndex(0); if (valueCount == 0) { validityBuffer.writerIndex(0); + offsetBuffer.writerIndex(0); } else { validityBuffer.writerIndex(BitVectorHelper.getValidityBufferSizeFromCount(valueCount)); + offsetBuffer.writerIndex((valueCount + 1) * OFFSET_WIDTH); } - // IPC serializer will determine readable bytes based on `readerIndex` and `writerIndex`. - // Both are set to 0 means 0 bytes are written to the IPC stream which will crash IPC readers - // in other libraries. According to Arrow spec, we should still output the offset buffer which - // is [0]. - offsetBuffer.writerIndex((long) (valueCount + 1) * OFFSET_WIDTH); } /** diff --git a/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java b/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java index 6c3993df63..93a313ef4f 100644 --- a/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java +++ b/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java @@ -267,14 +267,11 @@ private void setReaderAndWriterIndex() { offsetBuffer.readerIndex(0); if (valueCount == 0) { validityBuffer.writerIndex(0); + offsetBuffer.writerIndex(0); } else { validityBuffer.writerIndex(BitVectorHelper.getValidityBufferSizeFromCount(valueCount)); + offsetBuffer.writerIndex((valueCount + 1) * OFFSET_WIDTH); } - // IPC serializer will determine readable bytes based on `readerIndex` and `writerIndex`. - // Both are set to 0 means 0 bytes are written to the IPC stream which will crash IPC readers - // in other libraries. According to Arrow spec, we should still output the offset buffer which - // is [0]. - offsetBuffer.writerIndex((long) (valueCount + 1) * OFFSET_WIDTH); } /** diff --git a/vector/src/test/java/org/apache/arrow/vector/TestLargeListVector.java b/vector/src/test/java/org/apache/arrow/vector/TestLargeListVector.java index bf9bba9c78..ccc0d3e176 100644 --- a/vector/src/test/java/org/apache/arrow/vector/TestLargeListVector.java +++ b/vector/src/test/java/org/apache/arrow/vector/TestLargeListVector.java @@ -1100,26 +1100,6 @@ public void testCopyValueSafeForExtensionType() throws Exception { } } - @Test - public void testEmptyLargeListOffsetBuffer() { - // Test that LargeListVector has correct readableBytes after allocation. - // According to Arrow spec, offset buffer must have N+1 entries. - // Even when N=0, it should contain [0]. - try (LargeListVector list = LargeListVector.empty("list", allocator)) { - list.addOrGetVector(FieldType.nullable(MinorType.INT.getType())); - list.allocateNew(); - list.setValueCount(0); - - List buffers = list.getFieldBuffers(); - assertTrue( - buffers.get(1).readableBytes() >= LargeListVector.OFFSET_WIDTH, - "Offset buffer should have at least " - + LargeListVector.OFFSET_WIDTH - + " bytes for offset[0]"); - assertEquals(0L, list.getOffsetBuffer().getLong(0)); - } - } - private void writeIntValues(UnionLargeListWriter writer, int[] values) { writer.startList(); for (int v : values) { diff --git a/vector/src/test/java/org/apache/arrow/vector/TestListVector.java b/vector/src/test/java/org/apache/arrow/vector/TestListVector.java index 0c90b32abc..1fe4c59f63 100644 --- a/vector/src/test/java/org/apache/arrow/vector/TestListVector.java +++ b/vector/src/test/java/org/apache/arrow/vector/TestListVector.java @@ -1379,26 +1379,6 @@ public void testCopyValueSafeForExtensionType() throws Exception { } } - @Test - public void testEmptyListOffsetBuffer() { - // Test that ListVector has correct readableBytes after allocation. - // According to Arrow spec, offset buffer must have N+1 entries. - // Even when N=0, it should contain [0]. - try (ListVector list = ListVector.empty("list", allocator)) { - list.addOrGetVector(FieldType.nullable(MinorType.INT.getType())); - list.allocateNew(); - list.setValueCount(0); - - List buffers = list.getFieldBuffers(); - assertTrue( - buffers.get(1).readableBytes() >= BaseRepeatedValueVector.OFFSET_WIDTH, - "Offset buffer should have at least " - + BaseRepeatedValueVector.OFFSET_WIDTH - + " bytes for offset[0]"); - assertEquals(0, list.getOffsetBuffer().getInt(0)); - } - } - private void writeIntValues(UnionListWriter writer, int[] values) { writer.startList(); for (int v : values) {