From 81c306329d8a59aa6a76a91a210ea98bcb4ff192 Mon Sep 17 00:00:00 2001
From: "Chen, Junjie" <cjjnjust@gmail.com>
Date: Sat, 8 Sep 2018 21:59:17 +0800
Subject: [PATCH 1/9] PARQUET-1328: Add Bloom filter reader and writer

---
 .../apache/parquet/cli/util/Expressions.java  |   4 +-
 parquet-column/pom.xml                        |   1 -
 .../parquet/column/ParquetProperties.java     |  63 ++-
 .../column/impl/ColumnWriteStoreV1.java       |  20 +-
 .../column/impl/ColumnWriteStoreV2.java       |  26 ++
 .../parquet/column/impl/ColumnWriterV1.java   |  51 ++-
 .../parquet/column/impl/ColumnWriterV2.java   |  56 ++-
 .../values/bloomfilter/BloomFilter.java       | 373 ++++++++++++++++++
 .../bloomfilter/BloomFilterReadStore.java     |  37 ++
 .../values/bloomfilter/BloomFilterReader.java |  31 ++
 .../bloomfilter/BloomFilterWriteStore.java    |  34 ++
 .../values/bloomfilter/BloomFilterWriter.java |  29 ++
 .../converter/ParquetMetadataConverter.java   |   2 +
 .../parquet/hadoop/BloomFilterDataReader.java |  80 ++++
 .../hadoop/ColumnChunkPageWriteStore.java     |  24 +-
 .../parquet/hadoop/ParquetFileReader.java     |  41 +-
 .../parquet/hadoop/ParquetFileWriter.java     |  14 +
 .../parquet/hadoop/ParquetInputFormat.java    |   6 +
 .../parquet/hadoop/ParquetOutputFormat.java   |  29 ++
 .../hadoop/metadata/ColumnChunkMetaData.java  |  70 ++++
 .../parquet/hadoop/TestParquetFileWriter.java |  39 ++
 pom.xml                                       |   2 +-
 22 files changed, 1006 insertions(+), 26 deletions(-)
 create mode 100644 parquet-column/src/main/java/org/apache/parquet/column/values/bloomfilter/BloomFilter.java
 create mode 100644 parquet-column/src/main/java/org/apache/parquet/column/values/bloomfilter/BloomFilterReadStore.java
 create mode 100644 parquet-column/src/main/java/org/apache/parquet/column/values/bloomfilter/BloomFilterReader.java
 create mode 100644 parquet-column/src/main/java/org/apache/parquet/column/values/bloomfilter/BloomFilterWriteStore.java
 create mode 100644 parquet-column/src/main/java/org/apache/parquet/column/values/bloomfilter/BloomFilterWriter.java
 create mode 100644 parquet-hadoop/src/main/java/org/apache/parquet/hadoop/BloomFilterDataReader.java
diff --git a/parquet-cli/src/main/java/org/apache/parquet/cli/util/Expressions.java b/parquet-cli/src/main/java/org/apache/parquet/cli/util/Expressions.java
index 06b28b46ae..d18ef559f2 100644
--- a/parquet-cli/src/main/java/org/apache/parquet/cli/util/Expressions.java
+++ b/parquet-cli/src/main/java/org/apache/parquet/cli/util/Expressions.java
@@ -19,7 +19,7 @@
 
 package org.apache.parquet.cli.util;
 
-import com.google.common.base.Objects;
+import com.google.common.base.MoreObjects;
 import com.google.common.base.Preconditions;
 import com.google.common.collect.Lists;
 import org.apache.avro.Schema;
@@ -385,7 +385,7 @@ public int hashCode() {
 
     @Override
     public String toString() {
-      return Objects.toStringHelper(this)
+      return MoreObjects.toStringHelper(this)
           .add("type", type)
           .add("value", value)
           .add("children", children)
diff --git a/parquet-column/pom.xml b/parquet-column/pom.xml
index f85c8b908b..2c40029137 100644
--- a/parquet-column/pom.xml
+++ b/parquet-column/pom.xml
@@ -93,7 +93,6 @@
       <groupId>com.google.guava</groupId>
       <artifactId>guava</artifactId>
       <version>${guava.version}</version>
-      <scope>test</scope>
     </dependency>
   </dependencies>
 
diff --git a/parquet-column/src/main/java/org/apache/parquet/column/ParquetProperties.java b/parquet-column/src/main/java/org/apache/parquet/column/ParquetProperties.java
index 39b65da9fa..94f1978f68 100644
--- a/parquet-column/src/main/java/org/apache/parquet/column/ParquetProperties.java
+++ b/parquet-column/src/main/java/org/apache/parquet/column/ParquetProperties.java
@@ -18,21 +18,23 @@
  */
 package org.apache.parquet.column;
 
+import static org.apache.parquet.bytes.BytesUtils.getWidthFromMaxInt;
+
+import java.util.HashMap;
+
 import org.apache.parquet.Preconditions;
 import org.apache.parquet.bytes.ByteBufferAllocator;
 import org.apache.parquet.bytes.CapacityByteArrayOutputStream;
 import org.apache.parquet.bytes.HeapByteBufferAllocator;
-
-import static org.apache.parquet.bytes.BytesUtils.getWidthFromMaxInt;
 import org.apache.parquet.column.impl.ColumnWriteStoreV1;
 import org.apache.parquet.column.impl.ColumnWriteStoreV2;
 import org.apache.parquet.column.page.PageWriteStore;
 import org.apache.parquet.column.values.ValuesWriter;
 import org.apache.parquet.column.values.bitpacking.DevNullValuesWriter;
 import org.apache.parquet.column.values.factory.DefaultValuesWriterFactory;
+import org.apache.parquet.column.values.factory.ValuesWriterFactory;
 import org.apache.parquet.column.values.rle.RunLengthBitPackingHybridEncoder;
 import org.apache.parquet.column.values.rle.RunLengthBitPackingHybridValuesWriter;
-import org.apache.parquet.column.values.factory.ValuesWriterFactory;
 import org.apache.parquet.schema.MessageType;
 
 /**
@@ -47,6 +49,7 @@ public class ParquetProperties {
   public static final boolean DEFAULT_ESTIMATE_ROW_COUNT_FOR_PAGE_SIZE_CHECK = true;
   public static final int DEFAULT_MINIMUM_RECORD_COUNT_FOR_CHECK = 100;
   public static final int DEFAULT_MAXIMUM_RECORD_COUNT_FOR_CHECK = 10000;
+  public static final boolean DEFAULT_BLOOM_FILTER_ENABLED = false;
 
   public static final ValuesWriterFactory DEFAULT_VALUES_WRITER_FACTORY = new DefaultValuesWriterFactory();
 
@@ -83,10 +86,12 @@ public static WriterVersion fromString(String name) {
   private final boolean estimateNextSizeCheck;
   private final ByteBufferAllocator allocator;
   private final ValuesWriterFactory valuesWriterFactory;
+  private final boolean enableBloomFilter;
+  private final HashMap<String, Long> bloomFilterInfo;
 
   private ParquetProperties(WriterVersion writerVersion, int pageSize, int dictPageSize, boolean enableDict, int minRowCountForPageSizeCheck,
                             int maxRowCountForPageSizeCheck, boolean estimateNextSizeCheck, ByteBufferAllocator allocator,
-                            ValuesWriterFactory writerFactory) {
+                            ValuesWriterFactory writerFactory, boolean enableBloomFilter, HashMap<String, Long> bloomFilterInfo) {
     this.pageSizeThreshold = pageSize;
     this.initialSlabSize = CapacityByteArrayOutputStream
       .initialSlabSizeHeuristic(MIN_SLAB_SIZE, pageSizeThreshold, 10);
@@ -97,7 +102,8 @@ private ParquetProperties(WriterVersion writerVersion, int pageSize, int dictPag
     this.maxRowCountForPageSizeCheck = maxRowCountForPageSizeCheck;
     this.estimateNextSizeCheck = estimateNextSizeCheck;
     this.allocator = allocator;
-
+    this.enableBloomFilter = enableBloomFilter;
+    this.bloomFilterInfo = bloomFilterInfo;
     this.valuesWriterFactory = writerFactory;
   }
 
@@ -159,6 +165,14 @@ public ByteBufferAllocator getAllocator() {
     return allocator;
   }
 
+  public boolean isBloomFilterEnabled() {
+    return enableBloomFilter;
+  }
+
+  public HashMap<String, Long> getBloomFilterInfo() {
+    return bloomFilterInfo;
+  }
+
   public ColumnWriteStore newColumnWriteStore(MessageType schema,
                                               PageWriteStore pageStore) {
     switch (writerVersion) {
@@ -199,6 +213,8 @@ public static class Builder {
     private int pageSize = DEFAULT_PAGE_SIZE;
     private int dictPageSize = DEFAULT_DICTIONARY_PAGE_SIZE;
     private boolean enableDict = DEFAULT_IS_DICTIONARY_ENABLED;
+    private boolean enableBloomFilter = DEFAULT_BLOOM_FILTER_ENABLED;
+    private HashMap<String, Long> bloomFilterInfo = new HashMap<>();
     private WriterVersion writerVersion = DEFAULT_WRITER_VERSION;
     private int minRowCountForPageSizeCheck = DEFAULT_MINIMUM_RECORD_COUNT_FOR_CHECK;
     private int maxRowCountForPageSizeCheck = DEFAULT_MAXIMUM_RECORD_COUNT_FOR_CHECK;
@@ -217,6 +233,8 @@ private Builder(ParquetProperties toCopy) {
       this.maxRowCountForPageSizeCheck = toCopy.maxRowCountForPageSizeCheck;
       this.estimateNextSizeCheck = toCopy.estimateNextSizeCheck;
       this.allocator = toCopy.allocator;
+      this.enableBloomFilter = toCopy.enableBloomFilter;
+      this.bloomFilterInfo = toCopy.bloomFilterInfo;
     }
 
     /**
@@ -256,6 +274,38 @@ public Builder withDictionaryPageSize(int dictionaryPageSize) {
       return this;
     }
 
+    /**
+     * Set to enable Bloom filter.
+     *
+     * @param enableBloomFilter a boolean to indicate whether to enable Bloom filter.
+     * @return this builder for method chaining.
+     */
+    public Builder withBloomFilterEnabled(boolean enableBloomFilter) {
+      this.enableBloomFilter = enableBloomFilter;
+      return this;
+    }
+
+    /**
+     * Set Bloom filter info for columns.
+     *
+     * @param names the columns to be enable for Bloom filter
+     * @param sizes the sizes corresponding to columns
+     * @return this builder for method chaining
+     */
+    public Builder withBloomFilterInfo(String names, String sizes) {
+      String[] bloomFilterColumns = names.split(",");
+      String[] bloomFilterSizes = sizes.split(",");
+
+      Preconditions.checkArgument(bloomFilterColumns.length == bloomFilterSizes.length,
+          "Column names are not matched to sizes");
+
+      for (int i = 0; i < bloomFilterColumns.length; i++) {
+        bloomFilterInfo.put(bloomFilterColumns[i], Long.getLong(bloomFilterSizes[i]));
+      }
+
+      return this;
+    }
+
     /**
      * Set the {@link WriterVersion format version}.
      *
@@ -303,7 +353,8 @@ public ParquetProperties build() {
       ParquetProperties properties =
         new ParquetProperties(writerVersion, pageSize, dictPageSize,
           enableDict, minRowCountForPageSizeCheck, maxRowCountForPageSizeCheck,
-          estimateNextSizeCheck, allocator, valuesWriterFactory);
+          estimateNextSizeCheck, allocator, valuesWriterFactory,
+          enableBloomFilter, bloomFilterInfo);
       // we pass a constructed but uninitialized factory to ParquetProperties above as currently
       // creation of ValuesWriters is invoked from within ParquetProperties. In the future
       // we'd like to decouple that and won't need to pass an object to properties and then pass the
diff --git a/parquet-column/src/main/java/org/apache/parquet/column/impl/ColumnWriteStoreV1.java b/parquet-column/src/main/java/org/apache/parquet/column/impl/ColumnWriteStoreV1.java
index 93a497fad8..bd401430ad 100644
--- a/parquet-column/src/main/java/org/apache/parquet/column/impl/ColumnWriteStoreV1.java
+++ b/parquet-column/src/main/java/org/apache/parquet/column/impl/ColumnWriteStoreV1.java
@@ -25,20 +25,21 @@
 import java.util.Set;
 import java.util.TreeMap;
 
-import org.apache.parquet.bytes.ByteBufferAllocator;
 import org.apache.parquet.column.ColumnDescriptor;
 import org.apache.parquet.column.ColumnWriteStore;
 import org.apache.parquet.column.ColumnWriter;
 import org.apache.parquet.column.ParquetProperties;
-import org.apache.parquet.column.ParquetProperties.WriterVersion;
 import org.apache.parquet.column.page.PageWriteStore;
 import org.apache.parquet.column.page.PageWriter;
+import org.apache.parquet.column.values.bloomfilter.BloomFilterWriteStore;
+import org.apache.parquet.column.values.bloomfilter.BloomFilterWriter;
 
 public class ColumnWriteStoreV1 implements ColumnWriteStore {
 
   private final Map<ColumnDescriptor, ColumnWriterV1> columns = new TreeMap<ColumnDescriptor, ColumnWriterV1>();
   private final PageWriteStore pageWriteStore;
   private final ParquetProperties props;
+  private BloomFilterWriteStore bloomFilterWriteStore;
 
   public ColumnWriteStoreV1(PageWriteStore pageWriteStore,
                             ParquetProperties props) {
@@ -46,6 +47,13 @@ public ColumnWriteStoreV1(PageWriteStore pageWriteStore,
     this.props = props;
   }
 
+  public ColumnWriteStoreV1(PageWriteStore pageWriteStore,
+                            BloomFilterWriteStore bloomFilterWriteStore,
+                            ParquetProperties props) {
+    this (pageWriteStore, props);
+    this.bloomFilterWriteStore = bloomFilterWriteStore;
+  }
+
   public ColumnWriter getColumnWriter(ColumnDescriptor path) {
     ColumnWriterV1 column = columns.get(path);
     if (column == null) {
@@ -61,7 +69,13 @@ public Set<ColumnDescriptor> getColumnDescriptors() {
 
   private ColumnWriterV1 newMemColumn(ColumnDescriptor path) {
     PageWriter pageWriter = pageWriteStore.getPageWriter(path);
-    return new ColumnWriterV1(path, pageWriter, props);
+
+    if (props.isBloomFilterEnabled() && props.getBloomFilterInfo() != null) {
+      BloomFilterWriter bloomFilterWriter = bloomFilterWriteStore.getBloomFilterWriter(path);
+      return new ColumnWriterV1(path, pageWriter, bloomFilterWriter, props);
+    } else {
+      return new ColumnWriterV1(path, pageWriter, props);
+    }
   }
 
   @Override
diff --git a/parquet-column/src/main/java/org/apache/parquet/column/impl/ColumnWriteStoreV2.java b/parquet-column/src/main/java/org/apache/parquet/column/impl/ColumnWriteStoreV2.java
index 7574cedf75..057660ff5d 100644
--- a/parquet-column/src/main/java/org/apache/parquet/column/impl/ColumnWriteStoreV2.java
+++ b/parquet-column/src/main/java/org/apache/parquet/column/impl/ColumnWriteStoreV2.java
@@ -35,6 +35,8 @@
 import org.apache.parquet.column.ParquetProperties;
 import org.apache.parquet.column.page.PageWriteStore;
 import org.apache.parquet.column.page.PageWriter;
+import org.apache.parquet.column.values.bloomfilter.BloomFilterWriteStore;
+import org.apache.parquet.column.values.bloomfilter.BloomFilterWriter;
 import org.apache.parquet.schema.MessageType;
 
 public class ColumnWriteStoreV2 implements ColumnWriteStore {
@@ -66,6 +68,30 @@ public ColumnWriteStoreV2(
     this.rowCountForNextSizeCheck = props.getMinRowCountForPageSizeCheck();
   }
 
+  public ColumnWriteStoreV2(
+    MessageType schema,
+    PageWriteStore pageWriteStore,
+    BloomFilterWriteStore bloomFilterWriteStore,
+    ParquetProperties props) {
+    this.props = props;
+    this.thresholdTolerance = (long)(props.getPageSizeThreshold() * THRESHOLD_TOLERANCE_RATIO);
+    Map<ColumnDescriptor, ColumnWriterV2> mcolumns = new TreeMap<ColumnDescriptor, ColumnWriterV2>();
+
+    for (ColumnDescriptor path : schema.getColumns()) {
+      PageWriter pageWriter = pageWriteStore.getPageWriter(path);
+      if (props.isBloomFilterEnabled() && props.getBloomFilterInfo() != null) {
+        BloomFilterWriter bloomFilterWriter = bloomFilterWriteStore.getBloomFilterWriter(path);
+        mcolumns.put(path, new ColumnWriterV2(path, pageWriter, bloomFilterWriter, props));
+      } else {
+        mcolumns.put(path, new ColumnWriterV2(path, pageWriter, props));
+      }
+    }
+    this.columns = unmodifiableMap(mcolumns);
+    this.writers = this.columns.values();
+
+    this.rowCountForNextSizeCheck = props.getMinRowCountForPageSizeCheck();
+  }
+
   public ColumnWriter getColumnWriter(ColumnDescriptor path) {
     return columns.get(path);
   }
diff --git a/parquet-column/src/main/java/org/apache/parquet/column/impl/ColumnWriterV1.java b/parquet-column/src/main/java/org/apache/parquet/column/impl/ColumnWriterV1.java
index c1f5d67b01..daf51cfa0d 100644
--- a/parquet-column/src/main/java/org/apache/parquet/column/impl/ColumnWriterV1.java
+++ b/parquet-column/src/main/java/org/apache/parquet/column/impl/ColumnWriterV1.java
@@ -21,6 +21,7 @@
 import static org.apache.parquet.bytes.BytesInput.concat;
 
 import java.io.IOException;
+import java.util.HashMap;
 
 import org.apache.parquet.column.ColumnDescriptor;
 import org.apache.parquet.column.ColumnWriter;
@@ -29,6 +30,8 @@
 import org.apache.parquet.column.page.PageWriter;
 import org.apache.parquet.column.statistics.Statistics;
 import org.apache.parquet.column.values.ValuesWriter;
+import org.apache.parquet.column.values.bloomfilter.BloomFilter;
+import org.apache.parquet.column.values.bloomfilter.BloomFilterWriter;
 import org.apache.parquet.io.ParquetEncodingException;
 import org.apache.parquet.io.api.Binary;
 import org.slf4j.Logger;
@@ -55,6 +58,23 @@ final class ColumnWriterV1 implements ColumnWriter {
   private int valueCountForNextSizeCheck;
 
   private Statistics statistics;
+  private BloomFilterWriter bloomFilterWriter;
+  private BloomFilter bloomFilter;
+
+  public ColumnWriterV1(ColumnDescriptor path, PageWriter pageWriter,
+                        BloomFilterWriter bloomFilterWriter, ParquetProperties props) {
+    this(path, pageWriter, props);
+
+    // Current not support nested column.
+    if (path.getPath().length == 1) {
+      this.bloomFilterWriter = bloomFilterWriter;
+      HashMap<String, Long> bloomFilterInfo = props.getBloomFilterInfo();
+      String column = path.getPath()[0];
+      if (bloomFilterInfo.keySet().contains(column)) {
+        this.bloomFilter = new BloomFilter(bloomFilterInfo.get(column).intValue());
+      }
+    }
+  }
 
   public ColumnWriterV1(ColumnDescriptor path, PageWriter pageWriter,
                         ParquetProperties props) {
@@ -177,6 +197,9 @@ public void write(double value, int repetitionLevel, int definitionLevel) {
     definitionLevelColumn.writeInteger(definitionLevel);
     dataColumn.writeDouble(value);
     updateStatistics(value);
+    if (bloomFilter != null) {
+      bloomFilter.insert(bloomFilter.hash(value));
+    }
     accountForValueWritten();
   }
 
@@ -187,6 +210,9 @@ public void write(float value, int repetitionLevel, int definitionLevel) {
     definitionLevelColumn.writeInteger(definitionLevel);
     dataColumn.writeFloat(value);
     updateStatistics(value);
+    if (bloomFilter != null) {
+      bloomFilter.insert(bloomFilter.hash(value));
+    }
     accountForValueWritten();
   }
 
@@ -197,6 +223,9 @@ public void write(Binary value, int repetitionLevel, int definitionLevel) {
     definitionLevelColumn.writeInteger(definitionLevel);
     dataColumn.writeBytes(value);
     updateStatistics(value);
+    if (bloomFilter != null) {
+      bloomFilter.insert(bloomFilter.hash(value));
+    }
     accountForValueWritten();
   }
 
@@ -217,6 +246,9 @@ public void write(int value, int repetitionLevel, int definitionLevel) {
     definitionLevelColumn.writeInteger(definitionLevel);
     dataColumn.writeInteger(value);
     updateStatistics(value);
+    if (bloomFilter != null) {
+      bloomFilter.insert(bloomFilter.hash(value));
+    }
     accountForValueWritten();
   }
 
@@ -227,6 +259,9 @@ public void write(long value, int repetitionLevel, int definitionLevel) {
     definitionLevelColumn.writeInteger(definitionLevel);
     dataColumn.writeLong(value);
     updateStatistics(value);
+    if (bloomFilter != null) {
+      bloomFilter.insert(bloomFilter.hash(value));
+    }
     accountForValueWritten();
   }
 
@@ -244,6 +279,10 @@ public void flush() {
       }
       dataColumn.resetDictionary();
     }
+
+    if (bloomFilterWriter != null && bloomFilter != null) {
+      bloomFilterWriter.writeBloomFilter(bloomFilter);
+    }
   }
 
   @Override
@@ -257,17 +296,21 @@ public void close() {
 
   @Override
   public long getBufferedSizeInMemory() {
+    long bloomBufferSize = bloomFilter == null ? 0 : bloomFilter.getBufferedSize();
     return repetitionLevelColumn.getBufferedSize()
         + definitionLevelColumn.getBufferedSize()
         + dataColumn.getBufferedSize()
-        + pageWriter.getMemSize();
+        + pageWriter.getMemSize()
+        + bloomBufferSize;
   }
 
   public long allocatedSize() {
+    long bloomAllocatedSize = bloomFilter == null ? 0 : bloomFilter.getBufferedSize();
     return repetitionLevelColumn.getAllocatedSize()
-    + definitionLevelColumn.getAllocatedSize()
-    + dataColumn.getAllocatedSize()
-    + pageWriter.allocatedSize();
+        + definitionLevelColumn.getAllocatedSize()
+        + dataColumn.getAllocatedSize()
+        + pageWriter.allocatedSize()
+        + bloomAllocatedSize;
   }
 
   public String memUsageString(String indent) {
diff --git a/parquet-column/src/main/java/org/apache/parquet/column/impl/ColumnWriterV2.java b/parquet-column/src/main/java/org/apache/parquet/column/impl/ColumnWriterV2.java
index 9abdee8a52..e041e97b57 100644
--- a/parquet-column/src/main/java/org/apache/parquet/column/impl/ColumnWriterV2.java
+++ b/parquet-column/src/main/java/org/apache/parquet/column/impl/ColumnWriterV2.java
@@ -19,6 +19,7 @@
 package org.apache.parquet.column.impl;
 
 import java.io.IOException;
+import java.util.HashMap;
 
 import org.apache.parquet.Ints;
 import org.apache.parquet.bytes.BytesInput;
@@ -30,6 +31,8 @@
 import org.apache.parquet.column.page.PageWriter;
 import org.apache.parquet.column.statistics.Statistics;
 import org.apache.parquet.column.values.ValuesWriter;
+import org.apache.parquet.column.values.bloomfilter.BloomFilter;
+import org.apache.parquet.column.values.bloomfilter.BloomFilterWriter;
 import org.apache.parquet.column.values.rle.RunLengthBitPackingHybridEncoder;
 import org.apache.parquet.io.ParquetEncodingException;
 import org.apache.parquet.io.api.Binary;
@@ -53,6 +56,9 @@ final class ColumnWriterV2 implements ColumnWriter {
   private ValuesWriter dataColumn;
   private int valueCount;
 
+  private BloomFilterWriter bloomFilterWriter;
+  private BloomFilter bloomFilter;
+
   private Statistics<?> statistics;
   private long rowsWrittenSoFar = 0;
 
@@ -69,6 +75,25 @@ public ColumnWriterV2(
     this.dataColumn = props.newValuesWriter(path);
   }
 
+  public ColumnWriterV2(
+    ColumnDescriptor path,
+    PageWriter pageWriter,
+    BloomFilterWriter bloomFilterWriter,
+    ParquetProperties props) {
+    this(path, pageWriter, props);
+
+    this.bloomFilterWriter = bloomFilterWriter;
+    HashMap<String, Long> bloomFilterInfo = props.getBloomFilterInfo();
+
+    // Current not support nested column.
+    if (path.getPath().length == 1) {
+      String column = path.getPath()[0];
+      if (bloomFilterInfo.keySet().contains(column)) {
+        this.bloomFilter = new BloomFilter(bloomFilterInfo.get(column).intValue());
+      }
+    }
+  }
+
   private void log(Object value, int r, int d) {
     LOG.debug("{} {} r:{} d:{}", path, value, r, d);
   }
@@ -134,6 +159,9 @@ public void write(double value, int repetitionLevel, int definitionLevel) {
     definitionLevel(definitionLevel);
     dataColumn.writeDouble(value);
     statistics.updateStats(value);
+    if (bloomFilter != null) {
+      bloomFilter.insert(bloomFilter.hash(value));
+    }
     ++ valueCount;
   }
 
@@ -149,6 +177,9 @@ public void write(float value, int repetitionLevel, int definitionLevel) {
     definitionLevel(definitionLevel);
     dataColumn.writeFloat(value);
     statistics.updateStats(value);
+    if (bloomFilter != null) {
+      bloomFilter.insert(bloomFilter.hash(value));
+    }
     ++ valueCount;
   }
 
@@ -164,6 +195,9 @@ public void write(Binary value, int repetitionLevel, int definitionLevel) {
     definitionLevel(definitionLevel);
     dataColumn.writeBytes(value);
     statistics.updateStats(value);
+    if (bloomFilter != null) {
+      bloomFilter.insert(bloomFilter.hash(value));
+    }
     ++ valueCount;
   }
 
@@ -194,6 +228,9 @@ public void write(int value, int repetitionLevel, int definitionLevel) {
     definitionLevel(definitionLevel);
     dataColumn.writeInteger(value);
     statistics.updateStats(value);
+    if (bloomFilter != null) {
+      bloomFilter.insert(bloomFilter.hash(value));
+    }
     ++ valueCount;
   }
 
@@ -209,6 +246,9 @@ public void write(long value, int repetitionLevel, int definitionLevel) {
     definitionLevel(definitionLevel);
     dataColumn.writeLong(value);
     statistics.updateStats(value);
+    if (bloomFilter != null) {
+      bloomFilter.insert(bloomFilter.hash(value));
+    }
     ++ valueCount;
   }
 
@@ -227,6 +267,10 @@ public void finalizeColumnChunk() {
       }
       dataColumn.resetDictionary();
     }
+
+    if (bloomFilterWriter != null && bloomFilter != null) {
+      bloomFilterWriter.writeBloomFilter(bloomFilter);
+    }
   }
 
   /**
@@ -234,9 +278,11 @@ public void finalizeColumnChunk() {
    * @return the number of bytes of memory used to buffer the current data
    */
   public long getCurrentPageBufferedSize() {
+    long bloomBufferSize = bloomFilter == null ? 0 : bloomFilter.getBufferedSize();
     return repetitionLevelColumn.getBufferedSize()
         + definitionLevelColumn.getBufferedSize()
-        + dataColumn.getBufferedSize();
+        + dataColumn.getBufferedSize()
+        + bloomBufferSize;
   }
 
   /**
@@ -244,20 +290,24 @@ public long getCurrentPageBufferedSize() {
    * @return the number of bytes of memory used to buffer the current data and the previously written pages
    */
   public long getTotalBufferedSize() {
+    long bloomBufferSize = bloomFilter == null ? 0 : bloomFilter.getBufferedSize();
     return repetitionLevelColumn.getBufferedSize()
         + definitionLevelColumn.getBufferedSize()
         + dataColumn.getBufferedSize()
-        + pageWriter.getMemSize();
+        + pageWriter.getMemSize()
+        + bloomBufferSize;
   }
 
   /**
    * @return actual memory used
    */
   public long allocatedSize() {
+    long bloomFilterSize = bloomFilter == null ? 0 : bloomFilter.getBufferedSize();
     return repetitionLevelColumn.getAllocatedSize()
     + definitionLevelColumn.getAllocatedSize()
     + dataColumn.getAllocatedSize()
-    + pageWriter.allocatedSize();
+    + pageWriter.allocatedSize()
+    + bloomFilterSize;
   }
 
   /**
diff --git a/parquet-column/src/main/java/org/apache/parquet/column/values/bloomfilter/BloomFilter.java b/parquet-column/src/main/java/org/apache/parquet/column/values/bloomfilter/BloomFilter.java
new file mode 100644
index 0000000000..4548617b1b
--- /dev/null
+++ b/parquet-column/src/main/java/org/apache/parquet/column/values/bloomfilter/BloomFilter.java
@@ -0,0 +1,373 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.parquet.column.values.bloomfilter;
+
+import com.google.common.hash.HashFunction;
+import com.google.common.hash.Hashing;
+import org.apache.parquet.Preconditions;
+import org.apache.parquet.bytes.BytesUtils;
+import org.apache.parquet.io.api.Binary;
+
+import java.io.IOException;
+import java.io.OutputStream;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.nio.IntBuffer;
+
+/**
+ * A Bloom filter is a compact structure to indicate whether an item is not in a set or probably
+ * in a set. BloomFilter class stores a bit set represents a elements set, a hash strategy and a
+ * Bloom filter algorithm.
+ *
+ * This Bloom filter is implemented using block-based Bloom filter algorithm from Putze et al.'s
+ * "Cache-, Hash- and Space-Efficient Bloom filters". The basic idea is to hash the item to a tiny
+ * Bloom filter which size fit a single cache line or smaller. This implementation sets 8 bits in
+ * each tiny Bloom filter. Each tiny Bloom filter is 32 bytes to take advantage of 32-byte SIMD
+ * instruction.
+ */
+
+public class BloomFilter {
+  // Bloom filter Hash strategy .
+  public enum HashStrategy {
+    MURMUR3_X64_128,
+  }
+
+  // Bloom filter algorithm.
+  public enum Algorithm {
+    BLOCK,
+  }
+
+  // Bytes in a tiny Bloom filter block.
+  private static final int BYTES_PER_FILTER_BLOCK = 32;
+
+  // Default seed for hash function, it comes from System.nanoTime().
+  private static final int DEFAULT_SEED = 1361930890;
+
+  // Minimum Bloom filter size, set to size of a tiny Bloom filter block
+  public static final int MINIMUM_BLOOM_FILTER_BYTES = 32;
+
+  // Maximum Bloom filter size, it sets to default HDFS block size for upper boundary check
+  // This should be re-consider when implementing write side logic.
+  public static final int MAXIMUM_BLOOM_FILTER_BYTES = 128 * 1024 * 1024;
+
+  // The number of bits to set in a tiny Bloom filter
+  private static final int BITS_SET_PER_BLOCK = 8;
+
+  // The header of Bloom filter, it includes number of bytes, algorithm and hash enumeration.
+  public static final int HEADER_SIZE = 12;
+
+  // Hash strategy used in this Bloom filter.
+  public final HashStrategy hashStrategy;
+
+  // Algorithm used in this Bloom filter.
+  public final Algorithm algorithm;
+
+  // The underlying byte array for Bloom filter bitset.
+  private byte[] bitset;
+
+  // A integer array buffer of underlying bitset to help setting bits.
+  private IntBuffer intBuffer;
+
+  // Hash function use to compute hash for column value.
+  private HashFunction hashFunction;
+
+  // The block-based algorithm needs 8 odd SALT values to calculate eight index
+  // of bit to set, one bit in 32-bit word.
+  private static final int SALT[] = {0x47b6137b, 0x44974d91, 0x8824ad5b, 0xa2b7289d,
+    0x705495c7, 0x2df1424b, 0x9efc4947, 0x5c6bfb31};
+
+  /**
+   * Constructor of Bloom filter.
+   *
+   * @param numBytes The number of bytes for Bloom filter bitset. The range of num_bytes should be within
+   *                 [MINIMUM_BLOOM_FILTER_BYTES, MAXIMUM_BLOOM_FILTER_BYTES], it will be rounded up/down
+   *                 to lower/upper bound if num_bytes is out of range and also will rounded up to a power
+   *                 of 2. It uses murmur3_x64_128 as its default hash function and block-based algorithm
+   *                 as default algorithm.
+   */
+  public BloomFilter(int numBytes) {
+    this(numBytes, HashStrategy.MURMUR3_X64_128, Algorithm.BLOCK);
+  }
+
+  /**
+   * Constructor of Bloom filter. It uses murmur3_x64_128 as its default hash
+   * function and block-based algorithm as its default algorithm.
+   *
+   * @param numBytes The number of bytes for Bloom filter bitset
+   * @param hashStrategy The hash strategy of Bloom filter.
+   * @param algorithm The algorithm of Bloom filter.
+   */
+  private BloomFilter(int numBytes, HashStrategy hashStrategy, Algorithm algorithm) {
+    initBitset(numBytes);
+
+    switch (hashStrategy) {
+      case MURMUR3_X64_128:
+        this.hashStrategy = hashStrategy;
+        hashFunction = Hashing.murmur3_128(DEFAULT_SEED);
+        break;
+      default:
+        throw new RuntimeException("Not supported hash strategy");
+    }
+
+    this.algorithm = algorithm;
+  }
+
+
+  /**
+   * Construct the Bloom filter with given bitset, it is used when reconstructing
+   * Bloom filter from parquet file. It use murmur3_x64_128 as its default hash
+   * function and block-based algorithm as default algorithm.
+   *
+   * @param bitset The given bitset to construct Bloom filter.
+   */
+  public BloomFilter(byte[] bitset) {
+    this(bitset, HashStrategy.MURMUR3_X64_128, Algorithm.BLOCK);
+  }
+
+  /**
+   * Construct the Bloom filter with given bitset, it is used when reconstructing
+   * Bloom filter from parquet file.
+   *
+   * @param bitset The given bitset to construct Bloom filter.
+   * @param hashStrategy The hash strategy Bloom filter apply.
+   * @param algorithm The algorithm of Bloom filter.
+   */
+  private BloomFilter(byte[] bitset, HashStrategy hashStrategy, Algorithm algorithm) {
+    if (bitset == null) {
+      throw new RuntimeException("Given bitset is null");
+    }
+    this.bitset = bitset;
+    this.intBuffer = ByteBuffer.wrap(bitset).order(ByteOrder.LITTLE_ENDIAN).asIntBuffer();
+
+    switch (hashStrategy) {
+      case MURMUR3_X64_128:
+        this.hashStrategy = hashStrategy;
+        hashFunction = Hashing.murmur3_128(DEFAULT_SEED);
+        break;
+      default:
+        throw new RuntimeException("Not supported hash strategy");
+    }
+    this.algorithm = algorithm;
+  }
+
+  /**
+   * Create a new bitset for Bloom filter.
+   *
+   * @param numBytes The number of bytes for Bloom filter bitset. The range of num_bytes should be within
+   *                 [MINIMUM_BLOOM_FILTER_BYTES, MAXIMUM_BLOOM_FILTER_BYTES], it will be rounded up/down
+   *                 to lower/upper bound if num_bytes is out of range and also will rounded up to a power
+   *                 of 2. It uses murmur3_x64_128 as its default hash function and block-based algorithm
+   *                 as default algorithm.
+   */
+  private void initBitset(int numBytes) {
+    if (numBytes < MINIMUM_BLOOM_FILTER_BYTES) {
+      numBytes = MINIMUM_BLOOM_FILTER_BYTES;
+    }
+
+    // Get next power of 2 if it is not power of 2.
+    if ((numBytes & (numBytes - 1)) != 0) {
+      numBytes = Integer.highestOneBit(numBytes) << 1;
+    }
+
+    if (numBytes > MAXIMUM_BLOOM_FILTER_BYTES || numBytes < 0) {
+      numBytes = MAXIMUM_BLOOM_FILTER_BYTES;
+    }
+
+    this.bitset = new byte[numBytes];
+    this.intBuffer = ByteBuffer.wrap(bitset).order(ByteOrder.LITTLE_ENDIAN).asIntBuffer();
+  }
+
+  /**
+   * Write the Bloom filter to an output stream. It writes the Bloom filter header includes the
+   * bitset's length in size of byte, the hash strategy, the algorithm, and the bitset.
+   *
+   * @param out the output stream to write
+   */
+  public void writeTo(OutputStream out) throws IOException {
+    // Write number of bytes of bitset.
+    out.write(BytesUtils.intToBytes(bitset.length));
+
+    // Write hash strategy
+    out.write(BytesUtils.intToBytes(this.hashStrategy.ordinal()));
+
+    // Write algorithm
+    out.write(BytesUtils.intToBytes(this.algorithm.ordinal()));
+
+    // Write bitset
+    out.write(bitset);
+  }
+
+  private int[] setMask(int key) {
+    int mask[] = new int[BITS_SET_PER_BLOCK];
+
+    for (int i = 0; i < BITS_SET_PER_BLOCK; ++i) {
+      mask[i] = key * SALT[i];
+    }
+
+    for (int i = 0; i < BITS_SET_PER_BLOCK; ++i) {
+      mask[i] = mask[i] >>> 27;
+    }
+
+    for (int i = 0; i < BITS_SET_PER_BLOCK; ++i) {
+      mask[i] = 0x1 << mask[i];
+    }
+
+    return mask;
+  }
+
+  /**
+   * Add an element to Bloom filter, the element content is represented by
+   * the hash value of its plain encoding result.
+   *
+   * @param hash the hash result of element.
+   */
+  public void insert(long hash) {
+    int bucketIndex = (int)(hash >> 32) & (bitset.length / BYTES_PER_FILTER_BLOCK - 1);
+    int key = (int)hash;
+
+    // Calculate mask for bucket.
+    int mask[] = setMask(key);
+
+    for (int i = 0; i < BITS_SET_PER_BLOCK; i++) {
+      int value = intBuffer.get(bucketIndex * (BYTES_PER_FILTER_BLOCK / 4) + i);
+      value |= mask[i];
+      intBuffer.put(bucketIndex * (BYTES_PER_FILTER_BLOCK / 4) + i, value);
+    }
+  }
+
+  /**
+   * Determine whether an element is in set or not.
+   *
+   * @param hash the hash value of element plain encoding result.
+   * @return false if element is must not in set, true if element probably in set.
+   */
+  public boolean find(long hash) {
+    int bucketIndex = (int)(hash >> 32) & (bitset.length / BYTES_PER_FILTER_BLOCK - 1);
+    int key = (int)hash;
+
+    // Calculate mask for the tiny Bloom filter.
+    int mask[] = setMask(key);
+
+    for (int i = 0; i < BITS_SET_PER_BLOCK; i++) {
+      if (0 == (intBuffer.get(bucketIndex * (BYTES_PER_FILTER_BLOCK / 4) + i) & mask[i])) {
+        return false;
+      }
+    }
+
+    return true;
+  }
+
+  /**
+   * Calculate optimal size according to the number of distinct values and false positive probability.
+   *
+   * @param n: The number of distinct values.
+   * @param p: The false positive probability.
+   * @return optimal number of bits of given n and p.
+   */
+  public static int optimalNumOfBits(long n, double p) {
+    Preconditions.checkArgument((p > 0.0 && p < 1.0),
+      "FPP should be less than 1.0 and great than 0.0");
+
+    final double m = -8 * n / Math.log(1 - Math.pow(p, 1.0 / 8));
+    final double MAX = MAXIMUM_BLOOM_FILTER_BYTES << 3;
+    int numBits = (int)m;
+
+    // Handle overflow.
+    if (m > MAX || m < 0) {
+      numBits = (int)MAX;
+    }
+
+    // Get next power of 2 if bits is not power of 2.
+    if ((numBits & (numBits - 1)) != 0) {
+      numBits = Integer.highestOneBit(numBits) << 1;
+    }
+
+    if (numBits < (MINIMUM_BLOOM_FILTER_BYTES << 3)) {
+      numBits = MINIMUM_BLOOM_FILTER_BYTES << 3;
+    }
+
+    return numBits;
+  }
+
+  /**
+   * Compute hash for int value by using its plain encoding result.
+   *
+   * @param value the value to hash
+   * @return hash result
+   */
+  public long hash(int value) {
+    ByteBuffer plain = ByteBuffer.allocate(Integer.SIZE/Byte.SIZE);
+    plain.order(ByteOrder.LITTLE_ENDIAN).putInt(value);
+    return hashFunction.hashBytes(plain.array()).asLong();
+  }
+
+  /**
+   * Compute hash for long value by using its plain encoding result.
+   *
+   * @param value the value to hash
+   * @return hash result
+   */
+  public long hash(long value) {
+    ByteBuffer plain = ByteBuffer.allocate(Long.SIZE/Byte.SIZE);
+    plain.order(ByteOrder.LITTLE_ENDIAN).putLong(value);
+    return hashFunction.hashBytes(plain.array()).asLong();
+  }
+
+  /**
+   * Compute hash for double value by using its plain encoding result.
+   *
+   * @param value the value to hash
+   * @return hash result
+   */
+  public long hash(double value) {
+    ByteBuffer plain = ByteBuffer.allocate(Double.SIZE/Byte.SIZE);
+    plain.order(ByteOrder.LITTLE_ENDIAN).putDouble(value);
+    return hashFunction.hashBytes(plain.array()).asLong();
+  }
+
+  /**
+   * Compute hash for float value by using its plain encoding result.
+   *
+   * @param value the value to hash
+   * @return hash result
+   */
+  public long hash(float value) {
+    ByteBuffer plain = ByteBuffer.allocate(Float.SIZE/Byte.SIZE);
+    plain.order(ByteOrder.LITTLE_ENDIAN).putFloat(value);
+    return hashFunction.hashBytes(plain.array()).asLong();
+  }
+
+  /**
+   * Compute hash for Binary value by using its plain encoding result.
+   *
+   * @param value the value to hash
+   * @return hash result
+   */
+  public long hash(Binary value) {
+      return hashFunction.hashBytes(value.toByteBuffer()).asLong();
+  }
+
+  /**
+   * Get allocated buffer size.
+   * @return size in byte.
+   */
+  public long getBufferedSize() {
+    return this.bitset.length;
+  }
+}
diff --git a/parquet-column/src/main/java/org/apache/parquet/column/values/bloomfilter/BloomFilterReadStore.java b/parquet-column/src/main/java/org/apache/parquet/column/values/bloomfilter/BloomFilterReadStore.java
new file mode 100644
index 0000000000..bdc51755b0
--- /dev/null
+++ b/parquet-column/src/main/java/org/apache/parquet/column/values/bloomfilter/BloomFilterReadStore.java
@@ -0,0 +1,37 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.parquet.column.values.bloomfilter;
+
+import org.apache.parquet.column.ColumnDescriptor;
+
+/**
+ * contains all the bloom filter reader for all columns of a row group
+ */
+
+public interface BloomFilterReadStore {
+  /**
+   * Get a Bloom filter reader of a column
+   *
+   * @param path the descriptor of the column
+   * @return the corresponding Bloom filter writer
+   */
+  BloomFilterReader getBloomFilterReader(ColumnDescriptor path);
+}
+
+
diff --git a/parquet-column/src/main/java/org/apache/parquet/column/values/bloomfilter/BloomFilterReader.java b/parquet-column/src/main/java/org/apache/parquet/column/values/bloomfilter/BloomFilterReader.java
new file mode 100644
index 0000000000..39b25e2a49
--- /dev/null
+++ b/parquet-column/src/main/java/org/apache/parquet/column/values/bloomfilter/BloomFilterReader.java
@@ -0,0 +1,31 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.parquet.column.values.bloomfilter;
+
+import org.apache.parquet.column.ColumnDescriptor;
+
+public interface BloomFilterReader {
+  /**
+   * Returns a {@link BloomFilter} for the given column descriptor.
+   *
+   * @param path the descriptor of the column
+   * @return the bloomFilter dta for that column, or null if there isn't one
+   */
+  BloomFilter readBloomFilter(ColumnDescriptor path);
+}
diff --git a/parquet-column/src/main/java/org/apache/parquet/column/values/bloomfilter/BloomFilterWriteStore.java b/parquet-column/src/main/java/org/apache/parquet/column/values/bloomfilter/BloomFilterWriteStore.java
new file mode 100644
index 0000000000..f472104daa
--- /dev/null
+++ b/parquet-column/src/main/java/org/apache/parquet/column/values/bloomfilter/BloomFilterWriteStore.java
@@ -0,0 +1,34 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.parquet.column.values.bloomfilter;
+
+import org.apache.parquet.column.ColumnDescriptor;
+
+/**
+ * Contains all writers for all columns of a row group
+ */
+public interface BloomFilterWriteStore {
+  /**
+   * Get bloom filter writer of a column
+   *
+   * @param path the descriptor for the column
+   * @return the corresponding Bloom filter writer
+   */
+  BloomFilterWriter getBloomFilterWriter(ColumnDescriptor path);
+}
diff --git a/parquet-column/src/main/java/org/apache/parquet/column/values/bloomfilter/BloomFilterWriter.java b/parquet-column/src/main/java/org/apache/parquet/column/values/bloomfilter/BloomFilterWriter.java
new file mode 100644
index 0000000000..388e779968
--- /dev/null
+++ b/parquet-column/src/main/java/org/apache/parquet/column/values/bloomfilter/BloomFilterWriter.java
@@ -0,0 +1,29 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.parquet.column.values.bloomfilter;
+
+public interface BloomFilterWriter {
+  /**
+   * Write a bloom filter
+   *
+   * @param bloomFilter the bloom filter to write
+   *
+   */
+  void writeBloomFilter(BloomFilter bloomFilter);
+}
diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java b/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java
index 1442910c8c..7f2a766a47 100644
--- a/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java
+++ b/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java
@@ -439,6 +439,7 @@ private void addRowGroup(ParquetMetadata parquetMetadata, List<RowGroup> rowGrou
           columnMetaData.getTotalSize(),
           columnMetaData.getFirstDataPageOffset());
       columnChunk.meta_data.dictionary_page_offset = columnMetaData.getDictionaryPageOffset();
+      columnChunk.meta_data.setBloom_filter_offset(columnMetaData.getBloomFilterOffset());
       if (!columnMetaData.getStatistics().isEmpty()) {
         columnChunk.meta_data.setStatistics(toParquetStatistics(columnMetaData.getStatistics()));
       }
@@ -1112,6 +1113,7 @@ public ParquetMetadata fromParquetMetadata(FileMetaData parquetMetadata) throws
                   messageType.getType(path.toArray()).asPrimitiveType()),
               metaData.data_page_offset,
               metaData.dictionary_page_offset,
+              metaData.bloom_filter_offset,
               metaData.num_values,
               metaData.total_compressed_size,
               metaData.total_uncompressed_size);
diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/BloomFilterDataReader.java b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/BloomFilterDataReader.java
new file mode 100644
index 0000000000..6b861e55c5
--- /dev/null
+++ b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/BloomFilterDataReader.java
@@ -0,0 +1,80 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.parquet.hadoop;
+
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.parquet.Strings;
+import org.apache.parquet.column.ColumnDescriptor;
+import org.apache.parquet.column.values.bloomfilter.BloomFilter;
+import org.apache.parquet.column.values.bloomfilter.BloomFilterReader;
+import org.apache.parquet.hadoop.metadata.BlockMetaData;
+import org.apache.parquet.hadoop.metadata.ColumnChunkMetaData;
+import org.apache.parquet.io.ParquetDecodingException;
+
+/**
+ * A {@link BloomFilterReader} implementation that reads Bloom filter data from
+ * an open {@link ParquetFileReader}.
+ *
+ */
+
+public class BloomFilterDataReader implements BloomFilterReader {
+  private final ParquetFileReader reader;
+  private final Map<String, ColumnChunkMetaData> columns;
+  private final Map<String, BloomFilter> cache = new HashMap<>();
+
+  public BloomFilterDataReader(ParquetFileReader fileReader, BlockMetaData block) {
+    this.reader = fileReader;
+    this.columns = new HashMap<>();
+    for (ColumnChunkMetaData column : block.getColumns()) {
+      columns.put(column.getPath().toDotString(), column);
+    }
+  }
+
+  @Override
+  public BloomFilter readBloomFilter(ColumnDescriptor descriptor) {
+    String dotPath = Strings.join(descriptor.getPath(), ".");
+    ColumnChunkMetaData column = columns.get(dotPath);
+    if (column == null) {
+      throw new ParquetDecodingException(
+        "Cannot load Bloom filter data, unknown column: " + dotPath);
+    }
+
+    if (cache.containsKey(dotPath)) {
+      return cache.get(dotPath);
+    }
+
+    try {
+      synchronized (cache) {
+        if (!cache.containsKey(dotPath)) {
+          BloomFilter bloomFilter = reader.readBloomFilter(column);
+          if (bloomFilter == null) return null;
+          cache.put(dotPath, bloomFilter);
+        }
+      }
+
+      return cache.get(dotPath);
+    } catch (IOException e) {
+      throw new ParquetDecodingException(
+        "Failed to read Bloom data", e);
+    }
+  }
+}
diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ColumnChunkPageWriteStore.java b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ColumnChunkPageWriteStore.java
index 82c288fe43..58b1450dfb 100644
--- a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ColumnChunkPageWriteStore.java
+++ b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ColumnChunkPageWriteStore.java
@@ -28,6 +28,7 @@
 import java.util.Set;
 
 import org.apache.parquet.bytes.BytesInput;
+import org.apache.parquet.bytes.ByteBufferAllocator;
 import org.apache.parquet.bytes.ConcatenatingByteArrayCollector;
 import org.apache.parquet.column.ColumnDescriptor;
 import org.apache.parquet.column.Encoding;
@@ -35,20 +36,22 @@
 import org.apache.parquet.column.page.PageWriteStore;
 import org.apache.parquet.column.page.PageWriter;
 import org.apache.parquet.column.statistics.Statistics;
+import org.apache.parquet.column.values.bloomfilter.BloomFilter;
+import org.apache.parquet.column.values.bloomfilter.BloomFilterWriteStore;
+import org.apache.parquet.column.values.bloomfilter.BloomFilterWriter;
 import org.apache.parquet.format.converter.ParquetMetadataConverter;
 import org.apache.parquet.hadoop.CodecFactory.BytesCompressor;
 import org.apache.parquet.io.ParquetEncodingException;
 import org.apache.parquet.schema.MessageType;
-import org.apache.parquet.bytes.ByteBufferAllocator;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-class ColumnChunkPageWriteStore implements PageWriteStore {
+class ColumnChunkPageWriteStore implements PageWriteStore, BloomFilterWriteStore {
   private static final Logger LOG = LoggerFactory.getLogger(ColumnChunkPageWriteStore.class);
 
   private static ParquetMetadataConverter parquetMetadataConverter = new ParquetMetadataConverter();
 
-  private static final class ColumnChunkPageWriter implements PageWriter {
+  private static final class ColumnChunkPageWriter implements PageWriter, BloomFilterWriter {
 
     private final ColumnDescriptor path;
     private final BytesCompressor compressor;
@@ -56,6 +59,7 @@ private static final class ColumnChunkPageWriter implements PageWriter {
     private final ByteArrayOutputStream tempOutputStream = new ByteArrayOutputStream();
     private final ConcatenatingByteArrayCollector buf;
     private DictionaryPage dictionaryPage;
+    private BloomFilter bloomFilter;
 
     private long uncompressedLength;
     private long compressedLength;
@@ -194,6 +198,10 @@ public long getMemSize() {
 
     public void writeToFileWriter(ParquetFileWriter writer) throws IOException {
       writer.startColumn(path, totalValueCount, compressor.getCodecName());
+      if (bloomFilter != null) {
+        writer.writeBloomFilter(bloomFilter);
+      }
+
       if (dictionaryPage != null) {
         writer.writeDictionaryPage(dictionaryPage);
         // tracking the dictionary encoding is handled in writeDictionaryPage
@@ -238,6 +246,11 @@ public String memUsageString(String prefix) {
       return buf.memUsageString(prefix + " ColumnChunkPageWriter");
     }
 
+    @Override
+    public void writeBloomFilter(BloomFilter bloomFilter) {
+      this.bloomFilter = bloomFilter;
+    }
+
   }
 
   private final Map<ColumnDescriptor, ColumnChunkPageWriter> writers = new HashMap<ColumnDescriptor, ColumnChunkPageWriter>();
@@ -255,6 +268,11 @@ public PageWriter getPageWriter(ColumnDescriptor path) {
     return writers.get(path);
   }
 
+  @Override
+  public BloomFilterWriter getBloomFilterWriter(ColumnDescriptor path) {
+    return writers.get(path);
+  }
+
   public void flushToFileWriter(ParquetFileWriter writer) throws IOException {
     for (ColumnDescriptor path : schema.getColumns()) {
       ColumnChunkPageWriter pageWriter = writers.get(path);
diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetFileReader.java b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetFileReader.java
index 15fe592dbe..a7d07ba007 100644
--- a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetFileReader.java
+++ b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetFileReader.java
@@ -31,6 +31,8 @@
 import java.io.IOException;
 import java.io.SequenceInputStream;
 import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.nio.IntBuffer;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collection;
@@ -54,19 +56,20 @@
 
 import org.apache.parquet.ParquetReadOptions;
 import org.apache.parquet.bytes.ByteBufferInputStream;
+import org.apache.parquet.bytes.BytesInput;
+import org.apache.parquet.column.ColumnDescriptor;
 import org.apache.parquet.column.Encoding;
 import org.apache.parquet.column.page.DictionaryPageReadStore;
 import org.apache.parquet.compression.CompressionCodecFactory.BytesInputDecompressor;
 import org.apache.parquet.filter2.compat.FilterCompat;
 import org.apache.parquet.filter2.compat.RowGroupFilter;
 
-import org.apache.parquet.bytes.BytesInput;
-import org.apache.parquet.column.ColumnDescriptor;
 import org.apache.parquet.column.page.DataPage;
 import org.apache.parquet.column.page.DataPageV1;
 import org.apache.parquet.column.page.DataPageV2;
 import org.apache.parquet.column.page.DictionaryPage;
 import org.apache.parquet.column.page.PageReadStore;
+import org.apache.parquet.column.values.bloomfilter.BloomFilter;
 import org.apache.parquet.hadoop.metadata.ColumnPath;
 import org.apache.parquet.format.DataPageHeader;
 import org.apache.parquet.format.DataPageHeaderV2;
@@ -83,10 +86,10 @@
 import org.apache.parquet.hadoop.util.HadoopInputFile;
 import org.apache.parquet.HadoopReadOptions;
 import org.apache.parquet.hadoop.util.HiddenFileFilter;
-import org.apache.parquet.io.SeekableInputStream;
 import org.apache.parquet.hadoop.util.counters.BenchmarkCounter;
 import org.apache.parquet.io.ParquetDecodingException;
 import org.apache.parquet.io.InputFile;
+import org.apache.parquet.io.SeekableInputStream;
 import org.apache.parquet.schema.MessageType;
 import org.apache.parquet.schema.PrimitiveType;
 import org.slf4j.Logger;
@@ -859,6 +862,9 @@ public DictionaryPageReader getDictionaryReader(BlockMetaData block) {
     return new DictionaryPageReader(this, block);
   }
 
+  public BloomFilterDataReader getBloomFilterDataReader(BlockMetaData block) {
+    return new BloomFilterDataReader(this, block);
+  }
   /**
    * Reads and decompresses a dictionary page for the given column chunk.
    *
@@ -910,6 +916,35 @@ private DictionaryPage readCompressedDictionary(
         converter.getEncoding(dictHeader.getEncoding()));
   }
 
+  /**
+   * Reads Bloom filter data for the given column chunk.
+   *
+   * @param meta a column's ColumnChunkMetaData to read the dictionary from
+   * @return an BloomFilter object.
+   * @throws IOException if there is an error while reading the Bloom filter.
+   */
+  public BloomFilter readBloomFilter(ColumnChunkMetaData meta) throws IOException {
+    long bloomFilterOffset = meta.getBloomFilterOffset();
+
+    if (bloomFilterOffset == Long.MAX_VALUE) return null;
+    f.seek(bloomFilterOffset);
+
+    // Read Bloom filter data header.
+    byte[] bytes = new byte[BloomFilter.HEADER_SIZE];
+    f.read(bytes);
+    ByteBuffer bloomHeader = ByteBuffer.wrap(bytes);
+    IntBuffer headerBuffer = bloomHeader.order(ByteOrder.LITTLE_ENDIAN).asIntBuffer();
+    int numBytes = headerBuffer.get();
+
+    BloomFilter.HashStrategy hash = BloomFilter.HashStrategy.values()[headerBuffer.get()];
+    BloomFilter.Algorithm algorithm = BloomFilter.Algorithm.values()[headerBuffer.get()];
+
+    byte[] bitset = new byte[numBytes];
+    f.readFully(bitset);
+
+    return new BloomFilter(bitset);
+  }
+
   @Override
   public void close() throws IOException {
     try {
diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetFileWriter.java b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetFileWriter.java
index c98c247965..7c52b1b93f 100644
--- a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetFileWriter.java
+++ b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetFileWriter.java
@@ -48,6 +48,7 @@
 import org.apache.parquet.column.EncodingStats;
 import org.apache.parquet.column.page.DictionaryPage;
 import org.apache.parquet.column.statistics.Statistics;
+import org.apache.parquet.column.values.bloomfilter.BloomFilter;
 import org.apache.parquet.hadoop.ParquetOutputFormat.JobSummaryLevel;
 import org.apache.parquet.hadoop.metadata.ColumnPath;
 import org.apache.parquet.format.converter.ParquetMetadataConverter;
@@ -117,6 +118,7 @@ public static enum Mode {
   private long currentChunkValueCount;            // set in startColumn
   private long currentChunkFirstDataPage;         // set in startColumn (out.pos())
   private long currentChunkDictionaryPageOffset;  // set in writeDictionaryPage
+  private long currentChunkBloomFilterDataOffset; // set in writeBloomData
 
   // set when end is called
   private ParquetMetadata footer = null;
@@ -348,6 +350,16 @@ public void writeDictionaryPage(DictionaryPage dictionaryPage) throws IOExceptio
     currentEncodings.add(dictionaryPage.getEncoding());
   }
 
+  /**
+   * Write a Bloom filter
+   * @param bloomFilter the bloom filter of column values
+   * @throws IOException if there is an error while writing
+   */
+  public void writeBloomFilter(BloomFilter bloomFilter) throws IOException {
+    state = state.write();
+    currentChunkBloomFilterDataOffset = out.getPos();
+    bloomFilter.writeTo(out);
+  }
 
   /**
    * writes a single page
@@ -484,6 +496,7 @@ public void endColumn() throws IOException {
         currentStatistics,
         currentChunkFirstDataPage,
         currentChunkDictionaryPageOffset,
+        currentChunkBloomFilterDataOffset,
         currentChunkValueCount,
         compressedLength,
         uncompressedLength));
@@ -622,6 +635,7 @@ public void appendRowGroup(SeekableInputStream from, BlockMetaData rowGroup,
           chunk.getStatistics(),
           newChunkStart,
           newChunkStart,
+          chunk.getBloomFilterOffset(),
           chunk.getValueCount(),
           chunk.getTotalSize(),
           chunk.getTotalUncompressedSize()));
diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetInputFormat.java b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetInputFormat.java
index 2c21e52035..3348ed8eb2 100644
--- a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetInputFormat.java
+++ b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetInputFormat.java
@@ -129,6 +129,12 @@ public class ParquetInputFormat<T> extends FileInputFormat<Void, T> {
    */
   public static final String DICTIONARY_FILTERING_ENABLED = "parquet.filter.dictionary.enabled";
 
+  /**
+   * key to configure whether row group bloom filtering is enabled
+   */
+  public static final String BLOOM_FILTERING_ENABLED = "parquet.filter.bloom.enabled";
+  public static final boolean BLOOM_FILTER_ENABLED_DEFAULT = false;
+
   /**
    * key to turn on or off task side metadata loading (default true)
    * if true then metadata is read on the task side and some tasks may finish immediately.
diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetOutputFormat.java b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetOutputFormat.java
index ff5bab397d..6e191b005c 100644
--- a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetOutputFormat.java
+++ b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetOutputFormat.java
@@ -143,6 +143,9 @@ public static enum JobSummaryLevel {
   public static final String MIN_ROW_COUNT_FOR_PAGE_SIZE_CHECK = "parquet.page.size.row.check.min";
   public static final String MAX_ROW_COUNT_FOR_PAGE_SIZE_CHECK = "parquet.page.size.row.check.max";
   public static final String ESTIMATE_PAGE_SIZE_CHECK = "parquet.page.size.check.estimate";
+  public static final String BLOOM_FILTER_COLUMN_NAMES = "parquet.bloom.filter.column.names";
+  public static final String BLOOM_FILTER_SIZES = "parquet.bloom.filter.size";
+  public static final String ENABLE_BLOOM_FILTER = "parquet.enable.bloom.filter";
 
   public static JobSummaryLevel getJobSummaryLevel(Configuration conf) {
     String level = conf.get(JOB_SUMMARY_LEVEL);
@@ -208,6 +211,14 @@ public static boolean getEnableDictionary(JobContext jobContext) {
     return getEnableDictionary(getConfiguration(jobContext));
   }
 
+  public static void setBloomFilterColumnNames(Job job, String names) {
+    getConfiguration(job).set(BLOOM_FILTER_COLUMN_NAMES, names);
+  }
+
+  public static String getBloomFilterColumnNames(JobContext jobContext) {
+    return getBloomFilterColumnNames(getConfiguration(jobContext));
+  }
+
   public static int getBlockSize(JobContext jobContext) {
     return getBlockSize(getConfiguration(jobContext));
   }
@@ -241,6 +252,19 @@ public static boolean getEnableDictionary(Configuration configuration) {
         ENABLE_DICTIONARY, ParquetProperties.DEFAULT_IS_DICTIONARY_ENABLED);
   }
 
+  public static String getBloomFilterColumnNames(Configuration conf) {
+    return conf.get(BLOOM_FILTER_COLUMN_NAMES);
+  }
+
+  public static boolean getEnableBloomFilter(Configuration configuration) {
+    return configuration.getBoolean(ENABLE_BLOOM_FILTER,
+        ParquetProperties.DEFAULT_BLOOM_FILTER_ENABLED);
+  }
+
+  public static String getBloomFilterSizes(Configuration configuration) {
+    return configuration.get(BLOOM_FILTER_SIZES);
+  }
+
   public static int getMinRowCountForPageSizeCheck(Configuration configuration) {
     return configuration.getInt(MIN_ROW_COUNT_FOR_PAGE_SIZE_CHECK,
         ParquetProperties.DEFAULT_MINIMUM_RECORD_COUNT_FOR_CHECK);
@@ -361,6 +385,8 @@ public RecordWriter<Void, T> getRecordWriter(Configuration conf, Path file, Comp
     ParquetProperties props = ParquetProperties.builder()
         .withPageSize(getPageSize(conf))
         .withDictionaryPageSize(getDictionaryPageSize(conf))
+        .withBloomFilterEnabled(getEnableBloomFilter(conf))
+        .withBloomFilterInfo(getBloomFilterColumnNames(conf), getBloomFilterSizes(conf))
         .withDictionaryEncoding(getEnableDictionary(conf))
         .withWriterVersion(getWriterVersion(conf))
         .estimateRowCountForPageSizeCheck(getEstimatePageSizeCheck(conf))
@@ -383,6 +409,9 @@ public RecordWriter<Void, T> getRecordWriter(Configuration conf, Path file, Comp
       LOG.info("Page size checking is: {}", (props.estimateNextSizeCheck() ? "estimated" : "constant"));
       LOG.info("Min row count for page size check is: {}", props.getMinRowCountForPageSizeCheck());
       LOG.info("Max row count for page size check is: {}", props.getMaxRowCountForPageSizeCheck());
+      LOG.info("Parquet Bloom Filter is {}", props.isBloomFilterEnabled()? "on": "off");
+      LOG.info("Parquet Bloom filter column names are: {}", props.getBloomFilterInfo().keySet());
+      LOG.info("Parquet Bloom filter column sizes are: {}", props.getBloomFilterInfo().values());
     }
 
     WriteContext init = writeSupport.init(conf);
diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/metadata/ColumnChunkMetaData.java b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/metadata/ColumnChunkMetaData.java
index fb94247ed7..9f476f6d07 100644
--- a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/metadata/ColumnChunkMetaData.java
+++ b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/metadata/ColumnChunkMetaData.java
@@ -124,6 +124,7 @@ && positiveLongFitsInAnInt(totalUncompressedSize)) {
           statistics,
           firstDataPage,
           dictionaryPageOffset,
+          Long.MAX_VALUE,
           valueCount,
           totalSize,
           totalUncompressedSize);
@@ -134,6 +135,50 @@ && positiveLongFitsInAnInt(totalUncompressedSize)) {
           statistics,
           firstDataPage,
           dictionaryPageOffset,
+          Long.MAX_VALUE,
+          valueCount,
+          totalSize,
+          totalUncompressedSize);
+    }
+  }
+
+  public static ColumnChunkMetaData get(
+      ColumnPath path,
+      PrimitiveType type,
+      CompressionCodecName codec,
+      EncodingStats encodingStats,
+      Set<Encoding> encodings,
+      Statistics statistics,
+      long firstDataPage,
+      long dictionaryPageOffset,
+      long bloomFilterDataOffset,
+      long valueCount,
+      long totalSize,
+      long totalUncompressedSize) {
+    // to save space we store those always positive longs in ints when they fit.
+    if (positiveLongFitsInAnInt(firstDataPage)
+      && positiveLongFitsInAnInt(dictionaryPageOffset)
+      && positiveLongFitsInAnInt(valueCount)
+      && positiveLongFitsInAnInt(totalSize)
+      && positiveLongFitsInAnInt(totalUncompressedSize)) {
+      return new IntColumnChunkMetaData(
+          path, type, codec,
+          encodingStats, encodings,
+          statistics,
+          firstDataPage,
+          dictionaryPageOffset,
+          bloomFilterDataOffset,
+          valueCount,
+          totalSize,
+          totalUncompressedSize);
+    } else {
+      return new LongColumnChunkMetaData(
+          path, type, codec,
+          encodingStats, encodings,
+          statistics,
+          firstDataPage,
+          dictionaryPageOffset,
+          bloomFilterDataOffset,
           valueCount,
           totalSize,
           totalUncompressedSize);
@@ -217,6 +262,11 @@ public PrimitiveType getPrimitiveType() {
    */
   abstract public long getDictionaryPageOffset();
 
+  /**
+   * @return the location of the bloomFilter filter data if any
+   */
+  abstract public long getBloomFilterOffset();
+
   /**
    * @return count of values in this block of the column
    */
@@ -258,6 +308,7 @@ class IntColumnChunkMetaData extends ColumnChunkMetaData {
 
   private final int firstDataPage;
   private final int dictionaryPageOffset;
+  private final int bloomFilterDataOffset;
   private final int valueCount;
   private final int totalSize;
   private final int totalUncompressedSize;
@@ -284,12 +335,14 @@ class IntColumnChunkMetaData extends ColumnChunkMetaData {
       Statistics statistics,
       long firstDataPage,
       long dictionaryPageOffset,
+      long bloomFilterDataOffset,
       long valueCount,
       long totalSize,
       long totalUncompressedSize) {
     super(encodingStats, ColumnChunkProperties.get(path, type, codec, encodings));
     this.firstDataPage = positiveLongToInt(firstDataPage);
     this.dictionaryPageOffset = positiveLongToInt(dictionaryPageOffset);
+    this.bloomFilterDataOffset = positiveLongToInt(bloomFilterDataOffset);
     this.valueCount = positiveLongToInt(valueCount);
     this.totalSize = positiveLongToInt(totalSize);
     this.totalUncompressedSize = positiveLongToInt(totalUncompressedSize);
@@ -331,6 +384,13 @@ public long getDictionaryPageOffset() {
     return intToPositiveLong(dictionaryPageOffset);
   }
 
+  /**
+   * @return the location of bloom filter if any
+   */
+  public long getBloomFilterOffset() {
+    return intToPositiveLong(bloomFilterDataOffset);
+  }
+
   /**
    * @return count of values in this block of the column
    */
@@ -363,6 +423,7 @@ class LongColumnChunkMetaData extends ColumnChunkMetaData {
 
   private final long firstDataPageOffset;
   private final long dictionaryPageOffset;
+  private final long bloomFilterDataOffset;
   private final long valueCount;
   private final long totalSize;
   private final long totalUncompressedSize;
@@ -389,12 +450,14 @@ class LongColumnChunkMetaData extends ColumnChunkMetaData {
       Statistics statistics,
       long firstDataPageOffset,
       long dictionaryPageOffset,
+      long bloomFilterDataOffset,
       long valueCount,
       long totalSize,
       long totalUncompressedSize) {
     super(encodingStats, ColumnChunkProperties.get(path, type, codec, encodings));
     this.firstDataPageOffset = firstDataPageOffset;
     this.dictionaryPageOffset = dictionaryPageOffset;
+    this.bloomFilterDataOffset = bloomFilterDataOffset;
     this.valueCount = valueCount;
     this.totalSize = totalSize;
     this.totalUncompressedSize = totalUncompressedSize;
@@ -415,6 +478,13 @@ public long getDictionaryPageOffset() {
     return dictionaryPageOffset;
   }
 
+  /**
+   * @return the location of the bloom filter if any
+   */
+  public long getBloomFilterOffset() {
+    return bloomFilterDataOffset;
+  }
+
   /**
    * @return count of values in this block of the column
    */
diff --git a/parquet-hadoop/src/test/java/org/apache/parquet/hadoop/TestParquetFileWriter.java b/parquet-hadoop/src/test/java/org/apache/parquet/hadoop/TestParquetFileWriter.java
index 095b575c80..636515dc8f 100644
--- a/parquet-hadoop/src/test/java/org/apache/parquet/hadoop/TestParquetFileWriter.java
+++ b/parquet-hadoop/src/test/java/org/apache/parquet/hadoop/TestParquetFileWriter.java
@@ -39,6 +39,7 @@
 import org.apache.parquet.column.page.PageReader;
 import org.apache.parquet.column.statistics.BinaryStatistics;
 import org.apache.parquet.column.statistics.LongStatistics;
+import org.apache.parquet.column.values.bloomfilter.*;
 import org.apache.parquet.format.Statistics;
 import org.apache.parquet.hadoop.metadata.*;
 import org.apache.parquet.hadoop.util.HiddenFileFilter;
@@ -132,6 +133,44 @@ public void testWriteMode() throws Exception {
     testFile.delete();
   }
 
+  @Test
+  public void testBloomWriteRead() throws Exception {
+    MessageType schema = MessageTypeParser.parseMessageType("message test { required binary foo; }");
+    File testFile = temp.newFile();
+    testFile.delete();
+
+    Path path = new Path(testFile.toURI());
+    Configuration configuration = new Configuration();
+    configuration.set("parquet.bloomFilter.filter.column.names", "foo");
+    String colPath[] = {"foo"};
+    ColumnDescriptor col = schema.getColumnDescription(colPath);
+
+    BinaryStatistics stats1 = new BinaryStatistics();
+
+    ParquetFileWriter w = new ParquetFileWriter(configuration, schema, path);
+    w.start();
+    w.startBlock(3);
+    w.startColumn(col, 5, CODEC);
+    w.writeDataPage(2, 4, BytesInput.from(BYTES1),stats1, BIT_PACKED, BIT_PACKED, PLAIN);
+    w.writeDataPage(3, 4, BytesInput.from(BYTES1),stats1, BIT_PACKED, BIT_PACKED, PLAIN);
+    BloomFilter bloomData = new BloomFilter(0);
+    bloomData.insert(bloomData.hash(Binary.fromString("hello")));
+    bloomData.insert(bloomData.hash(Binary.fromString("world")));
+    long blStarts = w.getPos();
+    w.writeBloomFilter(bloomData);
+    w.endColumn();
+    w.endBlock();
+    w.end(new HashMap<String, String>());
+    ParquetMetadata readFooter = ParquetFileReader.readFooter(configuration, path);
+    assertEquals("bloomFilter offset", blStarts, readFooter.getBlocks().get(0).getColumns().get(0).getBloomFilterOffset());
+    ParquetFileReader r = new ParquetFileReader(configuration, readFooter.getFileMetaData(), path,
+      Arrays.asList(readFooter.getBlocks().get(0)), Arrays.asList(schema.getColumnDescription(colPath)));
+    BloomFilterReader bloomFilterReader =  r.getBloomFilterDataReader(readFooter.getBlocks().get(0));
+    BloomFilter bloomDataRead = bloomFilterReader.readBloomFilter(col);
+    assertTrue(bloomDataRead.find(bloomData.hash(Binary.fromString("hello"))));
+    assertTrue(bloomDataRead.find(bloomData.hash(Binary.fromString("world"))));
+  }
+
   @Test
   public void testWriteRead() throws Exception {
     File testFile = temp.newFile();
diff --git a/pom.xml b/pom.xml
index 7b3f36fe5b..ee8ae94fb7 100644
--- a/pom.xml
+++ b/pom.xml
@@ -96,7 +96,7 @@
     <semver.api.version>0.9.33</semver.api.version>
     <slf4j.version>1.7.22</slf4j.version>
     <avro.version>1.8.2</avro.version>
-    <guava.version>20.0</guava.version>
+    <guava.version>24.0-jre</guava.version>
     <brotli-codec.version>0.1.1</brotli-codec.version>
     <mockito.version>1.10.19</mockito.version>
 

From 1a0875beddd46d2d226fd1e9b4e1f356a1f5212a Mon Sep 17 00:00:00 2001
From: Junjie Chen <jimmyjchen@tencent.com>
Date: Sun, 21 Oct 2018 01:04:41 +0800
Subject: [PATCH 2/9] Align to parquet-cpp side code and address comments

---
 .../apache/parquet/cli/util/Expressions.java  |   4 +-
 .../parquet/column/ParquetProperties.java     |  38 +--
 .../column/impl/ColumnWriteStoreV1.java       |   8 +-
 .../column/impl/ColumnWriteStoreV2.java       |   8 +-
 .../parquet/column/impl/ColumnWriterV1.java   |  80 +++--
 .../parquet/column/impl/ColumnWriterV2.java   |  32 +-
 .../bloomfilter/BlockSplitBloomFilter.java    | 318 ++++++++++++++++++
 .../values/bloomfilter/BloomFilter.java       | 296 +---------------
 .../TestBlockSplitBloomFilter.java            | 129 +++++++
 .../hadoop/ColumnChunkPageWriteStore.java     |   1 +
 .../parquet/hadoop/ParquetFileReader.java     |   5 +-
 .../parquet/hadoop/ParquetOutputFormat.java   |  18 +-
 .../parquet/hadoop/TestParquetFileWriter.java |   8 +-
 pom.xml                                       |   4 +-
 14 files changed, 583 insertions(+), 366 deletions(-)
 create mode 100644 parquet-column/src/main/java/org/apache/parquet/column/values/bloomfilter/BlockSplitBloomFilter.java
 create mode 100644 parquet-column/src/test/java/org/apache/parquet/column/values/bloomfilter/TestBlockSplitBloomFilter.java

diff --git a/parquet-cli/src/main/java/org/apache/parquet/cli/util/Expressions.java b/parquet-cli/src/main/java/org/apache/parquet/cli/util/Expressions.java
index d18ef559f2..06b28b46ae 100644
--- a/parquet-cli/src/main/java/org/apache/parquet/cli/util/Expressions.java
+++ b/parquet-cli/src/main/java/org/apache/parquet/cli/util/Expressions.java
@@ -19,7 +19,7 @@
 
 package org.apache.parquet.cli.util;
 
-import com.google.common.base.MoreObjects;
+import com.google.common.base.Objects;
 import com.google.common.base.Preconditions;
 import com.google.common.collect.Lists;
 import org.apache.avro.Schema;
@@ -385,7 +385,7 @@ public int hashCode() {
 
     @Override
     public String toString() {
-      return MoreObjects.toStringHelper(this)
+      return Objects.toStringHelper(this)
           .add("type", type)
           .add("value", value)
           .add("children", children)
diff --git a/parquet-column/src/main/java/org/apache/parquet/column/ParquetProperties.java b/parquet-column/src/main/java/org/apache/parquet/column/ParquetProperties.java
index 94f1978f68..f01888aed8 100644
--- a/parquet-column/src/main/java/org/apache/parquet/column/ParquetProperties.java
+++ b/parquet-column/src/main/java/org/apache/parquet/column/ParquetProperties.java
@@ -1,4 +1,4 @@
-/* 
+/*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
@@ -6,9 +6,9 @@
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
- * 
+ *
  *   http://www.apache.org/licenses/LICENSE-2.0
- * 
+ *
  * Unless required by applicable law or agreed to in writing,
  * software distributed under the License is distributed on an
  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
@@ -87,11 +87,11 @@ public static WriterVersion fromString(String name) {
   private final ByteBufferAllocator allocator;
   private final ValuesWriterFactory valuesWriterFactory;
   private final boolean enableBloomFilter;
-  private final HashMap<String, Long> bloomFilterInfo;
+  private final HashMap<String, Long> bloomFilterExpectValues;
 
   private ParquetProperties(WriterVersion writerVersion, int pageSize, int dictPageSize, boolean enableDict, int minRowCountForPageSizeCheck,
                             int maxRowCountForPageSizeCheck, boolean estimateNextSizeCheck, ByteBufferAllocator allocator,
-                            ValuesWriterFactory writerFactory, boolean enableBloomFilter, HashMap<String, Long> bloomFilterInfo) {
+                            ValuesWriterFactory writerFactory, boolean enableBloomFilter, HashMap<String, Long> bloomFilterExpectValues) {
     this.pageSizeThreshold = pageSize;
     this.initialSlabSize = CapacityByteArrayOutputStream
       .initialSlabSizeHeuristic(MIN_SLAB_SIZE, pageSizeThreshold, 10);
@@ -103,7 +103,7 @@ private ParquetProperties(WriterVersion writerVersion, int pageSize, int dictPag
     this.estimateNextSizeCheck = estimateNextSizeCheck;
     this.allocator = allocator;
     this.enableBloomFilter = enableBloomFilter;
-    this.bloomFilterInfo = bloomFilterInfo;
+    this.bloomFilterExpectValues = bloomFilterExpectValues;
     this.valuesWriterFactory = writerFactory;
   }
 
@@ -169,8 +169,8 @@ public boolean isBloomFilterEnabled() {
     return enableBloomFilter;
   }
 
-  public HashMap<String, Long> getBloomFilterInfo() {
-    return bloomFilterInfo;
+  public HashMap<String, Long> getBloomFilterExpectValues() {
+    return bloomFilterExpectValues;
   }
 
   public ColumnWriteStore newColumnWriteStore(MessageType schema,
@@ -214,7 +214,7 @@ public static class Builder {
     private int dictPageSize = DEFAULT_DICTIONARY_PAGE_SIZE;
     private boolean enableDict = DEFAULT_IS_DICTIONARY_ENABLED;
     private boolean enableBloomFilter = DEFAULT_BLOOM_FILTER_ENABLED;
-    private HashMap<String, Long> bloomFilterInfo = new HashMap<>();
+    private HashMap<String, Long> bloomFilterExpectValues = new HashMap<>();
     private WriterVersion writerVersion = DEFAULT_WRITER_VERSION;
     private int minRowCountForPageSizeCheck = DEFAULT_MINIMUM_RECORD_COUNT_FOR_CHECK;
     private int maxRowCountForPageSizeCheck = DEFAULT_MAXIMUM_RECORD_COUNT_FOR_CHECK;
@@ -234,7 +234,7 @@ private Builder(ParquetProperties toCopy) {
       this.estimateNextSizeCheck = toCopy.estimateNextSizeCheck;
       this.allocator = toCopy.allocator;
       this.enableBloomFilter = toCopy.enableBloomFilter;
-      this.bloomFilterInfo = toCopy.bloomFilterInfo;
+      this.bloomFilterExpectValues = toCopy.bloomFilterExpectValues;
     }
 
     /**
@@ -288,19 +288,19 @@ public Builder withBloomFilterEnabled(boolean enableBloomFilter) {
     /**
      * Set Bloom filter info for columns.
      *
-     * @param names the columns to be enable for Bloom filter
-     * @param sizes the sizes corresponding to columns
+     * @param bloomFilterColumnNames the columns to be enabled for Bloom filter
+     * @param bloomFilterDistinctNumbers the expected distinct number of values corresponding to columns
      * @return this builder for method chaining
      */
-    public Builder withBloomFilterInfo(String names, String sizes) {
-      String[] bloomFilterColumns = names.split(",");
-      String[] bloomFilterSizes = sizes.split(",");
+    public Builder withBloomFilterInfo(String bloomFilterColumnNames, String bloomFilterDistinctNumbers) {
+      String[] columnNames = bloomFilterColumnNames.split(",");
+      String[] expectedDistinctNumber = bloomFilterDistinctNumbers.split(",");
 
-      Preconditions.checkArgument(bloomFilterColumns.length == bloomFilterSizes.length,
+      Preconditions.checkArgument(columnNames.length == expectedDistinctNumber.length,
           "Column names are not matched to sizes");
 
-      for (int i = 0; i < bloomFilterColumns.length; i++) {
-        bloomFilterInfo.put(bloomFilterColumns[i], Long.getLong(bloomFilterSizes[i]));
+      for (int i = 0; i < columnNames.length; i++) {
+        this.bloomFilterExpectValues.put(columnNames[i], Long.getLong(expectedDistinctNumber[i]));
       }
 
       return this;
@@ -354,7 +354,7 @@ public ParquetProperties build() {
         new ParquetProperties(writerVersion, pageSize, dictPageSize,
           enableDict, minRowCountForPageSizeCheck, maxRowCountForPageSizeCheck,
           estimateNextSizeCheck, allocator, valuesWriterFactory,
-          enableBloomFilter, bloomFilterInfo);
+          enableBloomFilter, bloomFilterExpectValues);
       // we pass a constructed but uninitialized factory to ParquetProperties above as currently
       // creation of ValuesWriters is invoked from within ParquetProperties. In the future
       // we'd like to decouple that and won't need to pass an object to properties and then pass the
diff --git a/parquet-column/src/main/java/org/apache/parquet/column/impl/ColumnWriteStoreV1.java b/parquet-column/src/main/java/org/apache/parquet/column/impl/ColumnWriteStoreV1.java
index bd401430ad..7e2876077a 100644
--- a/parquet-column/src/main/java/org/apache/parquet/column/impl/ColumnWriteStoreV1.java
+++ b/parquet-column/src/main/java/org/apache/parquet/column/impl/ColumnWriteStoreV1.java
@@ -1,4 +1,4 @@
-/* 
+/*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
@@ -6,9 +6,9 @@
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
- * 
+ *
  *   http://www.apache.org/licenses/LICENSE-2.0
- * 
+ *
  * Unless required by applicable law or agreed to in writing,
  * software distributed under the License is distributed on an
  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
@@ -70,7 +70,7 @@ public Set<ColumnDescriptor> getColumnDescriptors() {
   private ColumnWriterV1 newMemColumn(ColumnDescriptor path) {
     PageWriter pageWriter = pageWriteStore.getPageWriter(path);
 
-    if (props.isBloomFilterEnabled() && props.getBloomFilterInfo() != null) {
+    if (props.isBloomFilterEnabled() && props.getBloomFilterExpectValues() != null) {
       BloomFilterWriter bloomFilterWriter = bloomFilterWriteStore.getBloomFilterWriter(path);
       return new ColumnWriterV1(path, pageWriter, bloomFilterWriter, props);
     } else {
diff --git a/parquet-column/src/main/java/org/apache/parquet/column/impl/ColumnWriteStoreV2.java b/parquet-column/src/main/java/org/apache/parquet/column/impl/ColumnWriteStoreV2.java
index 057660ff5d..6c20b8bb87 100644
--- a/parquet-column/src/main/java/org/apache/parquet/column/impl/ColumnWriteStoreV2.java
+++ b/parquet-column/src/main/java/org/apache/parquet/column/impl/ColumnWriteStoreV2.java
@@ -1,4 +1,4 @@
-/* 
+/*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
@@ -6,9 +6,9 @@
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
- * 
+ *
  *   http://www.apache.org/licenses/LICENSE-2.0
- * 
+ *
  * Unless required by applicable law or agreed to in writing,
  * software distributed under the License is distributed on an
  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
@@ -79,7 +79,7 @@ public ColumnWriteStoreV2(
 
     for (ColumnDescriptor path : schema.getColumns()) {
       PageWriter pageWriter = pageWriteStore.getPageWriter(path);
-      if (props.isBloomFilterEnabled() && props.getBloomFilterInfo() != null) {
+      if (props.isBloomFilterEnabled() && props.getBloomFilterExpectValues() != null) {
         BloomFilterWriter bloomFilterWriter = bloomFilterWriteStore.getBloomFilterWriter(path);
         mcolumns.put(path, new ColumnWriterV2(path, pageWriter, bloomFilterWriter, props));
       } else {
diff --git a/parquet-column/src/main/java/org/apache/parquet/column/impl/ColumnWriterV1.java b/parquet-column/src/main/java/org/apache/parquet/column/impl/ColumnWriterV1.java
index daf51cfa0d..c5fc9dc549 100644
--- a/parquet-column/src/main/java/org/apache/parquet/column/impl/ColumnWriterV1.java
+++ b/parquet-column/src/main/java/org/apache/parquet/column/impl/ColumnWriterV1.java
@@ -1,4 +1,4 @@
-/* 
+/*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
@@ -6,9 +6,9 @@
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
- * 
+ *
  *   http://www.apache.org/licenses/LICENSE-2.0
- * 
+ *
  * Unless required by applicable law or agreed to in writing,
  * software distributed under the License is distributed on an
  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
@@ -30,6 +30,7 @@
 import org.apache.parquet.column.page.PageWriter;
 import org.apache.parquet.column.statistics.Statistics;
 import org.apache.parquet.column.values.ValuesWriter;
+import org.apache.parquet.column.values.bloomfilter.BlockSplitBloomFilter;
 import org.apache.parquet.column.values.bloomfilter.BloomFilter;
 import org.apache.parquet.column.values.bloomfilter.BloomFilterWriter;
 import org.apache.parquet.io.ParquetEncodingException;
@@ -66,14 +67,19 @@ public ColumnWriterV1(ColumnDescriptor path, PageWriter pageWriter,
     this(path, pageWriter, props);
 
     // Current not support nested column.
-    if (path.getPath().length == 1) {
-      this.bloomFilterWriter = bloomFilterWriter;
-      HashMap<String, Long> bloomFilterInfo = props.getBloomFilterInfo();
-      String column = path.getPath()[0];
-      if (bloomFilterInfo.keySet().contains(column)) {
-        this.bloomFilter = new BloomFilter(bloomFilterInfo.get(column).intValue());
-      }
+    if (path.getPath().length != 1 || bloomFilterWriter == null) {
+      return;
+    }
+
+    this.bloomFilterWriter = bloomFilterWriter;
+    HashMap<String, Long> bloomFilterExpectValues = props.getBloomFilterExpectValues();
+    String column = path.getPath()[0];
+    if (bloomFilterExpectValues.keySet().contains(column)) {
+      int optimalNumOfBits = BlockSplitBloomFilter.optimalNumOfBits(bloomFilterExpectValues.get(column).intValue(),
+        BlockSplitBloomFilter.DEFAULT_FPP);
+      this.bloomFilter = new BlockSplitBloomFilter(optimalNumOfBits/8);
     }
+
   }
 
   public ColumnWriterV1(ColumnDescriptor path, PageWriter pageWriter,
@@ -161,6 +167,36 @@ private void updateStatistics(boolean value) {
    statistics.updateStats(value);
   }
 
+  private void updateBloomFilter(int value) {
+    if (bloomFilter != null) {
+      bloomFilter.insert(bloomFilter.hash(value));
+    }
+  }
+
+  private void updateBloomFilter(long value) {
+    if (bloomFilter != null) {
+      bloomFilter.insert(bloomFilter.hash(value));
+    }
+  }
+
+  private void updateBloomFilter(double value) {
+    if (bloomFilter != null) {
+      bloomFilter.insert(bloomFilter.hash(value));
+    }
+  }
+
+  private void updateBloomFilter(float value) {
+    if (bloomFilter != null) {
+      bloomFilter.insert(bloomFilter.hash(value));
+    }
+  }
+
+  private void updateBloomFilter(Binary value) {
+    if (bloomFilter != null) {
+      bloomFilter.insert(bloomFilter.hash(value));
+    }
+  }
+
   private void writePage() {
     if (DEBUG) LOG.debug("write page");
     try {
@@ -197,9 +233,7 @@ public void write(double value, int repetitionLevel, int definitionLevel) {
     definitionLevelColumn.writeInteger(definitionLevel);
     dataColumn.writeDouble(value);
     updateStatistics(value);
-    if (bloomFilter != null) {
-      bloomFilter.insert(bloomFilter.hash(value));
-    }
+    updateBloomFilter(value);
     accountForValueWritten();
   }
 
@@ -210,9 +244,7 @@ public void write(float value, int repetitionLevel, int definitionLevel) {
     definitionLevelColumn.writeInteger(definitionLevel);
     dataColumn.writeFloat(value);
     updateStatistics(value);
-    if (bloomFilter != null) {
-      bloomFilter.insert(bloomFilter.hash(value));
-    }
+    updateBloomFilter(value);
     accountForValueWritten();
   }
 
@@ -223,9 +255,7 @@ public void write(Binary value, int repetitionLevel, int definitionLevel) {
     definitionLevelColumn.writeInteger(definitionLevel);
     dataColumn.writeBytes(value);
     updateStatistics(value);
-    if (bloomFilter != null) {
-      bloomFilter.insert(bloomFilter.hash(value));
-    }
+    updateBloomFilter(value);
     accountForValueWritten();
   }
 
@@ -246,9 +276,7 @@ public void write(int value, int repetitionLevel, int definitionLevel) {
     definitionLevelColumn.writeInteger(definitionLevel);
     dataColumn.writeInteger(value);
     updateStatistics(value);
-    if (bloomFilter != null) {
-      bloomFilter.insert(bloomFilter.hash(value));
-    }
+    updateBloomFilter(value);
     accountForValueWritten();
   }
 
@@ -259,9 +287,7 @@ public void write(long value, int repetitionLevel, int definitionLevel) {
     definitionLevelColumn.writeInteger(definitionLevel);
     dataColumn.writeLong(value);
     updateStatistics(value);
-    if (bloomFilter != null) {
-      bloomFilter.insert(bloomFilter.hash(value));
-    }
+    updateBloomFilter(value);
     accountForValueWritten();
   }
 
@@ -296,7 +322,7 @@ public void close() {
 
   @Override
   public long getBufferedSizeInMemory() {
-    long bloomBufferSize = bloomFilter == null ? 0 : bloomFilter.getBufferedSize();
+    long bloomBufferSize = bloomFilter == null ? 0 : bloomFilter.getBitsetSize();
     return repetitionLevelColumn.getBufferedSize()
         + definitionLevelColumn.getBufferedSize()
         + dataColumn.getBufferedSize()
@@ -305,7 +331,7 @@ public long getBufferedSizeInMemory() {
   }
 
   public long allocatedSize() {
-    long bloomAllocatedSize = bloomFilter == null ? 0 : bloomFilter.getBufferedSize();
+    long bloomAllocatedSize = bloomFilter == null ? 0 : bloomFilter.getBitsetSize();
     return repetitionLevelColumn.getAllocatedSize()
         + definitionLevelColumn.getAllocatedSize()
         + dataColumn.getAllocatedSize()
diff --git a/parquet-column/src/main/java/org/apache/parquet/column/impl/ColumnWriterV2.java b/parquet-column/src/main/java/org/apache/parquet/column/impl/ColumnWriterV2.java
index e041e97b57..7b1671407a 100644
--- a/parquet-column/src/main/java/org/apache/parquet/column/impl/ColumnWriterV2.java
+++ b/parquet-column/src/main/java/org/apache/parquet/column/impl/ColumnWriterV2.java
@@ -1,4 +1,4 @@
-/* 
+/*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
@@ -6,9 +6,9 @@
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
- * 
+ *
  *   http://www.apache.org/licenses/LICENSE-2.0
- * 
+ *
  * Unless required by applicable law or agreed to in writing,
  * software distributed under the License is distributed on an
  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
@@ -31,6 +31,7 @@
 import org.apache.parquet.column.page.PageWriter;
 import org.apache.parquet.column.statistics.Statistics;
 import org.apache.parquet.column.values.ValuesWriter;
+import org.apache.parquet.column.values.bloomfilter.BlockSplitBloomFilter;
 import org.apache.parquet.column.values.bloomfilter.BloomFilter;
 import org.apache.parquet.column.values.bloomfilter.BloomFilterWriter;
 import org.apache.parquet.column.values.rle.RunLengthBitPackingHybridEncoder;
@@ -82,15 +83,18 @@ public ColumnWriterV2(
     ParquetProperties props) {
     this(path, pageWriter, props);
 
-    this.bloomFilterWriter = bloomFilterWriter;
-    HashMap<String, Long> bloomFilterInfo = props.getBloomFilterInfo();
-
     // Current not support nested column.
-    if (path.getPath().length == 1) {
-      String column = path.getPath()[0];
-      if (bloomFilterInfo.keySet().contains(column)) {
-        this.bloomFilter = new BloomFilter(bloomFilterInfo.get(column).intValue());
-      }
+    if (path.getPath().length != 1 || bloomFilterWriter == null) {
+      return;
+    }
+
+    this.bloomFilterWriter = bloomFilterWriter;
+    HashMap<String, Long> bloomFilterExpectValues = props.getBloomFilterExpectValues();
+    String column = path.getPath()[0];
+    if (bloomFilterExpectValues.keySet().contains(column)) {
+      int optimalNumOfBits = BlockSplitBloomFilter.optimalNumOfBits(bloomFilterExpectValues.get(column).intValue(),
+        BlockSplitBloomFilter.DEFAULT_FPP);
+      this.bloomFilter = new BlockSplitBloomFilter(optimalNumOfBits/8);
     }
   }
 
@@ -278,7 +282,7 @@ public void finalizeColumnChunk() {
    * @return the number of bytes of memory used to buffer the current data
    */
   public long getCurrentPageBufferedSize() {
-    long bloomBufferSize = bloomFilter == null ? 0 : bloomFilter.getBufferedSize();
+    long bloomBufferSize = bloomFilter == null ? 0 : bloomFilter.getBitsetSize();
     return repetitionLevelColumn.getBufferedSize()
         + definitionLevelColumn.getBufferedSize()
         + dataColumn.getBufferedSize()
@@ -290,7 +294,7 @@ public long getCurrentPageBufferedSize() {
    * @return the number of bytes of memory used to buffer the current data and the previously written pages
    */
   public long getTotalBufferedSize() {
-    long bloomBufferSize = bloomFilter == null ? 0 : bloomFilter.getBufferedSize();
+    long bloomBufferSize = bloomFilter == null ? 0 : bloomFilter.getBitsetSize();
     return repetitionLevelColumn.getBufferedSize()
         + definitionLevelColumn.getBufferedSize()
         + dataColumn.getBufferedSize()
@@ -302,7 +306,7 @@ public long getTotalBufferedSize() {
    * @return actual memory used
    */
   public long allocatedSize() {
-    long bloomFilterSize = bloomFilter == null ? 0 : bloomFilter.getBufferedSize();
+    long bloomFilterSize = bloomFilter == null ? 0 : bloomFilter.getBitsetSize();
     return repetitionLevelColumn.getAllocatedSize()
     + definitionLevelColumn.getAllocatedSize()
     + dataColumn.getAllocatedSize()
diff --git a/parquet-column/src/main/java/org/apache/parquet/column/values/bloomfilter/BlockSplitBloomFilter.java b/parquet-column/src/main/java/org/apache/parquet/column/values/bloomfilter/BlockSplitBloomFilter.java
new file mode 100644
index 0000000000..d2cf4d692c
--- /dev/null
+++ b/parquet-column/src/main/java/org/apache/parquet/column/values/bloomfilter/BlockSplitBloomFilter.java
@@ -0,0 +1,318 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.parquet.column.values.bloomfilter;
+
+import com.google.common.hash.HashFunction;
+import com.google.common.hash.Hashing;
+import org.apache.parquet.Preconditions;
+import org.apache.parquet.bytes.BytesUtils;
+import org.apache.parquet.io.api.Binary;
+
+import java.io.IOException;
+import java.io.OutputStream;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.nio.IntBuffer;
+
+/*
+ * This Bloom filter is implemented using block-based Bloom filter algorithm from Putze et al.'s
+ * "Cache-, Hash- and Space-Efficient Bloom filters". The basic idea is to hash the item to a tiny
+ * Bloom filter which size fit a single cache line or smaller. This implementation sets 8 bits in
+ * each tiny Bloom filter. Each tiny Bloom filter is 32 bytes to take advantage of 32-byte SIMD
+ * instruction.
+ */
+public class BlockSplitBloomFilter extends BloomFilter {
+  // Bytes in a tiny Bloom filter block.
+  private static final int BYTES_PER_FILTER_BLOCK = 32;
+
+  // Default seed for hash function, it comes from System.nanoTime().
+  private static final int DEFAULT_SEED = 1361930890;
+
+  // Minimum Bloom filter size, set to size of a tiny Bloom filter block
+  public static final int MINIMUM_BLOOM_FILTER_BYTES = 32;
+
+  // Maximum Bloom filter size, it sets to default HDFS block size for upper boundary check
+  // This should be re-consider when implementing write side logic.
+  public static final int MAXIMUM_BLOOM_FILTER_BYTES = 128 * 1024 * 1024;
+
+  // The number of bits to set in a tiny Bloom filter
+  private static final int BITS_SET_PER_BLOCK = 8;
+
+  // The header of Bloom filter, it includes number of bytes, algorithm and hash enumeration.
+  public static final int HEADER_SIZE = 12;
+
+  // The default false positive probability value
+  public static final double DEFAULT_FPP = 0.01;
+
+  // Hash strategy used in this Bloom filter.
+  public final HashStrategy hashStrategy;
+
+  // Algorithm used in this Bloom filter.
+  public final Algorithm algorithm;
+
+  // The underlying byte array for Bloom filter bitset.
+  private byte[] bitset;
+
+  // A integer array buffer of underlying bitset to help setting bits.
+  private IntBuffer intBuffer;
+
+  // Hash function use to compute hash for column value.
+  private HashFunction hashFunction;
+
+  // The block-based algorithm needs 8 odd SALT values to calculate eight index
+  // of bit to set, one bit in 32-bit word.
+  private static final int SALT[] = {0x47b6137b, 0x44974d91, 0x8824ad5b, 0xa2b7289d,
+    0x705495c7, 0x2df1424b, 0x9efc4947, 0x5c6bfb31};
+
+  /**
+   * Constructor of Bloom filter.
+   *
+   * @param numBytes The number of bytes for Bloom filter bitset. The range of num_bytes should be within
+   *                 [MINIMUM_BLOOM_FILTER_BYTES, MAXIMUM_BLOOM_FILTER_BYTES], it will be rounded up/down
+   *                 to lower/upper bound if num_bytes is out of range and also will rounded up to a power
+   *                 of 2. It uses murmur3_x64_128 as its default hash function and block-based algorithm
+   *                 as default algorithm.
+   */
+  public BlockSplitBloomFilter(int numBytes) {
+    this(numBytes, HashStrategy.MURMUR3_X64_128, Algorithm.BLOCK);
+  }
+
+  /**
+   * Constructor of Bloom filter. It uses murmur3_x64_128 as its default hash
+   * function and block-based algorithm as its default algorithm.
+   *
+   * @param numBytes The number of bytes for Bloom filter bitset
+   * @param hashStrategy The hash strategy of Bloom filter.
+   * @param algorithm The algorithm of Bloom filter.
+   */
+  private BlockSplitBloomFilter(int numBytes, HashStrategy hashStrategy, Algorithm algorithm) {
+    initBitset(numBytes);
+
+    switch (hashStrategy) {
+      case MURMUR3_X64_128:
+        this.hashStrategy = hashStrategy;
+        hashFunction = Hashing.murmur3_128(DEFAULT_SEED);
+        break;
+      default:
+        throw new RuntimeException("Not supported hash strategy");
+    }
+
+    this.algorithm = algorithm;
+  }
+
+
+  /**
+   * Construct the Bloom filter with given bitset, it is used when reconstructing
+   * Bloom filter from parquet file. It use murmur3_x64_128 as its default hash
+   * function and block-based algorithm as default algorithm.
+   *
+   * @param bitset The given bitset to construct Bloom filter.
+   */
+  public BlockSplitBloomFilter(byte[] bitset) {
+    this(bitset, HashStrategy.MURMUR3_X64_128, Algorithm.BLOCK);
+  }
+
+  /**
+   * Construct the Bloom filter with given bitset, it is used when reconstructing
+   * Bloom filter from parquet file.
+   *
+   * @param bitset The given bitset to construct Bloom filter.
+   * @param hashStrategy The hash strategy Bloom filter apply.
+   * @param algorithm The algorithm of Bloom filter.
+   */
+  private BlockSplitBloomFilter(byte[] bitset, HashStrategy hashStrategy, Algorithm algorithm) {
+    if (bitset == null) {
+      throw new RuntimeException("Given bitset is null");
+    }
+    this.bitset = bitset;
+    this.intBuffer = ByteBuffer.wrap(bitset).order(ByteOrder.LITTLE_ENDIAN).asIntBuffer();
+
+    switch (hashStrategy) {
+      case MURMUR3_X64_128:
+        this.hashStrategy = hashStrategy;
+        hashFunction = Hashing.murmur3_128(DEFAULT_SEED);
+        break;
+      default:
+        throw new RuntimeException("Not supported hash strategy");
+    }
+    this.algorithm = algorithm;
+  }
+
+  /**
+   * Create a new bitset for Bloom filter.
+   *
+   * @param numBytes The number of bytes for Bloom filter bitset. The range of num_bytes should be within
+   *                 [MINIMUM_BLOOM_FILTER_BYTES, MAXIMUM_BLOOM_FILTER_BYTES], it will be rounded up/down
+   *                 to lower/upper bound if num_bytes is out of range and also will rounded up to a power
+   *                 of 2. It uses murmur3_x64_128 as its default hash function and block-based algorithm
+   *                 as default algorithm.
+   */
+  private void initBitset(int numBytes) {
+    if (numBytes < MINIMUM_BLOOM_FILTER_BYTES) {
+      numBytes = MINIMUM_BLOOM_FILTER_BYTES;
+    }
+
+    // Get next power of 2 if it is not power of 2.
+    if ((numBytes & (numBytes - 1)) != 0) {
+      numBytes = Integer.highestOneBit(numBytes) << 1;
+    }
+
+    if (numBytes > MAXIMUM_BLOOM_FILTER_BYTES || numBytes < 0) {
+      numBytes = MAXIMUM_BLOOM_FILTER_BYTES;
+    }
+
+    this.bitset = new byte[numBytes];
+    this.intBuffer = ByteBuffer.wrap(bitset).order(ByteOrder.LITTLE_ENDIAN).asIntBuffer();
+  }
+
+  @Override
+  public void writeTo(OutputStream out) throws IOException {
+    // Write number of bytes of bitset.
+    out.write(BytesUtils.intToBytes(bitset.length));
+
+    // Write hash strategy
+    out.write(BytesUtils.intToBytes(this.hashStrategy.ordinal()));
+
+    // Write algorithm
+    out.write(BytesUtils.intToBytes(this.algorithm.ordinal()));
+
+    // Write bitset
+    out.write(bitset);
+  }
+
+  private int[] setMask(int key) {
+    int mask[] = new int[BITS_SET_PER_BLOCK];
+
+    for (int i = 0; i < BITS_SET_PER_BLOCK; ++i) {
+      mask[i] = key * SALT[i];
+    }
+
+    for (int i = 0; i < BITS_SET_PER_BLOCK; ++i) {
+      mask[i] = mask[i] >>> 27;
+    }
+
+    for (int i = 0; i < BITS_SET_PER_BLOCK; ++i) {
+      mask[i] = 0x1 << mask[i];
+    }
+
+    return mask;
+  }
+
+  @Override
+  public void insert(long hash) {
+    int bucketIndex = (int)(hash >> 32) & (bitset.length / BYTES_PER_FILTER_BLOCK - 1);
+    int key = (int)hash;
+
+    // Calculate mask for bucket.
+    int mask[] = setMask(key);
+
+    for (int i = 0; i < BITS_SET_PER_BLOCK; i++) {
+      int value = intBuffer.get(bucketIndex * (BYTES_PER_FILTER_BLOCK / 4) + i);
+      value |= mask[i];
+      intBuffer.put(bucketIndex * (BYTES_PER_FILTER_BLOCK / 4) + i, value);
+    }
+  }
+
+  @Override
+  public boolean find(long hash) {
+    int bucketIndex = (int)(hash >> 32) & (bitset.length / BYTES_PER_FILTER_BLOCK - 1);
+    int key = (int)hash;
+
+    // Calculate mask for the tiny Bloom filter.
+    int mask[] = setMask(key);
+
+    for (int i = 0; i < BITS_SET_PER_BLOCK; i++) {
+      if (0 == (intBuffer.get(bucketIndex * (BYTES_PER_FILTER_BLOCK / 4) + i) & mask[i])) {
+        return false;
+      }
+    }
+
+    return true;
+  }
+
+  /**
+   * Calculate optimal size according to the number of distinct values and false positive probability.
+   *
+   * @param n: The number of distinct values.
+   * @param p: The false positive probability.
+   * @return optimal number of bits of given n and p.
+   */
+  public static int optimalNumOfBits(long n, double p) {
+    Preconditions.checkArgument((p > 0.0 && p < 1.0),
+      "FPP should be less than 1.0 and great than 0.0");
+
+    final double m = -8 * n / Math.log(1 - Math.pow(p, 1.0 / 8));
+    final double MAX = MAXIMUM_BLOOM_FILTER_BYTES << 3;
+    int numBits = (int)m;
+
+    // Handle overflow.
+    if (m > MAX || m < 0) {
+      numBits = (int)MAX;
+    }
+
+    // Get next power of 2 if bits is not power of 2.
+    if ((numBits & (numBits - 1)) != 0) {
+      numBits = Integer.highestOneBit(numBits) << 1;
+    }
+
+    if (numBits < (MINIMUM_BLOOM_FILTER_BYTES << 3)) {
+      numBits = MINIMUM_BLOOM_FILTER_BYTES << 3;
+    }
+
+    return numBits;
+  }
+
+  @Override
+  public long hash(int value) {
+    ByteBuffer plain = ByteBuffer.allocate(Integer.SIZE/Byte.SIZE);
+    plain.order(ByteOrder.LITTLE_ENDIAN).putInt(value);
+    return hashFunction.hashBytes(plain.array()).asLong();
+  }
+
+  @Override
+  public long hash(long value) {
+    ByteBuffer plain = ByteBuffer.allocate(Long.SIZE/Byte.SIZE);
+    plain.order(ByteOrder.LITTLE_ENDIAN).putLong(value);
+    return hashFunction.hashBytes(plain.array()).asLong();
+  }
+
+  @Override
+  public long hash(double value) {
+    ByteBuffer plain = ByteBuffer.allocate(Double.SIZE/Byte.SIZE);
+    plain.order(ByteOrder.LITTLE_ENDIAN).putDouble(value);
+    return hashFunction.hashBytes(plain.array()).asLong();
+  }
+
+  @Override
+  public long hash(float value) {
+    ByteBuffer plain = ByteBuffer.allocate(Float.SIZE/Byte.SIZE);
+    plain.order(ByteOrder.LITTLE_ENDIAN).putFloat(value);
+    return hashFunction.hashBytes(plain.array()).asLong();
+  }
+
+  @Override
+  public long hash(Binary value) {
+    return hashFunction.hashBytes(value.getBytes()).asLong();
+  }
+
+  @Override
+  public long getBitsetSize() {
+    return this.bitset.length;
+  }
+}
diff --git a/parquet-column/src/main/java/org/apache/parquet/column/values/bloomfilter/BloomFilter.java b/parquet-column/src/main/java/org/apache/parquet/column/values/bloomfilter/BloomFilter.java
index 4548617b1b..430fab8d61 100644
--- a/parquet-column/src/main/java/org/apache/parquet/column/values/bloomfilter/BloomFilter.java
+++ b/parquet-column/src/main/java/org/apache/parquet/column/values/bloomfilter/BloomFilter.java
@@ -18,32 +18,18 @@
  */
 package org.apache.parquet.column.values.bloomfilter;
 
-import com.google.common.hash.HashFunction;
-import com.google.common.hash.Hashing;
-import org.apache.parquet.Preconditions;
-import org.apache.parquet.bytes.BytesUtils;
 import org.apache.parquet.io.api.Binary;
 
 import java.io.IOException;
 import java.io.OutputStream;
-import java.nio.ByteBuffer;
-import java.nio.ByteOrder;
-import java.nio.IntBuffer;
 
 /**
  * A Bloom filter is a compact structure to indicate whether an item is not in a set or probably
- * in a set. BloomFilter class stores a bit set represents a elements set, a hash strategy and a
- * Bloom filter algorithm.
- *
- * This Bloom filter is implemented using block-based Bloom filter algorithm from Putze et al.'s
- * "Cache-, Hash- and Space-Efficient Bloom filters". The basic idea is to hash the item to a tiny
- * Bloom filter which size fit a single cache line or smaller. This implementation sets 8 bits in
- * each tiny Bloom filter. Each tiny Bloom filter is 32 bytes to take advantage of 32-byte SIMD
- * instruction.
+ * in a set. The Bloom filter usually consists of a bit set that represents a elements set,
+ * a hash strategy and a Bloom filter algorithm.
  */
-
-public class BloomFilter {
-  // Bloom filter Hash strategy .
+public abstract class BloomFilter {
+  // Bloom filter Hash strategy.
   public enum HashStrategy {
     MURMUR3_X64_128,
   }
@@ -53,203 +39,21 @@ public enum Algorithm {
     BLOCK,
   }
 
-  // Bytes in a tiny Bloom filter block.
-  private static final int BYTES_PER_FILTER_BLOCK = 32;
-
-  // Default seed for hash function, it comes from System.nanoTime().
-  private static final int DEFAULT_SEED = 1361930890;
-
-  // Minimum Bloom filter size, set to size of a tiny Bloom filter block
-  public static final int MINIMUM_BLOOM_FILTER_BYTES = 32;
-
-  // Maximum Bloom filter size, it sets to default HDFS block size for upper boundary check
-  // This should be re-consider when implementing write side logic.
-  public static final int MAXIMUM_BLOOM_FILTER_BYTES = 128 * 1024 * 1024;
-
-  // The number of bits to set in a tiny Bloom filter
-  private static final int BITS_SET_PER_BLOCK = 8;
-
-  // The header of Bloom filter, it includes number of bytes, algorithm and hash enumeration.
-  public static final int HEADER_SIZE = 12;
-
-  // Hash strategy used in this Bloom filter.
-  public final HashStrategy hashStrategy;
-
-  // Algorithm used in this Bloom filter.
-  public final Algorithm algorithm;
-
-  // The underlying byte array for Bloom filter bitset.
-  private byte[] bitset;
-
-  // A integer array buffer of underlying bitset to help setting bits.
-  private IntBuffer intBuffer;
-
-  // Hash function use to compute hash for column value.
-  private HashFunction hashFunction;
-
-  // The block-based algorithm needs 8 odd SALT values to calculate eight index
-  // of bit to set, one bit in 32-bit word.
-  private static final int SALT[] = {0x47b6137b, 0x44974d91, 0x8824ad5b, 0xa2b7289d,
-    0x705495c7, 0x2df1424b, 0x9efc4947, 0x5c6bfb31};
-
-  /**
-   * Constructor of Bloom filter.
-   *
-   * @param numBytes The number of bytes for Bloom filter bitset. The range of num_bytes should be within
-   *                 [MINIMUM_BLOOM_FILTER_BYTES, MAXIMUM_BLOOM_FILTER_BYTES], it will be rounded up/down
-   *                 to lower/upper bound if num_bytes is out of range and also will rounded up to a power
-   *                 of 2. It uses murmur3_x64_128 as its default hash function and block-based algorithm
-   *                 as default algorithm.
-   */
-  public BloomFilter(int numBytes) {
-    this(numBytes, HashStrategy.MURMUR3_X64_128, Algorithm.BLOCK);
-  }
-
-  /**
-   * Constructor of Bloom filter. It uses murmur3_x64_128 as its default hash
-   * function and block-based algorithm as its default algorithm.
-   *
-   * @param numBytes The number of bytes for Bloom filter bitset
-   * @param hashStrategy The hash strategy of Bloom filter.
-   * @param algorithm The algorithm of Bloom filter.
-   */
-  private BloomFilter(int numBytes, HashStrategy hashStrategy, Algorithm algorithm) {
-    initBitset(numBytes);
-
-    switch (hashStrategy) {
-      case MURMUR3_X64_128:
-        this.hashStrategy = hashStrategy;
-        hashFunction = Hashing.murmur3_128(DEFAULT_SEED);
-        break;
-      default:
-        throw new RuntimeException("Not supported hash strategy");
-    }
-
-    this.algorithm = algorithm;
-  }
-
-
-  /**
-   * Construct the Bloom filter with given bitset, it is used when reconstructing
-   * Bloom filter from parquet file. It use murmur3_x64_128 as its default hash
-   * function and block-based algorithm as default algorithm.
-   *
-   * @param bitset The given bitset to construct Bloom filter.
-   */
-  public BloomFilter(byte[] bitset) {
-    this(bitset, HashStrategy.MURMUR3_X64_128, Algorithm.BLOCK);
-  }
-
-  /**
-   * Construct the Bloom filter with given bitset, it is used when reconstructing
-   * Bloom filter from parquet file.
-   *
-   * @param bitset The given bitset to construct Bloom filter.
-   * @param hashStrategy The hash strategy Bloom filter apply.
-   * @param algorithm The algorithm of Bloom filter.
-   */
-  private BloomFilter(byte[] bitset, HashStrategy hashStrategy, Algorithm algorithm) {
-    if (bitset == null) {
-      throw new RuntimeException("Given bitset is null");
-    }
-    this.bitset = bitset;
-    this.intBuffer = ByteBuffer.wrap(bitset).order(ByteOrder.LITTLE_ENDIAN).asIntBuffer();
-
-    switch (hashStrategy) {
-      case MURMUR3_X64_128:
-        this.hashStrategy = hashStrategy;
-        hashFunction = Hashing.murmur3_128(DEFAULT_SEED);
-        break;
-      default:
-        throw new RuntimeException("Not supported hash strategy");
-    }
-    this.algorithm = algorithm;
-  }
-
-  /**
-   * Create a new bitset for Bloom filter.
-   *
-   * @param numBytes The number of bytes for Bloom filter bitset. The range of num_bytes should be within
-   *                 [MINIMUM_BLOOM_FILTER_BYTES, MAXIMUM_BLOOM_FILTER_BYTES], it will be rounded up/down
-   *                 to lower/upper bound if num_bytes is out of range and also will rounded up to a power
-   *                 of 2. It uses murmur3_x64_128 as its default hash function and block-based algorithm
-   *                 as default algorithm.
-   */
-  private void initBitset(int numBytes) {
-    if (numBytes < MINIMUM_BLOOM_FILTER_BYTES) {
-      numBytes = MINIMUM_BLOOM_FILTER_BYTES;
-    }
-
-    // Get next power of 2 if it is not power of 2.
-    if ((numBytes & (numBytes - 1)) != 0) {
-      numBytes = Integer.highestOneBit(numBytes) << 1;
-    }
-
-    if (numBytes > MAXIMUM_BLOOM_FILTER_BYTES || numBytes < 0) {
-      numBytes = MAXIMUM_BLOOM_FILTER_BYTES;
-    }
-
-    this.bitset = new byte[numBytes];
-    this.intBuffer = ByteBuffer.wrap(bitset).order(ByteOrder.LITTLE_ENDIAN).asIntBuffer();
-  }
-
   /**
    * Write the Bloom filter to an output stream. It writes the Bloom filter header includes the
    * bitset's length in size of byte, the hash strategy, the algorithm, and the bitset.
    *
    * @param out the output stream to write
    */
-  public void writeTo(OutputStream out) throws IOException {
-    // Write number of bytes of bitset.
-    out.write(BytesUtils.intToBytes(bitset.length));
-
-    // Write hash strategy
-    out.write(BytesUtils.intToBytes(this.hashStrategy.ordinal()));
-
-    // Write algorithm
-    out.write(BytesUtils.intToBytes(this.algorithm.ordinal()));
-
-    // Write bitset
-    out.write(bitset);
-  }
-
-  private int[] setMask(int key) {
-    int mask[] = new int[BITS_SET_PER_BLOCK];
-
-    for (int i = 0; i < BITS_SET_PER_BLOCK; ++i) {
-      mask[i] = key * SALT[i];
-    }
-
-    for (int i = 0; i < BITS_SET_PER_BLOCK; ++i) {
-      mask[i] = mask[i] >>> 27;
-    }
-
-    for (int i = 0; i < BITS_SET_PER_BLOCK; ++i) {
-      mask[i] = 0x1 << mask[i];
-    }
-
-    return mask;
-  }
+  public abstract void writeTo(OutputStream out) throws IOException;
 
   /**
-   * Add an element to Bloom filter, the element content is represented by
+   * Insert an element to the Bloom filter, the element content is represented by
    * the hash value of its plain encoding result.
    *
    * @param hash the hash result of element.
    */
-  public void insert(long hash) {
-    int bucketIndex = (int)(hash >> 32) & (bitset.length / BYTES_PER_FILTER_BLOCK - 1);
-    int key = (int)hash;
-
-    // Calculate mask for bucket.
-    int mask[] = setMask(key);
-
-    for (int i = 0; i < BITS_SET_PER_BLOCK; i++) {
-      int value = intBuffer.get(bucketIndex * (BYTES_PER_FILTER_BLOCK / 4) + i);
-      value |= mask[i];
-      intBuffer.put(bucketIndex * (BYTES_PER_FILTER_BLOCK / 4) + i, value);
-    }
-  }
+  public abstract void insert(long hash);
 
   /**
    * Determine whether an element is in set or not.
@@ -257,53 +61,7 @@ public void insert(long hash) {
    * @param hash the hash value of element plain encoding result.
    * @return false if element is must not in set, true if element probably in set.
    */
-  public boolean find(long hash) {
-    int bucketIndex = (int)(hash >> 32) & (bitset.length / BYTES_PER_FILTER_BLOCK - 1);
-    int key = (int)hash;
-
-    // Calculate mask for the tiny Bloom filter.
-    int mask[] = setMask(key);
-
-    for (int i = 0; i < BITS_SET_PER_BLOCK; i++) {
-      if (0 == (intBuffer.get(bucketIndex * (BYTES_PER_FILTER_BLOCK / 4) + i) & mask[i])) {
-        return false;
-      }
-    }
-
-    return true;
-  }
-
-  /**
-   * Calculate optimal size according to the number of distinct values and false positive probability.
-   *
-   * @param n: The number of distinct values.
-   * @param p: The false positive probability.
-   * @return optimal number of bits of given n and p.
-   */
-  public static int optimalNumOfBits(long n, double p) {
-    Preconditions.checkArgument((p > 0.0 && p < 1.0),
-      "FPP should be less than 1.0 and great than 0.0");
-
-    final double m = -8 * n / Math.log(1 - Math.pow(p, 1.0 / 8));
-    final double MAX = MAXIMUM_BLOOM_FILTER_BYTES << 3;
-    int numBits = (int)m;
-
-    // Handle overflow.
-    if (m > MAX || m < 0) {
-      numBits = (int)MAX;
-    }
-
-    // Get next power of 2 if bits is not power of 2.
-    if ((numBits & (numBits - 1)) != 0) {
-      numBits = Integer.highestOneBit(numBits) << 1;
-    }
-
-    if (numBits < (MINIMUM_BLOOM_FILTER_BYTES << 3)) {
-      numBits = MINIMUM_BLOOM_FILTER_BYTES << 3;
-    }
-
-    return numBits;
-  }
+  public abstract boolean find(long hash);
 
   /**
    * Compute hash for int value by using its plain encoding result.
@@ -311,11 +69,7 @@ public static int optimalNumOfBits(long n, double p) {
    * @param value the value to hash
    * @return hash result
    */
-  public long hash(int value) {
-    ByteBuffer plain = ByteBuffer.allocate(Integer.SIZE/Byte.SIZE);
-    plain.order(ByteOrder.LITTLE_ENDIAN).putInt(value);
-    return hashFunction.hashBytes(plain.array()).asLong();
-  }
+  public abstract long hash(int value);
 
   /**
    * Compute hash for long value by using its plain encoding result.
@@ -323,11 +77,7 @@ public long hash(int value) {
    * @param value the value to hash
    * @return hash result
    */
-  public long hash(long value) {
-    ByteBuffer plain = ByteBuffer.allocate(Long.SIZE/Byte.SIZE);
-    plain.order(ByteOrder.LITTLE_ENDIAN).putLong(value);
-    return hashFunction.hashBytes(plain.array()).asLong();
-  }
+  public abstract long hash(long value) ;
 
   /**
    * Compute hash for double value by using its plain encoding result.
@@ -335,11 +85,7 @@ public long hash(long value) {
    * @param value the value to hash
    * @return hash result
    */
-  public long hash(double value) {
-    ByteBuffer plain = ByteBuffer.allocate(Double.SIZE/Byte.SIZE);
-    plain.order(ByteOrder.LITTLE_ENDIAN).putDouble(value);
-    return hashFunction.hashBytes(plain.array()).asLong();
-  }
+  public abstract long hash(double value);
 
   /**
    * Compute hash for float value by using its plain encoding result.
@@ -347,27 +93,19 @@ public long hash(double value) {
    * @param value the value to hash
    * @return hash result
    */
-  public long hash(float value) {
-    ByteBuffer plain = ByteBuffer.allocate(Float.SIZE/Byte.SIZE);
-    plain.order(ByteOrder.LITTLE_ENDIAN).putFloat(value);
-    return hashFunction.hashBytes(plain.array()).asLong();
-  }
-
+  public abstract long hash(float value);
   /**
    * Compute hash for Binary value by using its plain encoding result.
    *
    * @param value the value to hash
    * @return hash result
    */
-  public long hash(Binary value) {
-      return hashFunction.hashBytes(value.toByteBuffer()).asLong();
-  }
+  public abstract long hash(Binary value);
 
   /**
-   * Get allocated buffer size.
-   * @return size in byte.
+   * Get the number of bytes for bitset in this Bloom filter.
+   *
+   * @return The number of bytes for bitset in this Bloom filter.
    */
-  public long getBufferedSize() {
-    return this.bitset.length;
-  }
+  public abstract long getBitsetSize();
 }
diff --git a/parquet-column/src/test/java/org/apache/parquet/column/values/bloomfilter/TestBlockSplitBloomFilter.java b/parquet-column/src/test/java/org/apache/parquet/column/values/bloomfilter/TestBlockSplitBloomFilter.java
new file mode 100644
index 0000000000..542b9cd25a
--- /dev/null
+++ b/parquet-column/src/test/java/org/apache/parquet/column/values/bloomfilter/TestBlockSplitBloomFilter.java
@@ -0,0 +1,129 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.parquet.column.values.bloomfilter;
+
+
+  import java.io.File;
+  import java.io.FileInputStream;
+  import java.io.FileOutputStream;
+  import java.io.IOException;
+  import java.nio.ByteBuffer;
+  import java.nio.ByteOrder;
+  import java.util.ArrayList;
+  import java.util.List;
+  import java.util.Random;
+
+  import jdk.nashorn.internal.ir.Block;
+  import org.apache.parquet.column.values.RandomStr;
+  import org.apache.parquet.io.api.Binary;
+  import org.junit.Rule;
+  import org.junit.Test;
+  import org.junit.rules.TemporaryFolder;
+
+  import static org.junit.Assert.assertEquals;
+  import static org.junit.Assert.assertTrue;
+
+public class TestBlockSplitBloomFilter {
+
+  @Test
+  public void testConstructor () throws IOException {
+    BloomFilter bloomFilter1 = new BlockSplitBloomFilter(0);
+    assertEquals(bloomFilter1.getBitsetSize(), BlockSplitBloomFilter.MINIMUM_BLOOM_FILTER_BYTES);
+
+    BloomFilter bloomFilter2 = new BlockSplitBloomFilter(256 * 1024 * 1024);
+    assertEquals(bloomFilter2.getBitsetSize(), BlockSplitBloomFilter.MAXIMUM_BLOOM_FILTER_BYTES);
+
+    BloomFilter bloomFilter3 = new BlockSplitBloomFilter(1000);
+    assertEquals(bloomFilter3.getBitsetSize(), 1024);
+  }
+
+  @Rule
+  public final TemporaryFolder temp = new TemporaryFolder();
+  /*
+   * This test is used to test basic operations including inserting, finding and
+   * serializing and de-serializing.
+   */
+  @Test
+  public void testBasic () throws IOException {
+    final String testStrings[] = {"hello", "parquet", "bloom", "filter"};
+    BloomFilter bloomFilter = new BlockSplitBloomFilter(1024);
+
+    for(int i = 0; i < testStrings.length; i++) {
+      bloomFilter.insert(bloomFilter.hash(Binary.fromString(testStrings[i])));
+    }
+
+    File testFile = temp.newFile();
+    FileOutputStream fileOutputStream = new FileOutputStream(testFile);
+    bloomFilter.writeTo(fileOutputStream);
+    fileOutputStream.close();
+
+    FileInputStream fileInputStream = new FileInputStream(testFile);
+
+    byte[] value = new byte[4];
+
+    fileInputStream.read(value);
+    int length = ByteBuffer.wrap(value).order(ByteOrder.LITTLE_ENDIAN).getInt();
+    assertEquals(length, 1024);
+
+    fileInputStream.read(value);
+    int hash = ByteBuffer.wrap(value).order(ByteOrder.LITTLE_ENDIAN).getInt();
+    assertEquals(hash, BloomFilter.HashStrategy.MURMUR3_X64_128.ordinal());
+
+    fileInputStream.read(value);
+    int algorithm = ByteBuffer.wrap(value).order(ByteOrder.LITTLE_ENDIAN).getInt();
+    assertEquals(algorithm, BloomFilter.Algorithm.BLOCK.ordinal());
+
+    byte[] bitset = new byte[length];
+    fileInputStream.read(bitset);
+    bloomFilter = new BlockSplitBloomFilter(bitset);
+
+    for(int i = 0; i < testStrings.length; i++) {
+      assertTrue(bloomFilter.find(bloomFilter.hash(Binary.fromString(testStrings[i]))));
+    }
+  }
+
+  @Test
+  public void testFPP() throws IOException {
+    final int totalCount = 100000;
+    final double FPP = 0.01;
+    final long SEED = 104729;
+
+    BloomFilter bloomFilter = new BlockSplitBloomFilter(BlockSplitBloomFilter.optimalNumOfBits(totalCount, FPP));
+    List<String> strings = new ArrayList<>();
+    RandomStr randomStr = new RandomStr(new Random(SEED));
+    for(int i = 0; i < totalCount; i++) {
+      String str = randomStr.get(10);
+      strings.add(str);
+      bloomFilter.insert(bloomFilter.hash(Binary.fromString(str)));
+    }
+
+    // The exist counts the number of times FindHash returns true.
+    int exist = 0;
+    for (int i = 0; i < totalCount; i++) {
+      String str = randomStr.get(8);
+      if (bloomFilter.find(bloomFilter.hash(Binary.fromString(str)))) {
+        exist ++;
+      }
+    }
+
+    // The exist should be probably less than 1000 according FPP 0.01.
+    assertTrue(exist < totalCount * FPP);
+  }
+}
diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ColumnChunkPageWriteStore.java b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ColumnChunkPageWriteStore.java
index 58b1450dfb..caa41fc7c0 100644
--- a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ColumnChunkPageWriteStore.java
+++ b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ColumnChunkPageWriteStore.java
@@ -36,6 +36,7 @@
 import org.apache.parquet.column.page.PageWriteStore;
 import org.apache.parquet.column.page.PageWriter;
 import org.apache.parquet.column.statistics.Statistics;
+import org.apache.parquet.column.values.bloomfilter.BlockSplitBloomFilter;
 import org.apache.parquet.column.values.bloomfilter.BloomFilter;
 import org.apache.parquet.column.values.bloomfilter.BloomFilterWriteStore;
 import org.apache.parquet.column.values.bloomfilter.BloomFilterWriter;
diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetFileReader.java b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetFileReader.java
index a7d07ba007..3975bf9f48 100644
--- a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetFileReader.java
+++ b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetFileReader.java
@@ -60,6 +60,7 @@
 import org.apache.parquet.column.ColumnDescriptor;
 import org.apache.parquet.column.Encoding;
 import org.apache.parquet.column.page.DictionaryPageReadStore;
+import org.apache.parquet.column.values.bloomfilter.BlockSplitBloomFilter;
 import org.apache.parquet.compression.CompressionCodecFactory.BytesInputDecompressor;
 import org.apache.parquet.filter2.compat.FilterCompat;
 import org.apache.parquet.filter2.compat.RowGroupFilter;
@@ -930,7 +931,7 @@ public BloomFilter readBloomFilter(ColumnChunkMetaData meta) throws IOException
     f.seek(bloomFilterOffset);
 
     // Read Bloom filter data header.
-    byte[] bytes = new byte[BloomFilter.HEADER_SIZE];
+    byte[] bytes = new byte[BlockSplitBloomFilter.HEADER_SIZE];
     f.read(bytes);
     ByteBuffer bloomHeader = ByteBuffer.wrap(bytes);
     IntBuffer headerBuffer = bloomHeader.order(ByteOrder.LITTLE_ENDIAN).asIntBuffer();
@@ -942,7 +943,7 @@ public BloomFilter readBloomFilter(ColumnChunkMetaData meta) throws IOException
     byte[] bitset = new byte[numBytes];
     f.readFully(bitset);
 
-    return new BloomFilter(bitset);
+    return new BlockSplitBloomFilter(bitset);
   }
 
   @Override
diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetOutputFormat.java b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetOutputFormat.java
index 6e191b005c..ffcf5c6a32 100644
--- a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetOutputFormat.java
+++ b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetOutputFormat.java
@@ -1,4 +1,4 @@
-/* 
+/*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
@@ -6,9 +6,9 @@
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
- * 
+ *
  *   http://www.apache.org/licenses/LICENSE-2.0
- * 
+ *
  * Unless required by applicable law or agreed to in writing,
  * software distributed under the License is distributed on an
  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
@@ -144,7 +144,7 @@ public static enum JobSummaryLevel {
   public static final String MAX_ROW_COUNT_FOR_PAGE_SIZE_CHECK = "parquet.page.size.row.check.max";
   public static final String ESTIMATE_PAGE_SIZE_CHECK = "parquet.page.size.check.estimate";
   public static final String BLOOM_FILTER_COLUMN_NAMES = "parquet.bloom.filter.column.names";
-  public static final String BLOOM_FILTER_SIZES = "parquet.bloom.filter.size";
+  public static final String BLOOM_FILTER_EXPECT_DISTINCT_NUMBERS = "parquet.bloom.filter.expected.distinct.numbers";
   public static final String ENABLE_BLOOM_FILTER = "parquet.enable.bloom.filter";
 
   public static JobSummaryLevel getJobSummaryLevel(Configuration conf) {
@@ -261,8 +261,8 @@ public static boolean getEnableBloomFilter(Configuration configuration) {
         ParquetProperties.DEFAULT_BLOOM_FILTER_ENABLED);
   }
 
-  public static String getBloomFilterSizes(Configuration configuration) {
-    return configuration.get(BLOOM_FILTER_SIZES);
+  public static String getBloomFilterExpectedDistinctNumbers(Configuration configuration) {
+    return configuration.get(BLOOM_FILTER_EXPECT_DISTINCT_NUMBERS);
   }
 
   public static int getMinRowCountForPageSizeCheck(Configuration configuration) {
@@ -386,7 +386,7 @@ public RecordWriter<Void, T> getRecordWriter(Configuration conf, Path file, Comp
         .withPageSize(getPageSize(conf))
         .withDictionaryPageSize(getDictionaryPageSize(conf))
         .withBloomFilterEnabled(getEnableBloomFilter(conf))
-        .withBloomFilterInfo(getBloomFilterColumnNames(conf), getBloomFilterSizes(conf))
+        .withBloomFilterInfo(getBloomFilterColumnNames(conf), getBloomFilterExpectedDistinctNumbers(conf))
         .withDictionaryEncoding(getEnableDictionary(conf))
         .withWriterVersion(getWriterVersion(conf))
         .estimateRowCountForPageSizeCheck(getEstimatePageSizeCheck(conf))
@@ -410,8 +410,8 @@ public RecordWriter<Void, T> getRecordWriter(Configuration conf, Path file, Comp
       LOG.info("Min row count for page size check is: {}", props.getMinRowCountForPageSizeCheck());
       LOG.info("Max row count for page size check is: {}", props.getMaxRowCountForPageSizeCheck());
       LOG.info("Parquet Bloom Filter is {}", props.isBloomFilterEnabled()? "on": "off");
-      LOG.info("Parquet Bloom filter column names are: {}", props.getBloomFilterInfo().keySet());
-      LOG.info("Parquet Bloom filter column sizes are: {}", props.getBloomFilterInfo().values());
+      LOG.info("Parquet Bloom filter column names are: {}", props.getBloomFilterExpectValues().keySet());
+      LOG.info("Parquet Bloom filter column expect distinct values are: {}", props.getBloomFilterExpectValues().values());
     }
 
     WriteContext init = writeSupport.init(conf);
diff --git a/parquet-hadoop/src/test/java/org/apache/parquet/hadoop/TestParquetFileWriter.java b/parquet-hadoop/src/test/java/org/apache/parquet/hadoop/TestParquetFileWriter.java
index 636515dc8f..535394b370 100644
--- a/parquet-hadoop/src/test/java/org/apache/parquet/hadoop/TestParquetFileWriter.java
+++ b/parquet-hadoop/src/test/java/org/apache/parquet/hadoop/TestParquetFileWriter.java
@@ -1,4 +1,4 @@
-/* 
+/*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
@@ -6,9 +6,9 @@
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
- * 
+ *
  *   http://www.apache.org/licenses/LICENSE-2.0
- * 
+ *
  * Unless required by applicable law or agreed to in writing,
  * software distributed under the License is distributed on an
  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
@@ -153,7 +153,7 @@ public void testBloomWriteRead() throws Exception {
     w.startColumn(col, 5, CODEC);
     w.writeDataPage(2, 4, BytesInput.from(BYTES1),stats1, BIT_PACKED, BIT_PACKED, PLAIN);
     w.writeDataPage(3, 4, BytesInput.from(BYTES1),stats1, BIT_PACKED, BIT_PACKED, PLAIN);
-    BloomFilter bloomData = new BloomFilter(0);
+    BloomFilter bloomData = new BlockSplitBloomFilter(0);
     bloomData.insert(bloomData.hash(Binary.fromString("hello")));
     bloomData.insert(bloomData.hash(Binary.fromString("world")));
     long blStarts = w.getPos();
diff --git a/pom.xml b/pom.xml
index ee8ae94fb7..6e6902847b 100644
--- a/pom.xml
+++ b/pom.xml
@@ -81,7 +81,7 @@
     <hadoop1.version>1.2.1</hadoop1.version>
     <cascading.version>2.7.1</cascading.version>
     <cascading3.version>3.1.2</cascading3.version>
-    <parquet.format.version>2.4.0</parquet.format.version>
+    <parquet.format.version>2.7.0-SNAPSHOT</parquet.format.version>
     <previous.version>1.7.0</previous.version>
     <thrift.executable>thrift</thrift.executable>
     <scala.version>2.10.6</scala.version>
@@ -96,7 +96,7 @@
     <semver.api.version>0.9.33</semver.api.version>
     <slf4j.version>1.7.22</slf4j.version>
     <avro.version>1.8.2</avro.version>
-    <guava.version>24.0-jre</guava.version>
+    <guava.version>20.0</guava.version>
     <brotli-codec.version>0.1.1</brotli-codec.version>
     <mockito.version>1.10.19</mockito.version>
 

From e3991eeefc9f6f542a0f5ac5e531b81152812ae1 Mon Sep 17 00:00:00 2001
From: Junjie Chen <jimmyjchen@tencent.com>
Date: Sun, 21 Oct 2018 21:29:35 +0800
Subject: [PATCH 3/9] Rebase to latest master

---
 .travis.yml                                   |    2 +-
 parquet-arrow/pom.xml                         |    2 +-
 .../parquet/arrow/schema/SchemaConverter.java |  269 +--
 .../arrow/schema/TestSchemaConverter.java     |   61 +-
 parquet-avro/pom.xml                          |    4 +-
 .../parquet/avro/AvroSchemaConverter.java     |  154 +-
 .../parquet/avro/TestAvroSchemaConverter.java |   14 +-
 .../cascading/convert/TupleConverter.java     |    9 +-
 .../java/org/apache/parquet/cli/Main.java     |    2 +
 .../java/org/apache/parquet/cli/Util.java     |   10 +
 .../cli/commands/ParquetMetadataCommand.java  |    4 +-
 .../cli/commands/ShowColumnIndexCommand.java  |  157 ++
 .../cli/commands/ShowDictionaryCommand.java   |    4 +-
 .../cli/commands/ShowPagesCommand.java        |    4 +-
 .../apache/parquet/column/ColumnReader.java   |    3 +
 .../parquet/column/ParquetProperties.java     |  125 +-
 .../column/impl/ColumnReadStoreImpl.java      |   14 +-
 .../parquet/column/impl/ColumnReaderBase.java |  760 ++++++++
 .../parquet/column/impl/ColumnReaderImpl.java |  676 +------
 .../column/impl/ColumnWriteStoreBase.java     |  255 +++
 .../column/impl/ColumnWriteStoreV1.java       |  129 +-
 .../column/impl/ColumnWriteStoreV2.java       |  176 +-
 .../parquet/column/impl/ColumnWriterBase.java |  400 +++++
 .../parquet/column/impl/ColumnWriterV1.java   |  323 +---
 .../parquet/column/impl/ColumnWriterV2.java   |  348 +---
 .../impl/SynchronizingColumnReader.java       |  111 ++
 .../apache/parquet/column/page/DataPage.java  |   22 +
 .../parquet/column/page/DataPageV1.java       |   31 +
 .../parquet/column/page/DataPageV2.java       |   52 +
 .../parquet/column/page/PageReadStore.java    |   15 +-
 .../parquet/column/page/PageWriter.java       |   17 +-
 .../parquet/column/values/ValuesReader.java   |   12 +
 .../bloomfilter/BlockSplitBloomFilter.java    |   22 +-
 .../values/bloomfilter/BloomFilter.java       |    2 +-
 .../bloomfilter/BloomFilterReadStore.java     |    3 -
 .../values/bloomfilter/BloomFilterReader.java |    1 +
 .../bloomfilter/BloomFilterWriteStore.java    |    1 +
 .../values/bloomfilter/BloomFilterWriter.java |    8 +-
 .../delta/DeltaBinaryPackingValuesReader.java |    8 +
 .../DeltaLengthByteArrayValuesReader.java     |   12 +-
 .../FixedLenByteArrayPlainValuesReader.java   |    8 +-
 .../values/plain/PlainValuesReader.java       |   36 +-
 ...RunLengthBitPackingHybridValuesWriter.java |    9 +-
 .../values/rle/ZeroIntegerValuesReader.java   |    4 +
 .../filter2/predicate/ValidTypeMap.java       |    7 +-
 .../columnindex/BinaryColumnIndexBuilder.java |  140 ++
 .../column/columnindex/BinaryTruncator.java   |  208 +++
 .../BooleanColumnIndexBuilder.java            |  133 ++
 .../column/columnindex/BoundaryOrder.java     |  352 ++++
 .../column/columnindex/ColumnIndex.java       |   60 +
 .../columnindex/ColumnIndexBuilder.java       |  636 +++++++
 .../columnindex/DoubleColumnIndexBuilder.java |  155 ++
 .../columnindex/FloatColumnIndexBuilder.java  |  155 ++
 .../column/columnindex/IndexIterator.java     |   98 ++
 .../columnindex/IntColumnIndexBuilder.java    |  136 ++
 .../columnindex/LongColumnIndexBuilder.java   |  136 ++
 .../column/columnindex/OffsetIndex.java       |   64 +
 .../columnindex/OffsetIndexBuilder.java       |  175 ++
 .../columnindex/ColumnIndexFilter.java        |  194 +++
 .../filter2/columnindex/ColumnIndexStore.java |   55 +
 .../filter2/columnindex/RowRanges.java        |  288 +++
 .../parquet/schema/ConversionPatterns.java    |   28 +-
 .../org/apache/parquet/schema/GroupType.java  |   36 +-
 .../parquet/schema/LogicalTypeAnnotation.java |  140 +-
 .../apache/parquet/schema/MessageType.java    |    8 +-
 .../apache/parquet/schema/OriginalType.java   |   66 +-
 .../parquet/schema/PrimitiveComparator.java   |    6 +-
 .../parquet/schema/PrimitiveStringifier.java  |  130 +-
 .../apache/parquet/schema/PrimitiveType.java  |  213 ++-
 .../java/org/apache/parquet/schema/Types.java |  137 +-
 .../column/impl/TestColumnReaderImpl.java     |    8 +-
 .../parquet/column/mem/TestMemColumn.java     |   12 +-
 .../column/page/mem/MemPageWriter.java        |    6 +
 .../bitpacking/TestBitPackingColumn.java      |   16 +
 .../TestBlockSplitBloomFilter.java            |   46 +-
 ...naryPackingValuesWriterForIntegerTest.java |   17 +
 ...aBinaryPackingValuesWriterForLongTest.java |   17 +
 .../TestDeltaLengthByteArray.java             |   24 +
 .../deltastrings/TestDeltaByteArray.java      |   19 +
 .../values/dictionary/TestDictionary.java     |  105 ++
 .../filter2/predicate/TestValidTypeMap.java   |    7 +-
 .../columnindex/TestBinaryTruncator.java      |  285 +++
 .../column/columnindex/TestBoundaryOrder.java |  487 ++++++
 .../columnindex/TestColumnIndexBuilder.java   | 1546 +++++++++++++++++
 .../column/columnindex/TestIndexIterator.java |   63 +
 .../columnindex/TestOffsetIndexBuilder.java   |  113 ++
 .../columnindex/TestColumnIndexFilter.java    |  464 +++++
 .../filter2/columnindex/TestRowRanges.java    |  155 ++
 .../parquet/parser/TestParquetParser.java     |    5 +
 .../parquet/schema/TestMessageType.java       |    2 +-
 .../schema/TestPrimitiveComparator.java       |   19 +
 .../schema/TestPrimitiveStringifier.java      |  144 +-
 .../TestTypeBuildersWithLogicalTypes.java     |  408 +++++
 parquet-common/pom.xml                        |   10 +-
 parquet-format-structures/pom.xml             |  206 +++
 .../parquet/format/InterningProtocol.java     |  231 +++
 .../apache/parquet/format/LogicalTypes.java   |   55 +
 .../java/org/apache/parquet/format/Util.java  |  236 +++
 .../parquet/format/event/Consumers.java       |  193 ++
 .../format/event/EventBasedThriftReader.java  |  126 ++
 .../parquet/format/event/FieldConsumer.java   |   39 +
 .../parquet/format/event/TypedConsumer.java   |  205 +++
 .../org/apache/parquet/format/TestUtil.java   |   83 +
 parquet-hadoop/pom.xml                        |    4 +-
 .../org/apache/parquet/HadoopReadOptions.java |    9 +-
 .../apache/parquet/ParquetReadOptions.java    |   20 +-
 .../converter/ParquetMetadataConverter.java   |  357 ++--
 .../parquet/hadoop/BloomFilterDataReader.java |    9 -
 .../hadoop/ColumnChunkPageReadStore.java      |  115 +-
 .../hadoop/ColumnChunkPageWriteStore.java     |   84 +-
 .../hadoop/ColumnIndexFilterUtils.java        |  157 ++
 .../parquet/hadoop/ColumnIndexStoreImpl.java  |  155 ++
 .../hadoop/InternalParquetRecordReader.java   |    6 +-
 .../hadoop/InternalParquetRecordWriter.java   |    8 +-
 .../parquet/hadoop/ParquetFileReader.java     |  432 ++++-
 .../parquet/hadoop/ParquetFileWriter.java     |  335 +++-
 .../parquet/hadoop/ParquetInputFormat.java    |   11 +-
 .../parquet/hadoop/ParquetOutputFormat.java   |   52 +-
 .../apache/parquet/hadoop/ParquetReader.java  |   10 +
 .../apache/parquet/hadoop/ParquetWriter.java  |    2 +-
 .../hadoop/metadata/ColumnChunkMetaData.java  |  107 +-
 .../parquet/hadoop/util/BlocksCombiner.java   |  106 ++
 .../hadoop/metadata/IndexReference.java       |   41 +
 .../filter2/recordlevel/PhoneBookWriter.java  |  105 +-
 .../TestParquetMetadataConverter.java         |   74 +
 .../hadoop/TestColumnChunkPageWriteStore.java |   94 +-
 .../hadoop/TestColumnIndexFiltering.java      |  442 +++++
 .../parquet/hadoop/TestParquetFileWriter.java |  224 ++-
 .../hadoop/TestParquetWriterMergeBlocks.java  |  280 +++
 .../parquet/convert/HiveSchemaConverter.java  |   17 +-
 parquet-pig/pom.xml                           |    4 +-
 .../parquet/pig/PigSchemaConverter.java       |  124 +-
 .../parquet/pig/convert/TupleConverter.java   |   31 +-
 parquet-protobuf/pom.xml                      |   11 +
 .../parquet/proto/ProtoMessageConverter.java  |   43 +-
 .../parquet/proto/ProtoSchemaConverter.java   |   45 +-
 .../parquet/proto/ProtoWriteSupport.java      |   29 +-
 parquet-thrift/pom.xml                        |   11 +
 .../thrift/ThriftSchemaConvertVisitor.java    |   18 +-
 parquet-tools/pom.xml                         |    4 +-
 .../tools/command/ColumnIndexCommand.java     |  182 ++
 .../parquet/tools/command/DumpCommand.java    |    1 -
 .../parquet/tools/command/MergeCommand.java   |   75 +-
 .../parquet/tools/command/MetadataUtils.java  |  212 +++
 .../parquet/tools/command/Registry.java       |    1 +
 .../tools/command/ShowMetaCommand.java        |   29 +-
 .../tools/command/ShowSchemaCommand.java      |   14 +-
 .../tools/read/SimpleRecordConverter.java     |   66 +-
 .../parquet/tools/util/MetadataUtils.java     |    9 +-
 pom.xml                                       |   18 +-
 150 files changed, 14764 insertions(+), 2806 deletions(-)
 create mode 100644 parquet-cli/src/main/java/org/apache/parquet/cli/commands/ShowColumnIndexCommand.java
 create mode 100644 parquet-column/src/main/java/org/apache/parquet/column/impl/ColumnReaderBase.java
 create mode 100644 parquet-column/src/main/java/org/apache/parquet/column/impl/ColumnWriteStoreBase.java
 create mode 100644 parquet-column/src/main/java/org/apache/parquet/column/impl/ColumnWriterBase.java
 create mode 100644 parquet-column/src/main/java/org/apache/parquet/column/impl/SynchronizingColumnReader.java
 create mode 100644 parquet-column/src/main/java/org/apache/parquet/internal/column/columnindex/BinaryColumnIndexBuilder.java
 create mode 100644 parquet-column/src/main/java/org/apache/parquet/internal/column/columnindex/BinaryTruncator.java
 create mode 100644 parquet-column/src/main/java/org/apache/parquet/internal/column/columnindex/BooleanColumnIndexBuilder.java
 create mode 100644 parquet-column/src/main/java/org/apache/parquet/internal/column/columnindex/BoundaryOrder.java
 create mode 100644 parquet-column/src/main/java/org/apache/parquet/internal/column/columnindex/ColumnIndex.java
 create mode 100644 parquet-column/src/main/java/org/apache/parquet/internal/column/columnindex/ColumnIndexBuilder.java
 create mode 100644 parquet-column/src/main/java/org/apache/parquet/internal/column/columnindex/DoubleColumnIndexBuilder.java
 create mode 100644 parquet-column/src/main/java/org/apache/parquet/internal/column/columnindex/FloatColumnIndexBuilder.java
 create mode 100644 parquet-column/src/main/java/org/apache/parquet/internal/column/columnindex/IndexIterator.java
 create mode 100644 parquet-column/src/main/java/org/apache/parquet/internal/column/columnindex/IntColumnIndexBuilder.java
 create mode 100644 parquet-column/src/main/java/org/apache/parquet/internal/column/columnindex/LongColumnIndexBuilder.java
 create mode 100644 parquet-column/src/main/java/org/apache/parquet/internal/column/columnindex/OffsetIndex.java
 create mode 100644 parquet-column/src/main/java/org/apache/parquet/internal/column/columnindex/OffsetIndexBuilder.java
 create mode 100644 parquet-column/src/main/java/org/apache/parquet/internal/filter2/columnindex/ColumnIndexFilter.java
 create mode 100644 parquet-column/src/main/java/org/apache/parquet/internal/filter2/columnindex/ColumnIndexStore.java
 create mode 100644 parquet-column/src/main/java/org/apache/parquet/internal/filter2/columnindex/RowRanges.java
 create mode 100644 parquet-column/src/test/java/org/apache/parquet/internal/column/columnindex/TestBinaryTruncator.java
 create mode 100644 parquet-column/src/test/java/org/apache/parquet/internal/column/columnindex/TestBoundaryOrder.java
 create mode 100644 parquet-column/src/test/java/org/apache/parquet/internal/column/columnindex/TestColumnIndexBuilder.java
 create mode 100644 parquet-column/src/test/java/org/apache/parquet/internal/column/columnindex/TestIndexIterator.java
 create mode 100644 parquet-column/src/test/java/org/apache/parquet/internal/column/columnindex/TestOffsetIndexBuilder.java
 create mode 100644 parquet-column/src/test/java/org/apache/parquet/internal/filter2/columnindex/TestColumnIndexFilter.java
 create mode 100644 parquet-column/src/test/java/org/apache/parquet/internal/filter2/columnindex/TestRowRanges.java
 create mode 100644 parquet-column/src/test/java/org/apache/parquet/schema/TestTypeBuildersWithLogicalTypes.java
 create mode 100644 parquet-format-structures/pom.xml
 create mode 100644 parquet-format-structures/src/main/java/org/apache/parquet/format/InterningProtocol.java
 create mode 100644 parquet-format-structures/src/main/java/org/apache/parquet/format/LogicalTypes.java
 create mode 100644 parquet-format-structures/src/main/java/org/apache/parquet/format/Util.java
 create mode 100644 parquet-format-structures/src/main/java/org/apache/parquet/format/event/Consumers.java
 create mode 100644 parquet-format-structures/src/main/java/org/apache/parquet/format/event/EventBasedThriftReader.java
 create mode 100644 parquet-format-structures/src/main/java/org/apache/parquet/format/event/FieldConsumer.java
 create mode 100644 parquet-format-structures/src/main/java/org/apache/parquet/format/event/TypedConsumer.java
 create mode 100644 parquet-format-structures/src/test/java/org/apache/parquet/format/TestUtil.java
 create mode 100644 parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ColumnIndexFilterUtils.java
 create mode 100644 parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ColumnIndexStoreImpl.java
 create mode 100644 parquet-hadoop/src/main/java/org/apache/parquet/hadoop/util/BlocksCombiner.java
 create mode 100644 parquet-hadoop/src/main/java/org/apache/parquet/internal/hadoop/metadata/IndexReference.java
 create mode 100644 parquet-hadoop/src/test/java/org/apache/parquet/hadoop/TestColumnIndexFiltering.java
 create mode 100644 parquet-hadoop/src/test/java/org/apache/parquet/hadoop/TestParquetWriterMergeBlocks.java
 create mode 100644 parquet-tools/src/main/java/org/apache/parquet/tools/command/ColumnIndexCommand.java
 create mode 100644 parquet-tools/src/main/java/org/apache/parquet/tools/command/MetadataUtils.java

diff --git a/.travis.yml b/.travis.yml
index da6a6ac80e..7ab4846c77 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -35,4 +35,4 @@ env:
   - HADOOP_PROFILE=default TEST_CODECS=gzip,snappy
 
 install: mvn install --batch-mode -DskipTests=true -Dmaven.javadoc.skip=true -Dsource.skip=true | pv -fbi 60 > mvn_install.log || (cat mvn_install.log && false)
-script: mvn test -P $HADOOP_PROFILE
+script: mvn verify -P $HADOOP_PROFILE
diff --git a/parquet-arrow/pom.xml b/parquet-arrow/pom.xml
index 232167ecb3..e0f305acbb 100644
--- a/parquet-arrow/pom.xml
+++ b/parquet-arrow/pom.xml
@@ -33,7 +33,7 @@
   <url>https://parquet.apache.org</url>
 
   <properties>
-    <arrow.version>0.8.0</arrow.version>
+    <arrow.version>0.10.0</arrow.version>
   </properties>
 
   <dependencies>
diff --git a/parquet-arrow/src/main/java/org/apache/parquet/arrow/schema/SchemaConverter.java b/parquet-arrow/src/main/java/org/apache/parquet/arrow/schema/SchemaConverter.java
index a7df48cee0..51057c589e 100644
--- a/parquet-arrow/src/main/java/org/apache/parquet/arrow/schema/SchemaConverter.java
+++ b/parquet-arrow/src/main/java/org/apache/parquet/arrow/schema/SchemaConverter.java
@@ -19,22 +19,17 @@
 package org.apache.parquet.arrow.schema;
 
 import static java.util.Arrays.asList;
-import static org.apache.parquet.schema.OriginalType.DATE;
-import static org.apache.parquet.schema.OriginalType.DECIMAL;
-import static org.apache.parquet.schema.OriginalType.INTERVAL;
-import static org.apache.parquet.schema.OriginalType.INT_16;
-import static org.apache.parquet.schema.OriginalType.INT_32;
-import static org.apache.parquet.schema.OriginalType.INT_64;
-import static org.apache.parquet.schema.OriginalType.INT_8;
-import static org.apache.parquet.schema.OriginalType.TIMESTAMP_MILLIS;
-import static org.apache.parquet.schema.OriginalType.TIMESTAMP_MICROS;
-import static org.apache.parquet.schema.OriginalType.TIME_MILLIS;
-import static org.apache.parquet.schema.OriginalType.TIME_MICROS;
-import static org.apache.parquet.schema.OriginalType.UINT_16;
-import static org.apache.parquet.schema.OriginalType.UINT_32;
-import static org.apache.parquet.schema.OriginalType.UINT_64;
-import static org.apache.parquet.schema.OriginalType.UINT_8;
-import static org.apache.parquet.schema.OriginalType.UTF8;
+import static java.util.Optional.empty;
+import static java.util.Optional.of;
+import static org.apache.parquet.schema.LogicalTypeAnnotation.TimeUnit.MICROS;
+import static org.apache.parquet.schema.LogicalTypeAnnotation.TimeUnit.MILLIS;
+import static org.apache.parquet.schema.LogicalTypeAnnotation.TimeUnit.NANOS;
+import static org.apache.parquet.schema.LogicalTypeAnnotation.dateType;
+import static org.apache.parquet.schema.LogicalTypeAnnotation.decimalType;
+import static org.apache.parquet.schema.LogicalTypeAnnotation.intType;
+import static org.apache.parquet.schema.LogicalTypeAnnotation.stringType;
+import static org.apache.parquet.schema.LogicalTypeAnnotation.timeType;
+import static org.apache.parquet.schema.LogicalTypeAnnotation.timestampType;
 import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.BINARY;
 import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.BOOLEAN;
 import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.DOUBLE;
@@ -48,6 +43,7 @@
 
 import java.util.ArrayList;
 import java.util.List;
+import java.util.Optional;
 
 import org.apache.arrow.vector.types.DateUnit;
 import org.apache.arrow.vector.types.FloatingPointPrecision;
@@ -75,10 +71,9 @@
 import org.apache.parquet.arrow.schema.SchemaMapping.StructTypeMapping;
 import org.apache.parquet.arrow.schema.SchemaMapping.TypeMapping;
 import org.apache.parquet.arrow.schema.SchemaMapping.UnionTypeMapping;
-import org.apache.parquet.schema.DecimalMetadata;
 import org.apache.parquet.schema.GroupType;
+import org.apache.parquet.schema.LogicalTypeAnnotation;
 import org.apache.parquet.schema.MessageType;
-import org.apache.parquet.schema.OriginalType;
 import org.apache.parquet.schema.PrimitiveType;
 import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName;
 import org.apache.parquet.schema.Type;
@@ -180,13 +175,11 @@ public TypeMapping visit(Int type) {
         boolean signed = type.getIsSigned();
         switch (type.getBitWidth()) {
           case 8:
-            return primitive(INT32, signed ? INT_8 : UINT_8);
           case 16:
-            return primitive(INT32, signed ? INT_16 : UINT_16);
           case 32:
-            return primitive(INT32, signed ? INT_32 : UINT_32);
+            return primitive(INT32, intType(type.getBitWidth(), signed));
           case 64:
-            return primitive(INT64, signed ? INT_64 : UINT_64);
+            return primitive(INT64, intType(64, signed));
           default:
             throw new IllegalArgumentException("Illegal int type: " + field);
         }
@@ -209,7 +202,7 @@ public TypeMapping visit(FloatingPoint type) {
 
       @Override
       public TypeMapping visit(Utf8 type) {
-        return primitive(BINARY, UTF8);
+        return primitive(BINARY, stringType());
       }
 
       @Override
@@ -243,7 +236,7 @@ public TypeMapping visit(Decimal type) {
 
       @Override
       public TypeMapping visit(Date type) {
-        return primitive(INT32, DATE);
+        return primitive(INT32, dateType());
       }
 
       @Override
@@ -251,9 +244,11 @@ public TypeMapping visit(Time type) {
         int bitWidth = type.getBitWidth();
         TimeUnit timeUnit = type.getUnit();
         if (bitWidth == 32 && timeUnit == TimeUnit.MILLISECOND) {
-          return primitive(INT32, TIME_MILLIS);
+          return primitive(INT32, timeType(false, MILLIS));
         } else if (bitWidth == 64 && timeUnit == TimeUnit.MICROSECOND) {
-          return primitive(INT64, TIME_MICROS);
+          return primitive(INT64, timeType(false, MICROS));
+        } else if (bitWidth == 64 && timeUnit == TimeUnit.NANOSECOND) {
+          return primitive(INT64, timeType(false, NANOS));
         }
         throw new UnsupportedOperationException("Unsupported type " + type);
       }
@@ -262,20 +257,32 @@ public TypeMapping visit(Time type) {
       public TypeMapping visit(Timestamp type) {
         TimeUnit timeUnit = type.getUnit();
         if (timeUnit == TimeUnit.MILLISECOND) {
-          return primitive(INT64, TIMESTAMP_MILLIS);
+          return primitive(INT64, timestampType(isUtcNormalized(type), MILLIS));
         } else if (timeUnit == TimeUnit.MICROSECOND) {
-          return primitive(INT64, TIMESTAMP_MICROS);
+          return primitive(INT64, timestampType(isUtcNormalized(type), MICROS));
+        } else if (timeUnit == TimeUnit.NANOSECOND) {
+          return primitive(INT64, timestampType(isUtcNormalized(type), NANOS));
         }
         throw new UnsupportedOperationException("Unsupported type " + type);
       }
 
+      private boolean isUtcNormalized(Timestamp timestamp) {
+        String timeZone = timestamp.getTimezone();
+        return timeZone != null && !timeZone.isEmpty();
+      }
+
       /**
        * See https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#interval
        */
       @Override
       public TypeMapping visit(Interval type) {
         // TODO(PARQUET-675): fix interval original types
-        return primitiveFLBA(12, INTERVAL);
+        return primitiveFLBA(12, LogicalTypeAnnotation.IntervalLogicalTypeAnnotation.getInstance());
+      }
+
+      @Override
+      public TypeMapping visit(ArrowType.FixedSizeBinary fixedSizeBinary) {
+        return primitive(BINARY);
       }
 
       private TypeMapping mapping(PrimitiveType parquetType) {
@@ -283,18 +290,18 @@ private TypeMapping mapping(PrimitiveType parquetType) {
       }
 
       private TypeMapping decimal(PrimitiveTypeName type, int precision, int scale) {
-        return mapping(Types.optional(type).as(DECIMAL).precision(precision).scale(scale).named(fieldName));
+        return mapping(Types.optional(type).as(decimalType(scale, precision)).named(fieldName));
       }
 
       private TypeMapping primitive(PrimitiveTypeName type) {
         return mapping(Types.optional(type).named(fieldName));
       }
 
-      private TypeMapping primitive(PrimitiveTypeName type, OriginalType otype) {
+      private TypeMapping primitive(PrimitiveTypeName type, LogicalTypeAnnotation otype) {
         return mapping(Types.optional(type).as(otype).named(fieldName));
       }
 
-      private TypeMapping primitiveFLBA(int length, OriginalType otype) {
+      private TypeMapping primitiveFLBA(int length, LogicalTypeAnnotation otype) {
         return mapping(Types.optional(FIXED_LEN_BYTE_ARRAY).length(length).as(otype).named(fieldName));
       }
     });
@@ -358,21 +365,21 @@ private TypeMapping fromParquet(Type type, String name, Repetition repetition) {
    * @return the mapping
    */
   private TypeMapping fromParquetGroup(GroupType type, String name) {
-    OriginalType ot = type.getOriginalType();
-    if (ot == null) {
+    LogicalTypeAnnotation logicalType = type.getLogicalTypeAnnotation();
+    if (logicalType == null) {
       List<TypeMapping> typeMappings = fromParquet(type.getFields());
       Field arrowField = new Field(name, type.isRepetition(OPTIONAL), new Struct(), fields(typeMappings));
       return new StructTypeMapping(arrowField, type, typeMappings);
     } else {
-      switch (ot) {
-        case LIST:
+      return logicalType.accept(new LogicalTypeAnnotation.LogicalTypeAnnotationVisitor<TypeMapping>() {
+        @Override
+        public Optional<TypeMapping> visit(LogicalTypeAnnotation.ListLogicalTypeAnnotation listLogicalType) {
           List3Levels list3Levels = new List3Levels(type);
           TypeMapping child = fromParquet(list3Levels.getElement(), null, list3Levels.getElement().getRepetition());
           Field arrowField = new Field(name, type.isRepetition(OPTIONAL), new ArrowType.List(), asList(child.getArrowField()));
-          return new ListTypeMapping(arrowField, list3Levels, child);
-        default:
-          throw new UnsupportedOperationException("Unsupported type " + type);
-      }
+          return of(new ListTypeMapping(arrowField, list3Levels, child));
+        }
+      }).orElseThrow(() -> new UnsupportedOperationException("Unsupported type " + type));
     }
   }
 
@@ -401,92 +408,86 @@ public TypeMapping convertDOUBLE(PrimitiveTypeName primitiveTypeName) throws Run
 
       @Override
       public TypeMapping convertINT32(PrimitiveTypeName primitiveTypeName) throws RuntimeException {
-        OriginalType ot = type.getOriginalType();
-        if (ot == null) {
+        LogicalTypeAnnotation logicalTypeAnnotation = type.getLogicalTypeAnnotation();
+        if (logicalTypeAnnotation == null) {
           return integer(32, true);
         }
-        switch (ot) {
-          case INT_8:
-            return integer(8, true);
-          case INT_16:
-            return integer(16, true);
-          case INT_32:
-            return integer(32, true);
-          case UINT_8:
-            return integer(8, false);
-          case UINT_16:
-            return integer(16, false);
-          case UINT_32:
-            return integer(32, false);
-          case DECIMAL:
-            return decimal(type.getDecimalMetadata());
-          case DATE:
-            return field(new ArrowType.Date(DateUnit.DAY));
-          case TIME_MILLIS:
-            return field(new ArrowType.Time(TimeUnit.MILLISECOND, 32));
-          default:
-          case INT_64:
-          case UINT_64:
-          case UTF8:
-          case ENUM:
-          case BSON:
-          case INTERVAL:
-          case JSON:
-          case LIST:
-          case MAP:
-          case MAP_KEY_VALUE:
-          case TIMESTAMP_MICROS:
-          case TIMESTAMP_MILLIS:
-          case TIME_MICROS:
-            throw new IllegalArgumentException("illegal type " + type);
-        }
+        return logicalTypeAnnotation.accept(new LogicalTypeAnnotation.LogicalTypeAnnotationVisitor<TypeMapping>() {
+          @Override
+          public Optional<TypeMapping> visit(LogicalTypeAnnotation.DecimalLogicalTypeAnnotation decimalLogicalType) {
+            return of(decimal(decimalLogicalType.getPrecision(), decimalLogicalType.getScale()));
+          }
+
+          @Override
+          public Optional<TypeMapping> visit(LogicalTypeAnnotation.DateLogicalTypeAnnotation dateLogicalType) {
+            return of(field(new ArrowType.Date(DateUnit.DAY)));
+          }
+
+          @Override
+          public Optional<TypeMapping> visit(LogicalTypeAnnotation.TimeLogicalTypeAnnotation timeLogicalType) {
+            return timeLogicalType.getUnit() == MILLIS ? of(field(new ArrowType.Time(TimeUnit.MILLISECOND, 32))) : empty();
+          }
+
+          @Override
+          public Optional<TypeMapping> visit(LogicalTypeAnnotation.IntLogicalTypeAnnotation intLogicalType) {
+            if (intLogicalType.getBitWidth() == 64) {
+              return empty();
+            }
+            return of(integer(intLogicalType.getBitWidth(), intLogicalType.isSigned()));
+          }
+        }).orElseThrow(() -> new IllegalArgumentException("illegal type " + type));
       }
 
       @Override
       public TypeMapping convertINT64(PrimitiveTypeName primitiveTypeName) throws RuntimeException {
-        OriginalType ot = type.getOriginalType();
-        if (ot == null) {
+        LogicalTypeAnnotation logicalTypeAnnotation = type.getLogicalTypeAnnotation();
+        if (logicalTypeAnnotation == null) {
           return integer(64, true);
         }
-        switch (ot) {
-          case INT_8:
-            return integer(8, true);
-          case INT_16:
-            return integer(16, true);
-          case INT_32:
-            return integer(32, true);
-          case INT_64:
-            return integer(64, true);
-          case UINT_8:
-            return integer(8, false);
-          case UINT_16:
-            return integer(16, false);
-          case UINT_32:
-            return integer(32, false);
-          case UINT_64:
-            return integer(64, false);
-          case DECIMAL:
-            return decimal(type.getDecimalMetadata());
-          case DATE:
-            return field(new ArrowType.Date(DateUnit.DAY));
-          case TIMESTAMP_MICROS:
-            return field(new ArrowType.Timestamp(TimeUnit.MICROSECOND, "UTC"));
-          case TIMESTAMP_MILLIS:
-            return field(new ArrowType.Timestamp(TimeUnit.MILLISECOND, "UTC"));
-          case TIME_MICROS:
-            return field(new ArrowType.Time(TimeUnit.MICROSECOND, 64));
-          default:
-          case UTF8:
-          case ENUM:
-          case BSON:
-          case INTERVAL:
-          case JSON:
-          case LIST:
-          case MAP:
-          case MAP_KEY_VALUE:
-          case TIME_MILLIS:
-            throw new IllegalArgumentException("illegal type " + type);
-        }
+
+        return logicalTypeAnnotation.accept(new LogicalTypeAnnotation.LogicalTypeAnnotationVisitor<TypeMapping>() {
+          @Override
+          public Optional<TypeMapping> visit(LogicalTypeAnnotation.DateLogicalTypeAnnotation dateLogicalType) {
+            return of(field(new ArrowType.Date(DateUnit.DAY)));
+          }
+
+          @Override
+          public Optional<TypeMapping> visit(LogicalTypeAnnotation.DecimalLogicalTypeAnnotation decimalLogicalType) {
+            return of(decimal(decimalLogicalType.getPrecision(), decimalLogicalType.getScale()));
+          }
+
+          @Override
+          public Optional<TypeMapping> visit(LogicalTypeAnnotation.IntLogicalTypeAnnotation intLogicalType) {
+            return of(integer(intLogicalType.getBitWidth(), intLogicalType.isSigned()));
+          }
+
+          @Override
+          public Optional<TypeMapping> visit(LogicalTypeAnnotation.TimeLogicalTypeAnnotation timeLogicalType) {
+            if (timeLogicalType.getUnit() == MICROS) {
+              return of(field(new ArrowType.Time(TimeUnit.MICROSECOND, 64)));
+            }  else if (timeLogicalType.getUnit() == NANOS) {
+              return of(field(new ArrowType.Time(TimeUnit.NANOSECOND, 64)));
+            }
+            return empty();
+          }
+
+          @Override
+          public Optional<TypeMapping> visit(LogicalTypeAnnotation.TimestampLogicalTypeAnnotation timestampLogicalType) {
+            switch (timestampLogicalType.getUnit()) {
+              case MICROS:
+                return of(field(new ArrowType.Timestamp(TimeUnit.MICROSECOND, getTimeZone(timestampLogicalType))));
+              case MILLIS:
+                return of(field(new ArrowType.Timestamp(TimeUnit.MILLISECOND, getTimeZone(timestampLogicalType))));
+              case NANOS:
+                return of(field(new ArrowType.Timestamp(TimeUnit.NANOSECOND, getTimeZone(timestampLogicalType))));
+            }
+            return empty();
+          }
+
+          private String getTimeZone(LogicalTypeAnnotation.TimestampLogicalTypeAnnotation timestampLogicalType) {
+            return timestampLogicalType.isAdjustedToUTC() ? "UTC" : null;
+          }
+        }).orElseThrow(() -> new IllegalArgumentException("illegal type " + type));
       }
 
       @Override
@@ -507,22 +508,25 @@ public TypeMapping convertBOOLEAN(PrimitiveTypeName primitiveTypeName) throws Ru
 
       @Override
       public TypeMapping convertBINARY(PrimitiveTypeName primitiveTypeName) throws RuntimeException {
-        OriginalType ot = type.getOriginalType();
-        if (ot == null) {
+        LogicalTypeAnnotation logicalTypeAnnotation = type.getLogicalTypeAnnotation();
+        if (logicalTypeAnnotation == null) {
           return field(new ArrowType.Binary());
         }
-        switch (ot) {
-          case UTF8:
-            return field(new ArrowType.Utf8());
-          case DECIMAL:
-            return decimal(type.getDecimalMetadata());
-          default:
-            throw new IllegalArgumentException("illegal type " + type);
-        }
+        return logicalTypeAnnotation.accept(new LogicalTypeAnnotation.LogicalTypeAnnotationVisitor<TypeMapping>() {
+          @Override
+          public Optional<TypeMapping> visit(LogicalTypeAnnotation.StringLogicalTypeAnnotation stringLogicalType) {
+            return of(field(new ArrowType.Utf8()));
+          }
+
+          @Override
+          public Optional<TypeMapping> visit(LogicalTypeAnnotation.DecimalLogicalTypeAnnotation decimalLogicalType) {
+            return of(decimal(decimalLogicalType.getPrecision(), decimalLogicalType.getScale()));
+          }
+        }).orElseThrow(() -> new IllegalArgumentException("illegal type " + type));
       }
 
-      private TypeMapping decimal(DecimalMetadata decimalMetadata) {
-        return field(new ArrowType.Decimal(decimalMetadata.getPrecision(), decimalMetadata.getScale()));
+      private TypeMapping decimal(int precision, int scale) {
+        return field(new ArrowType.Decimal(precision, scale));
       }
 
       private TypeMapping integer(int width, boolean signed) {
@@ -663,6 +667,11 @@ public TypeMapping visit(Interval type) {
         return primitive();
       }
 
+      @Override
+      public TypeMapping visit(ArrowType.FixedSizeBinary fixedSizeBinary) {
+        return primitive();
+      }
+
       private TypeMapping primitive() {
         if (!parquetField.isPrimitive()) {
           throw new IllegalArgumentException("Can not map schemas as one is primitive and the other is not: " + arrowField + " != " + parquetField);
diff --git a/parquet-arrow/src/test/java/org/apache/parquet/arrow/schema/TestSchemaConverter.java b/parquet-arrow/src/test/java/org/apache/parquet/arrow/schema/TestSchemaConverter.java
index 2d1f028e24..c962b5456f 100644
--- a/parquet-arrow/src/test/java/org/apache/parquet/arrow/schema/TestSchemaConverter.java
+++ b/parquet-arrow/src/test/java/org/apache/parquet/arrow/schema/TestSchemaConverter.java
@@ -19,6 +19,11 @@
 package org.apache.parquet.arrow.schema;
 
 import static java.util.Arrays.asList;
+import static org.apache.parquet.schema.LogicalTypeAnnotation.TimeUnit.MICROS;
+import static org.apache.parquet.schema.LogicalTypeAnnotation.TimeUnit.MILLIS;
+import static org.apache.parquet.schema.LogicalTypeAnnotation.TimeUnit.NANOS;
+import static org.apache.parquet.schema.LogicalTypeAnnotation.timeType;
+import static org.apache.parquet.schema.LogicalTypeAnnotation.timestampType;
 import static org.apache.parquet.schema.OriginalType.DATE;
 import static org.apache.parquet.schema.OriginalType.DECIMAL;
 import static org.apache.parquet.schema.OriginalType.INTERVAL;
@@ -62,12 +67,12 @@
 import org.apache.parquet.arrow.schema.SchemaMapping.TypeMappingVisitor;
 import org.apache.parquet.arrow.schema.SchemaMapping.UnionTypeMapping;
 import org.apache.parquet.example.Paper;
+import org.apache.parquet.schema.LogicalTypeAnnotation;
 import org.apache.parquet.schema.MessageType;
 import org.apache.parquet.schema.Types;
+import org.junit.Assert;
 import org.junit.Test;
 
-import junit.framework.Assert;
-
 /**
  * @see SchemaConverter
  */
@@ -90,7 +95,11 @@ private static Field field(String name, ArrowType type, Field... children) {
     field("f", new ArrowType.FixedSizeList(1), field(null, new ArrowType.Date(DateUnit.DAY))),
     field("g", new ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE)),
     field("h", new ArrowType.Timestamp(TimeUnit.MILLISECOND, "UTC")),
-    field("i", new ArrowType.Interval(IntervalUnit.DAY_TIME))
+    field("i", new ArrowType.Timestamp(TimeUnit.NANOSECOND, "UTC")),
+    field("j", new ArrowType.Timestamp(TimeUnit.MILLISECOND, null)),
+    field("k", new ArrowType.Timestamp(TimeUnit.MICROSECOND, "UTC")),
+    field("l", new ArrowType.Timestamp(TimeUnit.MICROSECOND, null)),
+    field("m", new ArrowType.Interval(IntervalUnit.DAY_TIME))
   ));
   private final MessageType complexParquetSchema = Types.buildMessage()
     .addField(Types.optional(INT32).as(INT_8).named("a"))
@@ -105,8 +114,12 @@ private static Field field(String name, ArrowType type, Field... children) {
       setElementType(Types.optional(INT32).as(DATE).named("element"))
       .named("f"))
     .addField(Types.optional(FLOAT).named("g"))
-    .addField(Types.optional(INT64).as(TIMESTAMP_MILLIS).named("h"))
-    .addField(Types.optional(FIXED_LEN_BYTE_ARRAY).length(12).as(INTERVAL).named("i"))
+    .addField(Types.optional(INT64).as(timestampType(true, MILLIS)).named("h"))
+    .addField(Types.optional(INT64).as(timestampType(true, NANOS)).named("i"))
+    .addField(Types.optional(INT64).as(timestampType(false, MILLIS)).named("j"))
+    .addField(Types.optional(INT64).as(timestampType(true, MICROS)).named("k"))
+    .addField(Types.optional(INT64).as(timestampType(false, MICROS)).named("l"))
+    .addField(Types.optional(FIXED_LEN_BYTE_ARRAY).length(12).as(INTERVAL).named("m"))
     .named("root");
 
   private final Schema allTypesArrowSchema = new Schema(asList(
@@ -135,8 +148,10 @@ private static Field field(String name, ArrowType type, Field... children) {
     field("m", new ArrowType.Time(TimeUnit.MILLISECOND, 32)),
     field("n", new ArrowType.Timestamp(TimeUnit.MILLISECOND, "UTC")),
     field("o", new ArrowType.Interval(IntervalUnit.DAY_TIME)),
-    field("o1", new ArrowType.Interval(IntervalUnit.YEAR_MONTH))
-  ));
+    field("o1", new ArrowType.Interval(IntervalUnit.YEAR_MONTH)),
+    field("p", new ArrowType.Time(TimeUnit.NANOSECOND, 64)),
+    field("q", new ArrowType.Timestamp(TimeUnit.NANOSECOND, "UTC"))
+    ));
 
   private final MessageType allTypesParquetSchema = Types.buildMessage()
     .addField(Types.optional(BINARY).named("a"))
@@ -169,10 +184,12 @@ private static Field field(String name, ArrowType type, Field... children) {
     .addField(Types.optional(INT64).as(DECIMAL).precision(15).scale(5).named("k1"))
     .addField(Types.optional(BINARY).as(DECIMAL).precision(25).scale(5).named("k2"))
     .addField(Types.optional(INT32).as(DATE).named("l"))
-    .addField(Types.optional(INT32).as(TIME_MILLIS).named("m"))
+    .addField(Types.optional(INT32).as(timeType(false, MILLIS)).named("m"))
     .addField(Types.optional(INT64).as(TIMESTAMP_MILLIS).named("n"))
     .addField(Types.optional(FIXED_LEN_BYTE_ARRAY).length(12).as(INTERVAL).named("o"))
     .addField(Types.optional(FIXED_LEN_BYTE_ARRAY).length(12).as(INTERVAL).named("o1"))
+    .addField(Types.optional(INT64).as(timeType(false, NANOS)).named("p"))
+    .addField(Types.optional(INT64).as(timestampType(true, NANOS)).named("q"))
     .named("root");
 
   private final Schema supportedTypesArrowSchema = new Schema(asList(
@@ -196,7 +213,9 @@ private static Field field(String name, ArrowType type, Field... children) {
     field("j2", new ArrowType.Decimal(25, 5)),
     field("k", new ArrowType.Date(DateUnit.DAY)),
     field("l", new ArrowType.Time(TimeUnit.MILLISECOND, 32)),
-    field("m", new ArrowType.Timestamp(TimeUnit.MILLISECOND, "UTC"))
+    field("m", new ArrowType.Timestamp(TimeUnit.MILLISECOND, "UTC")),
+    field("n", new ArrowType.Time(TimeUnit.NANOSECOND, 64)),
+    field("o", new ArrowType.Timestamp(TimeUnit.NANOSECOND, "UTC"))
   ));
 
   private final MessageType supportedTypesParquetSchema = Types.buildMessage()
@@ -225,6 +244,8 @@ private static Field field(String name, ArrowType type, Field... children) {
     .addField(Types.optional(INT32).as(DATE).named("k"))
     .addField(Types.optional(INT32).as(TIME_MILLIS).named("l"))
     .addField(Types.optional(INT64).as(TIMESTAMP_MILLIS).named("m"))
+    .addField(Types.optional(INT64).as(timeType(true, NANOS)).named("n"))
+    .addField(Types.optional(INT64).as(timestampType(true, NANOS)).named("o"))
     .named("root");
 
   private final Schema paperArrowSchema = new Schema(asList(
@@ -298,7 +319,7 @@ private void compareFields(List<Field> left, List<Field> right) {
   @Test
   public void testAllMap() throws IOException {
     SchemaMapping map = converter.map(allTypesArrowSchema, allTypesParquetSchema);
-    Assert.assertEquals("p, s<p>, l<p>, l<p>, u<p>, p, p, p, p, p, p, p, p, p, p, p, p, p, p, p, p, p, p, p, p, p", toSummaryString(map));
+    Assert.assertEquals("p, s<p>, l<p>, l<p>, u<p>, p, p, p, p, p, p, p, p, p, p, p, p, p, p, p, p, p, p, p, p, p, p, p", toSummaryString(map));
   }
 
   private String toSummaryString(SchemaMapping map) {
@@ -365,7 +386,8 @@ public void testArrowTimeMillisecondToParquet() {
     MessageType expected = converter.fromArrow(new Schema(asList(
       field("a", new ArrowType.Time(TimeUnit.MILLISECOND, 32))
     ))).getParquetSchema();
-    Assert.assertEquals(expected, Types.buildMessage().addField(Types.optional(INT32).as(TIME_MILLIS).named("a")).named("root"));
+    Assert.assertEquals(expected,
+      Types.buildMessage().addField(Types.optional(INT32).as(timeType(false, MILLIS)).named("a")).named("root"));
   }
 
   @Test
@@ -373,14 +395,8 @@ public void testArrowTimeMicrosecondToParquet() {
     MessageType expected = converter.fromArrow(new Schema(asList(
       field("a", new ArrowType.Time(TimeUnit.MICROSECOND, 64))
     ))).getParquetSchema();
-    Assert.assertEquals(expected, Types.buildMessage().addField(Types.optional(INT64).as(TIME_MICROS).named("a")).named("root"));
-  }
-
-  @Test(expected = UnsupportedOperationException.class)
-  public void testArrowTimeNanosecondToParquet() {
-    converter.fromArrow(new Schema(asList(
-      field("a", new ArrowType.Time(TimeUnit.NANOSECOND, 64))
-    ))).getParquetSchema();
+    Assert.assertEquals(expected,
+      Types.buildMessage().addField(Types.optional(INT64).as(timeType(false, MICROS)).named("a")).named("root"));
   }
 
   @Test
@@ -438,13 +454,6 @@ public void testArrowTimestampMicrosecondToParquet() {
     Assert.assertEquals(expected, Types.buildMessage().addField(Types.optional(INT64).as(TIMESTAMP_MICROS).named("a")).named("root"));
   }
 
-  @Test(expected = UnsupportedOperationException.class)
-  public void testArrowTimestampNanosecondToParquet() {
-    converter.fromArrow(new Schema(asList(
-      field("a", new ArrowType.Timestamp(TimeUnit.NANOSECOND, "UTC"))
-    ))).getParquetSchema();
-  }
-
   @Test
   public void testParquetInt64TimestampMillisToArrow() {
     MessageType parquet = Types.buildMessage()
diff --git a/parquet-avro/pom.xml b/parquet-avro/pom.xml
index 3592121d76..bc3603fe62 100644
--- a/parquet-avro/pom.xml
+++ b/parquet-avro/pom.xml
@@ -45,8 +45,8 @@
     </dependency>
     <dependency>
       <groupId>org.apache.parquet</groupId>
-      <artifactId>parquet-format</artifactId>
-      <version>${parquet.format.version}</version>
+      <artifactId>parquet-format-structures</artifactId>
+      <version>${project.version}</version>
     </dependency>
     <dependency>
       <groupId>org.apache.avro</groupId>
diff --git a/parquet-avro/src/main/java/org/apache/parquet/avro/AvroSchemaConverter.java b/parquet-avro/src/main/java/org/apache/parquet/avro/AvroSchemaConverter.java
index 1bb12b9835..558446e6ba 100644
--- a/parquet-avro/src/main/java/org/apache/parquet/avro/AvroSchemaConverter.java
+++ b/parquet-avro/src/main/java/org/apache/parquet/avro/AvroSchemaConverter.java
@@ -24,10 +24,9 @@
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.parquet.schema.ConversionPatterns;
-import org.apache.parquet.schema.DecimalMetadata;
 import org.apache.parquet.schema.GroupType;
+import org.apache.parquet.schema.LogicalTypeAnnotation;
 import org.apache.parquet.schema.MessageType;
-import org.apache.parquet.schema.OriginalType;
 import org.apache.parquet.schema.PrimitiveType;
 import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName;
 import org.apache.parquet.schema.Type;
@@ -36,11 +35,21 @@
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.List;
+import java.util.Optional;
 
+import static java.util.Optional.empty;
+import static java.util.Optional.of;
 import static org.apache.avro.JsonProperties.NULL_VALUE;
 import static org.apache.parquet.avro.AvroWriteSupport.WRITE_OLD_LIST_STRUCTURE;
 import static org.apache.parquet.avro.AvroWriteSupport.WRITE_OLD_LIST_STRUCTURE_DEFAULT;
-import static org.apache.parquet.schema.OriginalType.*;
+import static org.apache.parquet.schema.LogicalTypeAnnotation.TimeUnit.MICROS;
+import static org.apache.parquet.schema.LogicalTypeAnnotation.TimeUnit.MILLIS;
+import static org.apache.parquet.schema.LogicalTypeAnnotation.dateType;
+import static org.apache.parquet.schema.LogicalTypeAnnotation.decimalType;
+import static org.apache.parquet.schema.LogicalTypeAnnotation.enumType;
+import static org.apache.parquet.schema.LogicalTypeAnnotation.stringType;
+import static org.apache.parquet.schema.LogicalTypeAnnotation.timeType;
+import static org.apache.parquet.schema.LogicalTypeAnnotation.timestampType;
 import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.*;
 import static org.apache.parquet.schema.Type.Repetition.REPEATED;
 
@@ -147,11 +156,11 @@ private Type convertField(String fieldName, Schema schema, Type.Repetition repet
     } else if (type.equals(Schema.Type.BYTES)) {
       builder = Types.primitive(BINARY, repetition);
     } else if (type.equals(Schema.Type.STRING)) {
-      builder = Types.primitive(BINARY, repetition).as(UTF8);
+      builder = Types.primitive(BINARY, repetition).as(stringType());
     } else if (type.equals(Schema.Type.RECORD)) {
       return new GroupType(repetition, fieldName, convertFields(schema.getFields()));
     } else if (type.equals(Schema.Type.ENUM)) {
-      builder = Types.primitive(BINARY, repetition).as(ENUM);
+      builder = Types.primitive(BINARY, repetition).as(enumType());
     } else if (type.equals(Schema.Type.ARRAY)) {
       if (writeOldListStructure) {
         return ConversionPatterns.listType(repetition, fieldName,
@@ -178,12 +187,10 @@ private Type convertField(String fieldName, Schema schema, Type.Repetition repet
     LogicalType logicalType = schema.getLogicalType();
     if (logicalType != null) {
       if (logicalType instanceof LogicalTypes.Decimal) {
-        builder = builder.as(DECIMAL)
-            .precision(((LogicalTypes.Decimal) logicalType).getPrecision())
-            .scale(((LogicalTypes.Decimal) logicalType).getScale());
-
+        LogicalTypes.Decimal decimal = (LogicalTypes.Decimal) logicalType;
+        builder = builder.as(decimalType(decimal.getScale(), decimal.getPrecision()));
       } else {
-        OriginalType annotation = convertLogicalType(logicalType);
+        LogicalTypeAnnotation annotation = convertLogicalType(logicalType);
         if (annotation != null) {
           builder.as(annotation);
         }
@@ -267,7 +274,7 @@ private Schema convertField(final Type parquetType) {
       final PrimitiveType asPrimitive = parquetType.asPrimitiveType();
       final PrimitiveTypeName parquetPrimitiveTypeName =
           asPrimitive.getPrimitiveTypeName();
-      final OriginalType annotation = parquetType.getOriginalType();
+      final LogicalTypeAnnotation annotation = parquetType.getLogicalTypeAnnotation();
       Schema schema = parquetPrimitiveTypeName.convert(
           new PrimitiveType.PrimitiveTypeNameConverter<Schema, RuntimeException>() {
             @Override
@@ -301,7 +308,8 @@ public Schema convertFIXED_LEN_BYTE_ARRAY(PrimitiveTypeName primitiveTypeName) {
             }
             @Override
             public Schema convertBINARY(PrimitiveTypeName primitiveTypeName) {
-              if (annotation == OriginalType.UTF8 || annotation == OriginalType.ENUM) {
+              if (annotation instanceof LogicalTypeAnnotation.StringLogicalTypeAnnotation ||
+                annotation instanceof  LogicalTypeAnnotation.EnumLogicalTypeAnnotation) {
                 return Schema.create(Schema.Type.STRING);
               } else {
                 return Schema.create(Schema.Type.BYTES);
@@ -309,9 +317,8 @@ public Schema convertBINARY(PrimitiveTypeName primitiveTypeName) {
             }
           });
 
-      LogicalType logicalType = convertOriginalType(
-          annotation, asPrimitive.getDecimalMetadata());
-      if (logicalType != null && (annotation != DECIMAL ||
+      LogicalType logicalType = convertLogicalType(annotation);
+      if (logicalType != null && (!(annotation instanceof LogicalTypeAnnotation.DecimalLogicalTypeAnnotation) ||
           parquetPrimitiveTypeName == BINARY ||
           parquetPrimitiveTypeName == FIXED_LEN_BYTE_ARRAY)) {
         schema = logicalType.addToSchema(schema);
@@ -321,10 +328,11 @@ public Schema convertBINARY(PrimitiveTypeName primitiveTypeName) {
 
     } else {
       GroupType parquetGroupType = parquetType.asGroupType();
-      OriginalType originalType = parquetGroupType.getOriginalType();
-      if (originalType != null) {
-        switch(originalType) {
-          case LIST:
+      LogicalTypeAnnotation logicalTypeAnnotation = parquetGroupType.getLogicalTypeAnnotation();
+      if (logicalTypeAnnotation != null) {
+        return logicalTypeAnnotation.accept(new LogicalTypeAnnotation.LogicalTypeAnnotationVisitor<Schema>() {
+          @Override
+          public Optional<Schema> visit(LogicalTypeAnnotation.ListLogicalTypeAnnotation listLogicalType) {
             if (parquetGroupType.getFieldCount()!= 1) {
               throw new UnsupportedOperationException("Invalid list type " + parquetGroupType);
             }
@@ -334,17 +342,29 @@ public Schema convertBINARY(PrimitiveTypeName primitiveTypeName) {
             }
             if (isElementType(repeatedType, parquetGroupType.getName())) {
               // repeated element types are always required
-              return Schema.createArray(convertField(repeatedType));
+              return of(Schema.createArray(convertField(repeatedType)));
             } else {
               Type elementType = repeatedType.asGroupType().getType(0);
               if (elementType.isRepetition(Type.Repetition.OPTIONAL)) {
-                return Schema.createArray(optional(convertField(elementType)));
+                return of(Schema.createArray(optional(convertField(elementType))));
               } else {
-                return Schema.createArray(convertField(elementType));
+                return of(Schema.createArray(convertField(elementType)));
               }
             }
-          case MAP_KEY_VALUE: // for backward-compatibility
-          case MAP:
+          }
+
+          @Override
+          // for backward-compatibility
+          public Optional<Schema> visit(LogicalTypeAnnotation.MapKeyValueTypeAnnotation mapKeyValueLogicalType) {
+            return visitMapOrMapKeyValue();
+          }
+
+          @Override
+          public Optional<Schema> visit(LogicalTypeAnnotation.MapLogicalTypeAnnotation mapLogicalType) {
+            return visitMapOrMapKeyValue();
+          }
+
+          private Optional<Schema> visitMapOrMapKeyValue() {
             if (parquetGroupType.getFieldCount() != 1 || parquetGroupType.getType(0).isPrimitive()) {
               throw new UnsupportedOperationException("Invalid map type " + parquetGroupType);
             }
@@ -356,24 +376,23 @@ public Schema convertBINARY(PrimitiveTypeName primitiveTypeName) {
             Type keyType = mapKeyValType.getType(0);
             if (!keyType.isPrimitive() ||
                 !keyType.asPrimitiveType().getPrimitiveTypeName().equals(PrimitiveTypeName.BINARY) ||
-                !keyType.getOriginalType().equals(OriginalType.UTF8)) {
+                !keyType.getLogicalTypeAnnotation().equals(stringType())) {
               throw new IllegalArgumentException("Map key type must be binary (UTF8): "
                   + keyType);
             }
             Type valueType = mapKeyValType.getType(1);
             if (valueType.isRepetition(Type.Repetition.OPTIONAL)) {
-              return Schema.createMap(optional(convertField(valueType)));
+              return of(Schema.createMap(optional(convertField(valueType))));
             } else {
-              return Schema.createMap(convertField(valueType));
+              return of(Schema.createMap(convertField(valueType)));
             }
-          case ENUM:
-            return Schema.create(Schema.Type.STRING);
-          case UTF8:
-          default:
-            throw new UnsupportedOperationException("Cannot convert Parquet type " +
-                parquetType);
+          }
 
-        }
+          @Override
+          public Optional<Schema> visit(LogicalTypeAnnotation.EnumLogicalTypeAnnotation enumLogicalType) {
+            return of(Schema.create(Schema.Type.STRING));
+          }
+        }).orElseThrow(() -> new UnsupportedOperationException("Cannot convert Parquet type " + parquetType));
       } else {
         // if no original type then it's a record
         return convertFields(parquetGroupType.getName(), parquetGroupType.getFields());
@@ -381,44 +400,65 @@ public Schema convertBINARY(PrimitiveTypeName primitiveTypeName) {
     }
   }
 
-  private OriginalType convertLogicalType(LogicalType logicalType) {
+  private LogicalTypeAnnotation convertLogicalType(LogicalType logicalType) {
     if (logicalType == null) {
       return null;
     } else if (logicalType instanceof LogicalTypes.Decimal) {
-      return OriginalType.DECIMAL;
+      LogicalTypes.Decimal decimal = (LogicalTypes.Decimal) logicalType;
+      return decimalType(decimal.getScale(), decimal.getPrecision());
     } else if (logicalType instanceof LogicalTypes.Date) {
-      return OriginalType.DATE;
+      return dateType();
     } else if (logicalType instanceof LogicalTypes.TimeMillis) {
-      return OriginalType.TIME_MILLIS;
+      return timeType(true, MILLIS);
     } else if (logicalType instanceof LogicalTypes.TimeMicros) {
-      return OriginalType.TIME_MICROS;
+      return timeType(true, MICROS);
     } else if (logicalType instanceof LogicalTypes.TimestampMillis) {
-      return OriginalType.TIMESTAMP_MILLIS;
+      return timestampType(true, MILLIS);
     } else if (logicalType instanceof LogicalTypes.TimestampMicros) {
-      return OriginalType.TIMESTAMP_MICROS;
+      return timestampType(true, MICROS);
     }
     return null;
   }
 
-  private LogicalType convertOriginalType(OriginalType annotation, DecimalMetadata meta) {
+  private LogicalType convertLogicalType(LogicalTypeAnnotation annotation) {
     if (annotation == null) {
       return null;
     }
-    switch (annotation) {
-      case DECIMAL:
-        return LogicalTypes.decimal(meta.getPrecision(), meta.getScale());
-      case DATE:
-        return LogicalTypes.date();
-      case TIME_MILLIS:
-        return LogicalTypes.timeMillis();
-      case TIME_MICROS:
-        return LogicalTypes.timeMicros();
-      case TIMESTAMP_MILLIS:
-        return LogicalTypes.timestampMillis();
-      case TIMESTAMP_MICROS:
-        return LogicalTypes.timestampMicros();
-    }
-    return null;
+    return annotation.accept(new LogicalTypeAnnotation.LogicalTypeAnnotationVisitor<LogicalType>() {
+      @Override
+      public Optional<LogicalType> visit(LogicalTypeAnnotation.DecimalLogicalTypeAnnotation decimalLogicalType) {
+        return of(LogicalTypes.decimal(decimalLogicalType.getPrecision(), decimalLogicalType.getScale()));
+      }
+
+      @Override
+      public Optional<LogicalType> visit(LogicalTypeAnnotation.DateLogicalTypeAnnotation dateLogicalType) {
+        return of(LogicalTypes.date());
+      }
+
+      @Override
+      public Optional<LogicalType> visit(LogicalTypeAnnotation.TimeLogicalTypeAnnotation timeLogicalType) {
+        LogicalTypeAnnotation.TimeUnit unit = timeLogicalType.getUnit();
+        switch (unit) {
+          case MILLIS:
+            return of(LogicalTypes.timeMillis());
+          case MICROS:
+            return of(LogicalTypes.timeMicros());
+        }
+        return empty();
+      }
+
+      @Override
+      public Optional<LogicalType> visit(LogicalTypeAnnotation.TimestampLogicalTypeAnnotation timestampLogicalType) {
+        LogicalTypeAnnotation.TimeUnit unit = timestampLogicalType.getUnit();
+        switch (unit) {
+          case MILLIS:
+            return of(LogicalTypes.timestampMillis());
+          case MICROS:
+            return of(LogicalTypes.timestampMicros());
+        }
+        return empty();
+      }
+    }).orElse(null);
   }
 
   /**
diff --git a/parquet-avro/src/test/java/org/apache/parquet/avro/TestAvroSchemaConverter.java b/parquet-avro/src/test/java/org/apache/parquet/avro/TestAvroSchemaConverter.java
index 942e3b1378..bfaeec3d6b 100644
--- a/parquet-avro/src/test/java/org/apache/parquet/avro/TestAvroSchemaConverter.java
+++ b/parquet-avro/src/test/java/org/apache/parquet/avro/TestAvroSchemaConverter.java
@@ -1,4 +1,4 @@
-/* 
+/*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
@@ -6,9 +6,9 @@
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
- * 
+ *
  *   http://www.apache.org/licenses/LICENSE-2.0
- * 
+ *
  * Unless required by applicable law or agreed to in writing,
  * software distributed under the License is distributed on an
  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
@@ -616,7 +616,7 @@ public void testTimeMillisType() throws Exception {
 
     testRoundTripConversion(expected,
         "message myrecord {\n" +
-            "  required int32 time (TIME_MILLIS);\n" +
+            "  required int32 time (TIME(MILLIS,true));\n" +
             "}\n");
 
     for (PrimitiveTypeName primitive : new PrimitiveTypeName[]
@@ -646,7 +646,7 @@ public void testTimeMicrosType() throws Exception {
 
     testRoundTripConversion(expected,
         "message myrecord {\n" +
-            "  required int64 time (TIME_MICROS);\n" +
+            "  required int64 time (TIME(MICROS,true));\n" +
             "}\n");
 
     for (PrimitiveTypeName primitive : new PrimitiveTypeName[]
@@ -676,7 +676,7 @@ public void testTimestampMillisType() throws Exception {
 
     testRoundTripConversion(expected,
         "message myrecord {\n" +
-            "  required int64 timestamp (TIMESTAMP_MILLIS);\n" +
+            "  required int64 timestamp (TIMESTAMP(MILLIS,true));\n" +
             "}\n");
 
     for (PrimitiveTypeName primitive : new PrimitiveTypeName[]
@@ -706,7 +706,7 @@ public void testTimestampMicrosType() throws Exception {
 
     testRoundTripConversion(expected,
         "message myrecord {\n" +
-            "  required int64 timestamp (TIMESTAMP_MICROS);\n" +
+            "  required int64 timestamp (TIMESTAMP(MICROS,true));\n" +
             "}\n");
 
     for (PrimitiveTypeName primitive : new PrimitiveTypeName[]
diff --git a/parquet-cascading-common23/src/main/java/org/apache/parquet/cascading/convert/TupleConverter.java b/parquet-cascading-common23/src/main/java/org/apache/parquet/cascading/convert/TupleConverter.java
index 3741165b09..4c1240b859 100644
--- a/parquet-cascading-common23/src/main/java/org/apache/parquet/cascading/convert/TupleConverter.java
+++ b/parquet-cascading-common23/src/main/java/org/apache/parquet/cascading/convert/TupleConverter.java
@@ -1,4 +1,4 @@
-/* 
+/*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
@@ -6,9 +6,9 @@
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
- * 
+ *
  *   http://www.apache.org/licenses/LICENSE-2.0
- * 
+ *
  * Unless required by applicable law or agreed to in writing,
  * software distributed under the License is distributed on an
  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
@@ -27,10 +27,7 @@
 import org.apache.parquet.io.api.PrimitiveConverter;
 import org.apache.parquet.pig.TupleConversionException;
 import org.apache.parquet.schema.GroupType;
-import org.apache.parquet.schema.OriginalType;
-import org.apache.parquet.schema.PrimitiveType;
 import org.apache.parquet.schema.Type;
-import org.apache.parquet.schema.Type.Repetition;
 
 public class TupleConverter extends GroupConverter {
 
diff --git a/parquet-cli/src/main/java/org/apache/parquet/cli/Main.java b/parquet-cli/src/main/java/org/apache/parquet/cli/Main.java
index 990193c731..fa69ce7a40 100644
--- a/parquet-cli/src/main/java/org/apache/parquet/cli/Main.java
+++ b/parquet-cli/src/main/java/org/apache/parquet/cli/Main.java
@@ -32,6 +32,7 @@
 import org.apache.parquet.cli.commands.ConvertCommand;
 import org.apache.parquet.cli.commands.ParquetMetadataCommand;
 import org.apache.parquet.cli.commands.SchemaCommand;
+import org.apache.parquet.cli.commands.ShowColumnIndexCommand;
 import org.apache.parquet.cli.commands.ShowDictionaryCommand;
 import org.apache.parquet.cli.commands.ShowPagesCommand;
 import org.apache.parquet.cli.commands.ToAvroCommand;
@@ -87,6 +88,7 @@ public class Main extends Configured implements Tool {
     jc.addCommand("to-avro", new ToAvroCommand(console));
     jc.addCommand("cat", new CatCommand(console, 0));
     jc.addCommand("head", new CatCommand(console, 10));
+    jc.addCommand("column-index", new ShowColumnIndexCommand(console));
   }
 
   @Override
diff --git a/parquet-cli/src/main/java/org/apache/parquet/cli/Util.java b/parquet-cli/src/main/java/org/apache/parquet/cli/Util.java
index 98bc1e5112..961c7f0c44 100644
--- a/parquet-cli/src/main/java/org/apache/parquet/cli/Util.java
+++ b/parquet-cli/src/main/java/org/apache/parquet/cli/Util.java
@@ -80,7 +80,12 @@ public static String humanReadable(long bytes) {
     }
   }
 
+  @Deprecated
   public static String minMaxAsString(Statistics stats, OriginalType annotation) {
+    return minMaxAsString(stats);
+  }
+
+  public static String minMaxAsString(Statistics stats) {
     if (stats == null) {
       return "no stats";
     }
@@ -90,7 +95,12 @@ public static String minMaxAsString(Statistics stats, OriginalType annotation) {
     return String.format("%s / %s", humanReadable(stats.minAsString(), 30), humanReadable(stats.maxAsString(), 30));
   }
 
+  @Deprecated
   public static String toString(Statistics stats, long count, OriginalType annotation) {
+    return toString(stats, count);
+  }
+
+  public static String toString(Statistics stats, long count) {
     if (stats == null) {
       return "no stats";
     }
diff --git a/parquet-cli/src/main/java/org/apache/parquet/cli/commands/ParquetMetadataCommand.java b/parquet-cli/src/main/java/org/apache/parquet/cli/commands/ParquetMetadataCommand.java
index 54fe6579b9..a452369e26 100644
--- a/parquet-cli/src/main/java/org/apache/parquet/cli/commands/ParquetMetadataCommand.java
+++ b/parquet-cli/src/main/java/org/apache/parquet/cli/commands/ParquetMetadataCommand.java
@@ -169,12 +169,12 @@ private void printColumnChunk(Logger console, int width, ColumnChunkMetaData col
       console.info(String.format("%-" + width + "s  FIXED[%d] %s %-7s %-9d %-8s %-7s %s",
           name, type.getTypeLength(), shortCodec(codec), encodingSummary, count,
           humanReadable(perValue), stats == null || !stats.isNumNullsSet() ? "" : String.valueOf(stats.getNumNulls()),
-          minMaxAsString(stats, type.getOriginalType())));
+          minMaxAsString(stats)));
     } else {
       console.info(String.format("%-" + width + "s  %-9s %s %-7s %-9d %-10s %-7s %s",
           name, typeName, shortCodec(codec), encodingSummary, count, humanReadable(perValue),
           stats == null || !stats.isNumNullsSet() ? "" : String.valueOf(stats.getNumNulls()),
-          minMaxAsString(stats, type.getOriginalType())));
+          minMaxAsString(stats)));
     }
   }
 }
diff --git a/parquet-cli/src/main/java/org/apache/parquet/cli/commands/ShowColumnIndexCommand.java b/parquet-cli/src/main/java/org/apache/parquet/cli/commands/ShowColumnIndexCommand.java
new file mode 100644
index 0000000000..38a7094b89
--- /dev/null
+++ b/parquet-cli/src/main/java/org/apache/parquet/cli/commands/ShowColumnIndexCommand.java
@@ -0,0 +1,157 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.parquet.cli.commands;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+import org.apache.parquet.cli.BaseCommand;
+import org.apache.parquet.hadoop.ParquetFileReader;
+import org.apache.parquet.hadoop.metadata.BlockMetaData;
+import org.apache.parquet.hadoop.metadata.ColumnChunkMetaData;
+import org.apache.parquet.hadoop.util.HadoopInputFile;
+import org.apache.parquet.internal.column.columnindex.ColumnIndex;
+import org.apache.parquet.internal.column.columnindex.OffsetIndex;
+import org.apache.parquet.io.InputFile;
+import org.slf4j.Logger;
+
+import com.beust.jcommander.Parameter;
+import com.beust.jcommander.Parameters;
+import com.google.common.base.Preconditions;
+import com.google.common.collect.Lists;
+
+/**
+ * parquet-cli command to print column and offset indexes.
+ */
+@Parameters(commandDescription = "Prints the column and offset indexes of a Parquet file")
+public class ShowColumnIndexCommand extends BaseCommand {
+  public ShowColumnIndexCommand(Logger console) {
+    super(console);
+  }
+
+  @Parameter(description = "<parquet path>")
+  List<String> files;
+
+  @Parameter(names = { "-c", "--column" }, description = "Shows the column/offset indexes for the given column only")
+  List<String> ColumnPaths;
+
+  @Parameter(names = { "-r",
+      "--row-group" }, description = "Shows the column/offset indexes for the given row-groups only; "
+          + "row-groups are referenced by their indexes from 0")
+  List<String> rowGroupIndexes;
+
+  @Parameter(names = { "-i", "--column-index" }, description = "Shows the column indexes; "
+      + "active by default unless -o is used")
+  boolean showColumnIndex;
+
+  @Parameter(names = { "-o", "--offset-index" }, description = "Shows the offset indexes; "
+      + "active by default unless -i is used")
+  boolean showOffsetIndex;
+
+  @Override
+  public List<String> getExamples() {
+    return Lists.newArrayList(
+        "# Show only column indexes for column 'col' from a Parquet file",
+        "-c col -i sample.parquet");
+  }
+
+  @Override
+  public int run() throws IOException {
+    Preconditions.checkArgument(files != null && files.size() >= 1,
+        "A Parquet file is required.");
+    Preconditions.checkArgument(files.size() == 1,
+        "Cannot process multiple Parquet files.");
+
+    InputFile in = HadoopInputFile.fromPath(qualifiedPath(files.get(0)), getConf());
+    if (!showColumnIndex && !showOffsetIndex) {
+      showColumnIndex = true;
+      showOffsetIndex = true;
+    }
+
+    Set<String> rowGroupIndexSet = new HashSet<>();
+    if (rowGroupIndexes != null) {
+      rowGroupIndexSet.addAll(rowGroupIndexes);
+    }
+
+    try (ParquetFileReader reader = ParquetFileReader.open(in)) {
+      boolean firstBlock = true;
+      int rowGroupIndex = 0;
+      for (BlockMetaData block : reader.getFooter().getBlocks()) {
+        if (!rowGroupIndexSet.isEmpty() && !rowGroupIndexSet.contains(Integer.toString(rowGroupIndex))) {
+          ++rowGroupIndex;
+          continue;
+        }
+        if (!firstBlock) {
+          console.info("");
+        }
+        firstBlock = false;
+        console.info("row-group {}:", rowGroupIndex);
+        for (ColumnChunkMetaData column : getColumns(block)) {
+          String path = column.getPath().toDotString();
+          if (showColumnIndex) {
+            console.info("column index for column {}:", path);
+            ColumnIndex columnIndex = reader.readColumnIndex(column);
+            if (columnIndex == null) {
+              console.info("NONE");
+            } else {
+              console.info(columnIndex.toString());
+            }
+          }
+          if (showOffsetIndex) {
+            console.info("offset index for column {}:", path);
+            OffsetIndex offsetIndex = reader.readOffsetIndex(column);
+            if (offsetIndex == null) {
+              console.info("NONE");
+            } else {
+              console.info(offsetIndex.toString());
+            }
+          }
+        }
+        ++rowGroupIndex;
+      }
+    }
+    return 0;
+  }
+
+  private List<ColumnChunkMetaData> getColumns(BlockMetaData block) {
+    List<ColumnChunkMetaData> columns = block.getColumns();
+    if (ColumnPaths == null || ColumnPaths.isEmpty()) {
+      return columns;
+    }
+    Map<String, ColumnChunkMetaData> pathMap = new HashMap<>();
+    for (ColumnChunkMetaData column : columns) {
+      pathMap.put(column.getPath().toDotString(), column);
+    }
+
+    List<ColumnChunkMetaData> filtered = new ArrayList<>();
+    for (String path : ColumnPaths) {
+      ColumnChunkMetaData column = pathMap.get(path);
+      if (column != null) {
+        filtered.add(column);
+      }
+    }
+    return filtered;
+  }
+
+}
diff --git a/parquet-cli/src/main/java/org/apache/parquet/cli/commands/ShowDictionaryCommand.java b/parquet-cli/src/main/java/org/apache/parquet/cli/commands/ShowDictionaryCommand.java
index db427c9c74..20a694ff7f 100644
--- a/parquet-cli/src/main/java/org/apache/parquet/cli/commands/ShowDictionaryCommand.java
+++ b/parquet-cli/src/main/java/org/apache/parquet/cli/commands/ShowDictionaryCommand.java
@@ -30,8 +30,8 @@
 import org.apache.parquet.column.page.DictionaryPage;
 import org.apache.parquet.column.page.DictionaryPageReadStore;
 import org.apache.parquet.hadoop.ParquetFileReader;
+import org.apache.parquet.schema.LogicalTypeAnnotation;
 import org.apache.parquet.schema.MessageType;
-import org.apache.parquet.schema.OriginalType;
 import org.apache.parquet.schema.PrimitiveType;
 import org.slf4j.Logger;
 import java.io.IOException;
@@ -81,7 +81,7 @@ public int run() throws IOException {
       for (int i = 0; i <= dict.getMaxId(); i += 1) {
         switch(type.getPrimitiveTypeName()) {
           case BINARY:
-            if (type.getOriginalType() == OriginalType.UTF8) {
+            if (type.getLogicalTypeAnnotation() instanceof LogicalTypeAnnotation.StringLogicalTypeAnnotation) {
               console.info("{}: {}", String.format("%6d", i),
                   Util.humanReadable(dict.decodeToBinary(i).toStringUsingUTF8(), 70));
             } else {
diff --git a/parquet-cli/src/main/java/org/apache/parquet/cli/commands/ShowPagesCommand.java b/parquet-cli/src/main/java/org/apache/parquet/cli/commands/ShowPagesCommand.java
index 4d0e2c9ba5..1ac03aad7a 100644
--- a/parquet-cli/src/main/java/org/apache/parquet/cli/commands/ShowPagesCommand.java
+++ b/parquet-cli/src/main/java/org/apache/parquet/cli/commands/ShowPagesCommand.java
@@ -193,7 +193,7 @@ public String visit(DataPageV1 page) {
       int count = page.getValueCount();
       String numNulls = page.getStatistics().isNumNullsSet() ? Long.toString(page.getStatistics().getNumNulls()) : "";
       float perValue = ((float) totalSize) / count;
-      String minMax = minMaxAsString(page.getStatistics(), type.getOriginalType());
+      String minMax = minMaxAsString(page.getStatistics());
       return String.format("%3d-%-3d  %-5s %s %-2s %-7d %-10s %-10s %-8s %-7s %s",
           rowGroupNum, pageNum, "data", shortCodec, enc, count, humanReadable(perValue),
           humanReadable(totalSize), "", numNulls, minMax);
@@ -207,7 +207,7 @@ public String visit(DataPageV2 page) {
       int numRows = page.getRowCount();
       int numNulls = page.getNullCount();
       float perValue = ((float) totalSize) / count;
-      String minMax = minMaxAsString(page.getStatistics(), type.getOriginalType());
+      String minMax = minMaxAsString(page.getStatistics());
       String compression = (page.isCompressed() ? shortCodec : "_");
       return String.format("%3d-%-3d  %-5s %s %-2s %-7d %-10s %-10s %-8d %-7s %s",
           rowGroupNum, pageNum, "data", compression, enc, count, humanReadable(perValue),
diff --git a/parquet-column/src/main/java/org/apache/parquet/column/ColumnReader.java b/parquet-column/src/main/java/org/apache/parquet/column/ColumnReader.java
index 52d269ef06..6d93eeed5f 100644
--- a/parquet-column/src/main/java/org/apache/parquet/column/ColumnReader.java
+++ b/parquet-column/src/main/java/org/apache/parquet/column/ColumnReader.java
@@ -41,7 +41,10 @@ public interface ColumnReader {
 
   /**
    * @return the totalCount of values to be consumed
+   * @deprecated will be removed in 2.0.0; Total values might not be able to be counted before reading the values (e.g.
+   *             in case of column index based filtering)
    */
+  @Deprecated
   long getTotalValueCount();
 
   /**
diff --git a/parquet-column/src/main/java/org/apache/parquet/column/ParquetProperties.java b/parquet-column/src/main/java/org/apache/parquet/column/ParquetProperties.java
index f01888aed8..572c6c9c87 100644
--- a/parquet-column/src/main/java/org/apache/parquet/column/ParquetProperties.java
+++ b/parquet-column/src/main/java/org/apache/parquet/column/ParquetProperties.java
@@ -18,25 +18,25 @@
  */
 package org.apache.parquet.column;
 
-import static org.apache.parquet.bytes.BytesUtils.getWidthFromMaxInt;
-
-import java.util.HashMap;
-
 import org.apache.parquet.Preconditions;
 import org.apache.parquet.bytes.ByteBufferAllocator;
 import org.apache.parquet.bytes.CapacityByteArrayOutputStream;
 import org.apache.parquet.bytes.HeapByteBufferAllocator;
+
+import static org.apache.parquet.bytes.BytesUtils.getWidthFromMaxInt;
 import org.apache.parquet.column.impl.ColumnWriteStoreV1;
 import org.apache.parquet.column.impl.ColumnWriteStoreV2;
 import org.apache.parquet.column.page.PageWriteStore;
 import org.apache.parquet.column.values.ValuesWriter;
 import org.apache.parquet.column.values.bitpacking.DevNullValuesWriter;
 import org.apache.parquet.column.values.factory.DefaultValuesWriterFactory;
-import org.apache.parquet.column.values.factory.ValuesWriterFactory;
 import org.apache.parquet.column.values.rle.RunLengthBitPackingHybridEncoder;
 import org.apache.parquet.column.values.rle.RunLengthBitPackingHybridValuesWriter;
+import org.apache.parquet.column.values.factory.ValuesWriterFactory;
 import org.apache.parquet.schema.MessageType;
 
+import java.util.HashMap;
+
 /**
  * This class represents all the configurable Parquet properties.
  */
@@ -49,8 +49,10 @@ public class ParquetProperties {
   public static final boolean DEFAULT_ESTIMATE_ROW_COUNT_FOR_PAGE_SIZE_CHECK = true;
   public static final int DEFAULT_MINIMUM_RECORD_COUNT_FOR_CHECK = 100;
   public static final int DEFAULT_MAXIMUM_RECORD_COUNT_FOR_CHECK = 10000;
+  public static final int DEFAULT_COLUMN_INDEX_TRUNCATE_LENGTH = 64;
   public static final boolean DEFAULT_BLOOM_FILTER_ENABLED = false;
 
+
   public static final ValuesWriterFactory DEFAULT_VALUES_WRITER_FACTORY = new DefaultValuesWriterFactory();
 
   private static final int MIN_SLAB_SIZE = 64;
@@ -86,12 +88,14 @@ public static WriterVersion fromString(String name) {
   private final boolean estimateNextSizeCheck;
   private final ByteBufferAllocator allocator;
   private final ValuesWriterFactory valuesWriterFactory;
+  private final int columnIndexTruncateLength;
   private final boolean enableBloomFilter;
-  private final HashMap<String, Long> bloomFilterExpectValues;
+  private final HashMap<String, Long> bloomFilterExpectedDistinctNumbers;
 
   private ParquetProperties(WriterVersion writerVersion, int pageSize, int dictPageSize, boolean enableDict, int minRowCountForPageSizeCheck,
                             int maxRowCountForPageSizeCheck, boolean estimateNextSizeCheck, ByteBufferAllocator allocator,
-                            ValuesWriterFactory writerFactory, boolean enableBloomFilter, HashMap<String, Long> bloomFilterExpectValues) {
+                            ValuesWriterFactory writerFactory, int columnIndexMinMaxTruncateLength, boolean enableBloomFilter,
+                            HashMap<String, Long> bloomFilterExpectedDistinctNumber) {
     this.pageSizeThreshold = pageSize;
     this.initialSlabSize = CapacityByteArrayOutputStream
       .initialSlabSizeHeuristic(MIN_SLAB_SIZE, pageSizeThreshold, 10);
@@ -102,9 +106,12 @@ private ParquetProperties(WriterVersion writerVersion, int pageSize, int dictPag
     this.maxRowCountForPageSizeCheck = maxRowCountForPageSizeCheck;
     this.estimateNextSizeCheck = estimateNextSizeCheck;
     this.allocator = allocator;
-    this.enableBloomFilter = enableBloomFilter;
-    this.bloomFilterExpectValues = bloomFilterExpectValues;
+
     this.valuesWriterFactory = writerFactory;
+    this.columnIndexTruncateLength = columnIndexMinMaxTruncateLength;
+
+    this.enableBloomFilter = enableBloomFilter;
+    this.bloomFilterExpectedDistinctNumbers = bloomFilterExpectedDistinctNumber;
   }
 
   public ValuesWriter newRepetitionLevelWriter(ColumnDescriptor path) {
@@ -165,19 +172,11 @@ public ByteBufferAllocator getAllocator() {
     return allocator;
   }
 
-  public boolean isBloomFilterEnabled() {
-    return enableBloomFilter;
-  }
-
-  public HashMap<String, Long> getBloomFilterExpectValues() {
-    return bloomFilterExpectValues;
-  }
-
   public ColumnWriteStore newColumnWriteStore(MessageType schema,
                                               PageWriteStore pageStore) {
     switch (writerVersion) {
     case PARQUET_1_0:
-      return new ColumnWriteStoreV1(pageStore, this);
+      return new ColumnWriteStoreV1(schema, pageStore, this);
     case PARQUET_2_0:
       return new ColumnWriteStoreV2(schema, pageStore, this);
     default:
@@ -197,10 +196,22 @@ public ValuesWriterFactory getValuesWriterFactory() {
     return valuesWriterFactory;
   }
 
+  public int getColumnIndexTruncateLength() {
+    return columnIndexTruncateLength;
+  }
+
   public boolean estimateNextSizeCheck() {
     return estimateNextSizeCheck;
   }
 
+  public boolean isBloomFilterEnabled() {
+    return enableBloomFilter;
+  }
+
+  public HashMap<String, Long> getBloomFilterExpectedDistinctNumbers() {
+    return bloomFilterExpectedDistinctNumbers;
+  }
+
   public static Builder builder() {
     return new Builder();
   }
@@ -213,14 +224,15 @@ public static class Builder {
     private int pageSize = DEFAULT_PAGE_SIZE;
     private int dictPageSize = DEFAULT_DICTIONARY_PAGE_SIZE;
     private boolean enableDict = DEFAULT_IS_DICTIONARY_ENABLED;
-    private boolean enableBloomFilter = DEFAULT_BLOOM_FILTER_ENABLED;
-    private HashMap<String, Long> bloomFilterExpectValues = new HashMap<>();
     private WriterVersion writerVersion = DEFAULT_WRITER_VERSION;
     private int minRowCountForPageSizeCheck = DEFAULT_MINIMUM_RECORD_COUNT_FOR_CHECK;
     private int maxRowCountForPageSizeCheck = DEFAULT_MAXIMUM_RECORD_COUNT_FOR_CHECK;
     private boolean estimateNextSizeCheck = DEFAULT_ESTIMATE_ROW_COUNT_FOR_PAGE_SIZE_CHECK;
     private ByteBufferAllocator allocator = new HeapByteBufferAllocator();
     private ValuesWriterFactory valuesWriterFactory = DEFAULT_VALUES_WRITER_FACTORY;
+    private int columnIndexTruncateLength = DEFAULT_COLUMN_INDEX_TRUNCATE_LENGTH;
+    private boolean enableBloomFilter = DEFAULT_BLOOM_FILTER_ENABLED;
+    private HashMap<String, Long> bloomFilterExpectedDistinctNumbers = new HashMap<>();
 
     private Builder() {
     }
@@ -234,7 +246,7 @@ private Builder(ParquetProperties toCopy) {
       this.estimateNextSizeCheck = toCopy.estimateNextSizeCheck;
       this.allocator = toCopy.allocator;
       this.enableBloomFilter = toCopy.enableBloomFilter;
-      this.bloomFilterExpectValues = toCopy.bloomFilterExpectValues;
+      this.bloomFilterExpectedDistinctNumbers = toCopy.bloomFilterExpectedDistinctNumbers;
     }
 
     /**
@@ -274,38 +286,6 @@ public Builder withDictionaryPageSize(int dictionaryPageSize) {
       return this;
     }
 
-    /**
-     * Set to enable Bloom filter.
-     *
-     * @param enableBloomFilter a boolean to indicate whether to enable Bloom filter.
-     * @return this builder for method chaining.
-     */
-    public Builder withBloomFilterEnabled(boolean enableBloomFilter) {
-      this.enableBloomFilter = enableBloomFilter;
-      return this;
-    }
-
-    /**
-     * Set Bloom filter info for columns.
-     *
-     * @param bloomFilterColumnNames the columns to be enabled for Bloom filter
-     * @param bloomFilterDistinctNumbers the expected distinct number of values corresponding to columns
-     * @return this builder for method chaining
-     */
-    public Builder withBloomFilterInfo(String bloomFilterColumnNames, String bloomFilterDistinctNumbers) {
-      String[] columnNames = bloomFilterColumnNames.split(",");
-      String[] expectedDistinctNumber = bloomFilterDistinctNumbers.split(",");
-
-      Preconditions.checkArgument(columnNames.length == expectedDistinctNumber.length,
-          "Column names are not matched to sizes");
-
-      for (int i = 0; i < columnNames.length; i++) {
-        this.bloomFilterExpectValues.put(columnNames[i], Long.getLong(expectedDistinctNumber[i]));
-      }
-
-      return this;
-    }
-
     /**
      * Set the {@link WriterVersion format version}.
      *
@@ -349,12 +329,47 @@ public Builder withValuesWriterFactory(ValuesWriterFactory factory) {
       return this;
     }
 
+    public Builder withColumnIndexTruncateLength(int length) {
+      Preconditions.checkArgument(length > 0, "Invalid column index min/max truncate length (negative) : %s", length);
+      this.columnIndexTruncateLength = length;
+      return this;
+    }
+
+    /**
+     * Set to enable Bloom filter.
+     *
+     * @param enableBloomFilter a boolean to indicate whether to enable Bloom filter.
+     * @return this builder for method chaining.
+     */
+    public Builder withBloomFilterEnabled(boolean enableBloomFilter) {
+      this.enableBloomFilter = enableBloomFilter;
+      return this;
+    }
+    /**
+     * Set Bloom filter info for columns.
+     *
+     * @param bloomFilterColumnNames the columns to be enabled for Bloom filter
+     * @param bloomFilterDistinctNumbers the expected distinct number of values corresponding to columns
+     * @return this builder for method chaining
+     */
+    public Builder withBloomFilterInfo(String bloomFilterColumnNames, String bloomFilterDistinctNumbers) {
+      String[] columnNames = bloomFilterColumnNames.split(",");
+      String[] expectedDistinctNumber = bloomFilterDistinctNumbers.split(",");
+      Preconditions.checkArgument(columnNames.length == expectedDistinctNumber.length,
+        "Column names are not matched to sizes");
+      for (int i = 0; i < columnNames.length; i++) {
+        this.bloomFilterExpectedDistinctNumbers.put(columnNames[i], Long.getLong(expectedDistinctNumber[i]));
+      }
+      return this;
+    }
+
+
     public ParquetProperties build() {
       ParquetProperties properties =
         new ParquetProperties(writerVersion, pageSize, dictPageSize,
           enableDict, minRowCountForPageSizeCheck, maxRowCountForPageSizeCheck,
-          estimateNextSizeCheck, allocator, valuesWriterFactory,
-          enableBloomFilter, bloomFilterExpectValues);
+          estimateNextSizeCheck, allocator, valuesWriterFactory, columnIndexTruncateLength,
+          enableBloomFilter, bloomFilterExpectedDistinctNumbers);
       // we pass a constructed but uninitialized factory to ParquetProperties above as currently
       // creation of ValuesWriters is invoked from within ParquetProperties. In the future
       // we'd like to decouple that and won't need to pass an object to properties and then pass the
diff --git a/parquet-column/src/main/java/org/apache/parquet/column/impl/ColumnReadStoreImpl.java b/parquet-column/src/main/java/org/apache/parquet/column/impl/ColumnReadStoreImpl.java
index 37845961ad..b7e159775f 100644
--- a/parquet-column/src/main/java/org/apache/parquet/column/impl/ColumnReadStoreImpl.java
+++ b/parquet-column/src/main/java/org/apache/parquet/column/impl/ColumnReadStoreImpl.java
@@ -18,6 +18,9 @@
  */
 package org.apache.parquet.column.impl;
 
+import java.util.Optional;
+import java.util.PrimitiveIterator;
+
 import org.apache.parquet.VersionParser;
 import org.apache.parquet.VersionParser.ParsedVersion;
 import org.apache.parquet.VersionParser.VersionParseException;
@@ -72,10 +75,17 @@ public ColumnReadStoreImpl(PageReadStore pageReadStore,
 
   @Override
   public ColumnReader getColumnReader(ColumnDescriptor path) {
-    return newMemColumnReader(path, pageReadStore.getPageReader(path));
+    PrimitiveConverter converter = getPrimitiveConverter(path);
+    PageReader pageReader = pageReadStore.getPageReader(path);
+    Optional<PrimitiveIterator.OfLong> rowIndexes = pageReadStore.getRowIndexes();
+    if (rowIndexes.isPresent()) {
+      return new SynchronizingColumnReader(path, pageReader, converter, writerVersion, rowIndexes.get());
+    } else {
+      return new ColumnReaderImpl(path, pageReader, converter, writerVersion);
+    }
   }
 
-  private ColumnReaderImpl newMemColumnReader(ColumnDescriptor path, PageReader pageReader) {
+  public ColumnReaderImpl newMemColumnReader(ColumnDescriptor path, PageReader pageReader) {
     PrimitiveConverter converter = getPrimitiveConverter(path);
     return new ColumnReaderImpl(path, pageReader, converter, writerVersion);
   }
diff --git a/parquet-column/src/main/java/org/apache/parquet/column/impl/ColumnReaderBase.java b/parquet-column/src/main/java/org/apache/parquet/column/impl/ColumnReaderBase.java
new file mode 100644
index 0000000000..c929431c64
--- /dev/null
+++ b/parquet-column/src/main/java/org/apache/parquet/column/impl/ColumnReaderBase.java
@@ -0,0 +1,760 @@
+/* 
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.parquet.column.impl;
+
+import static java.lang.String.format;
+import static org.apache.parquet.Preconditions.checkNotNull;
+import static org.apache.parquet.column.ValuesType.DEFINITION_LEVEL;
+import static org.apache.parquet.column.ValuesType.REPETITION_LEVEL;
+import static org.apache.parquet.column.ValuesType.VALUES;
+
+import java.io.IOException;
+
+import org.apache.parquet.CorruptDeltaByteArrays;
+import org.apache.parquet.VersionParser.ParsedVersion;
+import org.apache.parquet.bytes.ByteBufferInputStream;
+import org.apache.parquet.bytes.BytesInput;
+import org.apache.parquet.bytes.BytesUtils;
+import org.apache.parquet.column.ColumnDescriptor;
+import org.apache.parquet.column.ColumnReader;
+import org.apache.parquet.column.Dictionary;
+import org.apache.parquet.column.Encoding;
+import org.apache.parquet.column.page.DataPage;
+import org.apache.parquet.column.page.DataPageV1;
+import org.apache.parquet.column.page.DataPageV2;
+import org.apache.parquet.column.page.DictionaryPage;
+import org.apache.parquet.column.page.PageReader;
+import org.apache.parquet.column.values.RequiresPreviousReader;
+import org.apache.parquet.column.values.ValuesReader;
+import org.apache.parquet.column.values.rle.RunLengthBitPackingHybridDecoder;
+import org.apache.parquet.io.ParquetDecodingException;
+import org.apache.parquet.io.api.Binary;
+import org.apache.parquet.io.api.PrimitiveConverter;
+import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName;
+import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeNameConverter;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Base superclass for {@link ColumnReader} implementations.
+ */
+abstract class ColumnReaderBase implements ColumnReader {
+  private static final Logger LOG = LoggerFactory.getLogger(ColumnReaderBase.class);
+
+  /**
+   * binds the lower level page decoder to the record converter materializing the records
+   */
+  private static abstract class Binding {
+
+    /**
+     * read one value from the underlying page
+     */
+    abstract void read();
+
+    /**
+     * skip one value from the underlying page
+     */
+    abstract void skip();
+
+    /**
+     * Skips n values from the underlying page
+     *
+     * @param n
+     *          the number of values to be skipped
+     */
+    abstract void skip(int n);
+
+    /**
+     * write current value to converter
+     */
+    abstract void writeValue();
+
+    /**
+     * @return current value
+     */
+    public int getDictionaryId() {
+      throw new UnsupportedOperationException();
+    }
+
+    /**
+     * @return current value
+     */
+    public int getInteger() {
+      throw new UnsupportedOperationException();
+    }
+
+    /**
+     * @return current value
+     */
+    public boolean getBoolean() {
+      throw new UnsupportedOperationException();
+    }
+
+    /**
+     * @return current value
+     */
+    public long getLong() {
+      throw new UnsupportedOperationException();
+    }
+
+    /**
+     * @return current value
+     */
+    public Binary getBinary() {
+      throw new UnsupportedOperationException();
+    }
+
+    /**
+     * @return current value
+     */
+    public float getFloat() {
+      throw new UnsupportedOperationException();
+    }
+
+    /**
+     * @return current value
+     */
+    public double getDouble() {
+      throw new UnsupportedOperationException();
+    }
+  }
+
+  private final ParsedVersion writerVersion;
+  private final ColumnDescriptor path;
+  private final long totalValueCount;
+  private final PageReader pageReader;
+  private final Dictionary dictionary;
+
+  private IntIterator repetitionLevelColumn;
+  private IntIterator definitionLevelColumn;
+  protected ValuesReader dataColumn;
+  private Encoding currentEncoding;
+
+  private int repetitionLevel;
+  private int definitionLevel;
+  private int dictionaryId;
+
+  private long endOfPageValueCount;
+  private long readValues = 0;
+  private int pageValueCount = 0;
+
+  private final PrimitiveConverter converter;
+  private Binding binding;
+  private final int maxDefinitionLevel;
+
+  // this is needed because we will attempt to read the value twice when filtering
+  // TODO: rework that
+  private boolean valueRead;
+
+  private void bindToDictionary(final Dictionary dictionary) {
+    binding =
+        new Binding() {
+          void read() {
+            dictionaryId = dataColumn.readValueDictionaryId();
+          }
+          public void skip() {
+            dataColumn.skip();
+          }
+          @Override
+          void skip(int n) {
+            dataColumn.skip(n);
+          }
+          public int getDictionaryId() {
+            return dictionaryId;
+          }
+          void writeValue() {
+            converter.addValueFromDictionary(dictionaryId);
+          }
+          public int getInteger() {
+            return dictionary.decodeToInt(dictionaryId);
+          }
+          public boolean getBoolean() {
+            return dictionary.decodeToBoolean(dictionaryId);
+          }
+          public long getLong() {
+            return dictionary.decodeToLong(dictionaryId);
+          }
+          public Binary getBinary() {
+            return dictionary.decodeToBinary(dictionaryId);
+          }
+          public float getFloat() {
+            return dictionary.decodeToFloat(dictionaryId);
+          }
+          public double getDouble() {
+            return dictionary.decodeToDouble(dictionaryId);
+          }
+        };
+  }
+
+  private void bind(PrimitiveTypeName type) {
+    binding = type.convert(new PrimitiveTypeNameConverter<Binding, RuntimeException>() {
+      @Override
+      public Binding convertFLOAT(PrimitiveTypeName primitiveTypeName) throws RuntimeException {
+        return new Binding() {
+          float current;
+          void read() {
+            current = dataColumn.readFloat();
+          }
+          public void skip() {
+            current = 0;
+            dataColumn.skip();
+          }
+          @Override
+          void skip(int n) {
+            current = 0;
+            dataColumn.skip(n);
+          }
+          public float getFloat() {
+            return current;
+          }
+          void writeValue() {
+            converter.addFloat(current);
+          }
+        };
+      }
+      @Override
+      public Binding convertDOUBLE(PrimitiveTypeName primitiveTypeName) throws RuntimeException {
+        return new Binding() {
+          double current;
+          void read() {
+            current = dataColumn.readDouble();
+          }
+          public void skip() {
+            current = 0;
+            dataColumn.skip();
+          }
+          @Override
+          void skip(int n) {
+            current = 0;
+            dataColumn.skip(n);
+          }
+          public double getDouble() {
+            return current;
+          }
+          void writeValue() {
+            converter.addDouble(current);
+          }
+        };
+      }
+      @Override
+      public Binding convertINT32(PrimitiveTypeName primitiveTypeName) throws RuntimeException {
+        return new Binding() {
+          int current;
+          void read() {
+            current = dataColumn.readInteger();
+          }
+          public void skip() {
+            current = 0;
+            dataColumn.skip();
+          }
+          @Override
+          void skip(int n) {
+            current = 0;
+            dataColumn.skip(n);
+          }
+          @Override
+          public int getInteger() {
+            return current;
+          }
+          void writeValue() {
+            converter.addInt(current);
+          }
+        };
+      }
+      @Override
+      public Binding convertINT64(PrimitiveTypeName primitiveTypeName) throws RuntimeException {
+        return new Binding() {
+          long current;
+          void read() {
+            current = dataColumn.readLong();
+          }
+          public void skip() {
+            current = 0;
+            dataColumn.skip();
+          }
+          @Override
+          void skip(int n) {
+            current = 0;
+            dataColumn.skip(n);
+          }
+          @Override
+          public long getLong() {
+            return current;
+          }
+          void writeValue() {
+            converter.addLong(current);
+          }
+        };
+      }
+      @Override
+      public Binding convertINT96(PrimitiveTypeName primitiveTypeName) throws RuntimeException {
+        return this.convertBINARY(primitiveTypeName);
+      }
+      @Override
+      public Binding convertFIXED_LEN_BYTE_ARRAY(
+          PrimitiveTypeName primitiveTypeName) throws RuntimeException {
+        return this.convertBINARY(primitiveTypeName);
+      }
+      @Override
+      public Binding convertBOOLEAN(PrimitiveTypeName primitiveTypeName) throws RuntimeException {
+        return new Binding() {
+          boolean current;
+          void read() {
+            current = dataColumn.readBoolean();
+          }
+          public void skip() {
+            current = false;
+            dataColumn.skip();
+          }
+          @Override
+          void skip(int n) {
+            current = false;
+            dataColumn.skip(n);
+          }
+          @Override
+          public boolean getBoolean() {
+            return current;
+          }
+          void writeValue() {
+            converter.addBoolean(current);
+          }
+        };
+      }
+      @Override
+      public Binding convertBINARY(PrimitiveTypeName primitiveTypeName) throws RuntimeException {
+        return new Binding() {
+          Binary current;
+          void read() {
+            current = dataColumn.readBytes();
+          }
+          public void skip() {
+            current = null;
+            dataColumn.skip();
+          }
+          @Override
+          void skip(int n) {
+            current = null;
+            dataColumn.skip(n);
+          }
+          @Override
+          public Binary getBinary() {
+            return current;
+          }
+          void writeValue() {
+            converter.addBinary(current);
+          }
+        };
+      }
+    });
+  }
+
+  /**
+   * creates a reader for triplets
+   * @param path the descriptor for the corresponding column
+   * @param pageReader the underlying store to read from
+   * @param converter a converter that materializes the values in this column in the current record
+   * @param writerVersion writer version string from the Parquet file being read
+   */
+  ColumnReaderBase(ColumnDescriptor path, PageReader pageReader, PrimitiveConverter converter, ParsedVersion writerVersion) {
+    this.path = checkNotNull(path, "path");
+    this.pageReader = checkNotNull(pageReader, "pageReader");
+    this.converter = checkNotNull(converter, "converter");
+    this.writerVersion = writerVersion;
+    this.maxDefinitionLevel = path.getMaxDefinitionLevel();
+    DictionaryPage dictionaryPage = pageReader.readDictionaryPage();
+    if (dictionaryPage != null) {
+      try {
+        this.dictionary = dictionaryPage.getEncoding().initDictionary(path, dictionaryPage);
+        if (converter.hasDictionarySupport()) {
+          converter.setDictionary(dictionary);
+        }
+      } catch (IOException e) {
+        throw new ParquetDecodingException("could not decode the dictionary for " + path, e);
+      }
+    } else {
+      this.dictionary = null;
+    }
+    this.totalValueCount = pageReader.getTotalValueCount();
+    if (totalValueCount <= 0) {
+      throw new ParquetDecodingException("totalValueCount '" + totalValueCount + "' <= 0");
+    }
+  }
+
+  boolean isFullyConsumed() {
+    return readValues >= totalValueCount;
+  }
+
+  /**
+   * {@inheritDoc}
+   * @see org.apache.parquet.column.ColumnReader#writeCurrentValueToConverter()
+   */
+  @Override
+  public void writeCurrentValueToConverter() {
+    readValue();
+    this.binding.writeValue();
+  }
+
+  @Override
+  public int getCurrentValueDictionaryID() {
+    readValue();
+    return binding.getDictionaryId();
+  }
+
+  /**
+   * {@inheritDoc}
+   * @see org.apache.parquet.column.ColumnReader#getInteger()
+   */
+  @Override
+  public int getInteger() {
+    readValue();
+    return this.binding.getInteger();
+  }
+
+  /**
+   * {@inheritDoc}
+   * @see org.apache.parquet.column.ColumnReader#getBoolean()
+   */
+  @Override
+  public boolean getBoolean() {
+    readValue();
+    return this.binding.getBoolean();
+  }
+
+  /**
+   * {@inheritDoc}
+   * @see org.apache.parquet.column.ColumnReader#getLong()
+   */
+  @Override
+  public long getLong() {
+    readValue();
+    return this.binding.getLong();
+  }
+
+  /**
+   * {@inheritDoc}
+   * @see org.apache.parquet.column.ColumnReader#getBinary()
+   */
+  @Override
+  public Binary getBinary() {
+    readValue();
+    return this.binding.getBinary();
+  }
+
+  /**
+   * {@inheritDoc}
+   * @see org.apache.parquet.column.ColumnReader#getFloat()
+   */
+  @Override
+  public float getFloat() {
+    readValue();
+    return this.binding.getFloat();
+  }
+
+  /**
+   * {@inheritDoc}
+   * @see org.apache.parquet.column.ColumnReader#getDouble()
+   */
+  @Override
+  public double getDouble() {
+    readValue();
+    return this.binding.getDouble();
+  }
+
+  /**
+   * {@inheritDoc}
+   * @see org.apache.parquet.column.ColumnReader#getCurrentRepetitionLevel()
+   */
+  @Override
+  public int getCurrentRepetitionLevel() {
+    return repetitionLevel;
+  }
+
+  /**
+   * {@inheritDoc}
+   * @see org.apache.parquet.column.ColumnReader#getDescriptor()
+   */
+  @Override
+  public ColumnDescriptor getDescriptor() {
+    return path;
+  }
+
+  /**
+   * Reads the value into the binding.
+   */
+  public void readValue() {
+    try {
+      if (!valueRead) {
+        binding.read();
+        valueRead = true;
+      }
+    } catch (RuntimeException e) {
+      if (CorruptDeltaByteArrays.requiresSequentialReads(writerVersion, currentEncoding) &&
+          e instanceof ArrayIndexOutOfBoundsException) {
+        // this is probably PARQUET-246, which may happen if reading data with
+        // MR because this can't be detected without reading all footers
+        throw new ParquetDecodingException("Read failure possibly due to " +
+            "PARQUET-246: try setting parquet.split.files to false",
+            new ParquetDecodingException(
+                format("Can't read value in column %s at value %d out of %d, " +
+                        "%d out of %d in currentPage. repetition level: " +
+                        "%d, definition level: %d",
+                    path, readValues, totalValueCount,
+                    readValues - (endOfPageValueCount - pageValueCount),
+                    pageValueCount, repetitionLevel, definitionLevel),
+                e));
+      }
+      throw new ParquetDecodingException(
+          format("Can't read value in column %s at value %d out of %d, " +
+                  "%d out of %d in currentPage. repetition level: " +
+                  "%d, definition level: %d",
+              path, readValues, totalValueCount,
+              readValues - (endOfPageValueCount - pageValueCount),
+              pageValueCount, repetitionLevel, definitionLevel),
+          e);
+    }
+  }
+
+  /**
+   * {@inheritDoc}
+   * @see org.apache.parquet.column.ColumnReader#skip()
+   */
+  @Override
+  public void skip() {
+    if (!valueRead) {
+      binding.skip();
+      valueRead = true;
+    }
+  }
+
+  /**
+   * {@inheritDoc}
+   * @see org.apache.parquet.column.ColumnReader#getCurrentDefinitionLevel()
+   */
+  @Override
+  public int getCurrentDefinitionLevel() {
+    return definitionLevel;
+  }
+
+  private void checkRead() {
+    int rl, dl;
+    int skipValues = 0;
+    for (;;) {
+      if (isPageFullyConsumed()) {
+        if (isFullyConsumed()) {
+          LOG.debug("end reached");
+          repetitionLevel = 0; // the next repetition level
+          return;
+        }
+        readPage();
+        skipValues = 0;
+      }
+      rl = repetitionLevelColumn.nextInt();
+      dl = definitionLevelColumn.nextInt();
+      ++readValues;
+      if (!skipRL(rl)) {
+        break;
+      }
+      if (dl == maxDefinitionLevel) {
+        ++skipValues;
+      }
+    }
+    binding.skip(skipValues);
+    repetitionLevel = rl;
+    definitionLevel = dl;
+  }
+
+  /*
+   * Returns if current levels / value shall be skipped based on the specified repetition level.
+   */
+  abstract boolean skipRL(int rl);
+
+  private void readPage() {
+    LOG.debug("loading page");
+    DataPage page = pageReader.readPage();
+    page.accept(new DataPage.Visitor<Void>() {
+      @Override
+      public Void visit(DataPageV1 dataPageV1) {
+        readPageV1(dataPageV1);
+        return null;
+      }
+      @Override
+      public Void visit(DataPageV2 dataPageV2) {
+        readPageV2(dataPageV2);
+        return null;
+      }
+    });
+  }
+
+  private void initDataReader(Encoding dataEncoding, ByteBufferInputStream in, int valueCount) {
+    ValuesReader previousReader = this.dataColumn;
+
+    this.currentEncoding = dataEncoding;
+    this.pageValueCount = valueCount;
+    this.endOfPageValueCount = readValues + pageValueCount;
+
+    if (dataEncoding.usesDictionary()) {
+      if (dictionary == null) {
+        throw new ParquetDecodingException(
+            "could not read page in col " + path + " as the dictionary was missing for encoding " + dataEncoding);
+      }
+      this.dataColumn = dataEncoding.getDictionaryBasedValuesReader(path, VALUES, dictionary);
+    } else {
+      this.dataColumn = dataEncoding.getValuesReader(path, VALUES);
+    }
+
+    if (dataEncoding.usesDictionary() && converter.hasDictionarySupport()) {
+      bindToDictionary(dictionary);
+    } else {
+      bind(path.getType());
+    }
+
+    try {
+      dataColumn.initFromPage(pageValueCount, in);
+    } catch (IOException e) {
+      throw new ParquetDecodingException("could not read page in col " + path, e);
+    }
+
+    if (CorruptDeltaByteArrays.requiresSequentialReads(writerVersion, dataEncoding) &&
+        previousReader != null && previousReader instanceof RequiresPreviousReader) {
+      // previous reader can only be set if reading sequentially
+      ((RequiresPreviousReader) dataColumn).setPreviousReader(previousReader);
+    }
+  }
+
+  private void readPageV1(DataPageV1 page) {
+    ValuesReader rlReader = page.getRlEncoding().getValuesReader(path, REPETITION_LEVEL);
+    ValuesReader dlReader = page.getDlEncoding().getValuesReader(path, DEFINITION_LEVEL);
+    this.repetitionLevelColumn = new ValuesReaderIntIterator(rlReader);
+    this.definitionLevelColumn = new ValuesReaderIntIterator(dlReader);
+    int valueCount = page.getValueCount();
+    try {
+      BytesInput bytes = page.getBytes();
+      LOG.debug("page size {} bytes and {} values", bytes.size(), valueCount);
+      LOG.debug("reading repetition levels at 0");
+      ByteBufferInputStream in = bytes.toInputStream();
+      rlReader.initFromPage(valueCount, in);
+      LOG.debug("reading definition levels at {}", in.position());
+      dlReader.initFromPage(valueCount, in);
+      LOG.debug("reading data at {}", in.position());
+      initDataReader(page.getValueEncoding(), in, valueCount);
+    } catch (IOException e) {
+      throw new ParquetDecodingException("could not read page " + page + " in col " + path, e);
+    }
+    newPageInitialized(page);
+  }
+
+  private void readPageV2(DataPageV2 page) {
+    this.repetitionLevelColumn = newRLEIterator(path.getMaxRepetitionLevel(), page.getRepetitionLevels());
+    this.definitionLevelColumn = newRLEIterator(path.getMaxDefinitionLevel(), page.getDefinitionLevels());
+    int valueCount = page.getValueCount();
+    LOG.debug("page data size {} bytes and {} values", page.getData().size(), valueCount);
+    try {
+      initDataReader(page.getDataEncoding(), page.getData().toInputStream(), valueCount);
+    } catch (IOException e) {
+      throw new ParquetDecodingException("could not read page " + page + " in col " + path, e);
+    }
+    newPageInitialized(page);
+  }
+
+  final int getPageValueCount() {
+    return pageValueCount;
+  }
+
+  abstract void newPageInitialized(DataPage page);
+
+  private IntIterator newRLEIterator(int maxLevel, BytesInput bytes) {
+    try {
+      if (maxLevel == 0) {
+        return new NullIntIterator();
+      }
+      return new RLEIntIterator(
+          new RunLengthBitPackingHybridDecoder(
+              BytesUtils.getWidthFromMaxInt(maxLevel),
+              bytes.toInputStream()));
+    } catch (IOException e) {
+      throw new ParquetDecodingException("could not read levels in page for col " + path, e);
+    }
+  }
+
+  boolean isPageFullyConsumed() {
+    return readValues >= endOfPageValueCount;
+  }
+
+  /**
+   * {@inheritDoc}
+   * @see org.apache.parquet.column.ColumnReader#consume()
+   */
+  @Override
+  public void consume() {
+    checkRead();
+    valueRead = false;
+  }
+
+  /**
+   * {@inheritDoc}
+   * @see org.apache.parquet.column.ColumnReader#getTotalValueCount()
+   */
+  @Deprecated
+  @Override
+  public long getTotalValueCount() {
+    return totalValueCount;
+  }
+
+  static abstract class IntIterator {
+    abstract int nextInt();
+  }
+
+  static class ValuesReaderIntIterator extends IntIterator {
+    ValuesReader delegate;
+
+    public ValuesReaderIntIterator(ValuesReader delegate) {
+      super();
+      this.delegate = delegate;
+    }
+
+    @Override
+    int nextInt() {
+      return delegate.readInteger();
+    }
+  }
+
+  static class RLEIntIterator extends IntIterator {
+    RunLengthBitPackingHybridDecoder delegate;
+
+    public RLEIntIterator(RunLengthBitPackingHybridDecoder delegate) {
+      this.delegate = delegate;
+    }
+
+    @Override
+    int nextInt() {
+      try {
+        return delegate.readInt();
+      } catch (IOException e) {
+        throw new ParquetDecodingException(e);
+      }
+    }
+  }
+
+  private static final class NullIntIterator extends IntIterator {
+    @Override
+    int nextInt() {
+      return 0;
+    }
+  }
+}
diff --git a/parquet-column/src/main/java/org/apache/parquet/column/impl/ColumnReaderImpl.java b/parquet-column/src/main/java/org/apache/parquet/column/impl/ColumnReaderImpl.java
index 8c85b37f8e..0413d621c1 100644
--- a/parquet-column/src/main/java/org/apache/parquet/column/impl/ColumnReaderImpl.java
+++ b/parquet-column/src/main/java/org/apache/parquet/column/impl/ColumnReaderImpl.java
@@ -1,4 +1,4 @@
-/* 
+/*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
@@ -6,9 +6,9 @@
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
- * 
+ *
  *   http://www.apache.org/licenses/LICENSE-2.0
- * 
+ *
  * Unless required by applicable law or agreed to in writing,
  * software distributed under the License is distributed on an
  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
@@ -18,675 +18,41 @@
  */
 package org.apache.parquet.column.impl;
 
-import static java.lang.String.format;
-import static org.apache.parquet.Preconditions.checkNotNull;
-import static org.apache.parquet.column.ValuesType.DEFINITION_LEVEL;
-import static org.apache.parquet.column.ValuesType.REPETITION_LEVEL;
-import static org.apache.parquet.column.ValuesType.VALUES;
-
-import java.io.IOException;
-
-import org.apache.parquet.CorruptDeltaByteArrays;
 import org.apache.parquet.VersionParser.ParsedVersion;
-import org.apache.parquet.bytes.ByteBufferInputStream;
-import org.apache.parquet.bytes.BytesInput;
-import org.apache.parquet.bytes.BytesUtils;
 import org.apache.parquet.column.ColumnDescriptor;
-import org.apache.parquet.column.ColumnReader;
-import org.apache.parquet.column.Dictionary;
-import org.apache.parquet.column.Encoding;
 import org.apache.parquet.column.page.DataPage;
-import org.apache.parquet.column.page.DataPageV1;
-import org.apache.parquet.column.page.DataPageV2;
-import org.apache.parquet.column.page.DictionaryPage;
 import org.apache.parquet.column.page.PageReader;
-import org.apache.parquet.column.values.RequiresPreviousReader;
-import org.apache.parquet.column.values.ValuesReader;
-import org.apache.parquet.column.values.rle.RunLengthBitPackingHybridDecoder;
-import org.apache.parquet.io.ParquetDecodingException;
-import org.apache.parquet.io.api.Binary;
 import org.apache.parquet.io.api.PrimitiveConverter;
-import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName;
-import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeNameConverter;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
 
 /**
- * ColumnReader implementation
+ * ColumnReader implementation for the scenario when column indexes are not used (all values are read)
  */
-public class ColumnReaderImpl implements ColumnReader {
-  private static final Logger LOG = LoggerFactory.getLogger(ColumnReaderImpl.class);
-
-  /**
-   * binds the lower level page decoder to the record converter materializing the records
-   */
-  private static abstract class Binding {
-
-    /**
-     * read one value from the underlying page
-     */
-    abstract void read();
-
-    /**
-     * skip one value from the underlying page
-     */
-    abstract void skip();
-
-    /**
-     * write current value to converter
-     */
-    abstract void writeValue();
-
-    /**
-     * @return current value
-     */
-    public int getDictionaryId() {
-      throw new UnsupportedOperationException();
-    }
-
-    /**
-     * @return current value
-     */
-    public int getInteger() {
-      throw new UnsupportedOperationException();
-    }
-
-    /**
-     * @return current value
-     */
-    public boolean getBoolean() {
-      throw new UnsupportedOperationException();
-    }
-
-    /**
-     * @return current value
-     */
-    public long getLong() {
-      throw new UnsupportedOperationException();
-    }
-
-    /**
-     * @return current value
-     */
-    public Binary getBinary() {
-      throw new UnsupportedOperationException();
-    }
-
-    /**
-     * @return current value
-     */
-    public float getFloat() {
-      throw new UnsupportedOperationException();
-    }
-
-    /**
-     * @return current value
-     */
-    public double getDouble() {
-      throw new UnsupportedOperationException();
-    }
-  }
-
-  private final ParsedVersion writerVersion;
-  private final ColumnDescriptor path;
-  private final long totalValueCount;
-  private final PageReader pageReader;
-  private final Dictionary dictionary;
-
-  private IntIterator repetitionLevelColumn;
-  private IntIterator definitionLevelColumn;
-  protected ValuesReader dataColumn;
-  private Encoding currentEncoding;
-
-  private int repetitionLevel;
-  private int definitionLevel;
-  private int dictionaryId;
-
-  private long endOfPageValueCount;
-  private long readValues = 0;
-  private int pageValueCount = 0;
-
-  private final PrimitiveConverter converter;
-  private Binding binding;
-
-  // this is needed because we will attempt to read the value twice when filtering
-  // TODO: rework that
-  private boolean valueRead;
-
-  private void bindToDictionary(final Dictionary dictionary) {
-    binding =
-        new Binding() {
-          void read() {
-            dictionaryId = dataColumn.readValueDictionaryId();
-          }
-          public void skip() {
-            dataColumn.skip();
-          }
-          public int getDictionaryId() {
-            return dictionaryId;
-          }
-          void writeValue() {
-            converter.addValueFromDictionary(dictionaryId);
-          }
-          public int getInteger() {
-            return dictionary.decodeToInt(dictionaryId);
-          }
-          public boolean getBoolean() {
-            return dictionary.decodeToBoolean(dictionaryId);
-          }
-          public long getLong() {
-            return dictionary.decodeToLong(dictionaryId);
-          }
-          public Binary getBinary() {
-            return dictionary.decodeToBinary(dictionaryId);
-          }
-          public float getFloat() {
-            return dictionary.decodeToFloat(dictionaryId);
-          }
-          public double getDouble() {
-            return dictionary.decodeToDouble(dictionaryId);
-          }
-        };
-  }
-
-  private void bind(PrimitiveTypeName type) {
-    binding = type.convert(new PrimitiveTypeNameConverter<Binding, RuntimeException>() {
-      @Override
-      public Binding convertFLOAT(PrimitiveTypeName primitiveTypeName) throws RuntimeException {
-        return new Binding() {
-          float current;
-          void read() {
-            current = dataColumn.readFloat();
-          }
-          public void skip() {
-            current = 0;
-            dataColumn.skip();
-          }
-          public float getFloat() {
-            return current;
-          }
-          void writeValue() {
-            converter.addFloat(current);
-          }
-        };
-      }
-      @Override
-      public Binding convertDOUBLE(PrimitiveTypeName primitiveTypeName) throws RuntimeException {
-        return new Binding() {
-          double current;
-          void read() {
-            current = dataColumn.readDouble();
-          }
-          public void skip() {
-            current = 0;
-            dataColumn.skip();
-          }
-          public double getDouble() {
-            return current;
-          }
-          void writeValue() {
-            converter.addDouble(current);
-          }
-        };
-      }
-      @Override
-      public Binding convertINT32(PrimitiveTypeName primitiveTypeName) throws RuntimeException {
-        return new Binding() {
-          int current;
-          void read() {
-            current = dataColumn.readInteger();
-          }
-          public void skip() {
-            current = 0;
-            dataColumn.skip();
-          }
-          @Override
-          public int getInteger() {
-            return current;
-          }
-          void writeValue() {
-            converter.addInt(current);
-          }
-        };
-      }
-      @Override
-      public Binding convertINT64(PrimitiveTypeName primitiveTypeName) throws RuntimeException {
-        return new Binding() {
-          long current;
-          void read() {
-            current = dataColumn.readLong();
-          }
-          public void skip() {
-            current = 0;
-            dataColumn.skip();
-          }
-          @Override
-          public long getLong() {
-            return current;
-          }
-          void writeValue() {
-            converter.addLong(current);
-          }
-        };
-      }
-      @Override
-      public Binding convertINT96(PrimitiveTypeName primitiveTypeName) throws RuntimeException {
-        return this.convertBINARY(primitiveTypeName);
-      }
-      @Override
-      public Binding convertFIXED_LEN_BYTE_ARRAY(
-          PrimitiveTypeName primitiveTypeName) throws RuntimeException {
-        return this.convertBINARY(primitiveTypeName);
-      }
-      @Override
-      public Binding convertBOOLEAN(PrimitiveTypeName primitiveTypeName) throws RuntimeException {
-        return new Binding() {
-          boolean current;
-          void read() {
-            current = dataColumn.readBoolean();
-          }
-          public void skip() {
-            current = false;
-            dataColumn.skip();
-          }
-          @Override
-          public boolean getBoolean() {
-            return current;
-          }
-          void writeValue() {
-            converter.addBoolean(current);
-          }
-        };
-      }
-      @Override
-      public Binding convertBINARY(PrimitiveTypeName primitiveTypeName) throws RuntimeException {
-        return new Binding() {
-          Binary current;
-          void read() {
-            current = dataColumn.readBytes();
-          }
-          public void skip() {
-            current = null;
-            dataColumn.skip();
-          }
-          @Override
-          public Binary getBinary() {
-            return current;
-          }
-          void writeValue() {
-            converter.addBinary(current);
-          }
-        };
-      }
-    });
-  }
+public class ColumnReaderImpl extends ColumnReaderBase {
 
   /**
    * creates a reader for triplets
-   * @param path the descriptor for the corresponding column
-   * @param pageReader the underlying store to read from
-   * @param converter a converter that materializes the values in this column in the current record
-   * @param writerVersion writer version string from the Parquet file being read
-   */
-  public ColumnReaderImpl(ColumnDescriptor path, PageReader pageReader, PrimitiveConverter converter, ParsedVersion writerVersion) {
-    this.path = checkNotNull(path, "path");
-    this.pageReader = checkNotNull(pageReader, "pageReader");
-    this.converter = checkNotNull(converter, "converter");
-    this.writerVersion = writerVersion;
-    DictionaryPage dictionaryPage = pageReader.readDictionaryPage();
-    if (dictionaryPage != null) {
-      try {
-        this.dictionary = dictionaryPage.getEncoding().initDictionary(path, dictionaryPage);
-        if (converter.hasDictionarySupport()) {
-          converter.setDictionary(dictionary);
-        }
-      } catch (IOException e) {
-        throw new ParquetDecodingException("could not decode the dictionary for " + path, e);
-      }
-    } else {
-      this.dictionary = null;
-    }
-    this.totalValueCount = pageReader.getTotalValueCount();
-    if (totalValueCount <= 0) {
-      throw new ParquetDecodingException("totalValueCount '" + totalValueCount + "' <= 0");
-    }
+   * 
+   * @param path
+   *          the descriptor for the corresponding column
+   * @param pageReader
+   *          the underlying store to read from
+   * @param converter
+   *          a converter that materializes the values in this column in the current record
+   * @param writerVersion
+   *          writer version string from the Parquet file being read
+   */
+  public ColumnReaderImpl(ColumnDescriptor path, PageReader pageReader, PrimitiveConverter converter,
+      ParsedVersion writerVersion) {
+    super(path, pageReader, converter, writerVersion);
     consume();
   }
 
-  private boolean isFullyConsumed() {
-    return readValues >= totalValueCount;
-  }
-
-  /**
-   * {@inheritDoc}
-   * @see org.apache.parquet.column.ColumnReader#writeCurrentValueToConverter()
-   */
-  @Override
-  public void writeCurrentValueToConverter() {
-    readValue();
-    this.binding.writeValue();
-  }
-
-  @Override
-  public int getCurrentValueDictionaryID() {
-    readValue();
-    return binding.getDictionaryId();
-  }
-
-  /**
-   * {@inheritDoc}
-   * @see org.apache.parquet.column.ColumnReader#getInteger()
-   */
-  @Override
-  public int getInteger() {
-    readValue();
-    return this.binding.getInteger();
-  }
-
-  /**
-   * {@inheritDoc}
-   * @see org.apache.parquet.column.ColumnReader#getBoolean()
-   */
-  @Override
-  public boolean getBoolean() {
-    readValue();
-    return this.binding.getBoolean();
-  }
-
-  /**
-   * {@inheritDoc}
-   * @see org.apache.parquet.column.ColumnReader#getLong()
-   */
-  @Override
-  public long getLong() {
-    readValue();
-    return this.binding.getLong();
-  }
-
-  /**
-   * {@inheritDoc}
-   * @see org.apache.parquet.column.ColumnReader#getBinary()
-   */
-  @Override
-  public Binary getBinary() {
-    readValue();
-    return this.binding.getBinary();
-  }
-
-  /**
-   * {@inheritDoc}
-   * @see org.apache.parquet.column.ColumnReader#getFloat()
-   */
-  @Override
-  public float getFloat() {
-    readValue();
-    return this.binding.getFloat();
-  }
-
-  /**
-   * {@inheritDoc}
-   * @see org.apache.parquet.column.ColumnReader#getDouble()
-   */
-  @Override
-  public double getDouble() {
-    readValue();
-    return this.binding.getDouble();
-  }
-
-  /**
-   * {@inheritDoc}
-   * @see org.apache.parquet.column.ColumnReader#getCurrentRepetitionLevel()
-   */
-  @Override
-  public int getCurrentRepetitionLevel() {
-    return repetitionLevel;
-  }
-
-  /**
-   * {@inheritDoc}
-   * @see org.apache.parquet.column.ColumnReader#getDescriptor()
-   */
-  @Override
-  public ColumnDescriptor getDescriptor() {
-    return path;
-  }
-
-  /**
-   * Reads the value into the binding.
-   */
-  public void readValue() {
-    try {
-      if (!valueRead) {
-        binding.read();
-        valueRead = true;
-      }
-    } catch (RuntimeException e) {
-      if (CorruptDeltaByteArrays.requiresSequentialReads(writerVersion, currentEncoding) &&
-          e instanceof ArrayIndexOutOfBoundsException) {
-        // this is probably PARQUET-246, which may happen if reading data with
-        // MR because this can't be detected without reading all footers
-        throw new ParquetDecodingException("Read failure possibly due to " +
-            "PARQUET-246: try setting parquet.split.files to false",
-            new ParquetDecodingException(
-                format("Can't read value in column %s at value %d out of %d, " +
-                        "%d out of %d in currentPage. repetition level: " +
-                        "%d, definition level: %d",
-                    path, readValues, totalValueCount,
-                    readValues - (endOfPageValueCount - pageValueCount),
-                    pageValueCount, repetitionLevel, definitionLevel),
-                e));
-      }
-      throw new ParquetDecodingException(
-          format("Can't read value in column %s at value %d out of %d, " +
-                  "%d out of %d in currentPage. repetition level: " +
-                  "%d, definition level: %d",
-              path, readValues, totalValueCount,
-              readValues - (endOfPageValueCount - pageValueCount),
-              pageValueCount, repetitionLevel, definitionLevel),
-          e);
-    }
-  }
-
-  /**
-   * {@inheritDoc}
-   * @see org.apache.parquet.column.ColumnReader#skip()
-   */
-  @Override
-  public void skip() {
-    if (!valueRead) {
-      binding.skip();
-      valueRead = true;
-    }
-  }
-
-  /**
-   * {@inheritDoc}
-   * @see org.apache.parquet.column.ColumnReader#getCurrentDefinitionLevel()
-   */
-  @Override
-  public int getCurrentDefinitionLevel() {
-    return definitionLevel;
-  }
-
-  // TODO: change the logic around read() to not tie together reading from the 3 columns
-  private void readRepetitionAndDefinitionLevels() {
-    repetitionLevel = repetitionLevelColumn.nextInt();
-    definitionLevel = definitionLevelColumn.nextInt();
-    ++readValues;
-  }
-
-  private void checkRead() {
-    if (isPageFullyConsumed()) {
-      if (isFullyConsumed()) {
-        LOG.debug("end reached");
-        repetitionLevel = 0; // the next repetition level
-        return;
-      }
-      readPage();
-    }
-    readRepetitionAndDefinitionLevels();
-  }
-
-  private void readPage() {
-    LOG.debug("loading page");
-    DataPage page = pageReader.readPage();
-    page.accept(new DataPage.Visitor<Void>() {
-      @Override
-      public Void visit(DataPageV1 dataPageV1) {
-        readPageV1(dataPageV1);
-        return null;
-      }
-      @Override
-      public Void visit(DataPageV2 dataPageV2) {
-        readPageV2(dataPageV2);
-        return null;
-      }
-    });
-  }
-
-  private void initDataReader(Encoding dataEncoding, ByteBufferInputStream in, int valueCount) {
-    ValuesReader previousReader = this.dataColumn;
-
-    this.currentEncoding = dataEncoding;
-    this.pageValueCount = valueCount;
-    this.endOfPageValueCount = readValues + pageValueCount;
-
-    if (dataEncoding.usesDictionary()) {
-      if (dictionary == null) {
-        throw new ParquetDecodingException(
-            "could not read page in col " + path + " as the dictionary was missing for encoding " + dataEncoding);
-      }
-      this.dataColumn = dataEncoding.getDictionaryBasedValuesReader(path, VALUES, dictionary);
-    } else {
-      this.dataColumn = dataEncoding.getValuesReader(path, VALUES);
-    }
-
-    if (dataEncoding.usesDictionary() && converter.hasDictionarySupport()) {
-      bindToDictionary(dictionary);
-    } else {
-      bind(path.getType());
-    }
-
-    try {
-      dataColumn.initFromPage(pageValueCount, in);
-    } catch (IOException e) {
-      throw new ParquetDecodingException("could not read page in col " + path, e);
-    }
-
-    if (CorruptDeltaByteArrays.requiresSequentialReads(writerVersion, dataEncoding) &&
-        previousReader != null && previousReader instanceof RequiresPreviousReader) {
-      // previous reader can only be set if reading sequentially
-      ((RequiresPreviousReader) dataColumn).setPreviousReader(previousReader);
-    }
-  }
-
-  private void readPageV1(DataPageV1 page) {
-    ValuesReader rlReader = page.getRlEncoding().getValuesReader(path, REPETITION_LEVEL);
-    ValuesReader dlReader = page.getDlEncoding().getValuesReader(path, DEFINITION_LEVEL);
-    this.repetitionLevelColumn = new ValuesReaderIntIterator(rlReader);
-    this.definitionLevelColumn = new ValuesReaderIntIterator(dlReader);
-    try {
-      BytesInput bytes = page.getBytes();
-      LOG.debug("page size {} bytes and {} records", bytes.size(), pageValueCount);
-      LOG.debug("reading repetition levels at 0");
-      ByteBufferInputStream in = bytes.toInputStream();
-      rlReader.initFromPage(pageValueCount, in);
-      LOG.debug("reading definition levels at {}", in.position());
-      dlReader.initFromPage(pageValueCount, in);
-      LOG.debug("reading data at {}", in.position());
-      initDataReader(page.getValueEncoding(), in, page.getValueCount());
-    } catch (IOException e) {
-      throw new ParquetDecodingException("could not read page " + page + " in col " + path, e);
-    }
-  }
-
-  private void readPageV2(DataPageV2 page) {
-    this.repetitionLevelColumn = newRLEIterator(path.getMaxRepetitionLevel(), page.getRepetitionLevels());
-    this.definitionLevelColumn = newRLEIterator(path.getMaxDefinitionLevel(), page.getDefinitionLevels());
-    LOG.debug("page data size {} bytes and {} records", page.getData().size(), pageValueCount);
-    try {
-      initDataReader(page.getDataEncoding(), page.getData().toInputStream(), page.getValueCount());
-    } catch (IOException e) {
-      throw new ParquetDecodingException("could not read page " + page + " in col " + path, e);
-    }
-  }
-
-  private IntIterator newRLEIterator(int maxLevel, BytesInput bytes) {
-    try {
-      if (maxLevel == 0) {
-        return new NullIntIterator();
-      }
-      return new RLEIntIterator(
-          new RunLengthBitPackingHybridDecoder(
-              BytesUtils.getWidthFromMaxInt(maxLevel),
-              bytes.toInputStream()));
-    } catch (IOException e) {
-      throw new ParquetDecodingException("could not read levels in page for col " + path, e);
-    }
-  }
-
-  private boolean isPageFullyConsumed() {
-    return readValues >= endOfPageValueCount;
-  }
-
-  /**
-   * {@inheritDoc}
-   * @see org.apache.parquet.column.ColumnReader#consume()
-   */
   @Override
-  public void consume() {
-    checkRead();
-    valueRead = false;
+  boolean skipRL(int rl) {
+    return false;
   }
 
-  /**
-   * {@inheritDoc}
-   * @see org.apache.parquet.column.ColumnReader#getTotalValueCount()
-   */
   @Override
-  public long getTotalValueCount() {
-    return totalValueCount;
-  }
-
-  static abstract class IntIterator {
-    abstract int nextInt();
-  }
-
-  static class ValuesReaderIntIterator extends IntIterator {
-    ValuesReader delegate;
-
-    public ValuesReaderIntIterator(ValuesReader delegate) {
-      super();
-      this.delegate = delegate;
-    }
-
-    @Override
-    int nextInt() {
-      return delegate.readInteger();
-    }
-  }
-
-  static class RLEIntIterator extends IntIterator {
-    RunLengthBitPackingHybridDecoder delegate;
-
-    public RLEIntIterator(RunLengthBitPackingHybridDecoder delegate) {
-      this.delegate = delegate;
-    }
-
-    @Override
-    int nextInt() {
-      try {
-        return delegate.readInt();
-      } catch (IOException e) {
-        throw new ParquetDecodingException(e);
-      }
-    }
-  }
-
-  private static final class NullIntIterator extends IntIterator {
-    @Override
-    int nextInt() {
-      return 0;
-    }
+  void newPageInitialized(DataPage page) {
   }
 }
diff --git a/parquet-column/src/main/java/org/apache/parquet/column/impl/ColumnWriteStoreBase.java b/parquet-column/src/main/java/org/apache/parquet/column/impl/ColumnWriteStoreBase.java
new file mode 100644
index 0000000000..dc4946e4ff
--- /dev/null
+++ b/parquet-column/src/main/java/org/apache/parquet/column/impl/ColumnWriteStoreBase.java
@@ -0,0 +1,255 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.parquet.column.impl;
+
+import static java.lang.Math.max;
+import static java.lang.Math.min;
+import static java.util.Collections.unmodifiableMap;
+
+import java.util.Arrays;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.Set;
+import java.util.TreeMap;
+
+import org.apache.parquet.column.ColumnDescriptor;
+import org.apache.parquet.column.ColumnWriteStore;
+import org.apache.parquet.column.ColumnWriter;
+import org.apache.parquet.column.ParquetProperties;
+import org.apache.parquet.column.page.PageWriteStore;
+import org.apache.parquet.column.page.PageWriter;
+import org.apache.parquet.column.values.bloomfilter.BloomFilterWriteStore;
+import org.apache.parquet.column.values.bloomfilter.BloomFilterWriter;
+import org.apache.parquet.schema.MessageType;
+
+/**
+ * Base implementation for {@link ColumnWriteStore} to be extended to specialize for V1 and V2 pages.
+ */
+abstract class ColumnWriteStoreBase implements ColumnWriteStore {
+
+  // Used to support the deprecated workflow of ColumnWriteStoreV1 (lazy init of ColumnWriters)
+  private interface ColumnWriterProvider {
+    ColumnWriter getColumnWriter(ColumnDescriptor path);
+  }
+
+  private final ColumnWriterProvider columnWriterProvider;
+
+  // will flush even if size bellow the threshold by this much to facilitate page alignment
+  private static final float THRESHOLD_TOLERANCE_RATIO = 0.1f; // 10 %
+
+  private final Map<ColumnDescriptor, ColumnWriterBase> columns;
+  private final ParquetProperties props;
+  private final long thresholdTolerance;
+  private long rowCount;
+  private long rowCountForNextSizeCheck;
+
+  // To be used by the deprecated constructor of ColumnWriteStoreV1
+  @Deprecated
+  ColumnWriteStoreBase(
+      final PageWriteStore pageWriteStore,
+      final ParquetProperties props) {
+    this.props = props;
+    this.thresholdTolerance = (long) (props.getPageSizeThreshold() * THRESHOLD_TOLERANCE_RATIO);
+
+    this.columns = new TreeMap<>();
+
+    this.rowCountForNextSizeCheck = props.getMinRowCountForPageSizeCheck();
+
+    columnWriterProvider = new ColumnWriterProvider() {
+      @Override
+      public ColumnWriter getColumnWriter(ColumnDescriptor path) {
+        ColumnWriterBase column = columns.get(path);
+        if (column == null) {
+          column = createColumnWriter(path, pageWriteStore.getPageWriter(path), null, props);
+          columns.put(path, column);
+        }
+        return column;
+      }
+    };
+  }
+
+  ColumnWriteStoreBase(
+      MessageType schema,
+      PageWriteStore pageWriteStore,
+      ParquetProperties props) {
+    this.props = props;
+    this.thresholdTolerance = (long) (props.getPageSizeThreshold() * THRESHOLD_TOLERANCE_RATIO);
+    Map<ColumnDescriptor, ColumnWriterBase> mcolumns = new TreeMap<>();
+    for (ColumnDescriptor path : schema.getColumns()) {
+      PageWriter pageWriter = pageWriteStore.getPageWriter(path);
+      mcolumns.put(path, createColumnWriter(path, pageWriter, null, props));
+    }
+    this.columns = unmodifiableMap(mcolumns);
+
+    this.rowCountForNextSizeCheck = props.getMinRowCountForPageSizeCheck();
+
+    columnWriterProvider = new ColumnWriterProvider() {
+      @Override
+      public ColumnWriter getColumnWriter(ColumnDescriptor path) {
+        return columns.get(path);
+      }
+    };
+  }
+
+  ColumnWriteStoreBase(
+    MessageType schema,
+    PageWriteStore pageWriteStore,
+    BloomFilterWriteStore bloomFilterWriteStore,
+    ParquetProperties props) {
+    this.props = props;
+    this.thresholdTolerance = (long) (props.getPageSizeThreshold() * THRESHOLD_TOLERANCE_RATIO);
+    Map<ColumnDescriptor, ColumnWriterBase> mcolumns = new TreeMap<>();
+    for (ColumnDescriptor path : schema.getColumns()) {
+      PageWriter pageWriter = pageWriteStore.getPageWriter(path);
+      if (props.isBloomFilterEnabled() && props.getBloomFilterExpectedDistinctNumbers() != null) {
+        BloomFilterWriter bloomFilterWriter = bloomFilterWriteStore.getBloomFilterWriter(path);
+        mcolumns.put(path, createColumnWriter(path, pageWriter, bloomFilterWriter, props));
+      } else {
+        mcolumns.put(path, createColumnWriter(path, pageWriter, null, props));
+      }
+    }
+    this.columns = unmodifiableMap(mcolumns);
+
+    this.rowCountForNextSizeCheck = props.getMinRowCountForPageSizeCheck();
+
+    columnWriterProvider = new ColumnWriterProvider() {
+      @Override
+      public ColumnWriter getColumnWriter(ColumnDescriptor path) {
+        return columns.get(path);
+      }
+    };
+  }
+
+  abstract ColumnWriterBase createColumnWriter(ColumnDescriptor path, PageWriter pageWriter,
+                                               BloomFilterWriter bloomFilterWriter, ParquetProperties props);
+
+  public ColumnWriter getColumnWriter(ColumnDescriptor path) {
+    return columnWriterProvider.getColumnWriter(path);
+  }
+
+  public Set<ColumnDescriptor> getColumnDescriptors() {
+    return columns.keySet();
+  }
+
+  @Override
+  public String toString() {
+    StringBuilder sb = new StringBuilder();
+    for (Entry<ColumnDescriptor, ColumnWriterBase> entry : columns.entrySet()) {
+      sb.append(Arrays.toString(entry.getKey().getPath())).append(": ");
+      sb.append(entry.getValue().getTotalBufferedSize()).append(" bytes");
+      sb.append("\n");
+    }
+    return sb.toString();
+  }
+
+  @Override
+  public long getAllocatedSize() {
+    long total = 0;
+    for (ColumnWriterBase memColumn : columns.values()) {
+      total += memColumn.allocatedSize();
+    }
+    return total;
+  }
+
+  @Override
+  public long getBufferedSize() {
+    long total = 0;
+    for (ColumnWriterBase memColumn : columns.values()) {
+      total += memColumn.getTotalBufferedSize();
+    }
+    return total;
+  }
+
+  @Override
+  public void flush() {
+    for (ColumnWriterBase memColumn : columns.values()) {
+      long rows = rowCount - memColumn.getRowsWrittenSoFar();
+      if (rows > 0) {
+        memColumn.writePage();
+      }
+      memColumn.finalizeColumnChunk();
+    }
+  }
+
+  public String memUsageString() {
+    StringBuilder b = new StringBuilder("Store {\n");
+    for (ColumnWriterBase memColumn : columns.values()) {
+      b.append(memColumn.memUsageString(" "));
+    }
+    b.append("}\n");
+    return b.toString();
+  }
+
+  public long maxColMemSize() {
+    long max = 0;
+    for (ColumnWriterBase memColumn : columns.values()) {
+      max = Math.max(max, memColumn.getBufferedSizeInMemory());
+    }
+    return max;
+  }
+
+  @Override
+  public void close() {
+    flush(); // calling flush() here to keep it consistent with the behavior before merging with master
+    for (ColumnWriterBase memColumn : columns.values()) {
+      memColumn.close();
+    }
+  }
+
+  @Override
+  public void endRecord() {
+    ++rowCount;
+    if (rowCount >= rowCountForNextSizeCheck) {
+      sizeCheck();
+    }
+  }
+
+  private void sizeCheck() {
+    long minRecordToWait = Long.MAX_VALUE;
+    for (ColumnWriterBase writer : columns.values()) {
+      long usedMem = writer.getCurrentPageBufferedSize();
+      long rows = rowCount - writer.getRowsWrittenSoFar();
+      long remainingMem = props.getPageSizeThreshold() - usedMem;
+      if (remainingMem <= thresholdTolerance) {
+        writer.writePage();
+        remainingMem = props.getPageSizeThreshold();
+      }
+      long rowsToFillPage =
+          usedMem == 0 ?
+              props.getMaxRowCountForPageSizeCheck()
+              : (long) ((float) rows) / usedMem * remainingMem;
+      if (rowsToFillPage < minRecordToWait) {
+        minRecordToWait = rowsToFillPage;
+      }
+    }
+    if (minRecordToWait == Long.MAX_VALUE) {
+      minRecordToWait = props.getMinRowCountForPageSizeCheck();
+    }
+
+    if (props.estimateNextSizeCheck()) {
+      // will check again halfway if between min and max
+      rowCountForNextSizeCheck = rowCount +
+          min(
+              max(minRecordToWait / 2, props.getMinRowCountForPageSizeCheck()),
+              props.getMaxRowCountForPageSizeCheck());
+    } else {
+      rowCountForNextSizeCheck = rowCount + props.getMinRowCountForPageSizeCheck();
+    }
+  }
+}
diff --git a/parquet-column/src/main/java/org/apache/parquet/column/impl/ColumnWriteStoreV1.java b/parquet-column/src/main/java/org/apache/parquet/column/impl/ColumnWriteStoreV1.java
index 7e2876077a..dd13b0b8a4 100644
--- a/parquet-column/src/main/java/org/apache/parquet/column/impl/ColumnWriteStoreV1.java
+++ b/parquet-column/src/main/java/org/apache/parquet/column/impl/ColumnWriteStoreV1.java
@@ -18,135 +18,34 @@
  */
 package org.apache.parquet.column.impl;
 
-import java.util.Arrays;
-import java.util.Collection;
-import java.util.Map;
-import java.util.Map.Entry;
-import java.util.Set;
-import java.util.TreeMap;
-
 import org.apache.parquet.column.ColumnDescriptor;
-import org.apache.parquet.column.ColumnWriteStore;
-import org.apache.parquet.column.ColumnWriter;
 import org.apache.parquet.column.ParquetProperties;
 import org.apache.parquet.column.page.PageWriteStore;
 import org.apache.parquet.column.page.PageWriter;
 import org.apache.parquet.column.values.bloomfilter.BloomFilterWriteStore;
 import org.apache.parquet.column.values.bloomfilter.BloomFilterWriter;
+import org.apache.parquet.schema.MessageType;
 
-public class ColumnWriteStoreV1 implements ColumnWriteStore {
-
-  private final Map<ColumnDescriptor, ColumnWriterV1> columns = new TreeMap<ColumnDescriptor, ColumnWriterV1>();
-  private final PageWriteStore pageWriteStore;
-  private final ParquetProperties props;
-  private BloomFilterWriteStore bloomFilterWriteStore;
+public class ColumnWriteStoreV1 extends ColumnWriteStoreBase {
+  public ColumnWriteStoreV1(MessageType schema, PageWriteStore pageWriteStore, ParquetProperties props) {
+    super(schema, pageWriteStore, props);
+  }
 
-  public ColumnWriteStoreV1(PageWriteStore pageWriteStore,
-                            ParquetProperties props) {
-    this.pageWriteStore = pageWriteStore;
-    this.props = props;
+  @Deprecated
+  public ColumnWriteStoreV1(final PageWriteStore pageWriteStore,
+      final ParquetProperties props) {
+    super(pageWriteStore, props);
   }
 
-  public ColumnWriteStoreV1(PageWriteStore pageWriteStore,
+  public ColumnWriteStoreV1(MessageType schema, PageWriteStore pageWriteStore,
                             BloomFilterWriteStore bloomFilterWriteStore,
                             ParquetProperties props) {
-    this (pageWriteStore, props);
-    this.bloomFilterWriteStore = bloomFilterWriteStore;
-  }
-
-  public ColumnWriter getColumnWriter(ColumnDescriptor path) {
-    ColumnWriterV1 column = columns.get(path);
-    if (column == null) {
-      column = newMemColumn(path);
-      columns.put(path, column);
-    }
-    return column;
-  }
-
-  public Set<ColumnDescriptor> getColumnDescriptors() {
-    return columns.keySet();
-  }
-
-  private ColumnWriterV1 newMemColumn(ColumnDescriptor path) {
-    PageWriter pageWriter = pageWriteStore.getPageWriter(path);
-
-    if (props.isBloomFilterEnabled() && props.getBloomFilterExpectValues() != null) {
-      BloomFilterWriter bloomFilterWriter = bloomFilterWriteStore.getBloomFilterWriter(path);
-      return new ColumnWriterV1(path, pageWriter, bloomFilterWriter, props);
-    } else {
-      return new ColumnWriterV1(path, pageWriter, props);
-    }
-  }
-
-  @Override
-  public String toString() {
-      StringBuilder sb = new StringBuilder();
-      for (Entry<ColumnDescriptor, ColumnWriterV1> entry : columns.entrySet()) {
-        sb.append(Arrays.toString(entry.getKey().getPath())).append(": ");
-        sb.append(entry.getValue().getBufferedSizeInMemory()).append(" bytes");
-        sb.append("\n");
-      }
-      return sb.toString();
+    super (schema, pageWriteStore, bloomFilterWriteStore, props);
   }
 
   @Override
-  public long getAllocatedSize() {
-    Collection<ColumnWriterV1> values = columns.values();
-    long total = 0;
-    for (ColumnWriterV1 memColumn : values) {
-      total += memColumn.allocatedSize();
-    }
-    return total;
+  ColumnWriterBase createColumnWriter(ColumnDescriptor path, PageWriter pageWriter,
+                                      BloomFilterWriter bloomFilterWriter, ParquetProperties props) {
+    return new ColumnWriterV1(path, pageWriter, bloomFilterWriter, props);
   }
-
-  @Override
-  public long getBufferedSize() {
-    Collection<ColumnWriterV1> values = columns.values();
-    long total = 0;
-    for (ColumnWriterV1 memColumn : values) {
-      total += memColumn.getBufferedSizeInMemory();
-    }
-    return total;
-  }
-
-  @Override
-  public String memUsageString() {
-    StringBuilder b = new StringBuilder("Store {\n");
-    Collection<ColumnWriterV1> values = columns.values();
-    for (ColumnWriterV1 memColumn : values) {
-      b.append(memColumn.memUsageString(" "));
-    }
-    b.append("}\n");
-    return b.toString();
-  }
-
-  public long maxColMemSize() {
-    Collection<ColumnWriterV1> values = columns.values();
-    long max = 0;
-    for (ColumnWriterV1 memColumn : values) {
-      max = Math.max(max, memColumn.getBufferedSizeInMemory());
-    }
-    return max;
-  }
-
-  @Override
-  public void flush() {
-    Collection<ColumnWriterV1> values = columns.values();
-    for (ColumnWriterV1 memColumn : values) {
-      memColumn.flush();
-    }
-  }
-
-  @Override
-  public void endRecord() {
-    // V1 does not take record boundaries into account
-  }
-
-  public void close() {
-    Collection<ColumnWriterV1> values = columns.values();
-    for (ColumnWriterV1 memColumn : values) {
-      memColumn.close();
-    }
-  }
-
 }
diff --git a/parquet-column/src/main/java/org/apache/parquet/column/impl/ColumnWriteStoreV2.java b/parquet-column/src/main/java/org/apache/parquet/column/impl/ColumnWriteStoreV2.java
index 6c20b8bb87..a9f2d5848d 100644
--- a/parquet-column/src/main/java/org/apache/parquet/column/impl/ColumnWriteStoreV2.java
+++ b/parquet-column/src/main/java/org/apache/parquet/column/impl/ColumnWriteStoreV2.java
@@ -18,20 +18,7 @@
  */
 package org.apache.parquet.column.impl;
 
-import static java.lang.Math.max;
-import static java.lang.Math.min;
-import static java.util.Collections.unmodifiableMap;
-
-import java.util.Arrays;
-import java.util.Collection;
-import java.util.Map;
-import java.util.Map.Entry;
-import java.util.Set;
-import java.util.TreeMap;
-
 import org.apache.parquet.column.ColumnDescriptor;
-import org.apache.parquet.column.ColumnWriteStore;
-import org.apache.parquet.column.ColumnWriter;
 import org.apache.parquet.column.ParquetProperties;
 import org.apache.parquet.column.page.PageWriteStore;
 import org.apache.parquet.column.page.PageWriter;
@@ -39,163 +26,20 @@
 import org.apache.parquet.column.values.bloomfilter.BloomFilterWriter;
 import org.apache.parquet.schema.MessageType;
 
-public class ColumnWriteStoreV2 implements ColumnWriteStore {
-
-  // will flush even if size bellow the threshold by this much to facilitate page alignment
-  private static final float THRESHOLD_TOLERANCE_RATIO = 0.1f; // 10 %
-
-  private final Map<ColumnDescriptor, ColumnWriterV2> columns;
-  private final Collection<ColumnWriterV2> writers;
-  private final ParquetProperties props;
-  private final long thresholdTolerance;
-  private long rowCount;
-  private long rowCountForNextSizeCheck;
-
-  public ColumnWriteStoreV2(
-      MessageType schema,
-      PageWriteStore pageWriteStore,
-      ParquetProperties props) {
-    this.props = props;
-    this.thresholdTolerance = (long)(props.getPageSizeThreshold() * THRESHOLD_TOLERANCE_RATIO);
-    Map<ColumnDescriptor, ColumnWriterV2> mcolumns = new TreeMap<ColumnDescriptor, ColumnWriterV2>();
-    for (ColumnDescriptor path : schema.getColumns()) {
-      PageWriter pageWriter = pageWriteStore.getPageWriter(path);
-      mcolumns.put(path, new ColumnWriterV2(path, pageWriter, props));
-    }
-    this.columns = unmodifiableMap(mcolumns);
-    this.writers = this.columns.values();
-
-    this.rowCountForNextSizeCheck = props.getMinRowCountForPageSizeCheck();
+public class ColumnWriteStoreV2 extends ColumnWriteStoreBase {
+  public ColumnWriteStoreV2(MessageType schema, PageWriteStore pageWriteStore, ParquetProperties props) {
+    super(schema, pageWriteStore, props);
   }
 
-  public ColumnWriteStoreV2(
-    MessageType schema,
-    PageWriteStore pageWriteStore,
-    BloomFilterWriteStore bloomFilterWriteStore,
-    ParquetProperties props) {
-    this.props = props;
-    this.thresholdTolerance = (long)(props.getPageSizeThreshold() * THRESHOLD_TOLERANCE_RATIO);
-    Map<ColumnDescriptor, ColumnWriterV2> mcolumns = new TreeMap<ColumnDescriptor, ColumnWriterV2>();
-
-    for (ColumnDescriptor path : schema.getColumns()) {
-      PageWriter pageWriter = pageWriteStore.getPageWriter(path);
-      if (props.isBloomFilterEnabled() && props.getBloomFilterExpectValues() != null) {
-        BloomFilterWriter bloomFilterWriter = bloomFilterWriteStore.getBloomFilterWriter(path);
-        mcolumns.put(path, new ColumnWriterV2(path, pageWriter, bloomFilterWriter, props));
-      } else {
-        mcolumns.put(path, new ColumnWriterV2(path, pageWriter, props));
-      }
-    }
-    this.columns = unmodifiableMap(mcolumns);
-    this.writers = this.columns.values();
-
-    this.rowCountForNextSizeCheck = props.getMinRowCountForPageSizeCheck();
-  }
-
-  public ColumnWriter getColumnWriter(ColumnDescriptor path) {
-    return columns.get(path);
-  }
-
-  public Set<ColumnDescriptor> getColumnDescriptors() {
-    return columns.keySet();
-  }
-
-  @Override
-  public String toString() {
-      StringBuilder sb = new StringBuilder();
-      for (Entry<ColumnDescriptor, ColumnWriterV2> entry : columns.entrySet()) {
-        sb.append(Arrays.toString(entry.getKey().getPath())).append(": ");
-        sb.append(entry.getValue().getTotalBufferedSize()).append(" bytes");
-        sb.append("\n");
-      }
-      return sb.toString();
+  public ColumnWriteStoreV2(MessageType schema, PageWriteStore pageWriteStore,
+                            BloomFilterWriteStore bloomFilterWriteStore,
+                            ParquetProperties props) {
+    super(schema, pageWriteStore, bloomFilterWriteStore, props);
   }
 
   @Override
-  public long getAllocatedSize() {
-    long total = 0;
-    for (ColumnWriterV2 memColumn : columns.values()) {
-      total += memColumn.allocatedSize();
-    }
-    return total;
+  ColumnWriterBase createColumnWriter(ColumnDescriptor path, PageWriter pageWriter,
+                                      BloomFilterWriter bloomFilterWriter, ParquetProperties props) {
+    return new ColumnWriterV2(path, pageWriter, bloomFilterWriter, props);
   }
-
-  @Override
-  public long getBufferedSize() {
-    long total = 0;
-    for (ColumnWriterV2 memColumn : columns.values()) {
-      total += memColumn.getTotalBufferedSize();
-    }
-    return total;
-  }
-
-  @Override
-  public void flush() {
-    for (ColumnWriterV2 memColumn : columns.values()) {
-      long rows = rowCount - memColumn.getRowsWrittenSoFar();
-      if (rows > 0) {
-        memColumn.writePage(rowCount);
-      }
-      memColumn.finalizeColumnChunk();
-    }
-  }
-
-  public String memUsageString() {
-    StringBuilder b = new StringBuilder("Store {\n");
-    for (ColumnWriterV2 memColumn : columns.values()) {
-      b.append(memColumn.memUsageString(" "));
-    }
-    b.append("}\n");
-    return b.toString();
-  }
-
-  @Override
-  public void close() {
-    flush(); // calling flush() here to keep it consistent with the behavior before merging with master
-    for (ColumnWriterV2 memColumn : columns.values()) {
-      memColumn.close();
-    }
-  }
-
-  @Override
-  public void endRecord() {
-    ++ rowCount;
-    if (rowCount >= rowCountForNextSizeCheck) {
-      sizeCheck();
-    }
-  }
-
-  private void sizeCheck() {
-    long minRecordToWait = Long.MAX_VALUE;
-    for (ColumnWriterV2 writer : writers) {
-      long usedMem = writer.getCurrentPageBufferedSize();
-      long rows = rowCount - writer.getRowsWrittenSoFar();
-      long remainingMem = props.getPageSizeThreshold() - usedMem;
-      if (remainingMem <= thresholdTolerance) {
-        writer.writePage(rowCount);
-        remainingMem = props.getPageSizeThreshold();
-      }
-      long rowsToFillPage =
-          usedMem == 0 ?
-              props.getMaxRowCountForPageSizeCheck()
-              : (long)((float)rows) / usedMem * remainingMem;
-      if (rowsToFillPage < minRecordToWait) {
-        minRecordToWait = rowsToFillPage;
-      }
-    }
-    if (minRecordToWait == Long.MAX_VALUE) {
-      minRecordToWait = props.getMinRowCountForPageSizeCheck();
-    }
-
-    if(props.estimateNextSizeCheck()) {
-      // will check again halfway if between min and max
-      rowCountForNextSizeCheck = rowCount +
-          min(
-              max(minRecordToWait / 2, props.getMinRowCountForPageSizeCheck()),
-              props.getMaxRowCountForPageSizeCheck());
-    } else {
-      rowCountForNextSizeCheck = rowCount + props.getMinRowCountForPageSizeCheck();
-    }
-  }
-
 }
diff --git a/parquet-column/src/main/java/org/apache/parquet/column/impl/ColumnWriterBase.java b/parquet-column/src/main/java/org/apache/parquet/column/impl/ColumnWriterBase.java
new file mode 100644
index 0000000000..7f9ba4d868
--- /dev/null
+++ b/parquet-column/src/main/java/org/apache/parquet/column/impl/ColumnWriterBase.java
@@ -0,0 +1,400 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.parquet.column.impl;
+
+import java.io.IOException;
+import java.util.HashMap;
+
+import org.apache.parquet.column.ColumnDescriptor;
+import org.apache.parquet.column.ColumnWriter;
+import org.apache.parquet.column.ParquetProperties;
+import org.apache.parquet.column.page.DictionaryPage;
+import org.apache.parquet.column.page.PageWriter;
+import org.apache.parquet.column.statistics.Statistics;
+import org.apache.parquet.column.values.ValuesWriter;
+import org.apache.parquet.column.values.bloomfilter.BlockSplitBloomFilter;
+import org.apache.parquet.column.values.bloomfilter.BloomFilter;
+import org.apache.parquet.column.values.bloomfilter.BloomFilterWriter;
+import org.apache.parquet.io.ParquetEncodingException;
+import org.apache.parquet.io.api.Binary;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Base implementation for {@link ColumnWriter} to be extended to specialize for V1 and V2 pages.
+ */
+abstract class ColumnWriterBase implements ColumnWriter {
+  private static final Logger LOG = LoggerFactory.getLogger(ColumnWriterBase.class);
+
+  // By default: Debugging disabled this way (using the "if (DEBUG)" IN the methods) to allow
+  // the java compiler (not the JIT) to remove the unused statements during build time.
+  private static final boolean DEBUG = false;
+
+  final ColumnDescriptor path;
+  final PageWriter pageWriter;
+  private ValuesWriter repetitionLevelColumn;
+  private ValuesWriter definitionLevelColumn;
+  private ValuesWriter dataColumn;
+  private int valueCount;
+
+  private Statistics<?> statistics;
+  private long rowsWrittenSoFar = 0;
+  private int pageRowCount;
+
+  private BloomFilterWriter bloomFilterWriter;
+  private BloomFilter bloomFilter;
+
+  ColumnWriterBase(
+      ColumnDescriptor path,
+      PageWriter pageWriter,
+      ParquetProperties props) {
+    this.path = path;
+    this.pageWriter = pageWriter;
+    resetStatistics();
+
+    this.repetitionLevelColumn = createRLWriter(props, path);
+    this.definitionLevelColumn = createDLWriter(props, path);
+    this.dataColumn = props.newValuesWriter(path);
+  }
+
+  ColumnWriterBase(
+    ColumnDescriptor path,
+    PageWriter pageWriter,
+    BloomFilterWriter bloomFilterWriter,
+    ParquetProperties props
+  ) {
+    this(path, pageWriter, props);
+
+    // Current not support nested column.
+    if (path.getPath().length != 1 || bloomFilterWriter == null) {
+      return;
+    }
+
+    this.bloomFilterWriter = bloomFilterWriter;
+    HashMap<String, Long> bloomFilterExpectValues = props.getBloomFilterExpectedDistinctNumbers();
+    String column = path.getPath()[0];
+    if (bloomFilterExpectValues.keySet().contains(column)) {
+      int optimalNumOfBits = BlockSplitBloomFilter.optimalNumOfBits(bloomFilterExpectValues.get(column).intValue(),
+        BlockSplitBloomFilter.DEFAULT_FPP);
+      this.bloomFilter = new BlockSplitBloomFilter(optimalNumOfBits/8);
+    }
+  }
+
+  abstract ValuesWriter createRLWriter(ParquetProperties props, ColumnDescriptor path);
+
+  abstract ValuesWriter createDLWriter(ParquetProperties props, ColumnDescriptor path);
+
+  private void log(Object value, int r, int d) {
+    LOG.debug("{} {} r:{} d:{}", path, value, r, d);
+  }
+
+  private void resetStatistics() {
+    this.statistics = Statistics.createStats(path.getPrimitiveType());
+  }
+
+  private void definitionLevel(int definitionLevel) {
+    definitionLevelColumn.writeInteger(definitionLevel);
+  }
+
+  private void repetitionLevel(int repetitionLevel) {
+    repetitionLevelColumn.writeInteger(repetitionLevel);
+    assert pageRowCount == 0 ? repetitionLevel == 0 : true : "Every page shall start on record boundaries";
+    if (repetitionLevel == 0) {
+      ++pageRowCount;
+    }
+  }
+
+  /**
+   * Writes the current null value
+   *
+   * @param repetitionLevel
+   * @param definitionLevel
+   */
+  @Override
+  public void writeNull(int repetitionLevel, int definitionLevel) {
+    if (DEBUG)
+      log(null, repetitionLevel, definitionLevel);
+    repetitionLevel(repetitionLevel);
+    definitionLevel(definitionLevel);
+    statistics.incrementNumNulls();
+    ++valueCount;
+  }
+
+  @Override
+  public void close() {
+    // Close the Values writers.
+    repetitionLevelColumn.close();
+    definitionLevelColumn.close();
+    dataColumn.close();
+  }
+
+  @Override
+  public long getBufferedSizeInMemory() {
+    return repetitionLevelColumn.getBufferedSize()
+        + definitionLevelColumn.getBufferedSize()
+        + dataColumn.getBufferedSize()
+        + pageWriter.getMemSize();
+  }
+
+
+  private void updateBloomFilter(int value) {
+    if (bloomFilter != null) {
+      bloomFilter.insert(bloomFilter.hash(value));
+    }
+  }
+
+  private void updateBloomFilter(long value) {
+    if (bloomFilter != null) {
+      bloomFilter.insert(bloomFilter.hash(value));
+    }
+  }
+
+  private void updateBloomFilter(double value) {
+    if (bloomFilter != null) {
+      bloomFilter.insert(bloomFilter.hash(value));
+    }
+  }
+
+  private void updateBloomFilter(float value) {
+    if (bloomFilter != null) {
+      bloomFilter.insert(bloomFilter.hash(value));
+    }
+  }
+
+  private void updateBloomFilter(Binary value) {
+    if (bloomFilter != null) {
+      bloomFilter.insert(bloomFilter.hash(value));
+    }
+  }
+
+  /**
+   * Writes the current value
+   *
+   * @param value
+   * @param repetitionLevel
+   * @param definitionLevel
+   */
+  @Override
+  public void write(double value, int repetitionLevel, int definitionLevel) {
+    if (DEBUG)
+      log(value, repetitionLevel, definitionLevel);
+    repetitionLevel(repetitionLevel);
+    definitionLevel(definitionLevel);
+    dataColumn.writeDouble(value);
+    statistics.updateStats(value);
+    updateBloomFilter(value);
+    ++valueCount;
+  }
+
+  /**
+   * Writes the current value
+   *
+   * @param value
+   * @param repetitionLevel
+   * @param definitionLevel
+   */
+  @Override
+  public void write(float value, int repetitionLevel, int definitionLevel) {
+    if (DEBUG)
+      log(value, repetitionLevel, definitionLevel);
+    repetitionLevel(repetitionLevel);
+    definitionLevel(definitionLevel);
+    dataColumn.writeFloat(value);
+    statistics.updateStats(value);
+    updateBloomFilter(value);
+    ++valueCount;
+  }
+
+  /**
+   * Writes the current value
+   *
+   * @param value
+   * @param repetitionLevel
+   * @param definitionLevel
+   */
+  @Override
+  public void write(Binary value, int repetitionLevel, int definitionLevel) {
+    if (DEBUG)
+      log(value, repetitionLevel, definitionLevel);
+    repetitionLevel(repetitionLevel);
+    definitionLevel(definitionLevel);
+    dataColumn.writeBytes(value);
+    statistics.updateStats(value);
+    updateBloomFilter(value);
+    ++valueCount;
+  }
+
+  /**
+   * Writes the current value
+   *
+   * @param value
+   * @param repetitionLevel
+   * @param definitionLevel
+   */
+  @Override
+  public void write(boolean value, int repetitionLevel, int definitionLevel) {
+    if (DEBUG)
+      log(value, repetitionLevel, definitionLevel);
+    repetitionLevel(repetitionLevel);
+    definitionLevel(definitionLevel);
+    dataColumn.writeBoolean(value);
+    statistics.updateStats(value);
+    ++valueCount;
+  }
+
+  /**
+   * Writes the current value
+   *
+   * @param value
+   * @param repetitionLevel
+   * @param definitionLevel
+   */
+  @Override
+  public void write(int value, int repetitionLevel, int definitionLevel) {
+    if (DEBUG)
+      log(value, repetitionLevel, definitionLevel);
+    repetitionLevel(repetitionLevel);
+    definitionLevel(definitionLevel);
+    dataColumn.writeInteger(value);
+    statistics.updateStats(value);
+    updateBloomFilter(value);
+    ++valueCount;
+  }
+
+  /**
+   * Writes the current value
+   *
+   * @param value
+   * @param repetitionLevel
+   * @param definitionLevel
+   */
+  @Override
+  public void write(long value, int repetitionLevel, int definitionLevel) {
+    if (DEBUG)
+      log(value, repetitionLevel, definitionLevel);
+    repetitionLevel(repetitionLevel);
+    definitionLevel(definitionLevel);
+    dataColumn.writeLong(value);
+    statistics.updateStats(value);
+    updateBloomFilter(value);
+    ++valueCount;
+  }
+
+  /**
+   * Finalizes the Column chunk. Possibly adding extra pages if needed (dictionary, ...)
+   * Is called right after writePage
+   */
+  void finalizeColumnChunk() {
+    final DictionaryPage dictionaryPage = dataColumn.toDictPageAndClose();
+    if (dictionaryPage != null) {
+      if (DEBUG)
+        LOG.debug("write dictionary");
+      try {
+        pageWriter.writeDictionaryPage(dictionaryPage);
+      } catch (IOException e) {
+        throw new ParquetEncodingException("could not write dictionary page for " + path, e);
+      }
+      dataColumn.resetDictionary();
+    }
+
+    if (bloomFilterWriter != null && bloomFilter != null) {
+      bloomFilterWriter.writeBloomFilter(bloomFilter);
+    }
+  }
+
+  /**
+   * Used to decide when to write a page
+   *
+   * @return the number of bytes of memory used to buffer the current data
+   */
+  long getCurrentPageBufferedSize() {
+    return repetitionLevelColumn.getBufferedSize()
+        + definitionLevelColumn.getBufferedSize()
+        + dataColumn.getBufferedSize();
+  }
+
+  /**
+   * Used to decide when to write a page or row group
+   *
+   * @return the number of bytes of memory used to buffer the current data and the previously written pages
+   */
+  long getTotalBufferedSize() {
+    long bloomBufferSize = bloomFilter == null ? 0 : bloomFilter.getBitsetSize();
+    return repetitionLevelColumn.getBufferedSize()
+        + definitionLevelColumn.getBufferedSize()
+        + dataColumn.getBufferedSize()
+        + pageWriter.getMemSize()
+        + bloomBufferSize;
+  }
+
+  /**
+   * @return actual memory used
+   */
+  long allocatedSize() {
+    long bloomAllocatedSize = bloomFilter == null ? 0 : bloomFilter.getBitsetSize();
+    return repetitionLevelColumn.getAllocatedSize()
+        + definitionLevelColumn.getAllocatedSize()
+        + dataColumn.getAllocatedSize()
+        + pageWriter.allocatedSize()
+        + bloomAllocatedSize;
+  }
+
+  /**
+   * @param indent
+   *          a prefix to format lines
+   * @return a formatted string showing how memory is used
+   */
+  String memUsageString(String indent) {
+    StringBuilder b = new StringBuilder(indent).append(path).append(" {\n");
+    b.append(indent).append(" r:").append(repetitionLevelColumn.getAllocatedSize()).append(" bytes\n");
+    b.append(indent).append(" d:").append(definitionLevelColumn.getAllocatedSize()).append(" bytes\n");
+    b.append(dataColumn.memUsageString(indent + "  data:")).append("\n");
+    b.append(pageWriter.memUsageString(indent + "  pages:")).append("\n");
+    b.append(indent).append(String.format("  total: %,d/%,d", getTotalBufferedSize(), allocatedSize())).append("\n");
+    b.append(indent).append("}\n");
+    return b.toString();
+  }
+
+  long getRowsWrittenSoFar() {
+    return this.rowsWrittenSoFar;
+  }
+
+  /**
+   * Writes the current data to a new page in the page store
+   */
+  void writePage() {
+    this.rowsWrittenSoFar += pageRowCount;
+    if (DEBUG)
+      LOG.debug("write page");
+    try {
+      writePage(pageRowCount, valueCount, statistics, repetitionLevelColumn, definitionLevelColumn, dataColumn);
+    } catch (IOException e) {
+      throw new ParquetEncodingException("could not write page for " + path, e);
+    }
+    repetitionLevelColumn.reset();
+    definitionLevelColumn.reset();
+    dataColumn.reset();
+    valueCount = 0;
+    resetStatistics();
+    pageRowCount = 0;
+  }
+
+  abstract void writePage(int rowCount, int valueCount, Statistics<?> statistics, ValuesWriter repetitionLevels,
+      ValuesWriter definitionLevels, ValuesWriter values) throws IOException;
+}
diff --git a/parquet-column/src/main/java/org/apache/parquet/column/impl/ColumnWriterV1.java b/parquet-column/src/main/java/org/apache/parquet/column/impl/ColumnWriterV1.java
index c5fc9dc549..1d732b837d 100644
--- a/parquet-column/src/main/java/org/apache/parquet/column/impl/ColumnWriterV1.java
+++ b/parquet-column/src/main/java/org/apache/parquet/column/impl/ColumnWriterV1.java
@@ -21,332 +21,47 @@
 import static org.apache.parquet.bytes.BytesInput.concat;
 
 import java.io.IOException;
-import java.util.HashMap;
 
 import org.apache.parquet.column.ColumnDescriptor;
-import org.apache.parquet.column.ColumnWriter;
 import org.apache.parquet.column.ParquetProperties;
-import org.apache.parquet.column.page.DictionaryPage;
 import org.apache.parquet.column.page.PageWriter;
 import org.apache.parquet.column.statistics.Statistics;
 import org.apache.parquet.column.values.ValuesWriter;
-import org.apache.parquet.column.values.bloomfilter.BlockSplitBloomFilter;
-import org.apache.parquet.column.values.bloomfilter.BloomFilter;
 import org.apache.parquet.column.values.bloomfilter.BloomFilterWriter;
-import org.apache.parquet.io.ParquetEncodingException;
-import org.apache.parquet.io.api.Binary;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
 
 /**
  * Writes (repetition level, definition level, value) triplets and deals with writing pages to the underlying layer.
  */
-final class ColumnWriterV1 implements ColumnWriter {
-  private static final Logger LOG = LoggerFactory.getLogger(ColumnWriterV1.class);
-
-  // By default: Debugging disabled this way (using the "if (DEBUG)" IN the methods) to allow
-  // the java compiler (not the JIT) to remove the unused statements during build time.
-  private static final boolean DEBUG = false;
-
-  private final ColumnDescriptor path;
-  private final PageWriter pageWriter;
-  private final ParquetProperties props;
-
-  private ValuesWriter repetitionLevelColumn;
-  private ValuesWriter definitionLevelColumn;
-  private ValuesWriter dataColumn;
-  private int valueCount;
-  private int valueCountForNextSizeCheck;
-
-  private Statistics statistics;
-  private BloomFilterWriter bloomFilterWriter;
-  private BloomFilter bloomFilter;
-
-  public ColumnWriterV1(ColumnDescriptor path, PageWriter pageWriter,
-                        BloomFilterWriter bloomFilterWriter, ParquetProperties props) {
-    this(path, pageWriter, props);
-
-    // Current not support nested column.
-    if (path.getPath().length != 1 || bloomFilterWriter == null) {
-      return;
-    }
-
-    this.bloomFilterWriter = bloomFilterWriter;
-    HashMap<String, Long> bloomFilterExpectValues = props.getBloomFilterExpectValues();
-    String column = path.getPath()[0];
-    if (bloomFilterExpectValues.keySet().contains(column)) {
-      int optimalNumOfBits = BlockSplitBloomFilter.optimalNumOfBits(bloomFilterExpectValues.get(column).intValue(),
-        BlockSplitBloomFilter.DEFAULT_FPP);
-      this.bloomFilter = new BlockSplitBloomFilter(optimalNumOfBits/8);
-    }
-
+final class ColumnWriterV1 extends ColumnWriterBase {
+  ColumnWriterV1(ColumnDescriptor path, PageWriter pageWriter, ParquetProperties props) {
+    super(path, pageWriter, props);
   }
 
   public ColumnWriterV1(ColumnDescriptor path, PageWriter pageWriter,
-                        ParquetProperties props) {
-    this.path = path;
-    this.pageWriter = pageWriter;
-    this.props = props;
-
-    // initial check of memory usage. So that we have enough data to make an initial prediction
-    this.valueCountForNextSizeCheck = props.getMinRowCountForPageSizeCheck();
-
-    resetStatistics();
-
-    this.repetitionLevelColumn = props.newRepetitionLevelWriter(path);
-    this.definitionLevelColumn = props.newDefinitionLevelWriter(path);
-    this.dataColumn = props.newValuesWriter(path);
-  }
-
-  private void log(Object value, int r, int d) {
-    if (DEBUG) LOG.debug( "{} {} r:{} d:{}", path, value, r, d);
-  }
-
-  private void resetStatistics() {
-    this.statistics = Statistics.createStats(this.path.getPrimitiveType());
-  }
-
-  /**
-   * Counts how many values have been written and checks the memory usage to flush the page when we reach the page threshold.
-   *
-   * We measure the memory used when we reach the mid point toward our estimated count.
-   * We then update the estimate and flush the page if we reached the threshold.
-   *
-   * That way we check the memory size log2(n) times.
-   *
-   */
-  private void accountForValueWritten() {
-    ++ valueCount;
-    if (valueCount > valueCountForNextSizeCheck) {
-      // not checking the memory used for every value
-      long memSize = repetitionLevelColumn.getBufferedSize()
-          + definitionLevelColumn.getBufferedSize()
-          + dataColumn.getBufferedSize();
-      if (memSize > props.getPageSizeThreshold()) {
-        // we will write the current page and check again the size at the predicted middle of next page
-        if (props.estimateNextSizeCheck()) {
-          valueCountForNextSizeCheck = valueCount / 2;
-        } else {
-          valueCountForNextSizeCheck = props.getMinRowCountForPageSizeCheck();
-        }
-        writePage();
-      } else if (props.estimateNextSizeCheck()) {
-        // not reached the threshold, will check again midway
-        valueCountForNextSizeCheck = (int)(valueCount + ((float)valueCount * props.getPageSizeThreshold() / memSize)) / 2 + 1;
-      } else {
-        valueCountForNextSizeCheck += props.getMinRowCountForPageSizeCheck();
-      }
-    }
-  }
-
-  private void updateStatisticsNumNulls() {
-    statistics.incrementNumNulls();
-  }
-
-  private void updateStatistics(int value) {
-    statistics.updateStats(value);
-  }
-
-  private void updateStatistics(long value) {
-    statistics.updateStats(value);
-  }
-
-  private void updateStatistics(float value) {
-    statistics.updateStats(value);
-  }
-
-  private void updateStatistics(double value) {
-   statistics.updateStats(value);
-  }
-
-  private void updateStatistics(Binary value) {
-   statistics.updateStats(value);
-  }
-
-  private void updateStatistics(boolean value) {
-   statistics.updateStats(value);
-  }
-
-  private void updateBloomFilter(int value) {
-    if (bloomFilter != null) {
-      bloomFilter.insert(bloomFilter.hash(value));
-    }
-  }
-
-  private void updateBloomFilter(long value) {
-    if (bloomFilter != null) {
-      bloomFilter.insert(bloomFilter.hash(value));
-    }
-  }
-
-  private void updateBloomFilter(double value) {
-    if (bloomFilter != null) {
-      bloomFilter.insert(bloomFilter.hash(value));
-    }
-  }
-
-  private void updateBloomFilter(float value) {
-    if (bloomFilter != null) {
-      bloomFilter.insert(bloomFilter.hash(value));
-    }
-  }
-
-  private void updateBloomFilter(Binary value) {
-    if (bloomFilter != null) {
-      bloomFilter.insert(bloomFilter.hash(value));
-    }
-  }
-
-  private void writePage() {
-    if (DEBUG) LOG.debug("write page");
-    try {
-      pageWriter.writePage(
-          concat(repetitionLevelColumn.getBytes(), definitionLevelColumn.getBytes(), dataColumn.getBytes()),
-          valueCount,
-          statistics,
-          repetitionLevelColumn.getEncoding(),
-          definitionLevelColumn.getEncoding(),
-          dataColumn.getEncoding());
-    } catch (IOException e) {
-      throw new ParquetEncodingException("could not write page for " + path, e);
-    }
-    repetitionLevelColumn.reset();
-    definitionLevelColumn.reset();
-    dataColumn.reset();
-    valueCount = 0;
-    resetStatistics();
-  }
-
-  @Override
-  public void writeNull(int repetitionLevel, int definitionLevel) {
-    if (DEBUG) log(null, repetitionLevel, definitionLevel);
-    repetitionLevelColumn.writeInteger(repetitionLevel);
-    definitionLevelColumn.writeInteger(definitionLevel);
-    updateStatisticsNumNulls();
-    accountForValueWritten();
-  }
-
-  @Override
-  public void write(double value, int repetitionLevel, int definitionLevel) {
-    if (DEBUG) log(value, repetitionLevel, definitionLevel);
-    repetitionLevelColumn.writeInteger(repetitionLevel);
-    definitionLevelColumn.writeInteger(definitionLevel);
-    dataColumn.writeDouble(value);
-    updateStatistics(value);
-    updateBloomFilter(value);
-    accountForValueWritten();
-  }
-
-  @Override
-  public void write(float value, int repetitionLevel, int definitionLevel) {
-    if (DEBUG) log(value, repetitionLevel, definitionLevel);
-    repetitionLevelColumn.writeInteger(repetitionLevel);
-    definitionLevelColumn.writeInteger(definitionLevel);
-    dataColumn.writeFloat(value);
-    updateStatistics(value);
-    updateBloomFilter(value);
-    accountForValueWritten();
-  }
-
-  @Override
-  public void write(Binary value, int repetitionLevel, int definitionLevel) {
-    if (DEBUG) log(value, repetitionLevel, definitionLevel);
-    repetitionLevelColumn.writeInteger(repetitionLevel);
-    definitionLevelColumn.writeInteger(definitionLevel);
-    dataColumn.writeBytes(value);
-    updateStatistics(value);
-    updateBloomFilter(value);
-    accountForValueWritten();
-  }
-
-  @Override
-  public void write(boolean value, int repetitionLevel, int definitionLevel) {
-    if (DEBUG) log(value, repetitionLevel, definitionLevel);
-    repetitionLevelColumn.writeInteger(repetitionLevel);
-    definitionLevelColumn.writeInteger(definitionLevel);
-    dataColumn.writeBoolean(value);
-    updateStatistics(value);
-    accountForValueWritten();
-  }
-
-  @Override
-  public void write(int value, int repetitionLevel, int definitionLevel) {
-    if (DEBUG) log(value, repetitionLevel, definitionLevel);
-    repetitionLevelColumn.writeInteger(repetitionLevel);
-    definitionLevelColumn.writeInteger(definitionLevel);
-    dataColumn.writeInteger(value);
-    updateStatistics(value);
-    updateBloomFilter(value);
-    accountForValueWritten();
+                        BloomFilterWriter bloomFilterWriter, ParquetProperties props) {
+    super(path, pageWriter, bloomFilterWriter, props);
   }
 
   @Override
-  public void write(long value, int repetitionLevel, int definitionLevel) {
-    if (DEBUG) log(value, repetitionLevel, definitionLevel);
-    repetitionLevelColumn.writeInteger(repetitionLevel);
-    definitionLevelColumn.writeInteger(definitionLevel);
-    dataColumn.writeLong(value);
-    updateStatistics(value);
-    updateBloomFilter(value);
-    accountForValueWritten();
-  }
-
-  public void flush() {
-    if (valueCount > 0) {
-      writePage();
-    }
-    final DictionaryPage dictionaryPage = dataColumn.toDictPageAndClose();
-    if (dictionaryPage != null) {
-      if (DEBUG) LOG.debug("write dictionary");
-      try {
-        pageWriter.writeDictionaryPage(dictionaryPage);
-      } catch (IOException e) {
-        throw new ParquetEncodingException("could not write dictionary page for " + path, e);
-      }
-      dataColumn.resetDictionary();
-    }
-
-    if (bloomFilterWriter != null && bloomFilter != null) {
-      bloomFilterWriter.writeBloomFilter(bloomFilter);
-    }
+  ValuesWriter createRLWriter(ParquetProperties props, ColumnDescriptor path) {
+    return props.newRepetitionLevelWriter(path);
   }
 
   @Override
-  public void close() {
-    flush();
-    // Close the Values writers.
-    repetitionLevelColumn.close();
-    definitionLevelColumn.close();
-    dataColumn.close();
+  ValuesWriter createDLWriter(ParquetProperties props, ColumnDescriptor path) {
+    return props.newDefinitionLevelWriter(path);
   }
 
   @Override
-  public long getBufferedSizeInMemory() {
-    long bloomBufferSize = bloomFilter == null ? 0 : bloomFilter.getBitsetSize();
-    return repetitionLevelColumn.getBufferedSize()
-        + definitionLevelColumn.getBufferedSize()
-        + dataColumn.getBufferedSize()
-        + pageWriter.getMemSize()
-        + bloomBufferSize;
-  }
-
-  public long allocatedSize() {
-    long bloomAllocatedSize = bloomFilter == null ? 0 : bloomFilter.getBitsetSize();
-    return repetitionLevelColumn.getAllocatedSize()
-        + definitionLevelColumn.getAllocatedSize()
-        + dataColumn.getAllocatedSize()
-        + pageWriter.allocatedSize()
-        + bloomAllocatedSize;
-  }
-
-  public String memUsageString(String indent) {
-    StringBuilder b = new StringBuilder(indent).append(path).append(" {\n");
-    b.append(repetitionLevelColumn.memUsageString(indent + "  r:")).append("\n");
-    b.append(definitionLevelColumn.memUsageString(indent + "  d:")).append("\n");
-    b.append(dataColumn.memUsageString(indent + "  data:")).append("\n");
-    b.append(pageWriter.memUsageString(indent + "  pages:")).append("\n");
-    b.append(indent).append(String.format("  total: %,d/%,d", getBufferedSizeInMemory(), allocatedSize())).append("\n");
-    b.append(indent).append("}\n");
-    return b.toString();
+  void writePage(int rowCount, int valueCount, Statistics<?> statistics, ValuesWriter repetitionLevels,
+      ValuesWriter definitionLevels, ValuesWriter values) throws IOException {
+    pageWriter.writePage(
+        concat(repetitionLevels.getBytes(), definitionLevels.getBytes(), values.getBytes()),
+        valueCount,
+        rowCount,
+        statistics,
+        repetitionLevels.getEncoding(),
+        definitionLevels.getEncoding(),
+        values.getEncoding());
   }
 }
diff --git a/parquet-column/src/main/java/org/apache/parquet/column/impl/ColumnWriterV2.java b/parquet-column/src/main/java/org/apache/parquet/column/impl/ColumnWriterV2.java
index 7b1671407a..ad7077bd19 100644
--- a/parquet-column/src/main/java/org/apache/parquet/column/impl/ColumnWriterV2.java
+++ b/parquet-column/src/main/java/org/apache/parquet/column/impl/ColumnWriterV2.java
@@ -19,349 +19,77 @@
 package org.apache.parquet.column.impl;
 
 import java.io.IOException;
-import java.util.HashMap;
 
 import org.apache.parquet.Ints;
 import org.apache.parquet.bytes.BytesInput;
 import org.apache.parquet.column.ColumnDescriptor;
-import org.apache.parquet.column.ColumnWriter;
 import org.apache.parquet.column.Encoding;
 import org.apache.parquet.column.ParquetProperties;
-import org.apache.parquet.column.page.DictionaryPage;
 import org.apache.parquet.column.page.PageWriter;
 import org.apache.parquet.column.statistics.Statistics;
 import org.apache.parquet.column.values.ValuesWriter;
-import org.apache.parquet.column.values.bloomfilter.BlockSplitBloomFilter;
-import org.apache.parquet.column.values.bloomfilter.BloomFilter;
+import org.apache.parquet.column.values.bitpacking.DevNullValuesWriter;
 import org.apache.parquet.column.values.bloomfilter.BloomFilterWriter;
 import org.apache.parquet.column.values.rle.RunLengthBitPackingHybridEncoder;
+import org.apache.parquet.column.values.rle.RunLengthBitPackingHybridValuesWriter;
 import org.apache.parquet.io.ParquetEncodingException;
-import org.apache.parquet.io.api.Binary;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
 
 /**
  * Writes (repetition level, definition level, value) triplets and deals with writing pages to the underlying layer.
  */
-final class ColumnWriterV2 implements ColumnWriter {
-  private static final Logger LOG = LoggerFactory.getLogger(ColumnWriterV2.class);
+final class ColumnWriterV2 extends ColumnWriterBase {
 
-  // By default: Debugging disabled this way (using the "if (DEBUG)" IN the methods) to allow
-  // the java compiler (not the JIT) to remove the unused statements during build time.
-  private static final boolean DEBUG = false;
-
-  private final ColumnDescriptor path;
-  private final PageWriter pageWriter;
-  private RunLengthBitPackingHybridEncoder repetitionLevelColumn;
-  private RunLengthBitPackingHybridEncoder definitionLevelColumn;
-  private ValuesWriter dataColumn;
-  private int valueCount;
-
-  private BloomFilterWriter bloomFilterWriter;
-  private BloomFilter bloomFilter;
-
-  private Statistics<?> statistics;
-  private long rowsWrittenSoFar = 0;
-
-  public ColumnWriterV2(
-      ColumnDescriptor path,
-      PageWriter pageWriter,
-      ParquetProperties props) {
-    this.path = path;
-    this.pageWriter = pageWriter;
-    resetStatistics();
-
-    this.repetitionLevelColumn = props.newRepetitionLevelEncoder(path);
-    this.definitionLevelColumn = props.newDefinitionLevelEncoder(path);
-    this.dataColumn = props.newValuesWriter(path);
-  }
-
-  public ColumnWriterV2(
-    ColumnDescriptor path,
-    PageWriter pageWriter,
-    BloomFilterWriter bloomFilterWriter,
-    ParquetProperties props) {
-    this(path, pageWriter, props);
-
-    // Current not support nested column.
-    if (path.getPath().length != 1 || bloomFilterWriter == null) {
-      return;
-    }
-
-    this.bloomFilterWriter = bloomFilterWriter;
-    HashMap<String, Long> bloomFilterExpectValues = props.getBloomFilterExpectValues();
-    String column = path.getPath()[0];
-    if (bloomFilterExpectValues.keySet().contains(column)) {
-      int optimalNumOfBits = BlockSplitBloomFilter.optimalNumOfBits(bloomFilterExpectValues.get(column).intValue(),
-        BlockSplitBloomFilter.DEFAULT_FPP);
-      this.bloomFilter = new BlockSplitBloomFilter(optimalNumOfBits/8);
+  // Extending the original implementation to not to write the size of the data as the original writer would
+  private static class RLEWriterForV2 extends RunLengthBitPackingHybridValuesWriter {
+    public RLEWriterForV2(RunLengthBitPackingHybridEncoder encoder) {
+      super(encoder);
     }
-  }
 
-  private void log(Object value, int r, int d) {
-    LOG.debug("{} {} r:{} d:{}", path, value, r, d);
-  }
-
-  private void resetStatistics() {
-    this.statistics = Statistics.createStats(path.getPrimitiveType());
-  }
-
-  private void definitionLevel(int definitionLevel) {
-    try {
-      definitionLevelColumn.writeInt(definitionLevel);
-    } catch (IOException e) {
-      throw new ParquetEncodingException("illegal definition level " + definitionLevel + " for column " + path, e);
-    }
-  }
-
-  private void repetitionLevel(int repetitionLevel) {
-    try {
-      repetitionLevelColumn.writeInt(repetitionLevel);
-    } catch (IOException e) {
-      throw new ParquetEncodingException("illegal repetition level " + repetitionLevel + " for column " + path, e);
-    }
-  }
-
-  /**
-   * writes the current null value
-   * @param repetitionLevel
-   * @param definitionLevel
-   */
-  public void writeNull(int repetitionLevel, int definitionLevel) {
-    if (DEBUG) log(null, repetitionLevel, definitionLevel);
-    repetitionLevel(repetitionLevel);
-    definitionLevel(definitionLevel);
-    statistics.incrementNumNulls();
-    ++ valueCount;
-  }
-
-  @Override
-  public void close() {
-    // Close the Values writers.
-    repetitionLevelColumn.close();
-    definitionLevelColumn.close();
-    dataColumn.close();
-  }
-
-  @Override
-  public long getBufferedSizeInMemory() {
-    return repetitionLevelColumn.getBufferedSize()
-      + definitionLevelColumn.getBufferedSize()
-      + dataColumn.getBufferedSize()
-      + pageWriter.getMemSize();
-  }
-
-  /**
-   * writes the current value
-   * @param value
-   * @param repetitionLevel
-   * @param definitionLevel
-   */
-  public void write(double value, int repetitionLevel, int definitionLevel) {
-    if (DEBUG) log(value, repetitionLevel, definitionLevel);
-    repetitionLevel(repetitionLevel);
-    definitionLevel(definitionLevel);
-    dataColumn.writeDouble(value);
-    statistics.updateStats(value);
-    if (bloomFilter != null) {
-      bloomFilter.insert(bloomFilter.hash(value));
-    }
-    ++ valueCount;
-  }
-
-  /**
-   * writes the current value
-   * @param value
-   * @param repetitionLevel
-   * @param definitionLevel
-   */
-  public void write(float value, int repetitionLevel, int definitionLevel) {
-    if (DEBUG) log(value, repetitionLevel, definitionLevel);
-    repetitionLevel(repetitionLevel);
-    definitionLevel(definitionLevel);
-    dataColumn.writeFloat(value);
-    statistics.updateStats(value);
-    if (bloomFilter != null) {
-      bloomFilter.insert(bloomFilter.hash(value));
-    }
-    ++ valueCount;
-  }
-
-  /**
-   * writes the current value
-   * @param value
-   * @param repetitionLevel
-   * @param definitionLevel
-   */
-  public void write(Binary value, int repetitionLevel, int definitionLevel) {
-    if (DEBUG) log(value, repetitionLevel, definitionLevel);
-    repetitionLevel(repetitionLevel);
-    definitionLevel(definitionLevel);
-    dataColumn.writeBytes(value);
-    statistics.updateStats(value);
-    if (bloomFilter != null) {
-      bloomFilter.insert(bloomFilter.hash(value));
-    }
-    ++ valueCount;
-  }
-
-  /**
-   * writes the current value
-   * @param value
-   * @param repetitionLevel
-   * @param definitionLevel
-   */
-  public void write(boolean value, int repetitionLevel, int definitionLevel) {
-    if (DEBUG) log(value, repetitionLevel, definitionLevel);
-    repetitionLevel(repetitionLevel);
-    definitionLevel(definitionLevel);
-    dataColumn.writeBoolean(value);
-    statistics.updateStats(value);
-    ++ valueCount;
-  }
-
-  /**
-   * writes the current value
-   * @param value
-   * @param repetitionLevel
-   * @param definitionLevel
-   */
-  public void write(int value, int repetitionLevel, int definitionLevel) {
-    if (DEBUG) log(value, repetitionLevel, definitionLevel);
-    repetitionLevel(repetitionLevel);
-    definitionLevel(definitionLevel);
-    dataColumn.writeInteger(value);
-    statistics.updateStats(value);
-    if (bloomFilter != null) {
-      bloomFilter.insert(bloomFilter.hash(value));
-    }
-    ++ valueCount;
-  }
-
-  /**
-   * writes the current value
-   * @param value
-   * @param repetitionLevel
-   * @param definitionLevel
-   */
-  public void write(long value, int repetitionLevel, int definitionLevel) {
-    if (DEBUG) log(value, repetitionLevel, definitionLevel);
-    repetitionLevel(repetitionLevel);
-    definitionLevel(definitionLevel);
-    dataColumn.writeLong(value);
-    statistics.updateStats(value);
-    if (bloomFilter != null) {
-      bloomFilter.insert(bloomFilter.hash(value));
-    }
-    ++ valueCount;
-  }
-
-  /**
-   * Finalizes the Column chunk. Possibly adding extra pages if needed (dictionary, ...)
-   * Is called right after writePage
-   */
-  public void finalizeColumnChunk() {
-    final DictionaryPage dictionaryPage = dataColumn.toDictPageAndClose();
-    if (dictionaryPage != null) {
-      if (DEBUG) LOG.debug("write dictionary");
+    @Override
+    public BytesInput getBytes() {
       try {
-        pageWriter.writeDictionaryPage(dictionaryPage);
+        return encoder.toBytes();
       } catch (IOException e) {
-        throw new ParquetEncodingException("could not write dictionary page for " + path, e);
+        throw new ParquetEncodingException(e);
       }
-      dataColumn.resetDictionary();
-    }
-
-    if (bloomFilterWriter != null && bloomFilter != null) {
-      bloomFilterWriter.writeBloomFilter(bloomFilter);
     }
   }
 
-  /**
-   * used to decide when to write a page
-   * @return the number of bytes of memory used to buffer the current data
-   */
-  public long getCurrentPageBufferedSize() {
-    long bloomBufferSize = bloomFilter == null ? 0 : bloomFilter.getBitsetSize();
-    return repetitionLevelColumn.getBufferedSize()
-        + definitionLevelColumn.getBufferedSize()
-        + dataColumn.getBufferedSize()
-        + bloomBufferSize;
-  }
+  private static final ValuesWriter NULL_WRITER = new DevNullValuesWriter();
 
-  /**
-   * used to decide when to write a page or row group
-   * @return the number of bytes of memory used to buffer the current data and the previously written pages
-   */
-  public long getTotalBufferedSize() {
-    long bloomBufferSize = bloomFilter == null ? 0 : bloomFilter.getBitsetSize();
-    return repetitionLevelColumn.getBufferedSize()
-        + definitionLevelColumn.getBufferedSize()
-        + dataColumn.getBufferedSize()
-        + pageWriter.getMemSize()
-        + bloomBufferSize;
+  ColumnWriterV2(ColumnDescriptor path, PageWriter pageWriter, ParquetProperties props) {
+    super(path, pageWriter, props);
   }
 
-  /**
-   * @return actual memory used
-   */
-  public long allocatedSize() {
-    long bloomFilterSize = bloomFilter == null ? 0 : bloomFilter.getBitsetSize();
-    return repetitionLevelColumn.getAllocatedSize()
-    + definitionLevelColumn.getAllocatedSize()
-    + dataColumn.getAllocatedSize()
-    + pageWriter.allocatedSize()
-    + bloomFilterSize;
+  ColumnWriterV2(ColumnDescriptor path, PageWriter pageWriter, BloomFilterWriter bloomFilterWriter,
+                 ParquetProperties props) {
+    super(path, pageWriter, bloomFilterWriter, props);
   }
 
-  /**
-   * @param indent a prefix to format lines
-   * @return a formatted string showing how memory is used
-   */
-  public String memUsageString(String indent) {
-    StringBuilder b = new StringBuilder(indent).append(path).append(" {\n");
-    b.append(indent).append(" r:").append(repetitionLevelColumn.getAllocatedSize()).append(" bytes\n");
-    b.append(indent).append(" d:").append(definitionLevelColumn.getAllocatedSize()).append(" bytes\n");
-    b.append(dataColumn.memUsageString(indent + "  data:")).append("\n");
-    b.append(pageWriter.memUsageString(indent + "  pages:")).append("\n");
-    b.append(indent).append(String.format("  total: %,d/%,d", getTotalBufferedSize(), allocatedSize())).append("\n");
-    b.append(indent).append("}\n");
-    return b.toString();
+  @Override
+  ValuesWriter createRLWriter(ParquetProperties props, ColumnDescriptor path) {
+    return path.getMaxRepetitionLevel() == 0 ? NULL_WRITER : new RLEWriterForV2(props.newRepetitionLevelEncoder(path));
   }
 
-  public long getRowsWrittenSoFar() {
-    return this.rowsWrittenSoFar;
+  @Override
+  ValuesWriter createDLWriter(ParquetProperties props, ColumnDescriptor path) {
+    return path.getMaxDefinitionLevel() == 0 ? NULL_WRITER : new RLEWriterForV2(props.newDefinitionLevelEncoder(path));
   }
 
-  /**
-   * writes the current data to a new page in the page store
-   * @param rowCount how many rows have been written so far
-   */
-  public void writePage(long rowCount) {
-    int pageRowCount = Ints.checkedCast(rowCount - rowsWrittenSoFar);
-    this.rowsWrittenSoFar = rowCount;
-    if (DEBUG) LOG.debug("write page");
-    try {
-      // TODO: rework this API. Those must be called *in that order*
-      BytesInput bytes = dataColumn.getBytes();
-      Encoding encoding = dataColumn.getEncoding();
-      pageWriter.writePageV2(
-          pageRowCount,
-          Ints.checkedCast(statistics.getNumNulls()),
-          valueCount,
-          path.getMaxRepetitionLevel() == 0 ? BytesInput.empty() : repetitionLevelColumn.toBytes(),
-          path.getMaxDefinitionLevel() == 0 ? BytesInput.empty() : definitionLevelColumn.toBytes(),
-          encoding,
-          bytes,
-          statistics
-          );
-    } catch (IOException e) {
-      throw new ParquetEncodingException("could not write page for " + path, e);
-    }
-    repetitionLevelColumn.reset();
-    definitionLevelColumn.reset();
-    dataColumn.reset();
-    valueCount = 0;
-    resetStatistics();
+  @Override
+  void writePage(int rowCount, int valueCount, Statistics<?> statistics, ValuesWriter repetitionLevels,
+      ValuesWriter definitionLevels, ValuesWriter values) throws IOException {
+    // TODO: rework this API. The bytes shall be retrieved before the encoding (encoding might be different otherwise)
+    BytesInput bytes = values.getBytes();
+    Encoding encoding = values.getEncoding();
+    pageWriter.writePageV2(
+        rowCount,
+        Ints.checkedCast(statistics.getNumNulls()),
+        valueCount,
+        repetitionLevels.getBytes(),
+        definitionLevels.getBytes(),
+        encoding,
+        bytes,
+        statistics);
   }
 }
diff --git a/parquet-column/src/main/java/org/apache/parquet/column/impl/SynchronizingColumnReader.java b/parquet-column/src/main/java/org/apache/parquet/column/impl/SynchronizingColumnReader.java
new file mode 100644
index 0000000000..50f05c8af3
--- /dev/null
+++ b/parquet-column/src/main/java/org/apache/parquet/column/impl/SynchronizingColumnReader.java
@@ -0,0 +1,111 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.parquet.column.impl;
+
+import java.util.PrimitiveIterator;
+
+import org.apache.parquet.VersionParser.ParsedVersion;
+import org.apache.parquet.column.ColumnDescriptor;
+import org.apache.parquet.column.ColumnReader;
+import org.apache.parquet.column.page.DataPage;
+import org.apache.parquet.column.page.PageReader;
+import org.apache.parquet.io.RecordReader;
+import org.apache.parquet.io.api.PrimitiveConverter;
+
+/**
+ * A {@link ColumnReader} implementation for utilizing indexes. When filtering using column indexes we might skip
+ * reading some pages for different columns. Because the rows are not aligned between the pages of the different columns
+ * it might be required to skip some values in this {@link ColumnReader} so we provide only the required values for the
+ * higher API ({@link RecordReader}) and they do not need to handle or know about the skipped pages. The values (and the
+ * related rl and dl) are skipped based on the iterator of the required row indexes and the first row index of each
+ * page.<br>
+ * For example:
+ *
+ * <pre>
+ * rows   col1   col2   col3
+ *      ┌──────┬──────┬──────┐
+ *   0  │  p0  │      │      │
+ *      ╞══════╡  p0  │  p0  │
+ *  20  │ p1(X)│------│------│
+ *      ╞══════╪══════╡      │
+ *  40  │ p2(X)│      │------│
+ *      ╞══════╡ p1(X)╞══════╡
+ *  60  │ p3(X)│      │------│
+ *      ╞══════╪══════╡      │
+ *  80  │  p4  │      │  p1  │
+ *      ╞══════╡  p2  │      │
+ * 100  │  p5  │      │      │
+ *      └──────┴──────┴──────┘
+ * </pre>
+ *
+ * The pages 1, 2, 3 in col1 are skipped so we have to skip the rows [20, 79]. Because page 1 in col2 contains values
+ * only for the rows [40, 79] we skip this entire page as well. To synchronize the row reading we have to skip the
+ * values (and the related rl and dl) for the rows [20, 39] in the end of the page 0 for col2. Similarly, we have to
+ * skip values while reading page0 and page1 for col3.
+ */
+class SynchronizingColumnReader extends ColumnReaderBase {
+
+  private final PrimitiveIterator.OfLong rowIndexes;
+  private long currentRow;
+  private long targetRow;
+  private long lastRowInPage;
+  private int valuesReadFromPage;
+
+  SynchronizingColumnReader(ColumnDescriptor path, PageReader pageReader, PrimitiveConverter converter,
+      ParsedVersion writerVersion, PrimitiveIterator.OfLong rowIndexes) {
+    super(path, pageReader, converter, writerVersion);
+    this.rowIndexes = rowIndexes;
+    targetRow = Long.MIN_VALUE;
+    consume();
+  }
+
+  @Override
+  boolean isPageFullyConsumed() {
+    return getPageValueCount() <= valuesReadFromPage || lastRowInPage < targetRow;
+  }
+
+  @Override
+  boolean isFullyConsumed() {
+    return !rowIndexes.hasNext();
+  }
+
+  @Override
+  boolean skipRL(int rl) {
+    ++valuesReadFromPage;
+    if (rl == 0) {
+      ++currentRow;
+      if (currentRow > targetRow) {
+        targetRow = rowIndexes.hasNext() ? rowIndexes.nextLong() : Long.MAX_VALUE;
+      }
+    }
+    return currentRow < targetRow;
+  }
+
+  @Override
+  protected void newPageInitialized(DataPage page) {
+    long firstRowIndex = page.getFirstRowIndex()
+        .orElseThrow(() -> new IllegalArgumentException("Missing firstRowIndex for synchronizing values"));
+    int rowCount = page.getIndexRowCount()
+        .orElseThrow(() -> new IllegalArgumentException("Missing rowCount for synchronizing values"));
+    currentRow = firstRowIndex - 1;
+    lastRowInPage = firstRowIndex + rowCount - 1;
+    valuesReadFromPage = 0;
+  }
+
+}
diff --git a/parquet-column/src/main/java/org/apache/parquet/column/page/DataPage.java b/parquet-column/src/main/java/org/apache/parquet/column/page/DataPage.java
index 4d8f381f51..fd1875eddf 100644
--- a/parquet-column/src/main/java/org/apache/parquet/column/page/DataPage.java
+++ b/parquet-column/src/main/java/org/apache/parquet/column/page/DataPage.java
@@ -18,16 +18,24 @@
  */
 package org.apache.parquet.column.page;
 
+import java.util.Optional;
+
 /**
  * one data page in a chunk
  */
 abstract public class DataPage extends Page {
 
   private final int valueCount;
+  private final long firstRowIndex;
 
   DataPage(int compressedSize, int uncompressedSize, int valueCount) {
+    this(compressedSize, uncompressedSize, valueCount, -1);
+  }
+
+  DataPage(int compressedSize, int uncompressedSize, int valueCount, long firstRowIndex) {
     super(compressedSize, uncompressedSize);
     this.valueCount = valueCount;
+    this.firstRowIndex = firstRowIndex;
   }
 
   /**
@@ -37,6 +45,20 @@ public int getValueCount() {
     return valueCount;
   }
 
+  /**
+   * @return the index of the first row in this page if the related data is available (the optional column-index
+   *         contains this value)
+   */
+  public Optional<Long> getFirstRowIndex() {
+    return firstRowIndex < 0 ? Optional.empty() : Optional.of(firstRowIndex);
+  }
+
+  /**
+   * @return the number of rows in this page if the related data is available (in case of pageV1 the optional
+   *         column-index contains this value)
+   */
+  public abstract Optional<Integer> getIndexRowCount();
+
   public abstract <T> T accept(Visitor<T> visitor);
 
   public static interface Visitor<T> {
diff --git a/parquet-column/src/main/java/org/apache/parquet/column/page/DataPageV1.java b/parquet-column/src/main/java/org/apache/parquet/column/page/DataPageV1.java
index 56928c3818..b1f68aefba 100755
--- a/parquet-column/src/main/java/org/apache/parquet/column/page/DataPageV1.java
+++ b/parquet-column/src/main/java/org/apache/parquet/column/page/DataPageV1.java
@@ -18,6 +18,8 @@
  */
 package org.apache.parquet.column.page;
 
+import java.util.Optional;
+
 import org.apache.parquet.Ints;
 import org.apache.parquet.bytes.BytesInput;
 import org.apache.parquet.column.Encoding;
@@ -30,6 +32,7 @@ public class DataPageV1 extends DataPage {
   private final Encoding rlEncoding;
   private final Encoding dlEncoding;
   private final Encoding valuesEncoding;
+  private final int indexRowCount;
 
   /**
    * @param bytes the bytes for this page
@@ -47,6 +50,29 @@ public DataPageV1(BytesInput bytes, int valueCount, int uncompressedSize, Statis
     this.rlEncoding = rlEncoding;
     this.dlEncoding = dlEncoding;
     this.valuesEncoding = valuesEncoding;
+    this.indexRowCount = -1;
+  }
+
+  /**
+   * @param bytes the bytes for this page
+   * @param valueCount count of values in this page
+   * @param uncompressedSize the uncompressed size of the page
+   * @param firstRowIndex the index of the first row in this page
+   * @param rowCount the number of rows in this page
+   * @param statistics of the page's values (max, min, num_null)
+   * @param rlEncoding the repetition level encoding for this page
+   * @param dlEncoding the definition level encoding for this page
+   * @param valuesEncoding the values encoding for this page
+   */
+  public DataPageV1(BytesInput bytes, int valueCount, int uncompressedSize, long firstRowIndex, int rowCount,
+      Statistics<?> statistics, Encoding rlEncoding, Encoding dlEncoding, Encoding valuesEncoding) {
+    super(Ints.checkedCast(bytes.size()), uncompressedSize, valueCount, firstRowIndex);
+    this.bytes = bytes;
+    this.statistics = statistics;
+    this.rlEncoding = rlEncoding;
+    this.dlEncoding = dlEncoding;
+    this.valuesEncoding = valuesEncoding;
+    this.indexRowCount = rowCount;
   }
 
   /**
@@ -94,4 +120,9 @@ public String toString() {
   public <T> T accept(Visitor<T> visitor) {
     return visitor.visit(this);
   }
+
+  @Override
+  public Optional<Integer> getIndexRowCount() {
+    return indexRowCount < 0 ? Optional.empty() : Optional.of(indexRowCount);
+  }
 }
diff --git a/parquet-column/src/main/java/org/apache/parquet/column/page/DataPageV2.java b/parquet-column/src/main/java/org/apache/parquet/column/page/DataPageV2.java
index 62dac83713..a1700aea00 100644
--- a/parquet-column/src/main/java/org/apache/parquet/column/page/DataPageV2.java
+++ b/parquet-column/src/main/java/org/apache/parquet/column/page/DataPageV2.java
@@ -18,6 +18,8 @@
  */
 package org.apache.parquet.column.page;
 
+import java.util.Optional;
+
 import org.apache.parquet.Ints;
 import org.apache.parquet.bytes.BytesInput;
 import org.apache.parquet.column.Encoding;
@@ -50,6 +52,32 @@ public static DataPageV2 uncompressed(
         false);
   }
 
+  /**
+   * @param rowCount count of rows
+   * @param nullCount count of nulls
+   * @param valueCount count of values
+   * @param firstRowIndex the index of the first row in this page
+   * @param repetitionLevels RLE encoded repetition levels
+   * @param definitionLevels RLE encoded definition levels
+   * @param dataEncoding encoding for the data
+   * @param data data encoded with dataEncoding
+   * @param statistics optional statistics for this page
+   * @return an uncompressed page
+   */
+  public static DataPageV2 uncompressed(
+      int rowCount, int nullCount, int valueCount, long firstRowIndex,
+      BytesInput repetitionLevels, BytesInput definitionLevels,
+      Encoding dataEncoding, BytesInput data,
+      Statistics<?> statistics) {
+    return new DataPageV2(
+        rowCount, nullCount, valueCount, firstRowIndex,
+        repetitionLevels, definitionLevels,
+        dataEncoding, data,
+        Ints.checkedCast(repetitionLevels.size() + definitionLevels.size() + data.size()),
+        statistics,
+        false);
+  }
+
   /**
    * @param rowCount count of rows
    * @param nullCount count of nulls
@@ -104,6 +132,25 @@ public DataPageV2(
     this.isCompressed = isCompressed;
   }
 
+  private DataPageV2(
+      int rowCount, int nullCount, int valueCount, long firstRowIndex,
+      BytesInput repetitionLevels, BytesInput definitionLevels,
+      Encoding dataEncoding, BytesInput data,
+      int uncompressedSize,
+      Statistics<?> statistics,
+      boolean isCompressed) {
+    super(Ints.checkedCast(repetitionLevels.size() + definitionLevels.size() + data.size()), uncompressedSize,
+        valueCount, firstRowIndex);
+    this.rowCount = rowCount;
+    this.nullCount = nullCount;
+    this.repetitionLevels = repetitionLevels;
+    this.definitionLevels = definitionLevels;
+    this.dataEncoding = dataEncoding;
+    this.data = data;
+    this.statistics = statistics;
+    this.isCompressed = isCompressed;
+  }
+
   public int getRowCount() {
     return rowCount;
   }
@@ -136,6 +183,11 @@ public boolean isCompressed() {
     return isCompressed;
   }
 
+  @Override
+  public Optional<Integer> getIndexRowCount() {
+    return Optional.of(rowCount);
+  }
+
   @Override
   public <T> T accept(Visitor<T> visitor) {
     return visitor.visit(this);
diff --git a/parquet-column/src/main/java/org/apache/parquet/column/page/PageReadStore.java b/parquet-column/src/main/java/org/apache/parquet/column/page/PageReadStore.java
index 24d5825543..753bda8907 100644
--- a/parquet-column/src/main/java/org/apache/parquet/column/page/PageReadStore.java
+++ b/parquet-column/src/main/java/org/apache/parquet/column/page/PageReadStore.java
@@ -18,6 +18,8 @@
  */
 package org.apache.parquet.column.page;
 
+import java.util.Optional;
+import java.util.PrimitiveIterator;
 import org.apache.parquet.column.ColumnDescriptor;
 
 /**
@@ -29,7 +31,8 @@ public interface PageReadStore {
 
   /**
    *
-   * @param descriptor the descriptor of the column
+   * @param descriptor
+   *          the descriptor of the column
    * @return the page reader for that column
    */
   PageReader getPageReader(ColumnDescriptor descriptor);
@@ -40,4 +43,14 @@ public interface PageReadStore {
    */
   long getRowCount();
 
+  /**
+   * Returns the indexes of the rows to be read/built if the related data is available. All the rows which index is not
+   * returned shall be skipped.
+   *
+   * @return the optional of the incremental iterator of the row indexes or an empty optional if the related data is not
+   *         available
+   */
+  default Optional<PrimitiveIterator.OfLong> getRowIndexes() {
+    return Optional.empty();
+  }
 }
diff --git a/parquet-column/src/main/java/org/apache/parquet/column/page/PageWriter.java b/parquet-column/src/main/java/org/apache/parquet/column/page/PageWriter.java
index a2d079f9cf..a72be48b54 100644
--- a/parquet-column/src/main/java/org/apache/parquet/column/page/PageWriter.java
+++ b/parquet-column/src/main/java/org/apache/parquet/column/page/PageWriter.java
@@ -20,7 +20,6 @@
 
 import java.io.IOException;
 
-import org.apache.parquet.bytes.ByteBufferAllocator;
 import org.apache.parquet.bytes.BytesInput;
 import org.apache.parquet.column.Encoding;
 import org.apache.parquet.column.statistics.Statistics;
@@ -39,9 +38,25 @@ public interface PageWriter {
    * @param dlEncoding definition level encoding
    * @param valuesEncoding values encoding
    * @throws IOException if there is an exception while writing page data
+   * @deprecated will be removed in 2.0.0. This method does not support writing column indexes; Use
+   *             {@link #writePage(BytesInput, int, int, Statistics, Encoding, Encoding, Encoding)} instead
    */
+  @Deprecated
   void writePage(BytesInput bytesInput, int valueCount, Statistics<?> statistics, Encoding rlEncoding, Encoding dlEncoding, Encoding valuesEncoding) throws IOException;
 
+  /**
+   * writes a single page
+   * @param bytesInput the bytes for the page
+   * @param valueCount the number of values in that page
+   * @param rowCount the number of rows in that page
+   * @param statistics the statistics for that page
+   * @param rlEncoding repetition level encoding
+   * @param dlEncoding definition level encoding
+   * @param valuesEncoding values encoding
+   * @throws IOException
+   */
+  void writePage(BytesInput bytesInput, int valueCount, int rowCount, Statistics<?> statistics, Encoding rlEncoding, Encoding dlEncoding, Encoding valuesEncoding) throws IOException;
+
   /**
    * writes a single page in the new format
    * @param rowCount the number of rows in this page
diff --git a/parquet-column/src/main/java/org/apache/parquet/column/values/ValuesReader.java b/parquet-column/src/main/java/org/apache/parquet/column/values/ValuesReader.java
index 1154bc44ee..06771e9751 100644
--- a/parquet-column/src/main/java/org/apache/parquet/column/values/ValuesReader.java
+++ b/parquet-column/src/main/java/org/apache/parquet/column/values/ValuesReader.java
@@ -179,5 +179,17 @@ public long readLong() {
    * Skips the next value in the page
    */
   abstract public void skip();
+
+  /**
+   * Skips the next n values in the page
+   *
+   * @param n
+   *          the number of values to be skipped
+   */
+  public void skip(int n) {
+    for (int i = 0; i < n; ++i) {
+      skip();
+    }
+  }
 }
 
diff --git a/parquet-column/src/main/java/org/apache/parquet/column/values/bloomfilter/BlockSplitBloomFilter.java b/parquet-column/src/main/java/org/apache/parquet/column/values/bloomfilter/BlockSplitBloomFilter.java
index d2cf4d692c..f5ceadc428 100644
--- a/parquet-column/src/main/java/org/apache/parquet/column/values/bloomfilter/BlockSplitBloomFilter.java
+++ b/parquet-column/src/main/java/org/apache/parquet/column/values/bloomfilter/BlockSplitBloomFilter.java
@@ -16,14 +16,13 @@
  * specific language governing permissions and limitations
  * under the License.
  */
-package org.apache.parquet.column.values.bloomfilter;
 
+package org.apache.parquet.column.values.bloomfilter;
 import com.google.common.hash.HashFunction;
 import com.google.common.hash.Hashing;
 import org.apache.parquet.Preconditions;
 import org.apache.parquet.bytes.BytesUtils;
 import org.apache.parquet.io.api.Binary;
-
 import java.io.IOException;
 import java.io.OutputStream;
 import java.nio.ByteBuffer;
@@ -79,7 +78,6 @@ public class BlockSplitBloomFilter extends BloomFilter {
   // of bit to set, one bit in 32-bit word.
   private static final int SALT[] = {0x47b6137b, 0x44974d91, 0x8824ad5b, 0xa2b7289d,
     0x705495c7, 0x2df1424b, 0x9efc4947, 0x5c6bfb31};
-
   /**
    * Constructor of Bloom filter.
    *
@@ -103,7 +101,6 @@ public BlockSplitBloomFilter(int numBytes) {
    */
   private BlockSplitBloomFilter(int numBytes, HashStrategy hashStrategy, Algorithm algorithm) {
     initBitset(numBytes);
-
     switch (hashStrategy) {
       case MURMUR3_X64_128:
         this.hashStrategy = hashStrategy;
@@ -112,11 +109,9 @@ private BlockSplitBloomFilter(int numBytes, HashStrategy hashStrategy, Algorithm
       default:
         throw new RuntimeException("Not supported hash strategy");
     }
-
     this.algorithm = algorithm;
   }
 
-
   /**
    * Construct the Bloom filter with given bitset, it is used when reconstructing
    * Bloom filter from parquet file. It use murmur3_x64_128 as its default hash
@@ -140,9 +135,9 @@ private BlockSplitBloomFilter(byte[] bitset, HashStrategy hashStrategy, Algorith
     if (bitset == null) {
       throw new RuntimeException("Given bitset is null");
     }
+
     this.bitset = bitset;
     this.intBuffer = ByteBuffer.wrap(bitset).order(ByteOrder.LITTLE_ENDIAN).asIntBuffer();
-
     switch (hashStrategy) {
       case MURMUR3_X64_128:
         this.hashStrategy = hashStrategy;
@@ -167,16 +162,13 @@ private void initBitset(int numBytes) {
     if (numBytes < MINIMUM_BLOOM_FILTER_BYTES) {
       numBytes = MINIMUM_BLOOM_FILTER_BYTES;
     }
-
     // Get next power of 2 if it is not power of 2.
     if ((numBytes & (numBytes - 1)) != 0) {
       numBytes = Integer.highestOneBit(numBytes) << 1;
     }
-
     if (numBytes > MAXIMUM_BLOOM_FILTER_BYTES || numBytes < 0) {
       numBytes = MAXIMUM_BLOOM_FILTER_BYTES;
     }
-
     this.bitset = new byte[numBytes];
     this.intBuffer = ByteBuffer.wrap(bitset).order(ByteOrder.LITTLE_ENDIAN).asIntBuffer();
   }
@@ -185,13 +177,10 @@ private void initBitset(int numBytes) {
   public void writeTo(OutputStream out) throws IOException {
     // Write number of bytes of bitset.
     out.write(BytesUtils.intToBytes(bitset.length));
-
     // Write hash strategy
     out.write(BytesUtils.intToBytes(this.hashStrategy.ordinal()));
-
     // Write algorithm
     out.write(BytesUtils.intToBytes(this.algorithm.ordinal()));
-
     // Write bitset
     out.write(bitset);
   }
@@ -202,11 +191,9 @@ private int[] setMask(int key) {
     for (int i = 0; i < BITS_SET_PER_BLOCK; ++i) {
       mask[i] = key * SALT[i];
     }
-
     for (int i = 0; i < BITS_SET_PER_BLOCK; ++i) {
       mask[i] = mask[i] >>> 27;
     }
-
     for (int i = 0; i < BITS_SET_PER_BLOCK; ++i) {
       mask[i] = 0x1 << mask[i];
     }
@@ -221,7 +208,6 @@ public void insert(long hash) {
 
     // Calculate mask for bucket.
     int mask[] = setMask(key);
-
     for (int i = 0; i < BITS_SET_PER_BLOCK; i++) {
       int value = intBuffer.get(bucketIndex * (BYTES_PER_FILTER_BLOCK / 4) + i);
       value |= mask[i];
@@ -236,7 +222,6 @@ public boolean find(long hash) {
 
     // Calculate mask for the tiny Bloom filter.
     int mask[] = setMask(key);
-
     for (int i = 0; i < BITS_SET_PER_BLOCK; i++) {
       if (0 == (intBuffer.get(bucketIndex * (BYTES_PER_FILTER_BLOCK / 4) + i) & mask[i])) {
         return false;
@@ -256,7 +241,6 @@ public boolean find(long hash) {
   public static int optimalNumOfBits(long n, double p) {
     Preconditions.checkArgument((p > 0.0 && p < 1.0),
       "FPP should be less than 1.0 and great than 0.0");
-
     final double m = -8 * n / Math.log(1 - Math.pow(p, 1.0 / 8));
     final double MAX = MAXIMUM_BLOOM_FILTER_BYTES << 3;
     int numBits = (int)m;
@@ -265,12 +249,10 @@ public static int optimalNumOfBits(long n, double p) {
     if (m > MAX || m < 0) {
       numBits = (int)MAX;
     }
-
     // Get next power of 2 if bits is not power of 2.
     if ((numBits & (numBits - 1)) != 0) {
       numBits = Integer.highestOneBit(numBits) << 1;
     }
-
     if (numBits < (MINIMUM_BLOOM_FILTER_BYTES << 3)) {
       numBits = MINIMUM_BLOOM_FILTER_BYTES << 3;
     }
diff --git a/parquet-column/src/main/java/org/apache/parquet/column/values/bloomfilter/BloomFilter.java b/parquet-column/src/main/java/org/apache/parquet/column/values/bloomfilter/BloomFilter.java
index 430fab8d61..4199497fd9 100644
--- a/parquet-column/src/main/java/org/apache/parquet/column/values/bloomfilter/BloomFilter.java
+++ b/parquet-column/src/main/java/org/apache/parquet/column/values/bloomfilter/BloomFilter.java
@@ -19,7 +19,6 @@
 package org.apache.parquet.column.values.bloomfilter;
 
 import org.apache.parquet.io.api.Binary;
-
 import java.io.IOException;
 import java.io.OutputStream;
 
@@ -94,6 +93,7 @@ public enum Algorithm {
    * @return hash result
    */
   public abstract long hash(float value);
+
   /**
    * Compute hash for Binary value by using its plain encoding result.
    *
diff --git a/parquet-column/src/main/java/org/apache/parquet/column/values/bloomfilter/BloomFilterReadStore.java b/parquet-column/src/main/java/org/apache/parquet/column/values/bloomfilter/BloomFilterReadStore.java
index bdc51755b0..3373bc1a0e 100644
--- a/parquet-column/src/main/java/org/apache/parquet/column/values/bloomfilter/BloomFilterReadStore.java
+++ b/parquet-column/src/main/java/org/apache/parquet/column/values/bloomfilter/BloomFilterReadStore.java
@@ -23,7 +23,6 @@
 /**
  * contains all the bloom filter reader for all columns of a row group
  */
-
 public interface BloomFilterReadStore {
   /**
    * Get a Bloom filter reader of a column
@@ -33,5 +32,3 @@ public interface BloomFilterReadStore {
    */
   BloomFilterReader getBloomFilterReader(ColumnDescriptor path);
 }
-
-
diff --git a/parquet-column/src/main/java/org/apache/parquet/column/values/bloomfilter/BloomFilterReader.java b/parquet-column/src/main/java/org/apache/parquet/column/values/bloomfilter/BloomFilterReader.java
index 39b25e2a49..7a430581dd 100644
--- a/parquet-column/src/main/java/org/apache/parquet/column/values/bloomfilter/BloomFilterReader.java
+++ b/parquet-column/src/main/java/org/apache/parquet/column/values/bloomfilter/BloomFilterReader.java
@@ -16,6 +16,7 @@
  * specific language governing permissions and limitations
  * under the License.
  */
+
 package org.apache.parquet.column.values.bloomfilter;
 
 import org.apache.parquet.column.ColumnDescriptor;
diff --git a/parquet-column/src/main/java/org/apache/parquet/column/values/bloomfilter/BloomFilterWriteStore.java b/parquet-column/src/main/java/org/apache/parquet/column/values/bloomfilter/BloomFilterWriteStore.java
index f472104daa..f7e28fdf2d 100644
--- a/parquet-column/src/main/java/org/apache/parquet/column/values/bloomfilter/BloomFilterWriteStore.java
+++ b/parquet-column/src/main/java/org/apache/parquet/column/values/bloomfilter/BloomFilterWriteStore.java
@@ -16,6 +16,7 @@
  * specific language governing permissions and limitations
  * under the License.
  */
+
 package org.apache.parquet.column.values.bloomfilter;
 
 import org.apache.parquet.column.ColumnDescriptor;
diff --git a/parquet-column/src/main/java/org/apache/parquet/column/values/bloomfilter/BloomFilterWriter.java b/parquet-column/src/main/java/org/apache/parquet/column/values/bloomfilter/BloomFilterWriter.java
index 388e779968..0fab73b2a4 100644
--- a/parquet-column/src/main/java/org/apache/parquet/column/values/bloomfilter/BloomFilterWriter.java
+++ b/parquet-column/src/main/java/org/apache/parquet/column/values/bloomfilter/BloomFilterWriter.java
@@ -1,3 +1,5 @@
+
+
 /*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
@@ -16,14 +18,16 @@
  * specific language governing permissions and limitations
  * under the License.
  */
+
 package org.apache.parquet.column.values.bloomfilter;
 
 public interface BloomFilterWriter {
   /**
-   * Write a bloom filter
+   * Write a Bloom filter
    *
-   * @param bloomFilter the bloom filter to write
+   * @param bloomFilter the Bloom filter to write
    *
    */
   void writeBloomFilter(BloomFilter bloomFilter);
 }
+
diff --git a/parquet-column/src/main/java/org/apache/parquet/column/values/delta/DeltaBinaryPackingValuesReader.java b/parquet-column/src/main/java/org/apache/parquet/column/values/delta/DeltaBinaryPackingValuesReader.java
index c8a80fd308..80cfaf2b04 100644
--- a/parquet-column/src/main/java/org/apache/parquet/column/values/delta/DeltaBinaryPackingValuesReader.java
+++ b/parquet-column/src/main/java/org/apache/parquet/column/values/delta/DeltaBinaryPackingValuesReader.java
@@ -90,6 +90,14 @@ public void skip() {
     valuesRead++;
   }
 
+  @Override
+  public void skip(int n) {
+    // checkRead() is invoked before incrementing valuesRead so increase valuesRead size in 2 steps
+    valuesRead += n - 1;
+    checkRead();
+    ++valuesRead;
+  }
+
   @Override
   public int readInteger() {
     // TODO: probably implement it separately
diff --git a/parquet-column/src/main/java/org/apache/parquet/column/values/deltalengthbytearray/DeltaLengthByteArrayValuesReader.java b/parquet-column/src/main/java/org/apache/parquet/column/values/deltalengthbytearray/DeltaLengthByteArrayValuesReader.java
index 1a2ccb9b53..4dbbcb5645 100644
--- a/parquet-column/src/main/java/org/apache/parquet/column/values/deltalengthbytearray/DeltaLengthByteArrayValuesReader.java
+++ b/parquet-column/src/main/java/org/apache/parquet/column/values/deltalengthbytearray/DeltaLengthByteArrayValuesReader.java
@@ -20,8 +20,6 @@
 
 
 import java.io.IOException;
-import java.nio.ByteBuffer;
-
 import org.apache.parquet.bytes.ByteBufferInputStream;
 import org.apache.parquet.column.values.ValuesReader;
 import org.apache.parquet.column.values.delta.DeltaBinaryPackingValuesReader;
@@ -64,7 +62,15 @@ public Binary readBytes() {
 
   @Override
   public void skip() {
-    int length = lengthReader.readInteger();
+    skip(1);
+  }
+
+  @Override
+  public void skip(int n) {
+    int length = 0;
+    for (int i = 0; i < n; ++i) {
+      length += lengthReader.readInteger();
+    }
     try {
       in.skipFully(length);
     } catch (IOException e) {
diff --git a/parquet-column/src/main/java/org/apache/parquet/column/values/plain/FixedLenByteArrayPlainValuesReader.java b/parquet-column/src/main/java/org/apache/parquet/column/values/plain/FixedLenByteArrayPlainValuesReader.java
index 15ed43438f..631c9084d1 100644
--- a/parquet-column/src/main/java/org/apache/parquet/column/values/plain/FixedLenByteArrayPlainValuesReader.java
+++ b/parquet-column/src/main/java/org/apache/parquet/column/values/plain/FixedLenByteArrayPlainValuesReader.java
@@ -19,7 +19,6 @@
 package org.apache.parquet.column.values.plain;
 
 import java.io.IOException;
-import java.nio.ByteBuffer;
 import org.apache.parquet.bytes.ByteBufferInputStream;
 import org.apache.parquet.column.values.ValuesReader;
 import org.apache.parquet.io.ParquetDecodingException;
@@ -51,8 +50,13 @@ public Binary readBytes() {
 
   @Override
   public void skip() {
+    skip(1);
+  }
+
+  @Override
+  public void skip(int n) {
     try {
-      in.skipFully(length);
+      in.skipFully(n * length);
     } catch (IOException | RuntimeException e) {
       throw new ParquetDecodingException("could not skip bytes at offset " + in.position(), e);
     }
diff --git a/parquet-column/src/main/java/org/apache/parquet/column/values/plain/PlainValuesReader.java b/parquet-column/src/main/java/org/apache/parquet/column/values/plain/PlainValuesReader.java
index f576528a98..127817eb0c 100644
--- a/parquet-column/src/main/java/org/apache/parquet/column/values/plain/PlainValuesReader.java
+++ b/parquet-column/src/main/java/org/apache/parquet/column/values/plain/PlainValuesReader.java
@@ -41,14 +41,26 @@ public void initFromPage(int valueCount, ByteBufferInputStream stream) throws IO
     this.in = new LittleEndianDataInputStream(stream.remainingStream());
   }
 
+  @Override
+  public void skip() {
+    skip(1);
+  }
+
+  void skipBytesFully(int n) throws IOException {
+    int skipped = 0;
+    while (skipped < n) {
+      skipped += in.skipBytes(n - skipped);
+    }
+  }
+
   public static class DoublePlainValuesReader extends PlainValuesReader {
 
     @Override
-    public void skip() {
+    public void skip(int n) {
       try {
-        in.skipBytes(8);
+        skipBytesFully(n * 8);
       } catch (IOException e) {
-        throw new ParquetDecodingException("could not skip double", e);
+        throw new ParquetDecodingException("could not skip " + n + " double values", e);
       }
     }
 
@@ -65,11 +77,11 @@ public double readDouble() {
   public static class FloatPlainValuesReader extends PlainValuesReader {
 
     @Override
-    public void skip() {
+    public void skip(int n) {
       try {
-        in.skipBytes(4);
+        skipBytesFully(n * 4);
       } catch (IOException e) {
-        throw new ParquetDecodingException("could not skip float", e);
+        throw new ParquetDecodingException("could not skip " + n + " floats", e);
       }
     }
 
@@ -86,11 +98,11 @@ public float readFloat() {
   public static class IntegerPlainValuesReader extends PlainValuesReader {
 
     @Override
-    public void skip() {
+    public void skip(int n) {
       try {
-        in.skipBytes(4);
+        in.skipBytes(n * 4);
       } catch (IOException e) {
-        throw new ParquetDecodingException("could not skip int", e);
+        throw new ParquetDecodingException("could not skip " + n + " ints", e);
       }
     }
 
@@ -107,11 +119,11 @@ public int readInteger() {
   public static class LongPlainValuesReader extends PlainValuesReader {
 
     @Override
-    public void skip() {
+    public void skip(int n) {
       try {
-        in.skipBytes(8);
+        in.skipBytes(n * 8);
       } catch (IOException e) {
-        throw new ParquetDecodingException("could not skip long", e);
+        throw new ParquetDecodingException("could not skip " + n + " longs", e);
       }
     }
 
diff --git a/parquet-column/src/main/java/org/apache/parquet/column/values/rle/RunLengthBitPackingHybridValuesWriter.java b/parquet-column/src/main/java/org/apache/parquet/column/values/rle/RunLengthBitPackingHybridValuesWriter.java
index 3b7a5def47..a51a8c4d82 100644
--- a/parquet-column/src/main/java/org/apache/parquet/column/values/rle/RunLengthBitPackingHybridValuesWriter.java
+++ b/parquet-column/src/main/java/org/apache/parquet/column/values/rle/RunLengthBitPackingHybridValuesWriter.java
@@ -19,6 +19,7 @@
 package org.apache.parquet.column.values.rle;
 
 import java.io.IOException;
+import java.util.Objects;
 
 import org.apache.parquet.bytes.ByteBufferAllocator;
 import org.apache.parquet.Ints;
@@ -28,10 +29,14 @@
 import org.apache.parquet.io.ParquetEncodingException;
 
 public class RunLengthBitPackingHybridValuesWriter extends ValuesWriter {
-  private final RunLengthBitPackingHybridEncoder encoder;
+  protected final RunLengthBitPackingHybridEncoder encoder;
 
   public RunLengthBitPackingHybridValuesWriter(int bitWidth, int initialCapacity, int pageSize, ByteBufferAllocator allocator) {
-    this.encoder = new RunLengthBitPackingHybridEncoder(bitWidth, initialCapacity, pageSize, allocator);
+    this(new RunLengthBitPackingHybridEncoder(bitWidth, initialCapacity, pageSize, allocator));
+  }
+
+  protected RunLengthBitPackingHybridValuesWriter(RunLengthBitPackingHybridEncoder encoder) {
+    this.encoder = Objects.requireNonNull(encoder);
   }
 
   @Override
diff --git a/parquet-column/src/main/java/org/apache/parquet/column/values/rle/ZeroIntegerValuesReader.java b/parquet-column/src/main/java/org/apache/parquet/column/values/rle/ZeroIntegerValuesReader.java
index beeb0ad2ed..09ca8a1a47 100644
--- a/parquet-column/src/main/java/org/apache/parquet/column/values/rle/ZeroIntegerValuesReader.java
+++ b/parquet-column/src/main/java/org/apache/parquet/column/values/rle/ZeroIntegerValuesReader.java
@@ -42,4 +42,8 @@ public void initFromPage(int valueCount, ByteBufferInputStream stream) throws IO
   public void skip() {
   }
 
+  @Override
+  public void skip(int n) {
+  }
+
 }
diff --git a/parquet-column/src/main/java/org/apache/parquet/filter2/predicate/ValidTypeMap.java b/parquet-column/src/main/java/org/apache/parquet/filter2/predicate/ValidTypeMap.java
index b8f48bb0b8..62c174e547 100644
--- a/parquet-column/src/main/java/org/apache/parquet/filter2/predicate/ValidTypeMap.java
+++ b/parquet-column/src/main/java/org/apache/parquet/filter2/predicate/ValidTypeMap.java
@@ -1,4 +1,4 @@
-/* 
+/*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
@@ -6,9 +6,9 @@
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
- * 
+ *
  *   http://www.apache.org/licenses/LICENSE-2.0
- * 
+ *
  * Unless required by applicable law or agreed to in writing,
  * software distributed under the License is distributed on an
  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
@@ -25,7 +25,6 @@
 
 import org.apache.parquet.filter2.predicate.Operators.Column;
 import org.apache.parquet.hadoop.metadata.ColumnPath;
-import org.apache.parquet.schema.OriginalType;
 import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName;
 
 /**
diff --git a/parquet-column/src/main/java/org/apache/parquet/internal/column/columnindex/BinaryColumnIndexBuilder.java b/parquet-column/src/main/java/org/apache/parquet/internal/column/columnindex/BinaryColumnIndexBuilder.java
new file mode 100644
index 0000000000..490cc3e9b3
--- /dev/null
+++ b/parquet-column/src/main/java/org/apache/parquet/internal/column/columnindex/BinaryColumnIndexBuilder.java
@@ -0,0 +1,140 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.parquet.internal.column.columnindex;
+
+import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.parquet.filter2.predicate.Statistics;
+import org.apache.parquet.io.api.Binary;
+import org.apache.parquet.schema.PrimitiveComparator;
+import org.apache.parquet.schema.PrimitiveType;
+
+class BinaryColumnIndexBuilder extends ColumnIndexBuilder {
+  private static class BinaryColumnIndex extends ColumnIndexBase<Binary> {
+    private Binary[] minValues;
+    private Binary[] maxValues;
+
+    private BinaryColumnIndex(PrimitiveType type) {
+      super(type);
+    }
+
+    @Override
+    ByteBuffer getMinValueAsBytes(int pageIndex) {
+      return convert(minValues[pageIndex]);
+    }
+
+    @Override
+    ByteBuffer getMaxValueAsBytes(int pageIndex) {
+      return convert(maxValues[pageIndex]);
+    }
+
+    @Override
+    String getMinValueAsString(int pageIndex) {
+      return stringifier.stringify(minValues[pageIndex]);
+    }
+
+    @Override
+    String getMaxValueAsString(int pageIndex) {
+      return stringifier.stringify(maxValues[pageIndex]);
+    }
+
+    @Override
+    @SuppressWarnings("unchecked")
+    <T extends Comparable<T>> Statistics<T> createStats(int arrayIndex) {
+      return (Statistics<T>) new Statistics<Binary>(minValues[arrayIndex], maxValues[arrayIndex], comparator);
+    }
+
+    @Override
+    ValueComparator createValueComparator(Object value) {
+      final Binary v = (Binary) value;
+      return new ValueComparator() {
+        @Override
+        int compareValueToMin(int arrayIndex) {
+          return comparator.compare(v, minValues[arrayIndex]);
+        }
+
+        @Override
+        int compareValueToMax(int arrayIndex) {
+          return comparator.compare(v, maxValues[arrayIndex]);
+        }
+      };
+    }
+  }
+
+  private final List<Binary> minValues = new ArrayList<>();
+  private final List<Binary> maxValues = new ArrayList<>();
+  private final BinaryTruncator truncator;
+  private final int truncateLength;
+
+  private static Binary convert(ByteBuffer buffer) {
+    return Binary.fromReusedByteBuffer(buffer);
+  }
+
+  private static ByteBuffer convert(Binary value) {
+    return value.toByteBuffer();
+  }
+
+  BinaryColumnIndexBuilder(PrimitiveType type, int truncateLength) {
+    truncator = BinaryTruncator.getTruncator(type);
+    this.truncateLength = truncateLength;
+  }
+
+  @Override
+  void addMinMaxFromBytes(ByteBuffer min, ByteBuffer max) {
+    minValues.add(convert(min));
+    maxValues.add(convert(max));
+  }
+
+  @Override
+  void addMinMax(Object min, Object max) {
+    minValues.add(min == null ? null : truncator.truncateMin((Binary) min, truncateLength));
+    maxValues.add(max == null ? null : truncator.truncateMax((Binary) max, truncateLength));
+  }
+
+  @Override
+  ColumnIndexBase<Binary> createColumnIndex(PrimitiveType type) {
+    BinaryColumnIndex columnIndex = new BinaryColumnIndex(type);
+    columnIndex.minValues = minValues.toArray(new Binary[minValues.size()]);
+    columnIndex.maxValues = maxValues.toArray(new Binary[maxValues.size()]);
+    return columnIndex;
+  }
+
+  @Override
+  void clearMinMax() {
+    minValues.clear();
+    maxValues.clear();
+  }
+
+  @Override
+  int compareMinValues(PrimitiveComparator<Binary> comparator, int index1, int index2) {
+    return comparator.compare(minValues.get(index1), minValues.get(index2));
+  }
+
+  @Override
+  int compareMaxValues(PrimitiveComparator<Binary> comparator, int index1, int index2) {
+    return comparator.compare(maxValues.get(index1), maxValues.get(index2));
+  }
+
+  @Override
+  int sizeOf(Object value) {
+    return ((Binary) value).length();
+  }
+}
diff --git a/parquet-column/src/main/java/org/apache/parquet/internal/column/columnindex/BinaryTruncator.java b/parquet-column/src/main/java/org/apache/parquet/internal/column/columnindex/BinaryTruncator.java
new file mode 100644
index 0000000000..bcc43fb866
--- /dev/null
+++ b/parquet-column/src/main/java/org/apache/parquet/internal/column/columnindex/BinaryTruncator.java
@@ -0,0 +1,208 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.parquet.internal.column.columnindex;
+
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.charset.Charset;
+import java.nio.charset.CharsetDecoder;
+import java.nio.charset.CoderResult;
+import java.nio.charset.CodingErrorAction;
+import java.nio.charset.StandardCharsets;
+
+import org.apache.parquet.io.api.Binary;
+import org.apache.parquet.schema.OriginalType;
+import org.apache.parquet.schema.PrimitiveType;
+
+/**
+ * Class for truncating min/max values for binary types.
+ */
+abstract class BinaryTruncator {
+  enum Validity {
+    VALID, MALFORMED, UNMAPPABLE;
+  }
+
+  private static class CharsetValidator {
+    private final CharBuffer dummyBuffer = CharBuffer.allocate(1024);
+    private final CharsetDecoder decoder;
+
+    CharsetValidator(Charset charset) {
+      decoder = charset.newDecoder();
+      decoder.onMalformedInput(CodingErrorAction.REPORT);
+      decoder.onUnmappableCharacter(CodingErrorAction.REPORT);
+    }
+
+    Validity checkValidity(ByteBuffer buffer) {
+      int pos = buffer.position();
+      CoderResult result = CoderResult.OVERFLOW;
+      while (result.isOverflow()) {
+        dummyBuffer.clear();
+        result = decoder.decode(buffer, dummyBuffer, true);
+      }
+      buffer.position(pos);
+      if (result.isUnderflow()) {
+        return Validity.VALID;
+      } else if (result.isMalformed()) {
+        return Validity.MALFORMED;
+      } else {
+        return Validity.UNMAPPABLE;
+      }
+    }
+  }
+
+  private static final BinaryTruncator NO_OP_TRUNCATOR = new BinaryTruncator() {
+    @Override
+    Binary truncateMin(Binary minValue, int length) {
+      return minValue;
+    }
+
+    @Override
+    Binary truncateMax(Binary maxValue, int length) {
+      return maxValue;
+    }
+  };
+
+  private static final BinaryTruncator DEFAULT_UTF8_TRUNCATOR = new BinaryTruncator() {
+    private final CharsetValidator validator = new CharsetValidator(StandardCharsets.UTF_8);
+
+    @Override
+    Binary truncateMin(Binary minValue, int length) {
+      if (minValue.length() <= length) {
+        return minValue;
+      }
+      ByteBuffer buffer = minValue.toByteBuffer();
+      byte[] array;
+      if (validator.checkValidity(buffer) == Validity.VALID) {
+        array = truncateUtf8(buffer, length);
+      } else {
+        array = truncate(buffer, length);
+      }
+      return array == null ? minValue : Binary.fromConstantByteArray(array);
+    }
+
+    @Override
+    Binary truncateMax(Binary maxValue, int length) {
+      if (maxValue.length() <= length) {
+        return maxValue;
+      }
+      byte[] array;
+      ByteBuffer buffer = maxValue.toByteBuffer();
+      if (validator.checkValidity(buffer) == Validity.VALID) {
+        array = incrementUtf8(truncateUtf8(buffer, length));
+      } else {
+        array = increment(truncate(buffer, length));
+      }
+      return array == null ? maxValue : Binary.fromConstantByteArray(array);
+    }
+
+    // Simply truncate to length
+    private byte[] truncate(ByteBuffer buffer, int length) {
+      assert length < buffer.remaining();
+      byte[] array = new byte[length];
+      buffer.get(array);
+      return array;
+    }
+
+    // Trying to increment the bytes from the last one to the beginning
+    private byte[] increment(byte[] array) {
+      for (int i = array.length - 1; i >= 0; --i) {
+        byte elem = array[i];
+        ++elem;
+        array[i] = elem;
+        if (elem != 0) { // Did not overflow: 0xFF -> 0x00
+          return array;
+        }
+      }
+      return null;
+    }
+
+    // Truncates the buffer to length or less so the remaining bytes form a valid UTF-8 string
+    private byte[] truncateUtf8(ByteBuffer buffer, int length) {
+      assert length < buffer.remaining();
+      ByteBuffer newBuffer = buffer.slice();
+      newBuffer.limit(newBuffer.position() + length);
+      while (validator.checkValidity(newBuffer) != Validity.VALID) {
+        newBuffer.limit(newBuffer.limit() - 1);
+        if (newBuffer.remaining() == 0) {
+          return null;
+        }
+      }
+      byte[] array = new byte[newBuffer.remaining()];
+      newBuffer.get(array);
+      return array;
+    }
+
+    // Trying to increment the bytes from the last one to the beginning until the bytes form a valid UTF-8 string
+    private byte[] incrementUtf8(byte[] array) {
+      if (array == null) {
+        return null;
+      }
+      ByteBuffer buffer = ByteBuffer.wrap(array);
+      for (int i = array.length - 1; i >= 0; --i) {
+        byte prev = array[i];
+        byte inc = prev;
+        while (++inc != 0) { // Until overflow: 0xFF -> 0x00
+          array[i] = inc;
+          switch (validator.checkValidity(buffer)) {
+            case VALID:
+              return array;
+            case UNMAPPABLE:
+              continue; // Increment the i byte once more
+            case MALFORMED:
+              break; // Stop incrementing the i byte; go to the i-1
+          }
+          break; // MALFORMED
+        }
+        array[i] = prev;
+      }
+      return null; // All characters are the largest possible; unable to increment
+    }
+  };
+
+  static BinaryTruncator getTruncator(PrimitiveType type) {
+    if (type == null) {
+      return NO_OP_TRUNCATOR;
+    }
+    switch (type.getPrimitiveTypeName()) {
+      case INT96:
+        return NO_OP_TRUNCATOR;
+      case BINARY:
+      case FIXED_LEN_BYTE_ARRAY:
+        OriginalType originalType = type.getOriginalType();
+        if (originalType == null) {
+          return DEFAULT_UTF8_TRUNCATOR;
+        }
+        switch (originalType) {
+          case UTF8:
+          case ENUM:
+          case JSON:
+          case BSON:
+            return DEFAULT_UTF8_TRUNCATOR;
+          default:
+            return NO_OP_TRUNCATOR;
+        }
+      default:
+        throw new IllegalArgumentException("No truncator is available for the type: " + type);
+    }
+  }
+
+  abstract Binary truncateMin(Binary minValue, int length);
+
+  abstract Binary truncateMax(Binary maxValue, int length);
+}
diff --git a/parquet-column/src/main/java/org/apache/parquet/internal/column/columnindex/BooleanColumnIndexBuilder.java b/parquet-column/src/main/java/org/apache/parquet/internal/column/columnindex/BooleanColumnIndexBuilder.java
new file mode 100644
index 0000000000..233bd1b026
--- /dev/null
+++ b/parquet-column/src/main/java/org/apache/parquet/internal/column/columnindex/BooleanColumnIndexBuilder.java
@@ -0,0 +1,133 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.parquet.internal.column.columnindex;
+
+import java.nio.ByteBuffer;
+import org.apache.parquet.filter2.predicate.Statistics;
+import org.apache.parquet.io.api.Binary;
+import org.apache.parquet.schema.PrimitiveComparator;
+import org.apache.parquet.schema.PrimitiveType;
+
+import it.unimi.dsi.fastutil.booleans.BooleanArrayList;
+import it.unimi.dsi.fastutil.booleans.BooleanList;
+
+class BooleanColumnIndexBuilder extends ColumnIndexBuilder {
+  private static class BooleanColumnIndex extends ColumnIndexBase<Boolean> {
+    private boolean[] minValues;
+    private boolean[] maxValues;
+
+    private BooleanColumnIndex(PrimitiveType type) {
+      super(type);
+    }
+
+    @Override
+    ByteBuffer getMinValueAsBytes(int pageIndex) {
+      return convert(minValues[pageIndex]);
+    }
+
+    @Override
+    ByteBuffer getMaxValueAsBytes(int pageIndex) {
+      return convert(maxValues[pageIndex]);
+    }
+
+    @Override
+    String getMinValueAsString(int pageIndex) {
+      return stringifier.stringify(minValues[pageIndex]);
+    }
+
+    @Override
+    String getMaxValueAsString(int pageIndex) {
+      return stringifier.stringify(maxValues[pageIndex]);
+    }
+
+    @Override
+    @SuppressWarnings("unchecked")
+    <T extends Comparable<T>> Statistics<T> createStats(int arrayIndex) {
+      return (Statistics<T>) new Statistics<Boolean>(minValues[arrayIndex], maxValues[arrayIndex], comparator);
+    }
+
+    @Override
+    ValueComparator createValueComparator(Object value) {
+      final boolean v = (boolean) value;
+      return new ValueComparator() {
+        @Override
+        int compareValueToMin(int arrayIndex) {
+          return comparator.compare(v, minValues[arrayIndex]);
+        }
+
+        @Override
+        int compareValueToMax(int arrayIndex) {
+          return comparator.compare(v, maxValues[arrayIndex]);
+        }
+      };
+    }
+  }
+
+  private final BooleanList minValues = new BooleanArrayList();
+  private final BooleanList maxValues = new BooleanArrayList();
+
+  private static boolean convert(ByteBuffer buffer) {
+    return buffer.get(0) != 0;
+  }
+
+  private static ByteBuffer convert(boolean value) {
+    return ByteBuffer.allocate(1).put(0, value ? (byte) 1 : 0);
+  }
+
+  @Override
+  void addMinMaxFromBytes(ByteBuffer min, ByteBuffer max) {
+    minValues.add(convert(min));
+    maxValues.add(convert(max));
+  }
+
+  @Override
+  void addMinMax(Object min, Object max) {
+    minValues.add((boolean) min);
+    maxValues.add((boolean) max);
+  }
+
+  @Override
+  ColumnIndexBase<Boolean> createColumnIndex(PrimitiveType type) {
+    BooleanColumnIndex columnIndex = new BooleanColumnIndex(type);
+    columnIndex.minValues = minValues.toBooleanArray();
+    columnIndex.maxValues = maxValues.toBooleanArray();
+    return columnIndex;
+  }
+
+  @Override
+  void clearMinMax() {
+    minValues.clear();
+    maxValues.clear();
+  }
+
+  @Override
+  int compareMinValues(PrimitiveComparator<Binary> comparator, int index1, int index2) {
+    return comparator.compare(minValues.get(index1), minValues.get(index2));
+  }
+
+  @Override
+  int compareMaxValues(PrimitiveComparator<Binary> comparator, int index1, int index2) {
+    return comparator.compare(maxValues.get(index1), maxValues.get(index2));
+  }
+
+  @Override
+  int sizeOf(Object value) {
+    return 1;
+  }
+}
diff --git a/parquet-column/src/main/java/org/apache/parquet/internal/column/columnindex/BoundaryOrder.java b/parquet-column/src/main/java/org/apache/parquet/internal/column/columnindex/BoundaryOrder.java
new file mode 100644
index 0000000000..e47b5b3f1a
--- /dev/null
+++ b/parquet-column/src/main/java/org/apache/parquet/internal/column/columnindex/BoundaryOrder.java
@@ -0,0 +1,352 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.parquet.internal.column.columnindex;
+
+import java.util.PrimitiveIterator;
+import java.util.PrimitiveIterator.OfInt;
+
+import org.apache.parquet.internal.column.columnindex.ColumnIndexBuilder.ColumnIndexBase;
+
+/**
+ * Enum for {@link org.apache.parquet.format.BoundaryOrder}. It also contains the implementations of searching for
+ * matching page indexes for column index based filtering.
+ */
+public enum BoundaryOrder {
+  UNORDERED {
+    @Override
+    PrimitiveIterator.OfInt eq(ColumnIndexBase<?>.ValueComparator comparator) {
+      return IndexIterator.filterTranslate(comparator.arrayLength(),
+          arrayIndex -> comparator.compareValueToMin(arrayIndex) >= 0 && comparator.compareValueToMax(arrayIndex) <= 0,
+          comparator::translate);
+    }
+
+    @Override
+    PrimitiveIterator.OfInt gt(ColumnIndexBase<?>.ValueComparator comparator) {
+      return IndexIterator.filterTranslate(comparator.arrayLength(),
+          arrayIndex -> comparator.compareValueToMax(arrayIndex) < 0,
+          comparator::translate);
+    }
+
+    @Override
+    PrimitiveIterator.OfInt gtEq(ColumnIndexBase<?>.ValueComparator comparator) {
+      return IndexIterator.filterTranslate(comparator.arrayLength(),
+          arrayIndex -> comparator.compareValueToMax(arrayIndex) <= 0,
+          comparator::translate);
+    }
+
+    @Override
+    PrimitiveIterator.OfInt lt(ColumnIndexBase<?>.ValueComparator comparator) {
+      return IndexIterator.filterTranslate(comparator.arrayLength(),
+          arrayIndex -> comparator.compareValueToMin(arrayIndex) > 0,
+          comparator::translate);
+    }
+
+    @Override
+    PrimitiveIterator.OfInt ltEq(ColumnIndexBase<?>.ValueComparator comparator) {
+      return IndexIterator.filterTranslate(comparator.arrayLength(),
+          arrayIndex -> comparator.compareValueToMin(arrayIndex) >= 0,
+          comparator::translate);
+    }
+
+    @Override
+    PrimitiveIterator.OfInt notEq(ColumnIndexBase<?>.ValueComparator comparator) {
+      return IndexIterator.filterTranslate(comparator.arrayLength(),
+          arrayIndex -> comparator.compareValueToMin(arrayIndex) != 0 || comparator.compareValueToMax(arrayIndex) != 0,
+          comparator::translate);
+    }
+  },
+  ASCENDING {
+    @Override
+    OfInt eq(ColumnIndexBase<?>.ValueComparator comparator) {
+      Bounds bounds = findBounds(comparator);
+      if (bounds == null) {
+        return IndexIterator.EMPTY;
+      }
+      return IndexIterator.rangeTranslate(bounds.lower, bounds.upper, comparator::translate);
+    }
+
+    @Override
+    OfInt gt(ColumnIndexBase<?>.ValueComparator comparator) {
+      int length = comparator.arrayLength();
+      int left = 0;
+      int right = length;
+      do {
+        int i = floorMid(left, right);
+        if (comparator.compareValueToMax(i) >= 0) {
+          left = i + 1;
+        } else {
+          right = i;
+        }
+      } while (left < right);
+      return IndexIterator.rangeTranslate(right, length - 1, comparator::translate);
+    }
+
+    @Override
+    OfInt gtEq(ColumnIndexBase<?>.ValueComparator comparator) {
+      int length = comparator.arrayLength();
+      int left = 0;
+      int right = length;
+      do {
+        int i = floorMid(left, right);
+        if (comparator.compareValueToMax(i) > 0) {
+          left = i + 1;
+        } else {
+          right = i;
+        }
+      } while (left < right);
+      return IndexIterator.rangeTranslate(right, length - 1, comparator::translate);
+    }
+
+    @Override
+    OfInt lt(ColumnIndexBase<?>.ValueComparator comparator) {
+      int length = comparator.arrayLength();
+      int left = -1;
+      int right = length - 1;
+      do {
+        int i = ceilingMid(left, right);
+        if (comparator.compareValueToMin(i) <= 0) {
+          right = i - 1;
+        } else {
+          left = i;
+        }
+      } while (left < right);
+      return IndexIterator.rangeTranslate(0, left, comparator::translate);
+    }
+
+    @Override
+    OfInt ltEq(ColumnIndexBase<?>.ValueComparator comparator) {
+      int length = comparator.arrayLength();
+      int left = -1;
+      int right = length - 1;
+      do {
+        int i = ceilingMid(left, right);
+        if (comparator.compareValueToMin(i) < 0) {
+          right = i - 1;
+        } else {
+          left = i;
+        }
+      } while (left < right);
+      return IndexIterator.rangeTranslate(0, left, comparator::translate);
+    }
+
+    @Override
+    OfInt notEq(ColumnIndexBase<?>.ValueComparator comparator) {
+      Bounds bounds = findBounds(comparator);
+      int length = comparator.arrayLength();
+      if (bounds == null) {
+        return IndexIterator.all(comparator);
+      }
+      return IndexIterator.filterTranslate(
+          length,
+          i -> i < bounds.lower || i > bounds.upper || comparator.compareValueToMin(i) != 0
+              || comparator.compareValueToMax(i) != 0,
+          comparator::translate);
+    }
+
+    private Bounds findBounds(ColumnIndexBase<?>.ValueComparator comparator) {
+      int length = comparator.arrayLength();
+      int lowerLeft = 0;
+      int upperLeft = 0;
+      int lowerRight = length - 1;
+      int upperRight = length - 1;
+      do {
+        if (lowerLeft > lowerRight) {
+          return null;
+        }
+        int i = floorMid(lowerLeft, lowerRight);
+        if (comparator.compareValueToMin(i) < 0) {
+          lowerRight = upperRight = i - 1;
+        } else if (comparator.compareValueToMax(i) > 0) {
+          lowerLeft = upperLeft = i + 1;
+        } else {
+          lowerRight = upperLeft = i;
+        }
+      } while (lowerLeft != lowerRight);
+      do {
+        if (upperLeft > upperRight) {
+          return null;
+        }
+        int i = ceilingMid(upperLeft, upperRight);
+        if (comparator.compareValueToMin(i) < 0) {
+          upperRight = i - 1;
+        } else if (comparator.compareValueToMax(i) > 0) {
+          upperLeft = i + 1;
+        } else {
+          upperLeft = i;
+        }
+      } while (upperLeft != upperRight);
+      return new Bounds(lowerLeft, upperRight);
+    }
+  },
+  DESCENDING {
+    @Override
+    OfInt eq(ColumnIndexBase<?>.ValueComparator comparator) {
+      Bounds bounds = findBounds(comparator);
+      if (bounds == null) {
+        return IndexIterator.EMPTY;
+      }
+      return IndexIterator.rangeTranslate(bounds.lower, bounds.upper, comparator::translate);
+    }
+
+    @Override
+    OfInt gt(ColumnIndexBase<?>.ValueComparator comparator) {
+      int length = comparator.arrayLength();
+      int left = -1;
+      int right = length - 1;
+      do {
+        int i = ceilingMid(left, right);
+        if (comparator.compareValueToMax(i) >= 0) {
+          right = i - 1;
+        } else {
+          left = i;
+        }
+      } while (left < right);
+      return IndexIterator.rangeTranslate(0, left, comparator::translate);
+    }
+
+    @Override
+    OfInt gtEq(ColumnIndexBase<?>.ValueComparator comparator) {
+      int length = comparator.arrayLength();
+      int left = -1;
+      int right = length - 1;
+      do {
+        int i = ceilingMid(left, right);
+        if (comparator.compareValueToMax(i) > 0) {
+          right = i - 1;
+        } else {
+          left = i;
+        }
+      } while (left < right);
+      return IndexIterator.rangeTranslate(0, left, comparator::translate);
+    }
+
+    @Override
+    OfInt lt(ColumnIndexBase<?>.ValueComparator comparator) {
+      int length = comparator.arrayLength();
+      int left = 0;
+      int right = length;
+      do {
+        int i = floorMid(left, right);
+        if (comparator.compareValueToMin(i) <= 0) {
+          left = i + 1;
+        } else {
+          right = i;
+        }
+      } while (left < right);
+      return IndexIterator.rangeTranslate(right, length - 1, comparator::translate);
+    }
+
+    @Override
+    OfInt ltEq(ColumnIndexBase<?>.ValueComparator comparator) {
+      int length = comparator.arrayLength();
+      int left = 0;
+      int right = length;
+      do {
+        int i = floorMid(left, right);
+        if (comparator.compareValueToMin(i) < 0) {
+          left = i + 1;
+        } else {
+          right = i;
+        }
+      } while (left < right);
+      return IndexIterator.rangeTranslate(right, length - 1, comparator::translate);
+    }
+
+    @Override
+    OfInt notEq(ColumnIndexBase<?>.ValueComparator comparator) {
+      Bounds bounds = findBounds(comparator);
+      int length = comparator.arrayLength();
+      if (bounds == null) {
+        return IndexIterator.all(comparator);
+      }
+      return IndexIterator.filterTranslate(
+          length,
+          i -> i < bounds.lower || i > bounds.upper || comparator.compareValueToMin(i) != 0
+              || comparator.compareValueToMax(i) != 0,
+          comparator::translate);
+    }
+
+    private Bounds findBounds(ColumnIndexBase<?>.ValueComparator comparator) {
+      int length = comparator.arrayLength();
+      int lowerLeft = 0;
+      int upperLeft = 0;
+      int lowerRight = length - 1;
+      int upperRight = length - 1;
+      do {
+        if (lowerLeft > lowerRight) {
+          return null;
+        }
+        int i = floorMid(lowerLeft, lowerRight);
+        if (comparator.compareValueToMax(i) > 0) {
+          lowerRight = upperRight = i - 1;
+        } else if (comparator.compareValueToMin(i) < 0) {
+          lowerLeft = upperLeft = i + 1;
+        } else {
+          lowerRight = upperLeft = i;
+        }
+      } while (lowerLeft != lowerRight);
+      do {
+        if (upperLeft > upperRight) {
+          return null;
+        }
+        int i = ceilingMid(upperLeft, upperRight);
+        if (comparator.compareValueToMax(i) > 0) {
+          upperRight = i - 1;
+        } else if (comparator.compareValueToMin(i) < 0) {
+          upperLeft = i + 1;
+        } else {
+          upperLeft = i;
+        }
+      } while (upperLeft != upperRight);
+      return new Bounds(lowerLeft, upperRight);
+    }
+  };
+
+  private static class Bounds {
+    final int lower, upper;
+
+    Bounds(int lower, int upper) {
+      assert lower <= upper;
+      this.lower = lower;
+      this.upper = upper;
+    }
+  }
+
+  private static int floorMid(int left, int right) {
+    // Avoid the possible overflow might happen in case of (left + right) / 2
+    return left + ((right - left) / 2);
+  }
+
+  private static int ceilingMid(int left, int right) {
+    // Avoid the possible overflow might happen in case of (left + right + 1) / 2
+    return left + ((right - left + 1) / 2);
+  }
+
+  abstract PrimitiveIterator.OfInt eq(ColumnIndexBase<?>.ValueComparator comparator);
+
+  abstract PrimitiveIterator.OfInt gt(ColumnIndexBase<?>.ValueComparator comparator);
+
+  abstract PrimitiveIterator.OfInt gtEq(ColumnIndexBase<?>.ValueComparator comparator);
+
+  abstract PrimitiveIterator.OfInt lt(ColumnIndexBase<?>.ValueComparator comparator);
+
+  abstract PrimitiveIterator.OfInt ltEq(ColumnIndexBase<?>.ValueComparator comparator);
+
+  abstract PrimitiveIterator.OfInt notEq(ColumnIndexBase<?>.ValueComparator comparator);
+}
diff --git a/parquet-column/src/main/java/org/apache/parquet/internal/column/columnindex/ColumnIndex.java b/parquet-column/src/main/java/org/apache/parquet/internal/column/columnindex/ColumnIndex.java
new file mode 100644
index 0000000000..b91a5c0d96
--- /dev/null
+++ b/parquet-column/src/main/java/org/apache/parquet/internal/column/columnindex/ColumnIndex.java
@@ -0,0 +1,60 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.parquet.internal.column.columnindex;
+
+import java.nio.ByteBuffer;
+import java.util.List;
+import java.util.PrimitiveIterator;
+
+import org.apache.parquet.filter2.predicate.FilterPredicate.Visitor;
+import org.apache.parquet.internal.filter2.columnindex.ColumnIndexFilter;
+
+/**
+ * Column index containing min/max and null count values for the pages in a column chunk. It also implements methods of
+ * {@link Visitor} to return the indexes of the matching pages. They are used by {@link ColumnIndexFilter}.
+ *
+ * @see org.apache.parquet.format.ColumnIndex
+ */
+public interface ColumnIndex extends Visitor<PrimitiveIterator.OfInt> {
+  /**
+   * @return the boundary order of the min/max values; used for converting to the related thrift object
+   */
+  public BoundaryOrder getBoundaryOrder();
+
+  /**
+   * @return the unmodifiable list of null counts; used for converting to the related thrift object
+   */
+  public List<Long> getNullCounts();
+
+  /**
+   * @return the unmodifiable list of null pages; used for converting to the related thrift object
+   */
+  public List<Boolean> getNullPages();
+
+  /**
+   * @return the list of the min values as {@link ByteBuffer}s; used for converting to the related thrift object
+   */
+  public List<ByteBuffer> getMinValues();
+
+  /**
+   * @return the list of the max values as {@link ByteBuffer}s; used for converting to the related thrift object
+   */
+  public List<ByteBuffer> getMaxValues();
+
+}
diff --git a/parquet-column/src/main/java/org/apache/parquet/internal/column/columnindex/ColumnIndexBuilder.java b/parquet-column/src/main/java/org/apache/parquet/internal/column/columnindex/ColumnIndexBuilder.java
new file mode 100644
index 0000000000..b28fddee42
--- /dev/null
+++ b/parquet-column/src/main/java/org/apache/parquet/internal/column/columnindex/ColumnIndexBuilder.java
@@ -0,0 +1,636 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.parquet.internal.column.columnindex;
+
+import static java.util.Objects.requireNonNull;
+
+import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.EnumMap;
+import java.util.Formatter;
+import java.util.List;
+import java.util.Map;
+import java.util.PrimitiveIterator;
+import java.util.function.IntPredicate;
+
+import org.apache.parquet.column.statistics.Statistics;
+import org.apache.parquet.filter2.predicate.UserDefinedPredicate;
+import org.apache.parquet.filter2.predicate.Operators.And;
+import org.apache.parquet.filter2.predicate.Operators.Eq;
+import org.apache.parquet.filter2.predicate.Operators.Gt;
+import org.apache.parquet.filter2.predicate.Operators.GtEq;
+import org.apache.parquet.filter2.predicate.Operators.LogicalNotUserDefined;
+import org.apache.parquet.filter2.predicate.Operators.Lt;
+import org.apache.parquet.filter2.predicate.Operators.LtEq;
+import org.apache.parquet.filter2.predicate.Operators.Not;
+import org.apache.parquet.filter2.predicate.Operators.NotEq;
+import org.apache.parquet.filter2.predicate.Operators.Or;
+import org.apache.parquet.filter2.predicate.Operators.UserDefined;
+import org.apache.parquet.io.api.Binary;
+import org.apache.parquet.schema.PrimitiveComparator;
+import org.apache.parquet.schema.PrimitiveStringifier;
+import org.apache.parquet.schema.PrimitiveType;
+import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName;
+
+import it.unimi.dsi.fastutil.booleans.BooleanArrayList;
+import it.unimi.dsi.fastutil.booleans.BooleanList;
+import it.unimi.dsi.fastutil.booleans.BooleanLists;
+import it.unimi.dsi.fastutil.ints.IntArrayList;
+import it.unimi.dsi.fastutil.ints.IntList;
+import it.unimi.dsi.fastutil.ints.IntOpenHashSet;
+import it.unimi.dsi.fastutil.ints.IntSet;
+import it.unimi.dsi.fastutil.longs.LongArrayList;
+import it.unimi.dsi.fastutil.longs.LongList;
+import it.unimi.dsi.fastutil.longs.LongLists;
+
+/**
+ * Builder implementation to create {@link ColumnIndex} objects.
+ */
+public abstract class ColumnIndexBuilder {
+
+  static abstract class ColumnIndexBase<C> implements ColumnIndex {
+    /*
+     * A class containing the value to be compared to the min/max values. This way we only need to do the deboxing once
+     * per predicate execution instead for every comparison.
+     */
+    abstract class ValueComparator {
+      abstract int compareValueToMin(int arrayIndex);
+
+      abstract int compareValueToMax(int arrayIndex);
+
+      int arrayLength() {
+        return pageIndexes.length;
+      }
+
+      int translate(int arrayIndex) {
+        return pageIndexes[arrayIndex];
+      }
+    }
+
+    private static final ByteBuffer EMPTY_BYTE_BUFFER = ByteBuffer.allocate(0);
+    private static final int MAX_VALUE_LENGTH_FOR_TOSTRING = 40;
+    private static final String TOSTRING_TRUNCATION_MARKER = "(...)";
+    private static final int TOSTRING_TRUNCATION_START_POS = (MAX_VALUE_LENGTH_FOR_TOSTRING
+        - TOSTRING_TRUNCATION_MARKER.length()) / 2;
+    private static final int TOSTRING_TRUNCATION_END_POS = MAX_VALUE_LENGTH_FOR_TOSTRING
+        - TOSTRING_TRUNCATION_MARKER.length() - TOSTRING_TRUNCATION_START_POS;
+    private static final String TOSTRING_MISSING_VALUE_MARKER = "<none>";
+
+    final PrimitiveStringifier stringifier;
+    final PrimitiveComparator<C> comparator;
+    private boolean[] nullPages;
+    private BoundaryOrder boundaryOrder;
+    // Storing the page index for each array index (min/max values are not stored for null-pages)
+    private int[] pageIndexes;
+    // might be null
+    private long[] nullCounts;
+
+    static String truncate(String str) {
+      if (str.length() <= MAX_VALUE_LENGTH_FOR_TOSTRING) {
+        return str;
+      }
+      return str.substring(0, TOSTRING_TRUNCATION_START_POS) + TOSTRING_TRUNCATION_MARKER
+          + str.substring(str.length() - TOSTRING_TRUNCATION_END_POS);
+    }
+
+    ColumnIndexBase(PrimitiveType type) {
+      comparator = type.comparator();
+      stringifier = type.stringifier();
+    }
+
+    @Override
+    public BoundaryOrder getBoundaryOrder() {
+      return boundaryOrder;
+    }
+
+    @Override
+    public List<Long> getNullCounts() {
+      if (nullCounts == null) {
+        return null;
+      }
+      return LongLists.unmodifiable(LongArrayList.wrap(nullCounts));
+    }
+
+    @Override
+    public List<Boolean> getNullPages() {
+      return BooleanLists.unmodifiable(BooleanArrayList.wrap(nullPages));
+    }
+
+    @Override
+    public List<ByteBuffer> getMinValues() {
+      List<ByteBuffer> list = new ArrayList<>(getPageCount());
+      int arrayIndex = 0;
+      for (int i = 0, n = getPageCount(); i < n; ++i) {
+        if (isNullPage(i)) {
+          list.add(EMPTY_BYTE_BUFFER);
+        } else {
+          list.add(getMinValueAsBytes(arrayIndex++));
+        }
+      }
+      return list;
+    }
+
+    @Override
+    public List<ByteBuffer> getMaxValues() {
+      List<ByteBuffer> list = new ArrayList<>(getPageCount());
+      int arrayIndex = 0;
+      for (int i = 0, n = getPageCount(); i < n; ++i) {
+        if (isNullPage(i)) {
+          list.add(EMPTY_BYTE_BUFFER);
+        } else {
+          list.add(getMaxValueAsBytes(arrayIndex++));
+        }
+      }
+      return list;
+    }
+
+    @Override
+    public String toString() {
+      try (Formatter formatter = new Formatter()) {
+        formatter.format("Boudary order: %s\n", boundaryOrder);
+        String minMaxPart = "  %-" + MAX_VALUE_LENGTH_FOR_TOSTRING + "s  %-" + MAX_VALUE_LENGTH_FOR_TOSTRING + "s\n";
+        formatter.format("%-10s  %20s" + minMaxPart, "", "null count", "min", "max");
+        String format = "page-%-5d  %20s" + minMaxPart;
+        int arrayIndex = 0;
+        for (int i = 0, n = nullPages.length; i < n; ++i) {
+          String nullCount = nullCounts == null ? TOSTRING_MISSING_VALUE_MARKER : Long.toString(nullCounts[i]);
+          String min, max;
+          if (nullPages[i]) {
+            min = max = TOSTRING_MISSING_VALUE_MARKER;
+          } else {
+            min = truncate(getMinValueAsString(arrayIndex));
+            max = truncate(getMaxValueAsString(arrayIndex++));
+          }
+          formatter.format(format, i, nullCount, min, max);
+        }
+        return formatter.toString();
+      }
+    }
+
+    int getPageCount() {
+      return nullPages.length;
+    }
+
+    boolean isNullPage(int pageIndex) {
+      return nullPages[pageIndex];
+    }
+
+    /*
+     * Returns the min value for arrayIndex as a ByteBuffer. (Min values are not stored for null-pages so arrayIndex
+     * might not equal to pageIndex.)
+     */
+    abstract ByteBuffer getMinValueAsBytes(int arrayIndex);
+
+    /*
+     * Returns the max value for arrayIndex as a ByteBuffer. (Max values are not stored for null-pages so arrayIndex
+     * might not equal to pageIndex.)
+     */
+    abstract ByteBuffer getMaxValueAsBytes(int arrayIndex);
+
+    /*
+     * Returns the min value for arrayIndex as a String. (Min values are not stored for null-pages so arrayIndex might
+     * not equal to pageIndex.)
+     */
+    abstract String getMinValueAsString(int arrayIndex);
+
+    /*
+     * Returns the max value for arrayIndex as a String. (Max values are not stored for null-pages so arrayIndex might
+     * not equal to pageIndex.)
+     */
+    abstract String getMaxValueAsString(int arrayIndex);
+
+    /* Creates a Statistics object for filtering. Used for user defined predicates. */
+    abstract <T extends Comparable<T>> org.apache.parquet.filter2.predicate.Statistics<T> createStats(int arrayIndex);
+
+    /* Creates a ValueComparator object containing the specified value to be compared for min/max values */
+    abstract ValueComparator createValueComparator(Object value);
+
+    @Override
+    public PrimitiveIterator.OfInt visit(And and) {
+      throw new UnsupportedOperationException("AND shall not be used on column index directly");
+    }
+
+    @Override
+    public PrimitiveIterator.OfInt visit(Not not) {
+      throw new UnsupportedOperationException("NOT shall not be used on column index directly");
+    }
+
+    @Override
+    public PrimitiveIterator.OfInt visit(Or or) {
+      throw new UnsupportedOperationException("OR shall not be used on column index directly");
+    }
+
+    @Override
+    public <T extends Comparable<T>> PrimitiveIterator.OfInt visit(Eq<T> eq) {
+      T value = eq.getValue();
+      if (value == null) {
+        if (nullCounts == null) {
+          // Searching for nulls so if we don't have null related statistics we have to return all pages
+          return IndexIterator.all(getPageCount());
+        } else {
+          return IndexIterator.filter(getPageCount(), pageIndex -> nullCounts[pageIndex] > 0);
+        }
+      }
+      return getBoundaryOrder().eq(createValueComparator(value));
+    }
+
+    @Override
+    public <T extends Comparable<T>> PrimitiveIterator.OfInt visit(Gt<T> gt) {
+      return getBoundaryOrder().gt(createValueComparator(gt.getValue()));
+    }
+
+    @Override
+    public <T extends Comparable<T>> PrimitiveIterator.OfInt visit(GtEq<T> gtEq) {
+      return getBoundaryOrder().gtEq(createValueComparator(gtEq.getValue()));
+    }
+
+    @Override
+    public <T extends Comparable<T>> PrimitiveIterator.OfInt visit(Lt<T> lt) {
+      return getBoundaryOrder().lt(createValueComparator(lt.getValue()));
+    }
+
+    @Override
+    public <T extends Comparable<T>> PrimitiveIterator.OfInt visit(LtEq<T> ltEq) {
+      return getBoundaryOrder().ltEq(createValueComparator(ltEq.getValue()));
+    }
+
+    @Override
+    public <T extends Comparable<T>> PrimitiveIterator.OfInt visit(NotEq<T> notEq) {
+      T value = notEq.getValue();
+      if (value == null) {
+        return IndexIterator.filter(getPageCount(), pageIndex -> !nullPages[pageIndex]);
+      }
+
+      if (nullCounts == null) {
+        // Nulls match so if we don't have null related statistics we have to return all pages
+        return IndexIterator.all(getPageCount());
+      }
+
+      // Merging value filtering with pages containing nulls
+      IntSet matchingIndexes = new IntOpenHashSet();
+      getBoundaryOrder().notEq(createValueComparator(value))
+          .forEachRemaining((int index) -> matchingIndexes.add(index));
+      return IndexIterator.filter(getPageCount(),
+          pageIndex -> nullCounts[pageIndex] > 0 || matchingIndexes.contains(pageIndex));
+    }
+
+    @Override
+    public <T extends Comparable<T>, U extends UserDefinedPredicate<T>> PrimitiveIterator.OfInt visit(
+        UserDefined<T, U> udp) {
+      final UserDefinedPredicate<T> predicate = udp.getUserDefinedPredicate();
+      final boolean acceptNulls = predicate.keep(null);
+
+      if (acceptNulls && nullCounts == null) {
+        // Nulls match so if we don't have null related statistics we have to return all pages
+        return IndexIterator.all(getPageCount());
+      }
+
+      return IndexIterator.filter(getPageCount(), new IntPredicate() {
+        private int arrayIndex = -1;
+
+        @Override
+        public boolean test(int pageIndex) {
+          if (isNullPage(pageIndex)) {
+            return acceptNulls;
+          } else {
+            ++arrayIndex;
+            if (acceptNulls && nullCounts[pageIndex] > 0) {
+              return true;
+            }
+            org.apache.parquet.filter2.predicate.Statistics<T> stats = createStats(arrayIndex);
+            return !predicate.canDrop(stats);
+          }
+        }
+      });
+    }
+
+    @Override
+    public <T extends Comparable<T>, U extends UserDefinedPredicate<T>> PrimitiveIterator.OfInt visit(
+        LogicalNotUserDefined<T, U> udp) {
+      final UserDefinedPredicate<T> inversePredicate = udp.getUserDefined().getUserDefinedPredicate();
+      final boolean acceptNulls = !inversePredicate.keep(null);
+
+      if (acceptNulls && nullCounts == null) {
+        // Nulls match so if we don't have null related statistics we have to return all pages
+        return IndexIterator.all(getPageCount());
+      }
+
+      return IndexIterator.filter(getPageCount(), new IntPredicate() {
+        private int arrayIndex = -1;
+
+        @Override
+        public boolean test(int pageIndex) {
+          if (isNullPage(pageIndex)) {
+            return acceptNulls;
+          } else {
+            ++arrayIndex;
+            if (acceptNulls && nullCounts[pageIndex] > 0) {
+              return true;
+            }
+            org.apache.parquet.filter2.predicate.Statistics<T> stats = createStats(arrayIndex);
+            return !inversePredicate.inverseCanDrop(stats);
+          }
+        }
+      });
+    }
+  }
+
+  private static final ColumnIndexBuilder NO_OP_BUILDER = new ColumnIndexBuilder() {
+    @Override
+    public ColumnIndex build() {
+      return null;
+    }
+
+    @Override
+    public void add(Statistics<?> stats) {
+    }
+
+    @Override
+    void addMinMax(Object min, Object max) {
+    }
+
+    @Override
+    ColumnIndexBase<?> createColumnIndex(PrimitiveType type) {
+      return null;
+    }
+
+    @Override
+    void clearMinMax() {
+    }
+
+    @Override
+    void addMinMaxFromBytes(ByteBuffer min, ByteBuffer max) {
+    }
+
+    @Override
+    int compareMinValues(PrimitiveComparator<Binary> comparator, int index1, int index2) {
+      return 0;
+    }
+
+    @Override
+    int compareMaxValues(PrimitiveComparator<Binary> comparator, int index1, int index2) {
+      return 0;
+    }
+
+    @Override
+    int sizeOf(Object value) {
+      return 0;
+    }
+  };
+
+  private static final Map<PrimitiveTypeName, ColumnIndexBuilder> BUILDERS = new EnumMap<>(PrimitiveTypeName.class);
+
+  private PrimitiveType type;
+  private final BooleanList nullPages = new BooleanArrayList();
+  private final LongList nullCounts = new LongArrayList();
+  private long minMaxSize;
+  private final IntList pageIndexes = new IntArrayList();
+  private int nextPageIndex;
+
+  /**
+   * @return a no-op builder that does not collect statistics objects and therefore returns {@code null} at
+   *         {@link #build()}.
+   */
+  public static ColumnIndexBuilder getNoOpBuilder() {
+    return NO_OP_BUILDER;
+  }
+
+  /**
+   * @param type
+   *          the type this builder is to be created for
+   * @param truncateLength
+   *          the length to be used for truncating binary values if possible
+   * @return a {@link ColumnIndexBuilder} instance to be used for creating {@link ColumnIndex} objects
+   */
+  public static ColumnIndexBuilder getBuilder(PrimitiveType type, int truncateLength) {
+    ColumnIndexBuilder builder = createNewBuilder(type, truncateLength);
+    builder.type = type;
+    return builder;
+  }
+
+  private static ColumnIndexBuilder createNewBuilder(PrimitiveType type, int truncateLength) {
+    switch (type.getPrimitiveTypeName()) {
+      case BINARY:
+      case FIXED_LEN_BYTE_ARRAY:
+      case INT96:
+        return new BinaryColumnIndexBuilder(type, truncateLength);
+      case BOOLEAN:
+        return new BooleanColumnIndexBuilder();
+      case DOUBLE:
+        return new DoubleColumnIndexBuilder();
+      case FLOAT:
+        return new FloatColumnIndexBuilder();
+      case INT32:
+        return new IntColumnIndexBuilder();
+      case INT64:
+        return new LongColumnIndexBuilder();
+      default:
+        throw new IllegalArgumentException("Unsupported type for column index: " + type);
+    }
+  }
+
+  /**
+   * @param type
+   *          the primitive type
+   * @param boundaryOrder
+   *          the boundary order of the min/max values
+   * @param nullPages
+   *          the null pages (one boolean value for each page that signifies whether the page consists of nulls
+   *          entirely)
+   * @param nullCounts
+   *          the number of null values for each page
+   * @param minValues
+   *          the min values for each page
+   * @param maxValues
+   *          the max values for each page
+   * @return the newly created {@link ColumnIndex} object based on the specified arguments
+   */
+  public static ColumnIndex build(
+      PrimitiveType type,
+      BoundaryOrder boundaryOrder,
+      List<Boolean> nullPages,
+      List<Long> nullCounts,
+      List<ByteBuffer> minValues,
+      List<ByteBuffer> maxValues) {
+
+    PrimitiveTypeName typeName = type.getPrimitiveTypeName();
+    ColumnIndexBuilder builder = BUILDERS.get(typeName);
+    if (builder == null) {
+      builder = createNewBuilder(type, Integer.MAX_VALUE);
+      BUILDERS.put(typeName, builder);
+    }
+
+    builder.fill(nullPages, nullCounts, minValues, maxValues);
+    ColumnIndexBase<?> columnIndex = builder.build(type);
+    columnIndex.boundaryOrder = requireNonNull(boundaryOrder);
+    return columnIndex;
+  }
+
+  ColumnIndexBuilder() {
+    // Shall be able to be created inside this package only
+  }
+
+  /**
+   * Adds the data from the specified statistics to this builder
+   *
+   * @param stats
+   *          the statistics to be added
+   */
+  public void add(Statistics<?> stats) {
+    if (stats.hasNonNullValue()) {
+      nullPages.add(false);
+      Object min = stats.genericGetMin();
+      Object max = stats.genericGetMax();
+      addMinMax(min, max);
+      pageIndexes.add(nextPageIndex);
+      minMaxSize += sizeOf(min);
+      minMaxSize += sizeOf(max);
+    } else {
+      nullPages.add(true);
+    }
+    nullCounts.add(stats.getNumNulls());
+    ++nextPageIndex;
+  }
+
+  abstract void addMinMaxFromBytes(ByteBuffer min, ByteBuffer max);
+
+  abstract void addMinMax(Object min, Object max);
+
+  private void fill(List<Boolean> nullPages, List<Long> nullCounts, List<ByteBuffer> minValues,
+      List<ByteBuffer> maxValues) {
+    clear();
+    int pageCount = nullPages.size();
+    if ((nullCounts != null && nullCounts.size() != pageCount) || minValues.size() != pageCount
+        || maxValues.size() != pageCount) {
+      throw new IllegalArgumentException(
+          String.format("Not all sizes are equal (nullPages:%d, nullCounts:%s, minValues:%d, maxValues:%d",
+              nullPages.size(), nullCounts == null ? "null" : nullCounts.size(), minValues.size(), maxValues.size()));
+    }
+    this.nullPages.addAll(nullPages);
+    // Nullcounts is optional in the format
+    if (nullCounts != null) {
+      this.nullCounts.addAll(nullCounts);
+    }
+
+    for (int i = 0; i < pageCount; ++i) {
+      if (!nullPages.get(i)) {
+        ByteBuffer min = minValues.get(i);
+        ByteBuffer max = maxValues.get(i);
+        addMinMaxFromBytes(min, max);
+        pageIndexes.add(i);
+        minMaxSize += min.remaining();
+        minMaxSize += max.remaining();
+      }
+    }
+  }
+
+  /**
+   * @return the newly created column index or {@code null} if the {@link ColumnIndex} would be empty
+   */
+  public ColumnIndex build() {
+    ColumnIndexBase<?> columnIndex = build(type);
+    if (columnIndex == null) {
+      return null;
+    }
+    columnIndex.boundaryOrder = calculateBoundaryOrder(type.comparator());
+    return columnIndex;
+  }
+
+  private ColumnIndexBase<?> build(PrimitiveType type) {
+    if (nullPages.isEmpty()) {
+      return null;
+    }
+    ColumnIndexBase<?> columnIndex = createColumnIndex(type);
+    if (columnIndex == null) {
+      // Might happen if the specialized builder discovers invalid min/max values
+      return null;
+    }
+    columnIndex.nullPages = nullPages.toBooleanArray();
+    // Null counts is optional so keep it null if the builder has no values
+    if (!nullCounts.isEmpty()) {
+      columnIndex.nullCounts = nullCounts.toLongArray();
+    }
+    columnIndex.pageIndexes = pageIndexes.toIntArray();
+
+    return columnIndex;
+  }
+
+  private BoundaryOrder calculateBoundaryOrder(PrimitiveComparator<Binary> comparator) {
+    if (isAscending(comparator)) {
+      return BoundaryOrder.ASCENDING;
+    } else if (isDescending(comparator)) {
+      return BoundaryOrder.DESCENDING;
+    } else {
+      return BoundaryOrder.UNORDERED;
+    }
+  }
+
+  // min[i] <= min[i+1] && max[i] <= max[i+1]
+  private boolean isAscending(PrimitiveComparator<Binary> comparator) {
+    for (int i = 1, n = pageIndexes.size(); i < n; ++i) {
+      if (compareMinValues(comparator, i - 1, i) > 0 || compareMaxValues(comparator, i - 1, i) > 0) {
+        return false;
+      }
+    }
+    return true;
+  }
+
+  // min[i] >= min[i+1] && max[i] >= max[i+1]
+  private boolean isDescending(PrimitiveComparator<Binary> comparator) {
+    for (int i = 1, n = pageIndexes.size(); i < n; ++i) {
+      if (compareMinValues(comparator, i - 1, i) < 0 || compareMaxValues(comparator, i - 1, i) < 0) {
+        return false;
+      }
+    }
+    return true;
+  }
+
+  abstract int compareMinValues(PrimitiveComparator<Binary> comparator, int index1, int index2);
+
+  abstract int compareMaxValues(PrimitiveComparator<Binary> comparator, int index1, int index2);
+
+  private void clear() {
+    nullPages.clear();
+    nullCounts.clear();
+    clearMinMax();
+    minMaxSize = 0;
+    nextPageIndex = 0;
+    pageIndexes.clear();
+  }
+
+  abstract void clearMinMax();
+
+  abstract ColumnIndexBase<?> createColumnIndex(PrimitiveType type);
+
+  abstract int sizeOf(Object value);
+
+  /**
+   * @return the number of pages added so far to this builder
+   */
+  public int getPageCount() {
+    return nullPages.size();
+  }
+
+  /**
+   * @return the sum of size in bytes of the min/max values added so far to this builder
+   */
+  public long getMinMaxSize() {
+    return minMaxSize;
+  }
+}
diff --git a/parquet-column/src/main/java/org/apache/parquet/internal/column/columnindex/DoubleColumnIndexBuilder.java b/parquet-column/src/main/java/org/apache/parquet/internal/column/columnindex/DoubleColumnIndexBuilder.java
new file mode 100644
index 0000000000..074d02573f
--- /dev/null
+++ b/parquet-column/src/main/java/org/apache/parquet/internal/column/columnindex/DoubleColumnIndexBuilder.java
@@ -0,0 +1,155 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.parquet.internal.column.columnindex;
+
+import static java.nio.ByteOrder.LITTLE_ENDIAN;
+
+import java.nio.ByteBuffer;
+
+import org.apache.parquet.filter2.predicate.Statistics;
+import org.apache.parquet.io.api.Binary;
+import org.apache.parquet.schema.PrimitiveComparator;
+import org.apache.parquet.schema.PrimitiveType;
+
+import it.unimi.dsi.fastutil.doubles.DoubleArrayList;
+import it.unimi.dsi.fastutil.doubles.DoubleList;
+
+class DoubleColumnIndexBuilder extends ColumnIndexBuilder {
+  private static class DoubleColumnIndex extends ColumnIndexBase<Double> {
+    private double[] minValues;
+    private double[] maxValues;
+
+    private DoubleColumnIndex(PrimitiveType type) {
+      super(type);
+    }
+
+    @Override
+    ByteBuffer getMinValueAsBytes(int pageIndex) {
+      return convert(minValues[pageIndex]);
+    }
+
+    @Override
+    ByteBuffer getMaxValueAsBytes(int pageIndex) {
+      return convert(maxValues[pageIndex]);
+    }
+
+    @Override
+    String getMinValueAsString(int pageIndex) {
+      return stringifier.stringify(minValues[pageIndex]);
+    }
+
+    @Override
+    String getMaxValueAsString(int pageIndex) {
+      return stringifier.stringify(maxValues[pageIndex]);
+    }
+
+    @Override
+    @SuppressWarnings("unchecked")
+    <T extends Comparable<T>> Statistics<T> createStats(int arrayIndex) {
+      return (Statistics<T>) new Statistics<Double>(minValues[arrayIndex], maxValues[arrayIndex], comparator);
+    }
+
+    @Override
+    ValueComparator createValueComparator(Object value) {
+      final double v = (double) value;
+      return new ValueComparator() {
+        @Override
+        int compareValueToMin(int arrayIndex) {
+          return comparator.compare(v, minValues[arrayIndex]);
+        }
+
+        @Override
+        int compareValueToMax(int arrayIndex) {
+          return comparator.compare(v, maxValues[arrayIndex]);
+        }
+      };
+    }
+  }
+
+  private final DoubleList minValues = new DoubleArrayList();
+  private final DoubleList maxValues = new DoubleArrayList();
+  private boolean invalid;
+
+  private static double convert(ByteBuffer buffer) {
+    return buffer.order(LITTLE_ENDIAN).getDouble(0);
+  }
+
+  private static ByteBuffer convert(double value) {
+    return ByteBuffer.allocate(Double.BYTES).order(LITTLE_ENDIAN).putDouble(0, value);
+  }
+
+  @Override
+  void addMinMaxFromBytes(ByteBuffer min, ByteBuffer max) {
+    minValues.add(convert(min));
+    maxValues.add(convert(max));
+  }
+
+  @Override
+  void addMinMax(Object min, Object max) {
+    double dMin = (double) min;
+    double dMax = (double) max;
+    if (Double.isNaN(dMin) || Double.isNaN(dMax)) {
+      // Invalidate this column index in case of NaN as the sorting order of values is undefined for this case
+      invalid = true;
+    }
+
+    // Sorting order is undefined for -0.0 so let min = -0.0 and max = +0.0 to ensure that no 0.0 values are skipped
+    if (Double.compare(dMin, +0.0) == 0) {
+      dMin = -0.0;
+    }
+    if (Double.compare(dMax, -0.0) == 0) {
+      dMax = +0.0;
+    }
+
+    minValues.add(dMin);
+    maxValues.add(dMax);
+  }
+
+  @Override
+  ColumnIndexBase<Double> createColumnIndex(PrimitiveType type) {
+    if (invalid) {
+      return null;
+    }
+    DoubleColumnIndex columnIndex = new DoubleColumnIndex(type);
+    columnIndex.minValues = minValues.toDoubleArray();
+    columnIndex.maxValues = maxValues.toDoubleArray();
+    return columnIndex;
+  }
+
+  @Override
+  void clearMinMax() {
+    minValues.clear();
+    maxValues.clear();
+  }
+
+  @Override
+  int compareMinValues(PrimitiveComparator<Binary> comparator, int index1, int index2) {
+    return comparator.compare(minValues.get(index1), minValues.get(index2));
+  }
+
+  @Override
+  int compareMaxValues(PrimitiveComparator<Binary> comparator, int index1, int index2) {
+    return comparator.compare(maxValues.get(index1), maxValues.get(index2));
+  }
+
+  @Override
+  int sizeOf(Object value) {
+    return Double.BYTES;
+  }
+}
diff --git a/parquet-column/src/main/java/org/apache/parquet/internal/column/columnindex/FloatColumnIndexBuilder.java b/parquet-column/src/main/java/org/apache/parquet/internal/column/columnindex/FloatColumnIndexBuilder.java
new file mode 100644
index 0000000000..cbcdf949d8
--- /dev/null
+++ b/parquet-column/src/main/java/org/apache/parquet/internal/column/columnindex/FloatColumnIndexBuilder.java
@@ -0,0 +1,155 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.parquet.internal.column.columnindex;
+
+import static java.nio.ByteOrder.LITTLE_ENDIAN;
+
+import java.nio.ByteBuffer;
+
+import org.apache.parquet.filter2.predicate.Statistics;
+import org.apache.parquet.io.api.Binary;
+import org.apache.parquet.schema.PrimitiveComparator;
+import org.apache.parquet.schema.PrimitiveType;
+
+import it.unimi.dsi.fastutil.floats.FloatArrayList;
+import it.unimi.dsi.fastutil.floats.FloatList;
+
+class FloatColumnIndexBuilder extends ColumnIndexBuilder {
+  private static class FloatColumnIndex extends ColumnIndexBase<Float> {
+    private float[] minValues;
+    private float[] maxValues;
+
+    private FloatColumnIndex(PrimitiveType type) {
+      super(type);
+    }
+
+    @Override
+    ByteBuffer getMinValueAsBytes(int pageIndex) {
+      return convert(minValues[pageIndex]);
+    }
+
+    @Override
+    ByteBuffer getMaxValueAsBytes(int pageIndex) {
+      return convert(maxValues[pageIndex]);
+    }
+
+    @Override
+    String getMinValueAsString(int pageIndex) {
+      return stringifier.stringify(minValues[pageIndex]);
+    }
+
+    @Override
+    String getMaxValueAsString(int pageIndex) {
+      return stringifier.stringify(maxValues[pageIndex]);
+    }
+
+    @Override
+    @SuppressWarnings("unchecked")
+    <T extends Comparable<T>> Statistics<T> createStats(int arrayIndex) {
+      return (Statistics<T>) new Statistics<Float>(minValues[arrayIndex], maxValues[arrayIndex], comparator);
+    }
+
+    @Override
+    ValueComparator createValueComparator(Object value) {
+      final float v = (float) value;
+      return new ValueComparator() {
+        @Override
+        int compareValueToMin(int arrayIndex) {
+          return comparator.compare(v, minValues[arrayIndex]);
+        }
+
+        @Override
+        int compareValueToMax(int arrayIndex) {
+          return comparator.compare(v, maxValues[arrayIndex]);
+        }
+      };
+    }
+  }
+
+  private final FloatList minValues = new FloatArrayList();
+  private final FloatList maxValues = new FloatArrayList();
+  private boolean invalid;
+
+  private static float convert(ByteBuffer buffer) {
+    return buffer.order(LITTLE_ENDIAN).getFloat(0);
+  }
+
+  private static ByteBuffer convert(float value) {
+    return ByteBuffer.allocate(Float.BYTES).order(LITTLE_ENDIAN).putFloat(0, value);
+  }
+
+  @Override
+  void addMinMaxFromBytes(ByteBuffer min, ByteBuffer max) {
+    minValues.add(convert(min));
+    maxValues.add(convert(max));
+  }
+
+  @Override
+  void addMinMax(Object min, Object max) {
+    float fMin = (float) min;
+    float fMax = (float) max;
+    if (Float.isNaN(fMin) || Float.isNaN(fMax)) {
+      // Invalidate this column index in case of NaN as the sorting order of values is undefined for this case
+      invalid = true;
+    }
+
+    // Sorting order is undefined for -0.0 so let min = -0.0 and max = +0.0 to ensure that no 0.0 values are skipped
+    if (Float.compare(fMin, +0.0f) == 0) {
+      fMin = -0.0f;
+    }
+    if (Float.compare(fMax, -0.0f) == 0) {
+      fMax = +0.0f;
+    }
+
+    minValues.add(fMin);
+    maxValues.add(fMax);
+  }
+
+  @Override
+  ColumnIndexBase<Float> createColumnIndex(PrimitiveType type) {
+    if (invalid) {
+      return null;
+    }
+    FloatColumnIndex columnIndex = new FloatColumnIndex(type);
+    columnIndex.minValues = minValues.toFloatArray();
+    columnIndex.maxValues = maxValues.toFloatArray();
+    return columnIndex;
+  }
+
+  @Override
+  void clearMinMax() {
+    minValues.clear();
+    maxValues.clear();
+  }
+
+  @Override
+  int compareMinValues(PrimitiveComparator<Binary> comparator, int index1, int index2) {
+    return comparator.compare(minValues.get(index1), minValues.get(index2));
+  }
+
+  @Override
+  int compareMaxValues(PrimitiveComparator<Binary> comparator, int index1, int index2) {
+    return comparator.compare(maxValues.get(index1), maxValues.get(index2));
+  }
+
+  @Override
+  int sizeOf(Object value) {
+    return Float.BYTES;
+  }
+}
diff --git a/parquet-column/src/main/java/org/apache/parquet/internal/column/columnindex/IndexIterator.java b/parquet-column/src/main/java/org/apache/parquet/internal/column/columnindex/IndexIterator.java
new file mode 100644
index 0000000000..9eab65e5bb
--- /dev/null
+++ b/parquet-column/src/main/java/org/apache/parquet/internal/column/columnindex/IndexIterator.java
@@ -0,0 +1,98 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.parquet.internal.column.columnindex;
+
+import java.util.NoSuchElementException;
+import java.util.PrimitiveIterator;
+import java.util.function.IntPredicate;
+import java.util.function.IntUnaryOperator;
+
+import org.apache.parquet.internal.column.columnindex.ColumnIndexBuilder.ColumnIndexBase;
+
+/**
+ * Iterator implementation for page indexes.
+ */
+class IndexIterator implements PrimitiveIterator.OfInt {
+  public static final PrimitiveIterator.OfInt EMPTY = new OfInt() {
+    @Override
+    public boolean hasNext() {
+      return false;
+    }
+
+    @Override
+    public int nextInt() {
+      throw new NoSuchElementException();
+    }
+  };
+  private int index;
+  private final int endIndex;
+  private final IntPredicate filter;
+  private final IntUnaryOperator translator;
+
+  static PrimitiveIterator.OfInt all(int pageCount) {
+    return new IndexIterator(0, pageCount, i -> true, i -> i);
+  }
+
+  static PrimitiveIterator.OfInt all(ColumnIndexBase<?>.ValueComparator comparator) {
+    return new IndexIterator(0, comparator.arrayLength(), i -> true, comparator::translate);
+  }
+
+  static PrimitiveIterator.OfInt filter(int pageCount, IntPredicate filter) {
+    return new IndexIterator(0, pageCount, filter, i -> i);
+  }
+
+  static PrimitiveIterator.OfInt filterTranslate(int arrayLength, IntPredicate filter, IntUnaryOperator translator) {
+    return new IndexIterator(0, arrayLength, filter, translator);
+  }
+
+  static PrimitiveIterator.OfInt rangeTranslate(int from, int to, IntUnaryOperator translator) {
+    return new IndexIterator(from, to + 1, i -> true, translator);
+  }
+
+  private IndexIterator(int startIndex, int endIndex, IntPredicate filter, IntUnaryOperator translator) {
+    this.endIndex = endIndex;
+    this.filter = filter;
+    this.translator = translator;
+    index = nextPageIndex(startIndex);
+  }
+
+  private int nextPageIndex(int startIndex) {
+    for (int i = startIndex; i < endIndex; ++i) {
+      if (filter.test(i)) {
+        return i;
+      }
+    }
+    return -1;
+  }
+
+  @Override
+  public boolean hasNext() {
+    return index >= 0;
+  }
+
+  @Override
+  public int nextInt() {
+    if (hasNext()) {
+      int ret = index;
+      index = nextPageIndex(index + 1);
+      return translator.applyAsInt(ret);
+    }
+    throw new NoSuchElementException();
+  }
+}
diff --git a/parquet-column/src/main/java/org/apache/parquet/internal/column/columnindex/IntColumnIndexBuilder.java b/parquet-column/src/main/java/org/apache/parquet/internal/column/columnindex/IntColumnIndexBuilder.java
new file mode 100644
index 0000000000..2d19d270f6
--- /dev/null
+++ b/parquet-column/src/main/java/org/apache/parquet/internal/column/columnindex/IntColumnIndexBuilder.java
@@ -0,0 +1,136 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.parquet.internal.column.columnindex;
+
+import static java.nio.ByteOrder.LITTLE_ENDIAN;
+
+import java.nio.ByteBuffer;
+
+import org.apache.parquet.filter2.predicate.Statistics;
+import org.apache.parquet.io.api.Binary;
+import org.apache.parquet.schema.PrimitiveComparator;
+import org.apache.parquet.schema.PrimitiveType;
+
+import it.unimi.dsi.fastutil.ints.IntArrayList;
+import it.unimi.dsi.fastutil.ints.IntList;
+
+class IntColumnIndexBuilder extends ColumnIndexBuilder {
+  private static class IntColumnIndex extends ColumnIndexBase<Integer> {
+    private int[] minValues;
+    private int[] maxValues;
+
+    private IntColumnIndex(PrimitiveType type) {
+      super(type);
+    }
+
+    @Override
+    ByteBuffer getMinValueAsBytes(int pageIndex) {
+      return convert(minValues[pageIndex]);
+    }
+
+    @Override
+    ByteBuffer getMaxValueAsBytes(int pageIndex) {
+      return convert(maxValues[pageIndex]);
+    }
+
+    @Override
+    String getMinValueAsString(int pageIndex) {
+      return stringifier.stringify(minValues[pageIndex]);
+    }
+
+    @Override
+    String getMaxValueAsString(int pageIndex) {
+      return stringifier.stringify(maxValues[pageIndex]);
+    }
+
+    @Override
+    @SuppressWarnings("unchecked")
+    <T extends Comparable<T>> Statistics<T> createStats(int arrayIndex) {
+      return (Statistics<T>) new Statistics<Integer>(minValues[arrayIndex], maxValues[arrayIndex], comparator);
+    }
+
+    @Override
+    ValueComparator createValueComparator(Object value) {
+      final int v = (int) value;
+      return new ValueComparator() {
+        @Override
+        int compareValueToMin(int arrayIndex) {
+          return comparator.compare(v, minValues[arrayIndex]);
+        }
+
+        @Override
+        int compareValueToMax(int arrayIndex) {
+          return comparator.compare(v, maxValues[arrayIndex]);
+        }
+      };
+    }
+  }
+
+  private final IntList minValues = new IntArrayList();
+  private final IntList maxValues = new IntArrayList();
+
+  private static int convert(ByteBuffer buffer) {
+    return buffer.order(LITTLE_ENDIAN).getInt(0);
+  }
+
+  private static ByteBuffer convert(int value) {
+    return ByteBuffer.allocate(Integer.BYTES).order(LITTLE_ENDIAN).putInt(0, value);
+  }
+
+  @Override
+  void addMinMaxFromBytes(ByteBuffer min, ByteBuffer max) {
+    minValues.add(convert(min));
+    maxValues.add(convert(max));
+  }
+
+  @Override
+  void addMinMax(Object min, Object max) {
+    minValues.add((int) min);
+    maxValues.add((int) max);
+  }
+
+  @Override
+  ColumnIndexBase<Integer> createColumnIndex(PrimitiveType type) {
+    IntColumnIndex columnIndex = new IntColumnIndex(type);
+    columnIndex.minValues = minValues.toIntArray();
+    columnIndex.maxValues = maxValues.toIntArray();
+    return columnIndex;
+  }
+
+  @Override
+  void clearMinMax() {
+    minValues.clear();
+    maxValues.clear();
+  }
+
+  @Override
+  int compareMinValues(PrimitiveComparator<Binary> comparator, int index1, int index2) {
+    return comparator.compare(minValues.get(index1), minValues.get(index2));
+  }
+
+  @Override
+  int compareMaxValues(PrimitiveComparator<Binary> comparator, int index1, int index2) {
+    return comparator.compare(maxValues.get(index1), maxValues.get(index2));
+  }
+
+  @Override
+  int sizeOf(Object value) {
+    return Integer.BYTES;
+  }
+}
diff --git a/parquet-column/src/main/java/org/apache/parquet/internal/column/columnindex/LongColumnIndexBuilder.java b/parquet-column/src/main/java/org/apache/parquet/internal/column/columnindex/LongColumnIndexBuilder.java
new file mode 100644
index 0000000000..b0189b7098
--- /dev/null
+++ b/parquet-column/src/main/java/org/apache/parquet/internal/column/columnindex/LongColumnIndexBuilder.java
@@ -0,0 +1,136 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.parquet.internal.column.columnindex;
+
+import static java.nio.ByteOrder.LITTLE_ENDIAN;
+
+import java.nio.ByteBuffer;
+
+import org.apache.parquet.filter2.predicate.Statistics;
+import org.apache.parquet.io.api.Binary;
+import org.apache.parquet.schema.PrimitiveComparator;
+import org.apache.parquet.schema.PrimitiveType;
+
+import it.unimi.dsi.fastutil.longs.LongArrayList;
+import it.unimi.dsi.fastutil.longs.LongList;
+
+class LongColumnIndexBuilder extends ColumnIndexBuilder {
+  private static class LongColumnIndex extends ColumnIndexBase<Long> {
+    private long[] minValues;
+    private long[] maxValues;
+
+    private LongColumnIndex(PrimitiveType type) {
+      super(type);
+    }
+
+    @Override
+    ByteBuffer getMinValueAsBytes(int pageIndex) {
+      return convert(minValues[pageIndex]);
+    }
+
+    @Override
+    ByteBuffer getMaxValueAsBytes(int pageIndex) {
+      return convert(maxValues[pageIndex]);
+    }
+
+    @Override
+    String getMinValueAsString(int pageIndex) {
+      return stringifier.stringify(minValues[pageIndex]);
+    }
+
+    @Override
+    String getMaxValueAsString(int pageIndex) {
+      return stringifier.stringify(maxValues[pageIndex]);
+    }
+
+    @Override
+    @SuppressWarnings("unchecked")
+    <T extends Comparable<T>> Statistics<T> createStats(int arrayIndex) {
+      return (Statistics<T>) new Statistics<Long>(minValues[arrayIndex], maxValues[arrayIndex], comparator);
+    }
+
+    @Override
+    ValueComparator createValueComparator(Object value) {
+      final long v = (long) value;
+      return new ValueComparator() {
+        @Override
+        int compareValueToMin(int arrayIndex) {
+          return comparator.compare(v, minValues[arrayIndex]);
+        }
+
+        @Override
+        int compareValueToMax(int arrayIndex) {
+          return comparator.compare(v, maxValues[arrayIndex]);
+        }
+      };
+    }
+  }
+
+  private final LongList minValues = new LongArrayList();
+  private final LongList maxValues = new LongArrayList();
+
+  private static long convert(ByteBuffer buffer) {
+    return buffer.order(LITTLE_ENDIAN).getLong(0);
+  }
+
+  private static ByteBuffer convert(long value) {
+    return ByteBuffer.allocate(Long.BYTES).order(LITTLE_ENDIAN).putLong(0, value);
+  }
+
+  @Override
+  void addMinMaxFromBytes(ByteBuffer min, ByteBuffer max) {
+    minValues.add(convert(min));
+    maxValues.add(convert(max));
+  }
+
+  @Override
+  void addMinMax(Object min, Object max) {
+    minValues.add((long) min);
+    maxValues.add((long) max);
+  }
+
+  @Override
+  ColumnIndexBase<Long> createColumnIndex(PrimitiveType type) {
+    LongColumnIndex columnIndex = new LongColumnIndex(type);
+    columnIndex.minValues = minValues.toLongArray();
+    columnIndex.maxValues = maxValues.toLongArray();
+    return columnIndex;
+  }
+
+  @Override
+  void clearMinMax() {
+    minValues.clear();
+    maxValues.clear();
+  }
+
+  @Override
+  int compareMinValues(PrimitiveComparator<Binary> comparator, int index1, int index2) {
+    return comparator.compare(minValues.get(index1), minValues.get(index2));
+  }
+
+  @Override
+  int compareMaxValues(PrimitiveComparator<Binary> comparator, int index1, int index2) {
+    return comparator.compare(maxValues.get(index1), maxValues.get(index2));
+  }
+
+  @Override
+  int sizeOf(Object value) {
+    return Long.BYTES;
+  }
+}
diff --git a/parquet-column/src/main/java/org/apache/parquet/internal/column/columnindex/OffsetIndex.java b/parquet-column/src/main/java/org/apache/parquet/internal/column/columnindex/OffsetIndex.java
new file mode 100644
index 0000000000..ba984ebc70
--- /dev/null
+++ b/parquet-column/src/main/java/org/apache/parquet/internal/column/columnindex/OffsetIndex.java
@@ -0,0 +1,64 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.parquet.internal.column.columnindex;
+
+/**
+ * Offset index containing the offset and size of the page and the index of the first row in the page.
+ *
+ * @see org.apache.parquet.format.OffsetIndex
+ */
+public interface OffsetIndex {
+  /**
+   * @return the number of pages
+   */
+  public int getPageCount();
+
+  /**
+   * @param pageIndex
+   *          the index of the page
+   * @return the offset of the page in the file
+   */
+  public long getOffset(int pageIndex);
+
+  /**
+   * @param pageIndex
+   *          the index of the page
+   * @return the compressed size of the page (including page header)
+   */
+  public int getCompressedPageSize(int pageIndex);
+
+  /**
+   * @param pageIndex
+   *          the index of the page
+   * @return the index of the first row in the page
+   */
+  public long getFirstRowIndex(int pageIndex);
+
+  /**
+   * @param pageIndex
+   *          the index of the page
+   * @param rowGroupRowCount
+   *          the total number of rows in the row-group
+   * @return the calculated index of the last row of the given page
+   */
+  public default long getLastRowIndex(int pageIndex, long rowGroupRowCount) {
+    int nextPageIndex = pageIndex + 1;
+    return (nextPageIndex >= getPageCount() ? rowGroupRowCount : getFirstRowIndex(nextPageIndex)) - 1;
+  }
+}
diff --git a/parquet-column/src/main/java/org/apache/parquet/internal/column/columnindex/OffsetIndexBuilder.java b/parquet-column/src/main/java/org/apache/parquet/internal/column/columnindex/OffsetIndexBuilder.java
new file mode 100644
index 0000000000..e4907b5488
--- /dev/null
+++ b/parquet-column/src/main/java/org/apache/parquet/internal/column/columnindex/OffsetIndexBuilder.java
@@ -0,0 +1,175 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.parquet.internal.column.columnindex;
+
+import java.util.Formatter;
+
+import it.unimi.dsi.fastutil.ints.IntArrayList;
+import it.unimi.dsi.fastutil.ints.IntList;
+import it.unimi.dsi.fastutil.longs.LongArrayList;
+import it.unimi.dsi.fastutil.longs.LongList;
+
+/**
+ * Builder implementation to create {@link OffsetIndex} objects during writing a parquet file.
+ */
+public class OffsetIndexBuilder {
+
+  private static class OffsetIndexImpl implements OffsetIndex {
+    private long[] offsets;
+    private int[] compressedPageSizes;
+    private long[] firstRowIndexes;
+
+    @Override
+    public String toString() {
+      try (Formatter formatter = new Formatter()) {
+        formatter.format("%-10s  %20s  %16s  %20s\n", "", "offset", "compressed size", "first row index");
+        for (int i = 0, n = offsets.length; i < n; ++i) {
+          formatter.format("page-%-5d  %20d  %16d  %20d\n", i, offsets[i], compressedPageSizes[i], firstRowIndexes[i]);
+        }
+        return formatter.toString();
+      }
+    }
+
+    @Override
+    public int getPageCount() {
+      return offsets.length;
+    }
+
+    @Override
+    public long getOffset(int pageIndex) {
+      return offsets[pageIndex];
+    }
+
+    @Override
+    public int getCompressedPageSize(int pageIndex) {
+      return compressedPageSizes[pageIndex];
+    }
+
+    @Override
+    public long getFirstRowIndex(int pageIndex) {
+      return firstRowIndexes[pageIndex];
+    }
+  }
+
+  private static final OffsetIndexBuilder NO_OP_BUILDER = new OffsetIndexBuilder() {
+    @Override
+    public void add(int compressedPageSize, long rowCount) {
+    }
+
+    @Override
+    public void add(long offset, int compressedPageSize, long rowCount) {
+    }
+  };
+
+  private final LongList offsets = new LongArrayList();
+  private final IntList compressedPageSizes = new IntArrayList();
+  private final LongList firstRowIndexes = new LongArrayList();
+  private long previousOffset;
+  private int previousPageSize;
+  private long previousRowIndex;
+  private long previousRowCount;
+
+  /**
+   * @return a no-op builder that does not collect values and therefore returns {@code null} at {@link #build(long)}
+   */
+  public static OffsetIndexBuilder getNoOpBuilder() {
+    return NO_OP_BUILDER;
+  }
+
+  /**
+   * @return an {@link OffsetIndexBuilder} instance to build an {@link OffsetIndex} object
+   */
+  public static OffsetIndexBuilder getBuilder() {
+    return new OffsetIndexBuilder();
+  }
+
+  private OffsetIndexBuilder() {
+  }
+
+  /**
+   * Adds the specified parameters to this builder. Used by the writers to building up {@link OffsetIndex} objects to be
+   * written to the Parquet file.
+   *
+   * @param compressedPageSize
+   *          the size of the page (including header)
+   * @param rowCount
+   *          the number of rows in the page
+   */
+  public void add(int compressedPageSize, long rowCount) {
+    add(previousOffset + previousPageSize, compressedPageSize, previousRowIndex + previousRowCount);
+    previousRowCount = rowCount;
+  }
+
+  /**
+   * Adds the specified parameters to this builder. Used by the metadata converter to building up {@link OffsetIndex}
+   * objects read from the Parquet file.
+   *
+   * @param offset
+   *          the offset of the page in the file
+   * @param compressedPageSize
+   *          the size of the page (including header)
+   * @param firstRowIndex
+   *          the index of the first row in the page (within the row group)
+   */
+  public void add(long offset, int compressedPageSize, long firstRowIndex) {
+    previousOffset = offset;
+    offsets.add(offset);
+    previousPageSize = compressedPageSize;
+    compressedPageSizes.add(compressedPageSize);
+    previousRowIndex = firstRowIndex;
+    firstRowIndexes.add(firstRowIndex);
+  }
+
+  /**
+   * Builds the offset index. Used by the metadata converter to building up {@link OffsetIndex}
+   * objects read from the Parquet file.
+   *
+   * @return the newly created offset index or {@code null} if the {@link OffsetIndex} object would be empty
+   */
+  public OffsetIndex build() {
+    return build(0);
+  }
+
+  /**
+   * Builds the offset index. Used by the writers to building up {@link OffsetIndex} objects to be
+   * written to the Parquet file.
+   *
+   * @param firstPageOffset
+   *          the actual offset in the file to be used to translate all the collected offsets
+   * @return the newly created offset index or {@code null} if the {@link OffsetIndex} object would be empty
+   */
+  public OffsetIndex build(long firstPageOffset) {
+    if (compressedPageSizes.isEmpty()) {
+      return null;
+    }
+    long[] offsets = this.offsets.toLongArray();
+    if (firstPageOffset != 0) {
+      for (int i = 0, n = offsets.length; i < n; ++i) {
+        offsets[i] += firstPageOffset;
+      }
+    }
+    OffsetIndexImpl offsetIndex = new OffsetIndexImpl();
+    offsetIndex.offsets = offsets;
+    offsetIndex.compressedPageSizes = compressedPageSizes.toIntArray();
+    offsetIndex.firstRowIndexes = firstRowIndexes.toLongArray();
+
+    return offsetIndex;
+  }
+
+}
diff --git a/parquet-column/src/main/java/org/apache/parquet/internal/filter2/columnindex/ColumnIndexFilter.java b/parquet-column/src/main/java/org/apache/parquet/internal/filter2/columnindex/ColumnIndexFilter.java
new file mode 100644
index 0000000000..fb3077e877
--- /dev/null
+++ b/parquet-column/src/main/java/org/apache/parquet/internal/filter2/columnindex/ColumnIndexFilter.java
@@ -0,0 +1,194 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.parquet.internal.filter2.columnindex;
+
+import java.util.PrimitiveIterator;
+import java.util.Set;
+import java.util.function.Function;
+
+import org.apache.parquet.filter2.compat.FilterCompat;
+import org.apache.parquet.filter2.compat.FilterCompat.FilterPredicateCompat;
+import org.apache.parquet.filter2.compat.FilterCompat.NoOpFilter;
+import org.apache.parquet.filter2.compat.FilterCompat.UnboundRecordFilterCompat;
+import org.apache.parquet.filter2.predicate.FilterPredicate.Visitor;
+import org.apache.parquet.filter2.predicate.Operators.And;
+import org.apache.parquet.filter2.predicate.Operators.Column;
+import org.apache.parquet.filter2.predicate.Operators.Eq;
+import org.apache.parquet.filter2.predicate.Operators.Gt;
+import org.apache.parquet.filter2.predicate.Operators.GtEq;
+import org.apache.parquet.filter2.predicate.Operators.LogicalNotUserDefined;
+import org.apache.parquet.filter2.predicate.Operators.Lt;
+import org.apache.parquet.filter2.predicate.Operators.LtEq;
+import org.apache.parquet.filter2.predicate.Operators.Not;
+import org.apache.parquet.filter2.predicate.Operators.NotEq;
+import org.apache.parquet.filter2.predicate.Operators.Or;
+import org.apache.parquet.filter2.predicate.Operators.UserDefined;
+import org.apache.parquet.filter2.predicate.UserDefinedPredicate;
+import org.apache.parquet.hadoop.metadata.ColumnPath;
+import org.apache.parquet.internal.column.columnindex.ColumnIndex;
+import org.apache.parquet.internal.column.columnindex.OffsetIndex;
+import org.apache.parquet.internal.filter2.columnindex.ColumnIndexStore.MissingOffsetIndexException;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Filter implementation based on column indexes.
+ * No filtering will be applied for columns where no column index is available.
+ * Offset index is required for all the columns in the projection, therefore a {@link MissingOffsetIndexException} will
+ * be thrown from any {@code visit} methods if any of the required offset indexes is missing.
+ */
+public class ColumnIndexFilter implements Visitor<RowRanges> {
+
+  private static final Logger LOGGER = LoggerFactory.getLogger(ColumnIndexFilter.class);
+  private final ColumnIndexStore columnIndexStore;
+  private final Set<ColumnPath> columns;
+  private final long rowCount;
+  private RowRanges allRows;
+
+  /**
+   * Calculates the row ranges containing the indexes of the rows might match the specified filter.
+   *
+   * @param filter
+   *          to be used for filtering the rows
+   * @param columnIndexStore
+   *          the store for providing column/offset indexes
+   * @param paths
+   *          the paths of the columns used in the actual projection; a column not being part of the projection will be
+   *          handled as containing {@code null} values only even if the column has values written in the file
+   * @param rowCount
+   *          the total number of rows in the row-group
+   * @return the ranges of the possible matching row indexes; the returned ranges will contain all the rows if any of
+   *         the required offset index is missing
+   */
+  public static RowRanges calculateRowRanges(FilterCompat.Filter filter, ColumnIndexStore columnIndexStore,
+      Set<ColumnPath> paths, long rowCount) {
+    return filter.accept(new FilterCompat.Visitor<RowRanges>() {
+      @Override
+      public RowRanges visit(FilterPredicateCompat filterPredicateCompat) {
+        try {
+          return filterPredicateCompat.getFilterPredicate()
+              .accept(new ColumnIndexFilter(columnIndexStore, paths, rowCount));
+        } catch (MissingOffsetIndexException e) {
+          LOGGER.warn("Unable to do filtering", e);
+          return RowRanges.createSingle(rowCount);
+        }
+      }
+
+      @Override
+      public RowRanges visit(UnboundRecordFilterCompat unboundRecordFilterCompat) {
+        return RowRanges.createSingle(rowCount);
+      }
+
+      @Override
+      public RowRanges visit(NoOpFilter noOpFilter) {
+        return RowRanges.createSingle(rowCount);
+      }
+    });
+  }
+
+  private ColumnIndexFilter(ColumnIndexStore columnIndexStore, Set<ColumnPath> paths, long rowCount) {
+    this.columnIndexStore = columnIndexStore;
+    this.columns = paths;
+    this.rowCount = rowCount;
+  }
+
+  private RowRanges allRows() {
+    if (allRows == null) {
+      allRows = RowRanges.createSingle(rowCount);
+    }
+    return allRows;
+  }
+
+  @Override
+  public <T extends Comparable<T>> RowRanges visit(Eq<T> eq) {
+    return applyPredicate(eq.getColumn(), ci -> ci.visit(eq), eq.getValue() == null ? allRows() : RowRanges.EMPTY);
+  }
+
+  @Override
+  public <T extends Comparable<T>> RowRanges visit(NotEq<T> notEq) {
+    return applyPredicate(notEq.getColumn(), ci -> ci.visit(notEq),
+        notEq.getValue() == null ? RowRanges.EMPTY : allRows());
+  }
+
+  @Override
+  public <T extends Comparable<T>> RowRanges visit(Lt<T> lt) {
+    return applyPredicate(lt.getColumn(), ci -> ci.visit(lt), RowRanges.EMPTY);
+  }
+
+  @Override
+  public <T extends Comparable<T>> RowRanges visit(LtEq<T> ltEq) {
+    return applyPredicate(ltEq.getColumn(), ci -> ci.visit(ltEq), RowRanges.EMPTY);
+  }
+
+  @Override
+  public <T extends Comparable<T>> RowRanges visit(Gt<T> gt) {
+    return applyPredicate(gt.getColumn(), ci -> ci.visit(gt), RowRanges.EMPTY);
+  }
+
+  @Override
+  public <T extends Comparable<T>> RowRanges visit(GtEq<T> gtEq) {
+    return applyPredicate(gtEq.getColumn(), ci -> ci.visit(gtEq), RowRanges.EMPTY);
+  }
+
+  @Override
+  public <T extends Comparable<T>, U extends UserDefinedPredicate<T>> RowRanges visit(UserDefined<T, U> udp) {
+    return applyPredicate(udp.getColumn(), ci -> ci.visit(udp),
+        udp.getUserDefinedPredicate().keep(null) ? allRows() : RowRanges.EMPTY);
+  }
+
+  @Override
+  public <T extends Comparable<T>, U extends UserDefinedPredicate<T>> RowRanges visit(
+      LogicalNotUserDefined<T, U> udp) {
+    return applyPredicate(udp.getUserDefined().getColumn(), ci -> ci.visit(udp),
+        udp.getUserDefined().getUserDefinedPredicate().keep(null) ? RowRanges.EMPTY : allRows());
+  }
+
+  private RowRanges applyPredicate(Column<?> column, Function<ColumnIndex, PrimitiveIterator.OfInt> func,
+      RowRanges rangesForMissingColumns) {
+    ColumnPath columnPath = column.getColumnPath();
+    if (!columns.contains(columnPath)) {
+      return rangesForMissingColumns;
+    }
+
+    OffsetIndex oi = columnIndexStore.getOffsetIndex(columnPath);
+    ColumnIndex ci = columnIndexStore.getColumnIndex(columnPath);
+    if (ci == null) {
+      LOGGER.warn("No column index for column {} is available; Unable to filter on this column", columnPath);
+      return allRows();
+    }
+
+    return RowRanges.create(rowCount, func.apply(ci), oi);
+  }
+
+  @Override
+  public RowRanges visit(And and) {
+    return RowRanges.intersection(and.getLeft().accept(this), and.getRight().accept(this));
+  }
+
+  @Override
+  public RowRanges visit(Or or) {
+    return RowRanges.union(or.getLeft().accept(this), or.getRight().accept(this));
+  }
+
+  @Override
+  public RowRanges visit(Not not) {
+    throw new IllegalArgumentException(
+        "Predicates containing a NOT must be run through LogicalInverseRewriter. " + not);
+  }
+}
diff --git a/parquet-column/src/main/java/org/apache/parquet/internal/filter2/columnindex/ColumnIndexStore.java b/parquet-column/src/main/java/org/apache/parquet/internal/filter2/columnindex/ColumnIndexStore.java
new file mode 100644
index 0000000000..c82861ac25
--- /dev/null
+++ b/parquet-column/src/main/java/org/apache/parquet/internal/filter2/columnindex/ColumnIndexStore.java
@@ -0,0 +1,55 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.parquet.internal.filter2.columnindex;
+
+import org.apache.parquet.ParquetRuntimeException;
+import org.apache.parquet.hadoop.metadata.ColumnPath;
+import org.apache.parquet.internal.column.columnindex.ColumnIndex;
+import org.apache.parquet.internal.column.columnindex.OffsetIndex;
+
+/**
+ * Provides the {@link ColumnIndex} and {@link OffsetIndex} objects for a row-group.
+ */
+public interface ColumnIndexStore {
+
+  /**
+   * Exception thrown in case of an offset index is missing for any of the columns.
+   */
+  public static class MissingOffsetIndexException extends ParquetRuntimeException {
+    public MissingOffsetIndexException(ColumnPath path) {
+      super("No offset index for column " + path.toDotString() + " is available; Unable to do filtering");
+    }
+  }
+
+  /**
+   * @param column
+   *          the path of the column
+   * @return the column index for the column-chunk in the row-group or {@code null} if no column index is available
+   */
+  ColumnIndex getColumnIndex(ColumnPath column);
+
+  /**
+   * @param column
+   *          the path of the column
+   * @return the offset index for the column-chunk in the row-group
+   * @throws MissingOffsetIndexException
+   *           if the related offset index is missing
+   */
+  OffsetIndex getOffsetIndex(ColumnPath column) throws MissingOffsetIndexException;
+}
diff --git a/parquet-column/src/main/java/org/apache/parquet/internal/filter2/columnindex/RowRanges.java b/parquet-column/src/main/java/org/apache/parquet/internal/filter2/columnindex/RowRanges.java
new file mode 100644
index 0000000000..7753507900
--- /dev/null
+++ b/parquet-column/src/main/java/org/apache/parquet/internal/filter2/columnindex/RowRanges.java
@@ -0,0 +1,288 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.parquet.internal.filter2.columnindex;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Iterator;
+import java.util.List;
+import java.util.NoSuchElementException;
+import java.util.PrimitiveIterator;
+import java.util.Set;
+
+import org.apache.parquet.filter2.compat.FilterCompat.Filter;
+import org.apache.parquet.internal.column.columnindex.OffsetIndex;
+
+/**
+ * Class representing row ranges in a row-group. These row ranges are calculated as a result of the column index based
+ * filtering. To be used iterate over the matching row indexes to be read from a row-group, retrieve the count of the
+ * matching rows or check overlapping of a row index range.
+ *
+ * @see ColumnIndexFilter#calculateRowRanges(Filter, ColumnIndexStore, Set, long)
+ */
+public class RowRanges {
+  private static class Range {
+
+    // Returns the union of the two ranges or null if there are elements between them.
+    private static Range union(Range left, Range right) {
+      if (left.from <= right.from) {
+        if (left.to + 1 >= right.from) {
+          return new Range(left.from, Math.max(left.to, right.to));
+        }
+      } else if (right.to + 1 >= left.from) {
+        return new Range(right.from, Math.max(left.to, right.to));
+      }
+      return null;
+    }
+
+    // Returns the intersection of the two ranges of null if they are not overlapped.
+    private static Range intersection(Range left, Range right) {
+      if (left.from <= right.from) {
+        if (left.to >= right.from) {
+          return new Range(right.from, Math.min(left.to, right.to));
+        }
+      } else if (right.to >= left.from) {
+        return new Range(left.from, Math.min(left.to, right.to));
+      }
+      return null;
+    }
+
+    final long from;
+    final long to;
+
+    // Creates a range of [from, to] (from and to are inclusive; empty ranges are not valid)
+    Range(long from, long to) {
+      assert from <= to;
+      this.from = from;
+      this.to = to;
+    }
+
+    long count() {
+      return to - from + 1;
+    }
+
+    boolean isBefore(Range other) {
+      return to < other.from;
+    }
+
+    boolean isAfter(Range other) {
+      return from > other.to;
+    }
+
+    @Override
+    public String toString() {
+      return "[" + from + ", " + to + ']';
+    }
+  }
+
+  static final RowRanges EMPTY = new RowRanges();
+
+  /*
+   * Creates a new RowRanges object with the single range [0, rowCount - 1].
+   */
+  static RowRanges createSingle(long rowCount) {
+    RowRanges ranges = new RowRanges();
+    ranges.add(new Range(0, rowCount - 1));
+    return ranges;
+  }
+
+  /*
+   * Creates a new RowRanges object with the following ranges.
+   * [firstRowIndex[0], lastRowIndex[0]],
+   * [firstRowIndex[1], lastRowIndex[1]],
+   * ...,
+   * [firstRowIndex[n], lastRowIndex[n]]
+   * (See OffsetIndex.getFirstRowIndex and OffsetIndex.getLastRowIndex for details.)
+   *
+   * The union of the ranges are calculated so the result ranges always contain the disjunct ranges. See union for
+   * details.
+   */
+  static RowRanges create(long rowCount, PrimitiveIterator.OfInt pageIndexes, OffsetIndex offsetIndex) {
+    RowRanges ranges = new RowRanges();
+    while (pageIndexes.hasNext()) {
+      int pageIndex = pageIndexes.nextInt();
+      ranges.add(new Range(offsetIndex.getFirstRowIndex(pageIndex), offsetIndex.getLastRowIndex(pageIndex, rowCount)));
+    }
+    return ranges;
+  }
+
+  /*
+   * Calculates the union of the two specified RowRanges object. The union of two range is calculated if there are no
+   * elements between them. Otherwise, the two disjunct ranges are stored separately.
+   * For example:
+   * [113, 241] ∪ [221, 340] = [113, 330]
+   * [113, 230] ∪ [231, 340] = [113, 340]
+   * while
+   * [113, 230] ∪ [232, 340] = [113, 230], [232, 340]
+   *
+   * The result RowRanges object will contain all the row indexes that were contained in one of the specified objects.
+   */
+  static RowRanges union(RowRanges left, RowRanges right) {
+    RowRanges result = new RowRanges();
+    Iterator<Range> it1 = left.ranges.iterator();
+    Iterator<Range> it2 = right.ranges.iterator();
+    if (it2.hasNext()) {
+      Range range2 = it2.next();
+      while (it1.hasNext()) {
+        Range range1 = it1.next();
+        if (range1.isAfter(range2)) {
+          result.add(range2);
+          range2 = range1;
+          Iterator<Range> tmp = it1;
+          it1 = it2;
+          it2 = tmp;
+        } else {
+          result.add(range1);
+        }
+      }
+      result.add(range2);
+    } else {
+      it2 = it1;
+    }
+    while (it2.hasNext()) {
+      result.add(it2.next());
+    }
+
+    return result;
+  }
+
+  /*
+   * Calculates the intersection of the two specified RowRanges object. Two ranges intersect if they have common
+   * elements otherwise the result is empty.
+   * For example:
+   * [113, 241] ∩ [221, 340] = [221, 241]
+   * while
+   * [113, 230] ∩ [231, 340] = <EMPTY>
+   *
+   * The result RowRanges object will contain all the row indexes there were contained in both of the specified objects
+   */
+  static RowRanges intersection(RowRanges left, RowRanges right) {
+    RowRanges result = new RowRanges();
+
+    int rightIndex = 0;
+    for (Range l : left.ranges) {
+      for (int i = rightIndex, n = right.ranges.size(); i < n; ++i) {
+        Range r = right.ranges.get(i);
+        if (l.isBefore(r)) {
+          break;
+        } else if (l.isAfter(r)) {
+          rightIndex = i + 1;
+          continue;
+        }
+        result.add(Range.intersection(l, r));
+      }
+    }
+
+    return result;
+  }
+
+  private final List<Range> ranges = new ArrayList<>();
+
+  private RowRanges() {
+  }
+
+  /*
+   * Adds a range to the end of the list of ranges. It maintains the disjunct ascending order(*) of the ranges by
+   * trying to union the specified range to the last ranges in the list. The specified range shall be larger(*) than
+   * the last one or might be overlapped with some of the last ones.
+   * (*) [a, b] < [c, d] if b < c
+   */
+  private void add(Range range) {
+    Range rangeToAdd = range;
+    for (int i = ranges.size() - 1; i >= 0; --i) {
+      Range last = ranges.get(i);
+      assert !last.isAfter(range);
+      Range u = Range.union(last, rangeToAdd);
+      if (u == null) {
+        break;
+      }
+      rangeToAdd = u;
+      ranges.remove(i);
+    }
+    ranges.add(rangeToAdd);
+  }
+
+  /**
+   * @return the number of rows in the ranges
+   */
+  public long rowCount() {
+    long cnt = 0;
+    for (Range range : ranges) {
+      cnt += range.count();
+    }
+    return cnt;
+  }
+
+  /**
+   * @return the ascending iterator of the row indexes contained in the ranges
+   */
+  public PrimitiveIterator.OfLong iterator() {
+    return new PrimitiveIterator.OfLong() {
+      private int currentRangeIndex = -1;
+      private Range currentRange;
+      private long next = findNext();
+
+      private long findNext() {
+        if (currentRange == null || next + 1 > currentRange.to) {
+          if (currentRangeIndex + 1 < ranges.size()) {
+            currentRange = ranges.get(++currentRangeIndex);
+            next = currentRange.from;
+          } else {
+            return -1;
+          }
+        } else {
+          ++next;
+        }
+        return next;
+      }
+
+      @Override
+      public boolean hasNext() {
+        return next >= 0;
+      }
+
+      @Override
+      public long nextLong() {
+        long ret = next;
+        if (ret < 0) {
+          throw new NoSuchElementException();
+        }
+        next = findNext();
+        return ret;
+      }
+    };
+  }
+
+  /**
+   * @param from
+   *          the first row of the range to be checked for connection
+   * @param to
+   *          the last row of the range to be checked for connection
+   * @return {@code true} if the specified range is overlapping (have common elements) with one of the ranges
+   */
+  public boolean isOverlapping(long from, long to) {
+    return Collections.binarySearch(ranges, new Range(from, to),
+        (r1, r2) -> r1.isBefore(r2) ? -1 : r1.isAfter(r2) ? 1 : 0) >= 0;
+  }
+
+  @Override
+  public String toString() {
+    return ranges.toString();
+  }
+}
diff --git a/parquet-column/src/main/java/org/apache/parquet/schema/ConversionPatterns.java b/parquet-column/src/main/java/org/apache/parquet/schema/ConversionPatterns.java
index 6db1e587c9..a530db13c8 100644
--- a/parquet-column/src/main/java/org/apache/parquet/schema/ConversionPatterns.java
+++ b/parquet-column/src/main/java/org/apache/parquet/schema/ConversionPatterns.java
@@ -1,4 +1,4 @@
-/* 
+/*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
@@ -6,9 +6,9 @@
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
- * 
+ *
  *   http://www.apache.org/licenses/LICENSE-2.0
- * 
+ *
  * Unless required by applicable law or agreed to in writing,
  * software distributed under the License is distributed on an
  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
@@ -22,7 +22,7 @@
 import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName;
 import org.apache.parquet.schema.Type.Repetition;
 
-import static org.apache.parquet.schema.OriginalType.*;
+import static org.apache.parquet.schema.LogicalTypeAnnotation.stringType;
 
 /**
  * Utility functions to convert from Java-like map and list types
@@ -37,15 +37,15 @@ public abstract class ConversionPatterns {
    *
    * @param repetition   repetition for the list or map
    * @param alias        name of the field
-   * @param originalType original type for the list or map
+   * @param logicalTypeAnnotation logical type for the list or map
    * @param nested       the nested repeated field
    * @return a group type
    */
-  private static GroupType listWrapper(Repetition repetition, String alias, OriginalType originalType, Type nested) {
+  private static GroupType listWrapper(Repetition repetition, String alias, LogicalTypeAnnotation logicalTypeAnnotation, Type nested) {
     if (!nested.isRepetition(Repetition.REPEATED)) {
       throw new IllegalArgumentException("Nested type should be repeated: " + nested);
     }
-    return new GroupType(repetition, alias, originalType, nested);
+    return new GroupType(repetition, alias, logicalTypeAnnotation, nested);
   }
 
   public static GroupType mapType(Repetition repetition, String alias, Type keyType, Type valueType) {
@@ -53,7 +53,7 @@ public static GroupType mapType(Repetition repetition, String alias, Type keyTyp
   }
 
   public static GroupType stringKeyMapType(Repetition repetition, String alias, String mapAlias, Type valueType) {
-    return mapType(repetition, alias, mapAlias, new PrimitiveType(Repetition.REQUIRED, PrimitiveTypeName.BINARY, "key", OriginalType.UTF8), valueType);
+    return mapType(repetition, alias, mapAlias, new PrimitiveType(Repetition.REQUIRED, PrimitiveTypeName.BINARY, "key", stringType()), valueType);
   }
 
   public static GroupType stringKeyMapType(Repetition repetition, String alias, Type valueType) {
@@ -66,11 +66,11 @@ public static GroupType mapType(Repetition repetition, String alias, String mapA
       return listWrapper(
               repetition,
               alias,
-              MAP,
+              LogicalTypeAnnotation.mapType(),
               new GroupType(
                       Repetition.REPEATED,
                       mapAlias,
-                      MAP_KEY_VALUE,
+                      LogicalTypeAnnotation.MapKeyValueTypeAnnotation.getInstance(),
                       keyType)
       );
     } else {
@@ -80,11 +80,11 @@ public static GroupType mapType(Repetition repetition, String alias, String mapA
       return listWrapper(
               repetition,
               alias,
-              MAP,
+              LogicalTypeAnnotation.mapType(),
               new GroupType(
                       Repetition.REPEATED,
                       mapAlias,
-                      MAP_KEY_VALUE,
+                      LogicalTypeAnnotation.MapKeyValueTypeAnnotation.getInstance(),
                       keyType,
                       valueType)
       );
@@ -103,7 +103,7 @@ public static GroupType listType(Repetition repetition, String alias, Type neste
     return listWrapper(
             repetition,
             alias,
-            LIST,
+            LogicalTypeAnnotation.listType(),
             nestedType
     );
   }
@@ -125,7 +125,7 @@ public static GroupType listOfElements(Repetition listRepetition, String name, T
     return listWrapper(
         listRepetition,
         name,
-        LIST,
+        LogicalTypeAnnotation.listType(),
         new GroupType(Repetition.REPEATED, "list", elementType)
     );
   }
diff --git a/parquet-column/src/main/java/org/apache/parquet/schema/GroupType.java b/parquet-column/src/main/java/org/apache/parquet/schema/GroupType.java
index 5cb40e5e39..64e7062959 100644
--- a/parquet-column/src/main/java/org/apache/parquet/schema/GroupType.java
+++ b/parquet-column/src/main/java/org/apache/parquet/schema/GroupType.java
@@ -67,6 +67,16 @@ public GroupType(Repetition repetition, String name, OriginalType originalType,
     this(repetition, name, originalType, Arrays.asList(fields));
   }
 
+  /**
+   * @param repetition OPTIONAL, REPEATED, REQUIRED
+   * @param name the name of the field
+   * @param logicalTypeAnnotation (optional) the logical type to help with cross schema conversion (LIST, MAP, ...)
+   * @param fields the contained fields
+   */
+  GroupType(Repetition repetition, String name, LogicalTypeAnnotation logicalTypeAnnotation, Type... fields) {
+    this(repetition, name, logicalTypeAnnotation, Arrays.asList(fields));
+  }
+
   /**
    * @param repetition OPTIONAL, REPEATED, REQUIRED
    * @param name the name of the field
@@ -78,6 +88,16 @@ public GroupType(Repetition repetition, String name, OriginalType originalType,
     this(repetition, name, originalType, fields, null);
   }
 
+  /**
+   * @param repetition OPTIONAL, REPEATED, REQUIRED
+   * @param name the name of the field
+   * @param logicalTypeAnnotation (optional) the logical type to help with cross schema conversion (LIST, MAP, ...)
+   * @param fields the contained fields
+   */
+  GroupType(Repetition repetition, String name, LogicalTypeAnnotation logicalTypeAnnotation, List<Type> fields) {
+    this(repetition, name, logicalTypeAnnotation, fields, null);
+  }
+
   /**
    * @param repetition OPTIONAL, REPEATED, REQUIRED
    * @param name the name of the field
@@ -109,7 +129,7 @@ public GroupType(Repetition repetition, String name, OriginalType originalType,
    */
   @Override
   public GroupType withId(int id) {
-    return new GroupType(getRepetition(), getName(), getOriginalType(), fields, new ID(id));
+    return new GroupType(getRepetition(), getName(), getLogicalTypeAnnotation(), fields, new ID(id));
   }
 
   /**
@@ -117,7 +137,7 @@ public GroupType withId(int id) {
    * @return a group with the same attributes and new fields.
    */
   public GroupType withNewFields(List<Type> newFields) {
-    return new GroupType(getRepetition(), getName(), getOriginalType(), newFields, getId());
+    return new GroupType(getRepetition(), getName(), getLogicalTypeAnnotation(), newFields, getId());
   }
 
   /**
@@ -219,7 +239,7 @@ public void writeToStringBuilder(StringBuilder sb, String indent) {
         .append(getRepetition().name().toLowerCase(Locale.ENGLISH))
         .append(" group ")
         .append(getName())
-        .append(getOriginalType() == null ? "" : " (" + getOriginalType() +")")
+        .append(getLogicalTypeAnnotation() == null ? "" : " (" + getLogicalTypeAnnotation().toString() +")")
         .append(getId() == null ? "" : " = " + getId())
         .append(" {\n");
     membersDisplayString(sb, indent + "  ");
@@ -250,7 +270,7 @@ protected boolean typeEquals(Type other) {
    */
   @Override
   public int hashCode() {
-    return Objects.hash(getOriginalType(), getFields());
+    return Objects.hash(getLogicalTypeAnnotation(), getFields());
   }
 
   /**
@@ -261,7 +281,7 @@ protected boolean equals(Type otherType) {
     return
         !otherType.isPrimitive()
         && super.equals(otherType)
-        && getOriginalType() == otherType.getOriginalType()
+        && Objects.equals(getLogicalTypeAnnotation(),otherType.getLogicalTypeAnnotation())
         && getFields().equals(otherType.asGroupType().getFields());
   }
 
@@ -355,7 +375,7 @@ protected Type union(Type toMerge, boolean strict) {
     if (toMerge.isPrimitive()) {
       throw new IncompatibleSchemaModificationException("can not merge primitive type " + toMerge + " into group type " + this);
     }
-    return new GroupType(toMerge.getRepetition(), getName(), toMerge.getOriginalType(), mergeFields(toMerge.asGroupType()), getId());
+    return new GroupType(toMerge.getRepetition(), getName(), toMerge.getLogicalTypeAnnotation(), mergeFields(toMerge.asGroupType()), getId());
   }
 
   /**
@@ -383,8 +403,8 @@ List<Type> mergeFields(GroupType toMerge, boolean strict) {
         if (fieldToMerge.getRepetition().isMoreRestrictiveThan(type.getRepetition())) {
           throw new IncompatibleSchemaModificationException("repetition constraint is more restrictive: can not merge type " + fieldToMerge + " into " + type);
         }
-        if (type.getOriginalType() != null && fieldToMerge.getOriginalType() != type.getOriginalType()) {
-          throw new IncompatibleSchemaModificationException("cannot merge original type " + fieldToMerge.getOriginalType() + " into " + type.getOriginalType());
+        if (type.getLogicalTypeAnnotation() != null && !type.getLogicalTypeAnnotation().equals(fieldToMerge.getLogicalTypeAnnotation())) {
+          throw new IncompatibleSchemaModificationException("cannot merge logical type " + fieldToMerge.getLogicalTypeAnnotation() + " into " + type.getLogicalTypeAnnotation());
         }
         merged = type.union(fieldToMerge, strict);
       } else {
diff --git a/parquet-column/src/main/java/org/apache/parquet/schema/LogicalTypeAnnotation.java b/parquet-column/src/main/java/org/apache/parquet/schema/LogicalTypeAnnotation.java
index 340a24af16..c1b7d99fd8 100644
--- a/parquet-column/src/main/java/org/apache/parquet/schema/LogicalTypeAnnotation.java
+++ b/parquet-column/src/main/java/org/apache/parquet/schema/LogicalTypeAnnotation.java
@@ -20,12 +20,28 @@
 
 import org.apache.parquet.Preconditions;
 
+import java.util.Collections;
+import java.util.HashSet;
 import java.util.List;
 import java.util.Objects;
 import java.util.Optional;
+import java.util.Set;
 import java.util.function.Supplier;
 
+import static java.util.Arrays.asList;
 import static java.util.Optional.empty;
+import static org.apache.parquet.schema.ColumnOrder.ColumnOrderName.TYPE_DEFINED_ORDER;
+import static org.apache.parquet.schema.ColumnOrder.ColumnOrderName.UNDEFINED;
+import static org.apache.parquet.schema.PrimitiveStringifier.TIMESTAMP_MICROS_STRINGIFIER;
+import static org.apache.parquet.schema.PrimitiveStringifier.TIMESTAMP_MICROS_UTC_STRINGIFIER;
+import static org.apache.parquet.schema.PrimitiveStringifier.TIMESTAMP_MILLIS_STRINGIFIER;
+import static org.apache.parquet.schema.PrimitiveStringifier.TIMESTAMP_MILLIS_UTC_STRINGIFIER;
+import static org.apache.parquet.schema.PrimitiveStringifier.TIMESTAMP_NANOS_STRINGIFIER;
+import static org.apache.parquet.schema.PrimitiveStringifier.TIMESTAMP_NANOS_UTC_STRINGIFIER;
+import static org.apache.parquet.schema.PrimitiveStringifier.TIME_NANOS_STRINGIFIER;
+import static org.apache.parquet.schema.PrimitiveStringifier.TIME_NANOS_UTC_STRINGIFIER;
+import static org.apache.parquet.schema.PrimitiveStringifier.TIME_STRINGIFIER;
+import static org.apache.parquet.schema.PrimitiveStringifier.TIME_UTC_STRINGIFIER;
 
 public abstract class LogicalTypeAnnotation {
   enum LogicalTypeToken {
@@ -144,6 +160,10 @@ String typeParametersAsString() {
     return "";
   }
 
+  boolean isValidColumnOrder(ColumnOrder columnOrder) {
+    return columnOrder.getColumnOrderName() == UNDEFINED || columnOrder.getColumnOrderName() == TYPE_DEFINED_ORDER;
+  }
+
   @Override
   public String toString() {
     StringBuilder sb = new StringBuilder();
@@ -152,6 +172,10 @@ public String toString() {
     return sb.toString();
   }
 
+  PrimitiveStringifier valueStringifier(PrimitiveType primitiveType) {
+    throw new UnsupportedOperationException("Stringifier is not supported for the logical type: " + this);
+  }
+
   /**
    * Helper method to convert the old representation of logical types (OriginalType) to new logical type.
    */
@@ -290,6 +314,11 @@ public int hashCode() {
       // This type doesn't have any parameters, thus using class hashcode
       return getClass().hashCode();
     }
+
+    @Override
+    PrimitiveStringifier valueStringifier(PrimitiveType primitiveType) {
+      return PrimitiveStringifier.UTF8_STRINGIFIER;
+    }
   }
 
   public static class MapLogicalTypeAnnotation extends LogicalTypeAnnotation {
@@ -389,15 +418,22 @@ public int hashCode() {
       // This type doesn't have any parameters, thus using class hashcode
       return getClass().hashCode();
     }
+
+    @Override
+    PrimitiveStringifier valueStringifier(PrimitiveType primitiveType) {
+      return PrimitiveStringifier.UTF8_STRINGIFIER;
+    }
   }
 
   public static class DecimalLogicalTypeAnnotation extends LogicalTypeAnnotation {
+    private final PrimitiveStringifier stringifier;
     private final int scale;
     private final int precision;
 
     private DecimalLogicalTypeAnnotation(int scale, int precision) {
       this.scale = scale;
       this.precision = precision;
+      stringifier = PrimitiveStringifier.createDecimalStringifier(scale);
     }
 
     public int getPrecision() {
@@ -447,6 +483,11 @@ public boolean equals(Object obj) {
     public int hashCode() {
       return Objects.hash(scale, precision);
     }
+
+    @Override
+    PrimitiveStringifier valueStringifier(PrimitiveType primitiveType) {
+      return stringifier;
+    }
   }
 
   public static class DateLogicalTypeAnnotation extends LogicalTypeAnnotation {
@@ -480,11 +521,17 @@ public int hashCode() {
       // This type doesn't have any parameters, thus using class hashcode
       return getClass().hashCode();
     }
+
+    @Override
+    PrimitiveStringifier valueStringifier(PrimitiveType primitiveType) {
+      return PrimitiveStringifier.DATE_STRINGIFIER;
+    }
   }
 
   public enum TimeUnit {
     MILLIS,
-    MICROS
+    MICROS,
+    NANOS
   }
 
   public static class TimeLogicalTypeAnnotation extends LogicalTypeAnnotation {
@@ -504,7 +551,7 @@ public OriginalType toOriginalType() {
         case MICROS:
           return OriginalType.TIME_MICROS;
         default:
-          throw new RuntimeException("Unknown original type for " + unit);
+          return null;
       }
     }
 
@@ -550,6 +597,19 @@ public boolean equals(Object obj) {
     public int hashCode() {
       return Objects.hash(isAdjustedToUTC, unit);
     }
+
+    @Override
+    PrimitiveStringifier valueStringifier(PrimitiveType primitiveType) {
+      switch (unit) {
+        case MICROS:
+        case MILLIS:
+          return isAdjustedToUTC ? TIME_UTC_STRINGIFIER : TIME_STRINGIFIER;
+        case NANOS:
+          return isAdjustedToUTC ? TIME_NANOS_UTC_STRINGIFIER : TIME_NANOS_STRINGIFIER;
+        default:
+          return super.valueStringifier(primitiveType);
+      }
+    }
   }
 
   public static class TimestampLogicalTypeAnnotation extends LogicalTypeAnnotation {
@@ -569,7 +629,7 @@ public OriginalType toOriginalType() {
         case MICROS:
           return OriginalType.TIMESTAMP_MICROS;
         default:
-          throw new RuntimeException("Unknown original type for " + unit);
+          return null;
       }
     }
 
@@ -615,14 +675,33 @@ public boolean equals(Object obj) {
     public int hashCode() {
       return Objects.hash(isAdjustedToUTC, unit);
     }
+
+    @Override
+    PrimitiveStringifier valueStringifier(PrimitiveType primitiveType) {
+      switch (unit) {
+        case MICROS:
+          return isAdjustedToUTC ? TIMESTAMP_MICROS_UTC_STRINGIFIER : TIMESTAMP_MICROS_STRINGIFIER;
+        case MILLIS:
+          return isAdjustedToUTC ? TIMESTAMP_MILLIS_UTC_STRINGIFIER : TIMESTAMP_MILLIS_STRINGIFIER;
+        case NANOS:
+          return isAdjustedToUTC ? TIMESTAMP_NANOS_UTC_STRINGIFIER : TIMESTAMP_NANOS_STRINGIFIER;
+        default:
+          return super.valueStringifier(primitiveType);
+      }
+    }
   }
 
   public static class IntLogicalTypeAnnotation extends LogicalTypeAnnotation {
+    private static final Set<Integer> VALID_BIT_WIDTH = Collections.unmodifiableSet(
+      new HashSet<>(asList(8, 16, 32, 64)));
+
     private final int bitWidth;
     private final boolean isSigned;
 
-
     private IntLogicalTypeAnnotation(int bitWidth, boolean isSigned) {
+      if (!VALID_BIT_WIDTH.contains(bitWidth)) {
+        throw new IllegalArgumentException("Invalid integer bit width: " + bitWidth);
+      }
       this.bitWidth = bitWidth;
       this.isSigned = isSigned;
     }
@@ -639,7 +718,7 @@ public OriginalType toOriginalType() {
         case 64:
           return isSigned ? OriginalType.INT_64 : OriginalType.UINT_64;
         default:
-          throw new RuntimeException("Unknown original type " + toOriginalType());
+          return null;
       }
     }
 
@@ -685,6 +764,11 @@ public boolean equals(Object obj) {
     public int hashCode() {
       return Objects.hash(bitWidth, isSigned);
     }
+
+    @Override
+    PrimitiveStringifier valueStringifier(PrimitiveType primitiveType) {
+      return isSigned ? PrimitiveStringifier.DEFAULT_STRINGIFIER : PrimitiveStringifier.UNSIGNED_STRINGIFIER;
+    }
   }
 
   public static class JsonLogicalTypeAnnotation extends LogicalTypeAnnotation {
@@ -718,6 +802,11 @@ public int hashCode() {
       // This type doesn't have any parameters, thus using class hashcode
       return getClass().hashCode();
     }
+
+    @Override
+    PrimitiveStringifier valueStringifier(PrimitiveType primitiveType) {
+      return PrimitiveStringifier.UTF8_STRINGIFIER;
+    }
   }
 
   public static class BsonLogicalTypeAnnotation extends LogicalTypeAnnotation {
@@ -751,6 +840,11 @@ public int hashCode() {
       // This type doesn't have any parameters, thus using class hashcode
       return getClass().hashCode();
     }
+
+    @Override
+    PrimitiveStringifier valueStringifier(PrimitiveType primitiveType) {
+      return PrimitiveStringifier.DEFAULT_STRINGIFIER;
+    }
   }
 
   // This logical type annotation is implemented to support backward compatibility with ConvertedType.
@@ -791,6 +885,16 @@ public int hashCode() {
       // This type doesn't have any parameters, thus using class hashcode
       return getClass().hashCode();
     }
+
+    @Override
+    PrimitiveStringifier valueStringifier(PrimitiveType primitiveType) {
+      return PrimitiveStringifier.INTERVAL_STRINGIFIER;
+    }
+
+    @Override
+    boolean isValidColumnOrder(ColumnOrder columnOrder) {
+      return columnOrder.getColumnOrderName() == UNDEFINED;
+    }
   }
 
   // This logical type annotation is implemented to support backward compatibility with ConvertedType.
@@ -845,55 +949,55 @@ public int hashCode() {
    * or {@link Optional#orElseThrow(Supplier)} to throw exception if omitting a type is not allowed.
    */
   public interface LogicalTypeAnnotationVisitor<T> {
-    default Optional<T> visit(StringLogicalTypeAnnotation logicalTypeAnnotation) {
+    default Optional<T> visit(StringLogicalTypeAnnotation stringLogicalType) {
       return empty();
     }
 
-    default Optional<T> visit(MapLogicalTypeAnnotation logicalTypeAnnotation) {
+    default Optional<T> visit(MapLogicalTypeAnnotation mapLogicalType) {
       return empty();
     }
 
-    default Optional<T> visit(ListLogicalTypeAnnotation logicalTypeAnnotation) {
+    default Optional<T> visit(ListLogicalTypeAnnotation listLogicalType) {
       return empty();
     }
 
-    default Optional<T> visit(EnumLogicalTypeAnnotation logicalTypeAnnotation) {
+    default Optional<T> visit(EnumLogicalTypeAnnotation enumLogicalType) {
       return empty();
     }
 
-    default Optional<T> visit(DecimalLogicalTypeAnnotation logicalTypeAnnotation) {
+    default Optional<T> visit(DecimalLogicalTypeAnnotation decimalLogicalType) {
       return empty();
     }
 
-    default Optional<T> visit(DateLogicalTypeAnnotation logicalTypeAnnotation) {
+    default Optional<T> visit(DateLogicalTypeAnnotation dateLogicalType) {
       return empty();
     }
 
-    default Optional<T> visit(TimeLogicalTypeAnnotation logicalTypeAnnotation) {
+    default Optional<T> visit(TimeLogicalTypeAnnotation timeLogicalType) {
       return empty();
     }
 
-    default Optional<T> visit(TimestampLogicalTypeAnnotation logicalTypeAnnotation) {
+    default Optional<T> visit(TimestampLogicalTypeAnnotation timestampLogicalType) {
       return empty();
     }
 
-    default Optional<T> visit(IntLogicalTypeAnnotation logicalTypeAnnotation) {
+    default Optional<T> visit(IntLogicalTypeAnnotation intLogicalType) {
       return empty();
     }
 
-    default Optional<T> visit(JsonLogicalTypeAnnotation logicalTypeAnnotation) {
+    default Optional<T> visit(JsonLogicalTypeAnnotation jsonLogicalType) {
       return empty();
     }
 
-    default Optional<T> visit(BsonLogicalTypeAnnotation logicalTypeAnnotation) {
+    default Optional<T> visit(BsonLogicalTypeAnnotation bsonLogicalType) {
       return empty();
     }
 
-    default Optional<T> visit(IntervalLogicalTypeAnnotation logicalTypeAnnotation) {
+    default Optional<T> visit(IntervalLogicalTypeAnnotation intervalLogicalType) {
       return empty();
     }
 
-    default Optional<T> visit(MapKeyValueTypeAnnotation logicalTypeAnnotation) {
+    default Optional<T> visit(MapKeyValueTypeAnnotation mapKeyValueLogicalType) {
       return empty();
     }
   }
diff --git a/parquet-column/src/main/java/org/apache/parquet/schema/MessageType.java b/parquet-column/src/main/java/org/apache/parquet/schema/MessageType.java
index d305eb88ee..83f98d7ecc 100644
--- a/parquet-column/src/main/java/org/apache/parquet/schema/MessageType.java
+++ b/parquet-column/src/main/java/org/apache/parquet/schema/MessageType.java
@@ -1,4 +1,4 @@
-/* 
+/*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
@@ -6,9 +6,9 @@
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
- * 
+ *
  *   http://www.apache.org/licenses/LICENSE-2.0
- * 
+ *
  * Unless required by applicable law or agreed to in writing,
  * software distributed under the License is distributed on an
  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
@@ -62,7 +62,7 @@ public void accept(TypeVisitor visitor) {
   public void writeToStringBuilder(StringBuilder sb, String indent) {
     sb.append("message ")
         .append(getName())
-        .append(getOriginalType() == null ? "" : " (" + getOriginalType() +")")
+        .append(getLogicalTypeAnnotation() == null ? "" : " (" + getLogicalTypeAnnotation().toString() +")")
         .append(" {\n");
     membersDisplayString(sb, "  ");
     sb.append("}\n");
diff --git a/parquet-column/src/main/java/org/apache/parquet/schema/OriginalType.java b/parquet-column/src/main/java/org/apache/parquet/schema/OriginalType.java
index b00ae7e6ce..78421b33fb 100644
--- a/parquet-column/src/main/java/org/apache/parquet/schema/OriginalType.java
+++ b/parquet-column/src/main/java/org/apache/parquet/schema/OriginalType.java
@@ -1,4 +1,4 @@
-/* 
+/*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
@@ -6,9 +6,9 @@
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
- * 
+ *
  *   http://www.apache.org/licenses/LICENSE-2.0
- * 
+ *
  * Unless required by applicable law or agreed to in writing,
  * software distributed under the License is distributed on an
  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
@@ -21,46 +21,24 @@
 public enum OriginalType {
   MAP,
   LIST,
-  UTF8(PrimitiveStringifier.UTF8_STRINGIFIER),
+  UTF8,
   MAP_KEY_VALUE,
-  ENUM(PrimitiveStringifier.UTF8_STRINGIFIER),
-  DECIMAL {
-    @Override
-    PrimitiveStringifier stringifier(PrimitiveType type) {
-      return PrimitiveStringifier.createDecimalStringifier(type.getDecimalMetadata().getScale());
-    }
-  },
-  DATE(PrimitiveStringifier.DATE_STRINGIFIER),
-  TIME_MILLIS(PrimitiveStringifier.TIME_STRINGIFIER),
-  TIME_MICROS(PrimitiveStringifier.TIME_STRINGIFIER),
-  TIMESTAMP_MILLIS(PrimitiveStringifier.TIMESTAMP_MILLIS_STRINGIFIER),
-  TIMESTAMP_MICROS(PrimitiveStringifier.TIMESTAMP_MICROS_STRINGIFIER),
-  UINT_8(PrimitiveStringifier.UNSIGNED_STRINGIFIER),
-  UINT_16(PrimitiveStringifier.UNSIGNED_STRINGIFIER),
-  UINT_32(PrimitiveStringifier.UNSIGNED_STRINGIFIER),
-  UINT_64(PrimitiveStringifier.UNSIGNED_STRINGIFIER),
-  INT_8(PrimitiveStringifier.DEFAULT_STRINGIFIER),
-  INT_16(PrimitiveStringifier.DEFAULT_STRINGIFIER),
-  INT_32(PrimitiveStringifier.DEFAULT_STRINGIFIER),
-  INT_64(PrimitiveStringifier.DEFAULT_STRINGIFIER),
-  JSON(PrimitiveStringifier.UTF8_STRINGIFIER),
-  BSON(PrimitiveStringifier.DEFAULT_STRINGIFIER),
-  INTERVAL(PrimitiveStringifier.INTERVAL_STRINGIFIER);
-
-  private final PrimitiveStringifier stringifier;
-
-  PrimitiveStringifier stringifier(PrimitiveType type) {
-    if (stringifier == null) {
-      throw new UnsupportedOperationException("Stringifier is not supported for the original type: " + this);
-    }
-    return stringifier;
-  }
-
-  OriginalType() {
-    this(null);
-  }
-
-  OriginalType(PrimitiveStringifier stringifier) {
-    this.stringifier = stringifier;
-  }
+  ENUM,
+  DECIMAL,
+  DATE,
+  TIME_MILLIS,
+  TIME_MICROS,
+  TIMESTAMP_MILLIS,
+  TIMESTAMP_MICROS,
+  UINT_8,
+  UINT_16,
+  UINT_32,
+  UINT_64,
+  INT_8,
+  INT_16,
+  INT_32,
+  INT_64,
+  JSON,
+  BSON,
+  INTERVAL
 }
diff --git a/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveComparator.java b/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveComparator.java
index 5e9adbcf7b..d343b0ea4c 100644
--- a/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveComparator.java
+++ b/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveComparator.java
@@ -238,8 +238,8 @@ int compare(ByteBuffer b1, ByteBuffer b2) {
       int p1 = b1.position();
       int p2 = b2.position();
 
-      boolean isNegative1 = l1 > 0 ? b1.get(p1) < 0 : false;
-      boolean isNegative2 = l2 > 0 ? b2.get(p2) < 0 : false;
+      boolean isNegative1 = l1 > 0 && b1.get(p1) < 0;
+      boolean isNegative2 = l2 > 0 && b2.get(p2) < 0;
       if (isNegative1 != isNegative2) {
         return isNegative1 ? -1 : 1;
       }
@@ -259,7 +259,7 @@ int compare(ByteBuffer b1, ByteBuffer b2) {
 
       // The beginning of the longer buffer equals to the padding or the lengths are equal
       if (result == 0) {
-        result = compare(l1, b1, p1, b2, p2);
+        result = compare(Math.min(l1, l2), b1, p1, b2, p2);
       }
       return result;
     }
diff --git a/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveStringifier.java b/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveStringifier.java
index c1a9b582fe..4705ad94eb 100644
--- a/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveStringifier.java
+++ b/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveStringifier.java
@@ -22,14 +22,16 @@
 import static java.util.concurrent.TimeUnit.MICROSECONDS;
 import static java.util.concurrent.TimeUnit.MILLISECONDS;
 import static java.util.concurrent.TimeUnit.MINUTES;
+import static java.util.concurrent.TimeUnit.NANOSECONDS;
 import static java.util.concurrent.TimeUnit.SECONDS;
 
 import java.math.BigDecimal;
 import java.math.BigInteger;
 import java.nio.ByteBuffer;
 import java.nio.ByteOrder;
-import java.text.SimpleDateFormat;
-import java.util.TimeZone;
+import java.time.Instant;
+import java.time.ZoneOffset;
+import java.time.format.DateTimeFormatter;
 import java.util.concurrent.TimeUnit;
 
 import javax.naming.OperationNotSupportedException;
@@ -242,71 +244,120 @@ String stringifyNotNull(Binary value) {
   };
 
   private static class DateStringifier extends PrimitiveStringifier {
-    private final SimpleDateFormat formatter;
-    private static final TimeZone UTC = TimeZone.getTimeZone("utc");
+    private final DateTimeFormatter formatter;
 
     private DateStringifier(String name, String format) {
       super(name);
-      formatter = new SimpleDateFormat(format);
-      formatter.setTimeZone(UTC);
+      formatter = DateTimeFormatter.ofPattern(format).withZone(ZoneOffset.UTC);
     }
 
     @Override
     public String stringify(int value) {
-      return toFormattedString(toMillis(value));
+      return toFormattedString(getInstant(value));
     }
 
     @Override
     public String stringify(long value) {
-      return toFormattedString(toMillis(value));
+      return toFormattedString(getInstant(value));
     }
 
-    private String toFormattedString(long millis) {
-      return formatter.format(millis);
+    private String toFormattedString(Instant instant) {
+      return formatter.format(instant);
     }
 
-    long toMillis(int value) {
+    Instant getInstant(int value) {
       // throw the related unsupported exception
       super.stringify(value);
-      return 0;
+      return null;
     }
 
-    long toMillis(long value) {
+    Instant getInstant(long value) {
       // throw the related unsupported exception
       super.stringify(value);
-      return 0;
+      return null;
     }
   }
 
   static final PrimitiveStringifier DATE_STRINGIFIER = new DateStringifier("DATE_STRINGIFIER", "yyyy-MM-dd") {
     @Override
-    long toMillis(int value) {
-      return TimeUnit.DAYS.toMillis(value);
+    Instant getInstant(int value) {
+      return Instant.ofEpochMilli(TimeUnit.DAYS.toMillis(value));
     };
   };
 
   static final PrimitiveStringifier TIMESTAMP_MILLIS_STRINGIFIER = new DateStringifier(
       "TIMESTAMP_MILLIS_STRINGIFIER", "yyyy-MM-dd'T'HH:mm:ss.SSS") {
     @Override
-    long toMillis(long value) {
-      return value;
+    Instant getInstant(long value) {
+      return Instant.ofEpochMilli(value);
     }
   };
 
   static final PrimitiveStringifier TIMESTAMP_MICROS_STRINGIFIER = new DateStringifier(
-      "TIMESTAMP_MICROS_STRINGIFIER", "yyyy-MM-dd'T'HH:mm:ss.SSS") {
+      "TIMESTAMP_MICROS_STRINGIFIER", "yyyy-MM-dd'T'HH:mm:ss.SSSSSS") {
     @Override
-    public String stringify(long value) {
-      return super.stringify(value) + String.format("%03d", Math.abs(value % 1000));
+    Instant getInstant(long value) {
+      return Instant.ofEpochSecond(MICROSECONDS.toSeconds(value), MICROSECONDS.toNanos(value % SECONDS.toMicros(1)));
+    }
+  };
+
+  static final PrimitiveStringifier TIMESTAMP_NANOS_STRINGIFIER = new DateStringifier(
+    "TIMESTAMP_NANOS_STRINGIFIER", "yyyy-MM-dd'T'HH:mm:ss.SSSSSSSSS") {
+    @Override
+    Instant getInstant(long value) {
+      return Instant.ofEpochSecond(NANOSECONDS.toSeconds(value), NANOSECONDS.toNanos(value % SECONDS.toNanos(1)));
     }
+  };
 
+  static final PrimitiveStringifier TIMESTAMP_MILLIS_UTC_STRINGIFIER = new DateStringifier(
+    "TIMESTAMP_MILLIS_UTC_STRINGIFIER", "yyyy-MM-dd'T'HH:mm:ss.SSSZ") {
     @Override
-    long toMillis(long value) {
-      return value / 1000;
+    Instant getInstant(long value) {
+      return Instant.ofEpochMilli(value);
     }
   };
 
-  static final PrimitiveStringifier TIME_STRINGIFIER = new PrimitiveStringifier("TIME_STRINGIFIER") {
+  static final PrimitiveStringifier TIMESTAMP_MICROS_UTC_STRINGIFIER = new DateStringifier(
+    "TIMESTAMP_MICROS_UTC_STRINGIFIER", "yyyy-MM-dd'T'HH:mm:ss.SSSSSSZ") {
+    @Override
+    Instant getInstant(long value) {
+      return Instant.ofEpochSecond(MICROSECONDS.toSeconds(value), MICROSECONDS.toNanos(value % SECONDS.toMicros(1)));
+    }
+  };
+
+  static final PrimitiveStringifier TIMESTAMP_NANOS_UTC_STRINGIFIER = new DateStringifier(
+    "TIMESTAMP_NANOS_UTC_STRINGIFIER", "yyyy-MM-dd'T'HH:mm:ss.SSSSSSSSSZ") {
+    @Override
+    Instant getInstant(long value) {
+      return Instant.ofEpochSecond(NANOSECONDS.toSeconds(value), NANOSECONDS.toNanos(value % SECONDS.toNanos(1)));
+    }
+  };
+
+  private abstract static class TimeStringifier extends PrimitiveStringifier {
+    private final boolean withZone;
+
+    TimeStringifier(String name, boolean withZone) {
+      super(name);
+      this.withZone = withZone;
+    }
+
+    protected String toTimeString(long duration, TimeUnit unit) {
+      String additionalFormat = (unit == MILLISECONDS ? "3d" : unit == MICROSECONDS ? "6d" : "9d");
+      String timeZone = withZone ? "+0000" : "";
+      String format = "%02d:%02d:%02d.%0" + additionalFormat + timeZone;
+      return String.format(format,
+        unit.toHours(duration),
+        convert(duration, unit, MINUTES, HOURS),
+        convert(duration, unit, SECONDS, MINUTES),
+        convert(duration, unit, unit, SECONDS));
+    }
+
+    protected long convert(long duration, TimeUnit from, TimeUnit to, TimeUnit higher) {
+      return Math.abs(to.convert(duration, from) % to.convert(1, higher));
+    }
+  }
+
+  static final PrimitiveStringifier TIME_STRINGIFIER = new TimeStringifier("TIME_STRINGIFIER", false) {
     @Override
     public String stringify(int millis) {
       return toTimeString(millis, MILLISECONDS);
@@ -316,18 +367,31 @@ public String stringify(int millis) {
     public String stringify(long micros) {
       return toTimeString(micros, MICROSECONDS);
     }
+  };
 
-    private String toTimeString(long duration, TimeUnit unit) {
-      String format = "%02d:%02d:%02d.%0" + (unit == MILLISECONDS ? "3d" : "6d");
-      return String.format(format,
-          unit.toHours(duration),
-          convert(duration, unit, MINUTES, HOURS),
-          convert(duration, unit, SECONDS, MINUTES),
-          convert(duration, unit, unit, SECONDS));
+  static final PrimitiveStringifier TIME_NANOS_STRINGIFIER = new TimeStringifier("TIME_NANOS_STRINGIFIER", false) {
+    @Override
+    public String stringify(long nanos) {
+      return toTimeString(nanos, NANOSECONDS);
     }
+  };
 
-    private long convert(long duration, TimeUnit from, TimeUnit to, TimeUnit higher) {
-      return Math.abs(to.convert(duration, from) % to.convert(1, higher));
+  static final PrimitiveStringifier TIME_UTC_STRINGIFIER = new TimeStringifier("TIME_UTC_STRINGIFIER", true) {
+    @Override
+    public String stringify(int millis) {
+      return toTimeString(millis, MILLISECONDS);
+    }
+
+    @Override
+    public String stringify(long micros) {
+      return toTimeString(micros, MICROSECONDS);
+    }
+  };
+
+  static final PrimitiveStringifier TIME_NANOS_UTC_STRINGIFIER = new TimeStringifier("TIME_NANOS_UTC_STRINGIFIER", true) {
+    @Override
+    public String stringify(long nanos) {
+      return toTimeString(nanos, NANOSECONDS);
     }
   };
 
diff --git a/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveType.java b/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveType.java
index 08adfbe996..6a7382eaba 100644
--- a/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveType.java
+++ b/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveType.java
@@ -21,6 +21,8 @@
 import java.util.Arrays;
 import java.util.List;
 import java.util.Locale;
+import java.util.Objects;
+import java.util.Optional;
 
 import org.apache.parquet.Preconditions;
 import org.apache.parquet.ShouldNeverHappenException;
@@ -31,6 +33,11 @@
 import org.apache.parquet.io.api.RecordConsumer;
 import org.apache.parquet.schema.ColumnOrder.ColumnOrderName;
 
+import static java.util.Optional.empty;
+import static java.util.Optional.of;
+import static org.apache.parquet.schema.LogicalTypeAnnotation.TimeUnit.MICROS;
+import static org.apache.parquet.schema.LogicalTypeAnnotation.TimeUnit.MILLIS;
+
 
 /**
  * Representation of a Primitive type
@@ -85,23 +92,32 @@ public <T, E extends Exception> T convert(PrimitiveTypeNameConverter<T, E> conve
       }
 
       @Override
-      PrimitiveComparator<?> comparator(OriginalType logicalType) {
+      PrimitiveComparator<?> comparator(LogicalTypeAnnotation logicalType) {
         if (logicalType == null) {
           return PrimitiveComparator.SIGNED_INT64_COMPARATOR;
         }
-        switch (logicalType) {
-        case UINT_64:
-          return PrimitiveComparator.UNSIGNED_INT64_COMPARATOR;
-        case INT_64:
-        case DECIMAL:
-        case TIME_MICROS:
-        case TIMESTAMP_MILLIS:
-        case TIMESTAMP_MICROS:
-          return PrimitiveComparator.SIGNED_INT64_COMPARATOR;
-        default:
-          throw new ShouldNeverHappenException(
-              "No comparator logic implemented for INT64 logical type: " + logicalType);
-        }
+        return logicalType.accept(new LogicalTypeAnnotation.LogicalTypeAnnotationVisitor<PrimitiveComparator>() {
+          @Override
+          public Optional<PrimitiveComparator> visit(LogicalTypeAnnotation.IntLogicalTypeAnnotation intLogicalType) {
+            return intLogicalType.isSigned() ?
+              of(PrimitiveComparator.SIGNED_INT64_COMPARATOR) : of(PrimitiveComparator.UNSIGNED_INT64_COMPARATOR);
+          }
+
+          @Override
+          public Optional<PrimitiveComparator> visit(LogicalTypeAnnotation.DecimalLogicalTypeAnnotation decimalLogicalType) {
+            return of(PrimitiveComparator.SIGNED_INT64_COMPARATOR);
+          }
+
+          @Override
+          public Optional<PrimitiveComparator> visit(LogicalTypeAnnotation.TimeLogicalTypeAnnotation timeLogicalType) {
+            return of(PrimitiveComparator.SIGNED_INT64_COMPARATOR);
+          }
+
+          @Override
+          public Optional<PrimitiveComparator> visit(LogicalTypeAnnotation.TimestampLogicalTypeAnnotation timestampLogicalType) {
+            return of(PrimitiveComparator.SIGNED_INT64_COMPARATOR);
+          }
+        }).orElseThrow(() -> new ShouldNeverHappenException("No comparator logic implemented for INT64 logical type: " + logicalType));
       }
     },
     INT32("getInteger", Integer.TYPE) {
@@ -128,26 +144,39 @@ public <T, E extends Exception> T convert(PrimitiveTypeNameConverter<T, E> conve
       }
 
       @Override
-      PrimitiveComparator<?> comparator(OriginalType logicalType) {
+      PrimitiveComparator<?> comparator(LogicalTypeAnnotation logicalType) {
         if (logicalType == null) {
           return PrimitiveComparator.SIGNED_INT32_COMPARATOR;
         }
-        switch (logicalType) {
-        case UINT_8:
-        case UINT_16:
-        case UINT_32:
-          return PrimitiveComparator.UNSIGNED_INT32_COMPARATOR;
-        case INT_8:
-        case INT_16:
-        case INT_32:
-        case DECIMAL:
-        case DATE:
-        case TIME_MILLIS:
-          return PrimitiveComparator.SIGNED_INT32_COMPARATOR;
-        default:
-          throw new ShouldNeverHappenException(
-              "No comparator logic implemented for INT32 logical type: " + logicalType);
-        }
+        return logicalType.accept(new LogicalTypeAnnotation.LogicalTypeAnnotationVisitor<PrimitiveComparator>() {
+          @Override
+          public Optional<PrimitiveComparator> visit(LogicalTypeAnnotation.IntLogicalTypeAnnotation intLogicalType) {
+            if (intLogicalType.getBitWidth() == 64) {
+              return empty();
+            }
+            return intLogicalType.isSigned() ?
+              of(PrimitiveComparator.SIGNED_INT32_COMPARATOR) : of(PrimitiveComparator.UNSIGNED_INT32_COMPARATOR);
+          }
+
+          @Override
+          public Optional<PrimitiveComparator> visit(LogicalTypeAnnotation.DecimalLogicalTypeAnnotation decimalLogicalType) {
+            return of(PrimitiveComparator.SIGNED_INT32_COMPARATOR);
+          }
+
+          @Override
+          public Optional<PrimitiveComparator> visit(LogicalTypeAnnotation.DateLogicalTypeAnnotation dateLogicalType) {
+            return of(PrimitiveComparator.SIGNED_INT32_COMPARATOR);
+          }
+
+          @Override
+          public Optional<PrimitiveComparator> visit(LogicalTypeAnnotation.TimeLogicalTypeAnnotation timeLogicalType) {
+            if (timeLogicalType.getUnit() == MILLIS) {
+              return of(PrimitiveComparator.SIGNED_INT32_COMPARATOR);
+            }
+            return empty();
+          }
+        }).orElseThrow(
+          () -> new ShouldNeverHappenException("No comparator logic implemented for INT32 logical type: " + logicalType));
       }
     },
     BOOLEAN("getBoolean", Boolean.TYPE) {
@@ -174,7 +203,7 @@ public <T, E extends Exception> T convert(PrimitiveTypeNameConverter<T, E> conve
       }
 
       @Override
-      PrimitiveComparator<?> comparator(OriginalType logicalType) {
+      PrimitiveComparator<?> comparator(LogicalTypeAnnotation logicalType) {
         return PrimitiveComparator.BOOLEAN_COMPARATOR;
       }
     },
@@ -202,22 +231,36 @@ public <T, E extends Exception> T convert(PrimitiveTypeNameConverter<T, E> conve
       }
 
       @Override
-      PrimitiveComparator<?> comparator(OriginalType logicalType) {
+      PrimitiveComparator<?> comparator(LogicalTypeAnnotation logicalType) {
         if (logicalType == null) {
           return PrimitiveComparator.UNSIGNED_LEXICOGRAPHICAL_BINARY_COMPARATOR;
         }
-        switch (logicalType) {
-        case DECIMAL:
-          return PrimitiveComparator.BINARY_AS_SIGNED_INTEGER_COMPARATOR;
-        case UTF8:
-        case ENUM:
-        case JSON:
-        case BSON:
-          return PrimitiveComparator.UNSIGNED_LEXICOGRAPHICAL_BINARY_COMPARATOR;
-        default:
-          throw new ShouldNeverHappenException(
-              "No comparator logic implemented for BINARY logical type: " + logicalType);
-        }
+        return logicalType.accept(new LogicalTypeAnnotation.LogicalTypeAnnotationVisitor<PrimitiveComparator>() {
+          @Override
+          public Optional<PrimitiveComparator> visit(LogicalTypeAnnotation.DecimalLogicalTypeAnnotation decimalLogicalType) {
+            return of(PrimitiveComparator.BINARY_AS_SIGNED_INTEGER_COMPARATOR);
+          }
+
+          @Override
+          public Optional<PrimitiveComparator> visit(LogicalTypeAnnotation.StringLogicalTypeAnnotation stringLogicalType) {
+            return of(PrimitiveComparator.UNSIGNED_LEXICOGRAPHICAL_BINARY_COMPARATOR);
+          }
+
+          @Override
+          public Optional<PrimitiveComparator> visit(LogicalTypeAnnotation.EnumLogicalTypeAnnotation enumLogicalType) {
+            return of(PrimitiveComparator.UNSIGNED_LEXICOGRAPHICAL_BINARY_COMPARATOR);
+          }
+
+          @Override
+          public Optional<PrimitiveComparator> visit(LogicalTypeAnnotation.JsonLogicalTypeAnnotation jsonLogicalType) {
+            return of(PrimitiveComparator.UNSIGNED_LEXICOGRAPHICAL_BINARY_COMPARATOR);
+          }
+
+          @Override
+          public Optional<PrimitiveComparator> visit(LogicalTypeAnnotation.BsonLogicalTypeAnnotation bsonLogicalType) {
+            return of(PrimitiveComparator.UNSIGNED_LEXICOGRAPHICAL_BINARY_COMPARATOR);
+          }
+        }).orElseThrow(() -> new ShouldNeverHappenException("No comparator logic implemented for BINARY logical type: " + logicalType));
       }
     },
     FLOAT("getFloat", Float.TYPE) {
@@ -244,7 +287,7 @@ public <T, E extends Exception> T convert(PrimitiveTypeNameConverter<T, E> conve
       }
 
       @Override
-      PrimitiveComparator<?> comparator(OriginalType logicalType) {
+      PrimitiveComparator<?> comparator(LogicalTypeAnnotation logicalType) {
         return PrimitiveComparator.FLOAT_COMPARATOR;
       }
     },
@@ -272,7 +315,7 @@ public <T, E extends Exception> T convert(PrimitiveTypeNameConverter<T, E> conve
       }
 
       @Override
-      PrimitiveComparator<?> comparator(OriginalType logicalType) {
+      PrimitiveComparator<?> comparator(LogicalTypeAnnotation logicalType) {
         return PrimitiveComparator.DOUBLE_COMPARATOR;
       }
     },
@@ -298,7 +341,7 @@ public <T, E extends Exception> T convert(PrimitiveTypeNameConverter<T, E> conve
       }
 
       @Override
-      PrimitiveComparator<?> comparator(OriginalType logicalType) {
+      PrimitiveComparator<?> comparator(LogicalTypeAnnotation logicalType) {
         return PrimitiveComparator.BINARY_AS_SIGNED_INTEGER_COMPARATOR;
       }
     },
@@ -326,19 +369,23 @@ public <T, E extends Exception> T convert(PrimitiveTypeNameConverter<T, E> conve
       }
 
       @Override
-      PrimitiveComparator<?> comparator(OriginalType logicalType) {
+      PrimitiveComparator<?> comparator(LogicalTypeAnnotation logicalType) {
         if (logicalType == null) {
           return PrimitiveComparator.UNSIGNED_LEXICOGRAPHICAL_BINARY_COMPARATOR;
         }
-        switch (logicalType) {
-        case DECIMAL:
-          return PrimitiveComparator.BINARY_AS_SIGNED_INTEGER_COMPARATOR;
-        case INTERVAL:
-          return PrimitiveComparator.UNSIGNED_LEXICOGRAPHICAL_BINARY_COMPARATOR;
-        default:
-          throw new ShouldNeverHappenException(
-              "No comparator logic implemented for FIXED_LEN_BYTE_ARRAY logical type: " + logicalType);
-        }
+
+        return logicalType.accept(new LogicalTypeAnnotation.LogicalTypeAnnotationVisitor<PrimitiveComparator>() {
+          @Override
+          public Optional<PrimitiveComparator> visit(LogicalTypeAnnotation.DecimalLogicalTypeAnnotation decimalLogicalType) {
+            return of(PrimitiveComparator.BINARY_AS_SIGNED_INTEGER_COMPARATOR);
+          }
+
+          @Override
+          public Optional<PrimitiveComparator> visit(LogicalTypeAnnotation.IntervalLogicalTypeAnnotation intervalLogicalType) {
+            return of(PrimitiveComparator.UNSIGNED_LEXICOGRAPHICAL_BINARY_COMPARATOR);
+          }
+        }).orElseThrow(() -> new ShouldNeverHappenException(
+          "No comparator logic implemented for FIXED_LEN_BYTE_ARRAY logical type: " + logicalType));
       }
     };
 
@@ -370,7 +417,7 @@ abstract public void addValueToPrimitiveConverter(
 
     abstract public <T, E extends Exception> T convert(PrimitiveTypeNameConverter<T, E> converter) throws E;
 
-    abstract PrimitiveComparator<?> comparator(OriginalType logicalType);
+    abstract PrimitiveComparator<?> comparator(LogicalTypeAnnotation logicalType);
   }
 
   private final PrimitiveTypeName primitive;
@@ -474,7 +521,7 @@ public PrimitiveType(Repetition repetition, PrimitiveTypeName primitive,
     super(name, repetition, logicalTypeAnnotation, id);
     this.primitive = primitive;
     this.length = length;
-    if (getOriginalType() == OriginalType.DECIMAL) {
+    if (logicalTypeAnnotation instanceof LogicalTypeAnnotation.DecimalLogicalTypeAnnotation) {
       LogicalTypeAnnotation.DecimalLogicalTypeAnnotation decimal = (LogicalTypeAnnotation.DecimalLogicalTypeAnnotation) logicalTypeAnnotation;
       this.decimalMeta = new DecimalMetadata(decimal.getPrecision(), decimal.getScale());
     } else {
@@ -482,7 +529,7 @@ public PrimitiveType(Repetition repetition, PrimitiveTypeName primitive,
     }
 
     if (columnOrder == null) {
-      columnOrder = primitive == PrimitiveTypeName.INT96 || getOriginalType() == OriginalType.INTERVAL
+      columnOrder = primitive == PrimitiveTypeName.INT96 || logicalTypeAnnotation instanceof LogicalTypeAnnotation.IntervalLogicalTypeAnnotation
         ? ColumnOrder.undefined()
         : ColumnOrder.typeDefined();
     }
@@ -494,35 +541,9 @@ private ColumnOrder requireValidColumnOrder(ColumnOrder columnOrder) {
       Preconditions.checkArgument(columnOrder.getColumnOrderName() == ColumnOrderName.UNDEFINED,
           "The column order {} is not supported by INT96", columnOrder);
     }
-    if (getOriginalType() != null) {
-      // Explicitly listing all the logical types to avoid having unsupported column orders new types accidentally
-      switch (getOriginalType()) {
-        case INT_8:
-        case INT_16:
-        case INT_32:
-        case INT_64:
-        case UINT_8:
-        case UINT_16:
-        case UINT_32:
-        case UINT_64:
-        case UTF8:
-        case DECIMAL:
-        case DATE:
-        case TIME_MILLIS:
-        case TIME_MICROS:
-        case TIMESTAMP_MILLIS:
-        case TIMESTAMP_MICROS:
-        case ENUM:
-        case JSON:
-        case BSON:
-          // Currently any available column order is valid
-          break;
-        case INTERVAL:
-        default:
-          Preconditions.checkArgument(columnOrder.getColumnOrderName() == ColumnOrderName.UNDEFINED,
-              "The column order {} is not supported by {} ({})", columnOrder, primitive, getOriginalType());
-          break;
-      }
+    if (getLogicalTypeAnnotation() != null) {
+      Preconditions.checkArgument(getLogicalTypeAnnotation().isValidColumnOrder(columnOrder),
+        "The column order {} is not supported by {} ({})", columnOrder, primitive, getLogicalTypeAnnotation());
     }
     return columnOrder;
   }
@@ -533,7 +554,7 @@ private ColumnOrder requireValidColumnOrder(ColumnOrder columnOrder) {
    */
   @Override
   public PrimitiveType withId(int id) {
-    return new PrimitiveType(getRepetition(), primitive, length, getName(), getOriginalType(), decimalMeta, new ID(id),
+    return new PrimitiveType(getRepetition(), primitive, length, getName(), getLogicalTypeAnnotation(), new ID(id),
         columnOrder);
   }
 
@@ -712,7 +733,7 @@ protected Type union(Type toMerge, boolean strict) {
     if (strict) {
       // Can't merge primitive fields of different type names or different original types
       if (!primitive.equals(toMerge.asPrimitiveType().getPrimitiveTypeName()) ||
-          getOriginalType() != toMerge.getOriginalType()) {
+        !Objects.equals(getLogicalTypeAnnotation(), toMerge.getLogicalTypeAnnotation())) {
         reportSchemaMergeError(toMerge);
       }
 
@@ -734,7 +755,7 @@ protected Type union(Type toMerge, boolean strict) {
       builder.length(length);
     }
 
-    return builder.as(getOriginalType()).named(getName());
+    return builder.as(getLogicalTypeAnnotation()).named(getName());
   }
 
   /**
@@ -747,7 +768,7 @@ protected Type union(Type toMerge, boolean strict) {
    */
   @SuppressWarnings("unchecked")
   public <T> PrimitiveComparator<T> comparator() {
-    return (PrimitiveComparator<T>) getPrimitiveTypeName().comparator(getOriginalType());
+    return (PrimitiveComparator<T>) getPrimitiveTypeName().comparator(getLogicalTypeAnnotation());
   }
 
   /**
@@ -762,7 +783,7 @@ public ColumnOrder columnOrder() {
    */
   @SuppressWarnings("unchecked")
   public PrimitiveStringifier stringifier() {
-    OriginalType originalType = getOriginalType();
-    return originalType == null ? PrimitiveStringifier.DEFAULT_STRINGIFIER : originalType.stringifier(this);
+    LogicalTypeAnnotation logicalTypeAnnotation = getLogicalTypeAnnotation();
+    return logicalTypeAnnotation == null ? PrimitiveStringifier.DEFAULT_STRINGIFIER : logicalTypeAnnotation.valueStringifier(this);
   }
 }
diff --git a/parquet-column/src/main/java/org/apache/parquet/schema/Types.java b/parquet-column/src/main/java/org/apache/parquet/schema/Types.java
index 165a5acea9..a1cd736580 100644
--- a/parquet-column/src/main/java/org/apache/parquet/schema/Types.java
+++ b/parquet-column/src/main/java/org/apache/parquet/schema/Types.java
@@ -21,6 +21,7 @@
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.List;
+import java.util.Optional;
 
 import org.apache.parquet.Preconditions;
 import org.apache.parquet.schema.ColumnOrder.ColumnOrderName;
@@ -441,16 +442,27 @@ protected PrimitiveType build(String name) {
 
       // validate type annotations and required metadata
       if (logicalTypeAnnotation != null) {
-        OriginalType originalType = logicalTypeAnnotation.toOriginalType();
-        switch (originalType) {
-          case UTF8:
-          case JSON:
-          case BSON:
-            Preconditions.checkState(
-                primitiveType == PrimitiveTypeName.BINARY,
-                originalType.toString() + " can only annotate binary fields");
-            break;
-          case DECIMAL:
+        logicalTypeAnnotation.accept(new LogicalTypeAnnotation.LogicalTypeAnnotationVisitor<Boolean>() {
+          @Override
+          public Optional<Boolean> visit(LogicalTypeAnnotation.StringLogicalTypeAnnotation stringLogicalType) {
+            checkBinaryPrimitiveType(stringLogicalType);
+            return Optional.of(true);
+          }
+
+          @Override
+          public Optional<Boolean> visit(LogicalTypeAnnotation.JsonLogicalTypeAnnotation jsonLogicalType) {
+            checkBinaryPrimitiveType(jsonLogicalType);
+            return Optional.of(true);
+          }
+
+          @Override
+          public Optional<Boolean> visit(LogicalTypeAnnotation.BsonLogicalTypeAnnotation bsonLogicalType) {
+            checkBinaryPrimitiveType(bsonLogicalType);
+            return Optional.of(true);
+          }
+
+          @Override
+          public Optional<Boolean> visit(LogicalTypeAnnotation.DecimalLogicalTypeAnnotation decimalLogicalType) {
             Preconditions.checkState(
                 (primitiveType == PrimitiveTypeName.INT32) ||
                 (primitiveType == PrimitiveTypeName.INT64) ||
@@ -478,40 +490,89 @@ protected PrimitiveType build(String name) {
                   "FIXED(" + length + ") cannot store " + meta.getPrecision() +
                   " digits (max " + maxPrecision(length) + ")");
             }
-            break;
-          case DATE:
-          case TIME_MILLIS:
-          case UINT_8:
-          case UINT_16:
-          case UINT_32:
-          case INT_8:
-          case INT_16:
-          case INT_32:
-            Preconditions.checkState(primitiveType == PrimitiveTypeName.INT32,
-                originalType.toString() + " can only annotate INT32");
-            break;
-          case TIME_MICROS:
-          case TIMESTAMP_MILLIS:
-          case TIMESTAMP_MICROS:
-          case UINT_64:
-          case INT_64:
-            Preconditions.checkState(primitiveType == PrimitiveTypeName.INT64,
-                originalType.toString() + " can only annotate INT64");
-            break;
-          case INTERVAL:
+            return Optional.of(true);
+          }
+
+          @Override
+          public Optional<Boolean> visit(LogicalTypeAnnotation.DateLogicalTypeAnnotation dateLogicalType) {
+            checkInt32PrimitiveType(dateLogicalType);
+            return Optional.of(true);
+          }
+
+          @Override
+          public Optional<Boolean> visit(LogicalTypeAnnotation.TimeLogicalTypeAnnotation timeLogicalType) {
+            LogicalTypeAnnotation.TimeUnit unit = timeLogicalType.getUnit();
+            switch (unit) {
+              case MILLIS:
+                checkInt32PrimitiveType(timeLogicalType);
+                break;
+              case MICROS:
+              case NANOS:
+                checkInt64PrimitiveType(timeLogicalType);
+                break;
+              default:
+                throw new RuntimeException("Invalid time unit: " + unit);
+            }
+            return Optional.of(true);
+          }
+
+          @Override
+          public Optional<Boolean> visit(LogicalTypeAnnotation.IntLogicalTypeAnnotation intLogicalType) {
+            int bitWidth = intLogicalType.getBitWidth();
+            switch (bitWidth) {
+              case 8:
+              case 16:
+              case 32:
+                checkInt32PrimitiveType(intLogicalType);
+                break;
+              case 64:
+                checkInt64PrimitiveType(intLogicalType);
+                break;
+              default:
+                throw new RuntimeException("Invalid bit width: " + bitWidth);
+            }
+            return Optional.of(true);
+          }
+
+          @Override
+          public Optional<Boolean> visit(LogicalTypeAnnotation.TimestampLogicalTypeAnnotation timestampLogicalType) {
+            checkInt64PrimitiveType(timestampLogicalType);
+            return Optional.of(true);
+          }
+
+          @Override
+          public Optional<Boolean> visit(LogicalTypeAnnotation.IntervalLogicalTypeAnnotation intervalLogicalType) {
             Preconditions.checkState(
                 (primitiveType == PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY) &&
                 (length == 12),
                 "INTERVAL can only annotate FIXED_LEN_BYTE_ARRAY(12)");
-            break;
-          case ENUM:
+            return Optional.of(true);
+          }
+
+          @Override
+          public Optional<Boolean> visit(LogicalTypeAnnotation.EnumLogicalTypeAnnotation enumLogicalType) {
             Preconditions.checkState(
                 primitiveType == PrimitiveTypeName.BINARY,
                 "ENUM can only annotate binary fields");
-            break;
-          default:
-            throw new IllegalStateException(originalType + " can not be applied to a primitive type");
-        }
+            return Optional.of(true);
+          }
+
+          private void checkBinaryPrimitiveType(LogicalTypeAnnotation logicalTypeAnnotation) {
+            Preconditions.checkState(
+                primitiveType == PrimitiveTypeName.BINARY,
+              logicalTypeAnnotation.toString() + " can only annotate binary fields");
+          }
+
+          private void checkInt32PrimitiveType(LogicalTypeAnnotation logicalTypeAnnotation) {
+            Preconditions.checkState(primitiveType == PrimitiveTypeName.INT32,
+              logicalTypeAnnotation.toString() + " can only annotate INT32");
+          }
+
+          private void checkInt64PrimitiveType(LogicalTypeAnnotation logicalTypeAnnotation) {
+            Preconditions.checkState(primitiveType == PrimitiveTypeName.INT64,
+              logicalTypeAnnotation.toString() + " can only annotate INT64");
+          }
+        }).orElseThrow(() -> new IllegalStateException(logicalTypeAnnotation + " can not be applied to a primitive type"));
       }
 
       if (newLogicalTypeSet) {
@@ -531,7 +592,7 @@ private static long maxPrecision(int numBytes) {
 
     protected DecimalMetadata decimalMetadata() {
       DecimalMetadata meta = null;
-      if (OriginalType.DECIMAL == getOriginalType()) {
+      if (logicalTypeAnnotation instanceof LogicalTypeAnnotation.DecimalLogicalTypeAnnotation) {
         LogicalTypeAnnotation.DecimalLogicalTypeAnnotation decimalType = (LogicalTypeAnnotation.DecimalLogicalTypeAnnotation) logicalTypeAnnotation;
         if (newLogicalTypeSet) {
           if (scaleAlreadySet) {
diff --git a/parquet-column/src/test/java/org/apache/parquet/column/impl/TestColumnReaderImpl.java b/parquet-column/src/test/java/org/apache/parquet/column/impl/TestColumnReaderImpl.java
index d2d78c43d1..35fddaf0b0 100644
--- a/parquet-column/src/test/java/org/apache/parquet/column/impl/TestColumnReaderImpl.java
+++ b/parquet-column/src/test/java/org/apache/parquet/column/impl/TestColumnReaderImpl.java
@@ -65,10 +65,10 @@ public void test() throws Exception {
     for (int i = 0; i < rows; i++) {
       columnWriterV2.write(Binary.fromString("bar" + i % 10), 0, 0);
       if ((i + 1) % 1000 == 0) {
-        columnWriterV2.writePage(i);
+        columnWriterV2.writePage();
       }
     }
-    columnWriterV2.writePage(rows);
+    columnWriterV2.writePage();
     columnWriterV2.finalizeColumnChunk();
     List<DataPage> pages = pageWriter.getPages();
     int valueCount = 0;
@@ -103,10 +103,10 @@ public void testOptional() throws Exception {
     for (int i = 0; i < rows; i++) {
       columnWriterV2.writeNull(0, 0);
       if ((i + 1) % 1000 == 0) {
-        columnWriterV2.writePage(i);
+        columnWriterV2.writePage();
       }
     }
-    columnWriterV2.writePage(rows);
+    columnWriterV2.writePage();
     columnWriterV2.finalizeColumnChunk();
     List<DataPage> pages = pageWriter.getPages();
     int valueCount = 0;
diff --git a/parquet-column/src/test/java/org/apache/parquet/column/mem/TestMemColumn.java b/parquet-column/src/test/java/org/apache/parquet/column/mem/TestMemColumn.java
index c855339c59..e5db38c945 100644
--- a/parquet-column/src/test/java/org/apache/parquet/column/mem/TestMemColumn.java
+++ b/parquet-column/src/test/java/org/apache/parquet/column/mem/TestMemColumn.java
@@ -20,12 +20,10 @@
 
 import static org.junit.Assert.assertEquals;
 
-import org.apache.parquet.column.ParquetProperties;
-import org.junit.Test;
-
 import org.apache.parquet.column.ColumnDescriptor;
 import org.apache.parquet.column.ColumnReader;
 import org.apache.parquet.column.ColumnWriter;
+import org.apache.parquet.column.ParquetProperties;
 import org.apache.parquet.column.impl.ColumnReadStoreImpl;
 import org.apache.parquet.column.impl.ColumnWriteStoreV1;
 import org.apache.parquet.column.page.mem.MemPageStore;
@@ -33,6 +31,7 @@
 import org.apache.parquet.io.api.Binary;
 import org.apache.parquet.schema.MessageType;
 import org.apache.parquet.schema.MessageTypeParser;
+import org.junit.Test;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -47,6 +46,7 @@ public void testMemColumn() throws Exception {
     ColumnWriteStoreV1 memColumnsStore = newColumnWriteStoreImpl(memPageStore);
     ColumnWriter columnWriter = memColumnsStore.getColumnWriter(path);
     columnWriter.write(42l, 0, 0);
+    memColumnsStore.endRecord();
     memColumnsStore.flush();
 
     ColumnReader columnReader = getColumnReader(memPageStore, path, schema);
@@ -85,6 +85,7 @@ public void testMemColumnBinary() throws Exception {
 
     ColumnWriter columnWriter = memColumnsStore.getColumnWriter(path);
     columnWriter.write(Binary.fromString("42"), 0, 0);
+    memColumnsStore.endRecord();
     memColumnsStore.flush();
 
     ColumnReader columnReader = getColumnReader(memPageStore, path, mt);
@@ -108,6 +109,7 @@ public void testMemColumnSeveralPages() throws Exception {
     ColumnWriter columnWriter = memColumnsStore.getColumnWriter(path);
     for (int i = 0; i < 2000; i++) {
       columnWriter.write(42l, 0, 0);
+      memColumnsStore.endRecord();
     }
     memColumnsStore.flush();
 
@@ -136,12 +138,16 @@ public void testMemColumnSeveralPagesRepeated() throws Exception {
       int r = rs[i % rs.length];
       int d = ds[i % ds.length];
       LOG.debug("write i: {}", i);
+      if (i != 0 && r == 0) {
+        memColumnsStore.endRecord();
+      }
       if (d == 2) {
         columnWriter.write((long)i, r, d);
       } else {
         columnWriter.writeNull(r, d);
       }
     }
+    memColumnsStore.endRecord();
     memColumnsStore.flush();
 
     ColumnReader columnReader = getColumnReader(memPageStore, path, mt);
diff --git a/parquet-column/src/test/java/org/apache/parquet/column/page/mem/MemPageWriter.java b/parquet-column/src/test/java/org/apache/parquet/column/page/mem/MemPageWriter.java
index be3a0f9cb4..706b00110d 100644
--- a/parquet-column/src/test/java/org/apache/parquet/column/page/mem/MemPageWriter.java
+++ b/parquet-column/src/test/java/org/apache/parquet/column/page/mem/MemPageWriter.java
@@ -56,6 +56,12 @@ public void writePage(BytesInput bytesInput, int valueCount, Statistics statisti
     LOG.debug("page written for {} bytes and {} records", bytesInput.size(), valueCount);
   }
 
+  @Override
+  public void writePage(BytesInput bytesInput, int valueCount, int rowCount, Statistics<?> statistics,
+      Encoding rlEncoding, Encoding dlEncoding, Encoding valuesEncoding) throws IOException {
+    writePage(bytesInput, valueCount, statistics, rlEncoding, dlEncoding, valuesEncoding);
+  }
+
   @Override
   public void writePageV2(int rowCount, int nullCount, int valueCount,
       BytesInput repetitionLevels, BytesInput definitionLevels,
diff --git a/parquet-column/src/test/java/org/apache/parquet/column/values/bitpacking/TestBitPackingColumn.java b/parquet-column/src/test/java/org/apache/parquet/column/values/bitpacking/TestBitPackingColumn.java
index 867af2876d..3ca3d0898d 100644
--- a/parquet-column/src/test/java/org/apache/parquet/column/values/bitpacking/TestBitPackingColumn.java
+++ b/parquet-column/src/test/java/org/apache/parquet/column/values/bitpacking/TestBitPackingColumn.java
@@ -183,6 +183,22 @@ private void validateEncodeDecode(int bitLength, int[] vals, String expected) th
       }
       LOG.debug("result: {}", TestBitPacking.toString(result));
       assertArrayEquals(type + " result: " + TestBitPacking.toString(result), vals, result);
+
+      // Test skipping
+      r.initFromPage(vals.length, ByteBufferInputStream.wrap(ByteBuffer.wrap(bytes)));
+      for (int i = 0; i < vals.length; i += 2) {
+        assertEquals(vals[i], r.readInteger());
+        r.skip();
+      }
+
+      // Test n-skipping
+      r.initFromPage(vals.length, ByteBufferInputStream.wrap(ByteBuffer.wrap(bytes)));
+      int skipCount;
+      for (int i = 0; i < vals.length; i += skipCount + 1) {
+        skipCount = (vals.length - i) / 2;
+        assertEquals(vals[i], r.readInteger());
+        r.skip(skipCount);
+      }
     }
   }
 
diff --git a/parquet-column/src/test/java/org/apache/parquet/column/values/bloomfilter/TestBlockSplitBloomFilter.java b/parquet-column/src/test/java/org/apache/parquet/column/values/bloomfilter/TestBlockSplitBloomFilter.java
index 542b9cd25a..0f85195706 100644
--- a/parquet-column/src/test/java/org/apache/parquet/column/values/bloomfilter/TestBlockSplitBloomFilter.java
+++ b/parquet-column/src/test/java/org/apache/parquet/column/values/bloomfilter/TestBlockSplitBloomFilter.java
@@ -16,46 +16,39 @@
  * specific language governing permissions and limitations
  * under the License.
  */
-
 package org.apache.parquet.column.values.bloomfilter;
 
-
-  import java.io.File;
-  import java.io.FileInputStream;
-  import java.io.FileOutputStream;
-  import java.io.IOException;
-  import java.nio.ByteBuffer;
-  import java.nio.ByteOrder;
-  import java.util.ArrayList;
-  import java.util.List;
-  import java.util.Random;
-
-  import jdk.nashorn.internal.ir.Block;
-  import org.apache.parquet.column.values.RandomStr;
-  import org.apache.parquet.io.api.Binary;
-  import org.junit.Rule;
-  import org.junit.Test;
-  import org.junit.rules.TemporaryFolder;
-
-  import static org.junit.Assert.assertEquals;
-  import static org.junit.Assert.assertTrue;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Random;
+import org.apache.parquet.column.values.RandomStr;
+import org.apache.parquet.io.api.Binary;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.TemporaryFolder;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
 
 public class TestBlockSplitBloomFilter {
-
   @Test
   public void testConstructor () throws IOException {
     BloomFilter bloomFilter1 = new BlockSplitBloomFilter(0);
     assertEquals(bloomFilter1.getBitsetSize(), BlockSplitBloomFilter.MINIMUM_BLOOM_FILTER_BYTES);
-
     BloomFilter bloomFilter2 = new BlockSplitBloomFilter(256 * 1024 * 1024);
     assertEquals(bloomFilter2.getBitsetSize(), BlockSplitBloomFilter.MAXIMUM_BLOOM_FILTER_BYTES);
-
     BloomFilter bloomFilter3 = new BlockSplitBloomFilter(1000);
     assertEquals(bloomFilter3.getBitsetSize(), 1024);
   }
 
   @Rule
   public final TemporaryFolder temp = new TemporaryFolder();
+
   /*
    * This test is used to test basic operations including inserting, finding and
    * serializing and de-serializing.
@@ -73,11 +66,9 @@ public void testBasic () throws IOException {
     FileOutputStream fileOutputStream = new FileOutputStream(testFile);
     bloomFilter.writeTo(fileOutputStream);
     fileOutputStream.close();
-
     FileInputStream fileInputStream = new FileInputStream(testFile);
 
     byte[] value = new byte[4];
-
     fileInputStream.read(value);
     int length = ByteBuffer.wrap(value).order(ByteOrder.LITTLE_ENDIAN).getInt();
     assertEquals(length, 1024);
@@ -93,7 +84,6 @@ public void testBasic () throws IOException {
     byte[] bitset = new byte[length];
     fileInputStream.read(bitset);
     bloomFilter = new BlockSplitBloomFilter(bitset);
-
     for(int i = 0; i < testStrings.length; i++) {
       assertTrue(bloomFilter.find(bloomFilter.hash(Binary.fromString(testStrings[i]))));
     }
@@ -122,7 +112,7 @@ public void testFPP() throws IOException {
         exist ++;
       }
     }
-
+    
     // The exist should be probably less than 1000 according FPP 0.01.
     assertTrue(exist < totalCount * FPP);
   }
diff --git a/parquet-column/src/test/java/org/apache/parquet/column/values/delta/DeltaBinaryPackingValuesWriterForIntegerTest.java b/parquet-column/src/test/java/org/apache/parquet/column/values/delta/DeltaBinaryPackingValuesWriterForIntegerTest.java
index df99e3c740..c69e0ff9c1 100644
--- a/parquet-column/src/test/java/org/apache/parquet/column/values/delta/DeltaBinaryPackingValuesWriterForIntegerTest.java
+++ b/parquet-column/src/test/java/org/apache/parquet/column/values/delta/DeltaBinaryPackingValuesWriterForIntegerTest.java
@@ -212,6 +212,23 @@ public void shouldSkip() throws IOException {
     }
   }
 
+  @Test
+  public void shouldSkipN() throws IOException {
+    int[] data = new int[5 * blockSize + 1];
+    for (int i = 0; i < data.length; i++) {
+      data[i] = i * 32;
+    }
+    writeData(data);
+    reader = new DeltaBinaryPackingValuesReader();
+    reader.initFromPage(100, writer.getBytes().toInputStream());
+    int skipCount;
+    for (int i = 0; i < data.length; i += skipCount + 1) {
+      skipCount = (data.length - i) / 2;
+      assertEquals(i * 32, reader.readInteger());
+      reader.skip(skipCount);
+    }
+  }
+
   @Test
   public void shouldReset() throws IOException {
     shouldReadWriteWhenDataIsNotAlignedWithBlock();
diff --git a/parquet-column/src/test/java/org/apache/parquet/column/values/delta/DeltaBinaryPackingValuesWriterForLongTest.java b/parquet-column/src/test/java/org/apache/parquet/column/values/delta/DeltaBinaryPackingValuesWriterForLongTest.java
index 65ac819e8c..ca12bbdb82 100644
--- a/parquet-column/src/test/java/org/apache/parquet/column/values/delta/DeltaBinaryPackingValuesWriterForLongTest.java
+++ b/parquet-column/src/test/java/org/apache/parquet/column/values/delta/DeltaBinaryPackingValuesWriterForLongTest.java
@@ -211,6 +211,23 @@ public void shouldSkip() throws IOException {
     }
   }
 
+  @Test
+  public void shouldSkipN() throws IOException {
+    long[] data = new long[5 * blockSize + 1];
+    for (int i = 0; i < data.length; i++) {
+      data[i] = i * 32;
+    }
+    writeData(data);
+    reader = new DeltaBinaryPackingValuesReader();
+    reader.initFromPage(100, writer.getBytes().toInputStream());
+    int skipCount;
+    for (int i = 0; i < data.length; i += skipCount + 1) {
+      skipCount = (data.length - i) / 2;
+      assertEquals(i * 32, reader.readLong());
+      reader.skip(skipCount);
+    }
+  }
+
   @Test
   public void shouldReset() throws IOException {
     shouldReadWriteWhenDataIsNotAlignedWithBlock();
diff --git a/parquet-column/src/test/java/org/apache/parquet/column/values/deltalengthbytearray/TestDeltaLengthByteArray.java b/parquet-column/src/test/java/org/apache/parquet/column/values/deltalengthbytearray/TestDeltaLengthByteArray.java
index d214a88980..6c974307b7 100644
--- a/parquet-column/src/test/java/org/apache/parquet/column/values/deltalengthbytearray/TestDeltaLengthByteArray.java
+++ b/parquet-column/src/test/java/org/apache/parquet/column/values/deltalengthbytearray/TestDeltaLengthByteArray.java
@@ -64,6 +64,30 @@ public void testRandomStrings() throws IOException {
     }
   }
 
+  @Test
+  public void testSkipWithRandomStrings() throws IOException {
+    DeltaLengthByteArrayValuesWriter writer = getDeltaLengthByteArrayValuesWriter();
+    DeltaLengthByteArrayValuesReader reader = new DeltaLengthByteArrayValuesReader();
+
+    String[] values = Utils.getRandomStringSamples(1000, 32);
+    Utils.writeData(writer, values);
+
+    reader.initFromPage(values.length, writer.getBytes().toInputStream());
+    for (int i = 0; i < values.length; i += 2) {
+      Assert.assertEquals(Binary.fromString(values[i]), reader.readBytes());
+      reader.skip();
+    }
+
+    reader = new DeltaLengthByteArrayValuesReader();
+    reader.initFromPage(values.length, writer.getBytes().toInputStream());
+    int skipCount;
+    for (int i = 0; i < values.length; i += skipCount + 1) {
+      skipCount = (values.length - i) / 2;
+      Assert.assertEquals(Binary.fromString(values[i]), reader.readBytes());
+      reader.skip(skipCount);
+    }
+  }
+
   @Test
   public void testLengths() throws IOException {
     DeltaLengthByteArrayValuesWriter writer = getDeltaLengthByteArrayValuesWriter();
diff --git a/parquet-column/src/test/java/org/apache/parquet/column/values/deltastrings/TestDeltaByteArray.java b/parquet-column/src/test/java/org/apache/parquet/column/values/deltastrings/TestDeltaByteArray.java
index c13a3a2b87..a5a22a8dbf 100644
--- a/parquet-column/src/test/java/org/apache/parquet/column/values/deltastrings/TestDeltaByteArray.java
+++ b/parquet-column/src/test/java/org/apache/parquet/column/values/deltastrings/TestDeltaByteArray.java
@@ -58,6 +58,13 @@ public void testRandomStringsWithSkip() throws Exception {
     assertReadWriteWithSkip(writer, reader, randvalues);
   }
 
+  @Test
+  public void testRandomStringsWithSkipN() throws Exception {
+    DeltaByteArrayWriter writer = new DeltaByteArrayWriter(64 * 1024, 64 * 1024, new DirectByteBufferAllocator());
+    DeltaByteArrayReader reader = new DeltaByteArrayReader();
+    assertReadWriteWithSkipN(writer, reader, randvalues);
+  }
+
   @Test
   public void testLengths() throws IOException {
     DeltaByteArrayWriter writer = new DeltaByteArrayWriter(64 * 1024, 64 * 1024, new DirectByteBufferAllocator());
@@ -99,6 +106,18 @@ private void assertReadWriteWithSkip(DeltaByteArrayWriter writer, DeltaByteArray
     }
   }
 
+  private void assertReadWriteWithSkipN(DeltaByteArrayWriter writer, DeltaByteArrayReader reader, String[] vals) throws Exception {
+    Utils.writeData(writer, vals);
+
+    reader.initFromPage(vals.length, writer.getBytes().toInputStream());
+    int skipCount;
+    for (int i = 0; i < vals.length; i += skipCount + 1) {
+      skipCount = (vals.length - i) / 2;
+      Assert.assertEquals(Binary.fromString(vals[i]), reader.readBytes());
+      reader.skip(skipCount);
+    }
+  }
+
   @Test
   public void testWriterReset() throws Exception {
     DeltaByteArrayWriter writer = new DeltaByteArrayWriter(64 * 1024, 64 * 1024, new DirectByteBufferAllocator());
diff --git a/parquet-column/src/test/java/org/apache/parquet/column/values/dictionary/TestDictionary.java b/parquet-column/src/test/java/org/apache/parquet/column/values/dictionary/TestDictionary.java
index ef2b7215dd..ba3f9034ad 100644
--- a/parquet-column/src/test/java/org/apache/parquet/column/values/dictionary/TestDictionary.java
+++ b/parquet-column/src/test/java/org/apache/parquet/column/values/dictionary/TestDictionary.java
@@ -99,6 +99,47 @@ public void testBinaryDictionary() throws IOException {
     checkDistinct(COUNT, bytes3, cr2, "c");
   }
 
+  @Test
+  public void testSkipInBinaryDictionary() throws Exception {
+    ValuesWriter cw = newPlainBinaryDictionaryValuesWriter(1000, 10000);
+    writeRepeated(100, cw, "a");
+    writeDistinct(100, cw, "b");
+    assertEquals(PLAIN_DICTIONARY, cw.getEncoding());
+
+    // Test skip and skip-n with dictionary encoding
+    ByteBufferInputStream stream = cw.getBytes().toInputStream();
+    DictionaryValuesReader cr = initDicReader(cw, BINARY);
+    cr.initFromPage(200, stream);
+    for (int i = 0; i < 100; i += 2) {
+      assertEquals(Binary.fromString("a" + i % 10), cr.readBytes());
+      cr.skip();
+    }
+    int skipCount;
+    for (int i = 0; i < 100; i += skipCount + 1) {
+      skipCount = (100 - i) / 2;
+      assertEquals(Binary.fromString("b" + i), cr.readBytes());
+      cr.skip(skipCount);
+    }
+
+    // Ensure fallback
+    writeDistinct(1000, cw, "c");
+    assertEquals(PLAIN, cw.getEncoding());
+
+    // Test skip and skip-n with plain encoding (after fallback)
+    ValuesReader plainReader = new BinaryPlainValuesReader();
+    plainReader.initFromPage(1200, cw.getBytes().toInputStream());
+    plainReader.skip(200);
+    for (int i = 0; i < 100; i += 2) {
+      assertEquals("c" + i, plainReader.readBytes().toStringUsingUTF8());
+      plainReader.skip();
+    }
+    for (int i = 100; i < 1000; i += skipCount + 1) {
+      skipCount = (1000 - i) / 2;
+      assertEquals(Binary.fromString("c" + i), plainReader.readBytes());
+      plainReader.skip(skipCount);
+    }
+  }
+
   @Test
   public void testBinaryDictionaryFallBack() throws IOException {
     int slabSize = 100;
@@ -234,6 +275,22 @@ private void roundTripLong(FallbackValuesWriter<PlainLongDictionaryValuesWriter,
     for (long i = 0; i < 100; i++) {
       assertEquals(i, reader.readLong());
     }
+
+    // Test skip with plain encoding
+    reader.initFromPage(100, cw.getBytes().toInputStream());
+    for (int i = 0; i < 100; i += 2) {
+      assertEquals(i, reader.readLong());
+      reader.skip();
+    }
+
+    // Test skip-n with plain encoding
+    reader.initFromPage(100, cw.getBytes().toInputStream());
+    int skipCount;
+    for (int i = 0; i < 100; i += skipCount + 1) {
+      skipCount = (100 - i) / 2;
+      assertEquals(i, reader.readLong());
+      reader.skip(skipCount);
+    }
   }
 
   @Test
@@ -305,6 +362,22 @@ private void roundTripDouble(FallbackValuesWriter<PlainDoubleDictionaryValuesWri
     for (double i = 0; i < 100; i++) {
       assertEquals(i, reader.readDouble(), 0.00001);
     }
+
+    // Test skip with plain encoding
+    reader.initFromPage(100, cw.getBytes().toInputStream());
+    for (int i = 0; i < 100; i += 2) {
+      assertEquals(i, reader.readDouble(), 0.0);
+      reader.skip();
+    }
+
+    // Test skip-n with plain encoding
+    reader.initFromPage(100, cw.getBytes().toInputStream());
+    int skipCount;
+    for (int i = 0; i < 100; i += skipCount + 1) {
+      skipCount = (100 - i) / 2;
+      assertEquals(i, reader.readDouble(), 0.0);
+      reader.skip(skipCount);
+    }
   }
 
   @Test
@@ -376,6 +449,22 @@ private void roundTripInt(FallbackValuesWriter<PlainIntegerDictionaryValuesWrite
     for (int i = 0; i < 100; i++) {
       assertEquals(i, reader.readInteger());
     }
+
+    // Test skip with plain encoding
+    reader.initFromPage(100, cw.getBytes().toInputStream());
+    for (int i = 0; i < 100; i += 2) {
+      assertEquals(i, reader.readInteger());
+      reader.skip();
+    }
+
+    // Test skip-n with plain encoding
+    reader.initFromPage(100, cw.getBytes().toInputStream());
+    int skipCount;
+    for (int i = 0; i < 100; i += skipCount + 1) {
+      skipCount = (100 - i) / 2;
+      assertEquals(i, reader.readInteger());
+      reader.skip(skipCount);
+    }
   }
 
   @Test
@@ -447,6 +536,22 @@ private void roundTripFloat(FallbackValuesWriter<PlainFloatDictionaryValuesWrite
     for (float i = 0; i < 100; i++) {
       assertEquals(i, reader.readFloat(), 0.00001);
     }
+
+    // Test skip with plain encoding
+    reader.initFromPage(100, cw.getBytes().toInputStream());
+    for (int i = 0; i < 100; i += 2) {
+      assertEquals(i, reader.readFloat(), 0.0f);
+      reader.skip();
+    }
+
+    // Test skip-n with plain encoding
+    reader.initFromPage(100, cw.getBytes().toInputStream());
+    int skipCount;
+    for (int i = 0; i < 100; i += skipCount + 1) {
+      skipCount = (100 - i) / 2;
+      assertEquals(i, reader.readFloat(), 0.0f);
+      reader.skip(skipCount);
+    }
   }
 
   @Test
diff --git a/parquet-column/src/test/java/org/apache/parquet/filter2/predicate/TestValidTypeMap.java b/parquet-column/src/test/java/org/apache/parquet/filter2/predicate/TestValidTypeMap.java
index d44136998d..6e19dcadca 100644
--- a/parquet-column/src/test/java/org/apache/parquet/filter2/predicate/TestValidTypeMap.java
+++ b/parquet-column/src/test/java/org/apache/parquet/filter2/predicate/TestValidTypeMap.java
@@ -1,4 +1,4 @@
-/* 
+/*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
@@ -6,9 +6,9 @@
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
- * 
+ *
  *   http://www.apache.org/licenses/LICENSE-2.0
- * 
+ *
  * Unless required by applicable law or agreed to in writing,
  * software distributed under the License is distributed on an
  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
@@ -28,7 +28,6 @@
 import org.apache.parquet.filter2.predicate.Operators.FloatColumn;
 import org.apache.parquet.filter2.predicate.Operators.IntColumn;
 import org.apache.parquet.filter2.predicate.Operators.LongColumn;
-import org.apache.parquet.schema.OriginalType;
 import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName;
 
 import static org.junit.Assert.assertEquals;
diff --git a/parquet-column/src/test/java/org/apache/parquet/internal/column/columnindex/TestBinaryTruncator.java b/parquet-column/src/test/java/org/apache/parquet/internal/column/columnindex/TestBinaryTruncator.java
new file mode 100644
index 0000000000..c3e3d85749
--- /dev/null
+++ b/parquet-column/src/test/java/org/apache/parquet/internal/column/columnindex/TestBinaryTruncator.java
@@ -0,0 +1,285 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.parquet.internal.column.columnindex;
+
+import static org.apache.parquet.schema.OriginalType.BSON;
+import static org.apache.parquet.schema.OriginalType.DECIMAL;
+import static org.apache.parquet.schema.OriginalType.ENUM;
+import static org.apache.parquet.schema.OriginalType.INTERVAL;
+import static org.apache.parquet.schema.OriginalType.JSON;
+import static org.apache.parquet.schema.OriginalType.UTF8;
+import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.BINARY;
+import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY;
+import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.INT96;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertSame;
+import static org.junit.Assert.assertTrue;
+
+import java.nio.charset.CharacterCodingException;
+import java.nio.charset.CharsetDecoder;
+import java.nio.charset.CodingErrorAction;
+import java.nio.charset.StandardCharsets;
+import java.util.Comparator;
+import java.util.Random;
+
+import org.apache.parquet.io.api.Binary;
+import org.apache.parquet.schema.PrimitiveStringifier;
+import org.apache.parquet.schema.PrimitiveType;
+import org.apache.parquet.schema.Types;
+import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Tests for {@link BinaryTruncator}
+ */
+public class TestBinaryTruncator {
+
+  private static final Logger LOG = LoggerFactory.getLogger(TestBinaryTruncator.class);
+  private static final PrimitiveStringifier HEXA_STRINGIFIER = Types.required(BINARY)
+      .named("dummy_type").stringifier();
+  private static final Random RANDOM = new Random(42);
+  private static final CharsetDecoder UTF8_DECODER = StandardCharsets.UTF_8.newDecoder();
+  static {
+    UTF8_DECODER.onMalformedInput(CodingErrorAction.REPORT);
+    UTF8_DECODER.onUnmappableCharacter(CodingErrorAction.REPORT);
+  }
+
+  // The maximum values in UTF-8 for the 1, 2, 3 and 4 bytes representations
+  private static final String UTF8_1BYTE_MAX_CHAR = "\u007F";
+  private static final String UTF8_2BYTES_MAX_CHAR = "\u07FF";
+  private static final String UTF8_3BYTES_MAX_CHAR = "\uFFFF";
+  private static final String UTF8_4BYTES_MAX_CHAR = "\uDBFF\uDFFF";
+
+  @Test
+  public void testNonStringTruncate() {
+    BinaryTruncator truncator = BinaryTruncator
+        .getTruncator(Types.required(BINARY).as(DECIMAL).precision(10).scale(2).named("test_binary_decimal"));
+    assertEquals(binary(0xFF, 0xFE, 0xFD, 0xFC, 0xFB, 0xFA),
+        truncator.truncateMin(binary(0xFF, 0xFE, 0xFD, 0xFC, 0xFB, 0xFA), 2));
+    assertEquals(binary(0x01, 0x02, 0x03, 0x04, 0x05, 0x06),
+        truncator.truncateMax(binary(0x01, 0x02, 0x03, 0x04, 0x05, 0x06), 2));
+  }
+
+  @Test
+  public void testContractNonStringTypes() {
+    testTruncator(
+        Types.required(FIXED_LEN_BYTE_ARRAY).length(8).as(DECIMAL).precision(18).scale(4).named("test_fixed_decimal"),
+        false);
+    testTruncator(Types.required(FIXED_LEN_BYTE_ARRAY).length(12).as(INTERVAL).named("test_fixed_interval"), false);
+    testTruncator(Types.required(BINARY).as(DECIMAL).precision(10).scale(2).named("test_binary_decimal"), false);
+    testTruncator(Types.required(INT96).named("test_int96"), false);
+  }
+
+  @Test
+  public void testStringTruncate() {
+    BinaryTruncator truncator = BinaryTruncator.getTruncator(Types.required(BINARY).as(UTF8).named("test_utf8"));
+
+    // Truncate 1 byte characters
+    assertEquals(Binary.fromString("abc"), truncator.truncateMin(Binary.fromString("abcdef"), 3));
+    assertEquals(Binary.fromString("abd"), truncator.truncateMax(Binary.fromString("abcdef"), 3));
+
+    // Truncate 1-2 bytes characters; the target length is "inside" a UTF-8 character
+    assertEquals(Binary.fromString("árvízt"), truncator.truncateMin(Binary.fromString("árvíztűrő"), 9));
+    assertEquals(Binary.fromString("árvízu"), truncator.truncateMax(Binary.fromString("árvíztűrő"), 9));
+
+    // Truncate highest UTF-8 values -> unable to increment
+    assertEquals(
+        Binary.fromString(
+            UTF8_1BYTE_MAX_CHAR
+                + UTF8_2BYTES_MAX_CHAR),
+        truncator.truncateMin(Binary.fromString(
+            UTF8_1BYTE_MAX_CHAR
+                + UTF8_2BYTES_MAX_CHAR
+                + UTF8_3BYTES_MAX_CHAR
+                + UTF8_4BYTES_MAX_CHAR),
+            5));
+    assertEquals(
+        Binary.fromString(
+            UTF8_1BYTE_MAX_CHAR
+                + UTF8_2BYTES_MAX_CHAR
+                + UTF8_3BYTES_MAX_CHAR
+                + UTF8_4BYTES_MAX_CHAR),
+        truncator.truncateMax(Binary.fromString(
+            UTF8_1BYTE_MAX_CHAR
+                + UTF8_2BYTES_MAX_CHAR
+                + UTF8_3BYTES_MAX_CHAR
+                + UTF8_4BYTES_MAX_CHAR),
+            5));
+
+    // Truncate highest UTF-8 values at the end -> increment the first possible character
+    assertEquals(
+        Binary.fromString(
+            UTF8_1BYTE_MAX_CHAR
+                + UTF8_2BYTES_MAX_CHAR
+                + "b"
+                + UTF8_3BYTES_MAX_CHAR),
+        truncator.truncateMax(Binary.fromString(
+            UTF8_1BYTE_MAX_CHAR
+                + UTF8_2BYTES_MAX_CHAR
+                + "a"
+                + UTF8_3BYTES_MAX_CHAR
+                + UTF8_4BYTES_MAX_CHAR),
+            10));
+
+    // Truncate invalid UTF-8 values -> truncate without validity check
+    assertEquals(binary(0xFF, 0xFE, 0xFD), truncator.truncateMin(binary(0xFF, 0xFE, 0xFD, 0xFC, 0xFB, 0xFA), 3));
+    assertEquals(binary(0xFF, 0xFE, 0xFE), truncator.truncateMax(binary(0xFF, 0xFE, 0xFD, 0xFC, 0xFB, 0xFA), 3));
+    assertEquals(binary(0xFF, 0xFE, 0xFE, 0x00, 0x00), truncator.truncateMax(binary(0xFF, 0xFE, 0xFD, 0xFF, 0xFF, 0xFF), 5));
+  }
+
+  @Test
+  public void testContractStringTypes() {
+    testTruncator(Types.required(BINARY).named("test_binary"), true);
+    testTruncator(Types.required(BINARY).as(UTF8).named("test_utf8"), true);
+    testTruncator(Types.required(BINARY).as(ENUM).named("test_enum"), true);
+    testTruncator(Types.required(BINARY).as(JSON).named("test_json"), true);
+    testTruncator(Types.required(BINARY).as(BSON).named("test_bson"), true);
+    testTruncator(Types.required(FIXED_LEN_BYTE_ARRAY).length(5).named("test_fixed"), true);
+  }
+
+  private void testTruncator(PrimitiveType type, boolean strict) {
+    BinaryTruncator truncator = BinaryTruncator.getTruncator(type);
+    Comparator<Binary> comparator = type.comparator();
+
+    checkContract(truncator, comparator, Binary.fromString("aaaaaaaaaa"), strict, strict);
+    checkContract(truncator, comparator, Binary.fromString("árvíztűrő tükörfúrógép"), strict, strict);
+    checkContract(truncator, comparator, Binary.fromString("aaaaaaaaaa" + UTF8_3BYTES_MAX_CHAR), strict, strict);
+    checkContract(truncator, comparator, Binary.fromString("a" + UTF8_3BYTES_MAX_CHAR + UTF8_1BYTE_MAX_CHAR), strict,
+        strict);
+
+    checkContract(truncator, comparator,
+        Binary.fromConstantByteArray(new byte[] { (byte) 0xFE, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, (byte) 0xFF }), strict,
+        strict);
+
+    // Edge case: zero length -> unable to truncate
+    checkContract(truncator, comparator, Binary.fromString(""), false, false);
+    // Edge case: containing only UTF-8 max characters -> unable to truncate for max
+    checkContract(truncator, comparator, Binary.fromString(
+        UTF8_1BYTE_MAX_CHAR +
+            UTF8_4BYTES_MAX_CHAR +
+            UTF8_3BYTES_MAX_CHAR +
+            UTF8_4BYTES_MAX_CHAR +
+            UTF8_2BYTES_MAX_CHAR +
+            UTF8_3BYTES_MAX_CHAR +
+            UTF8_3BYTES_MAX_CHAR +
+            UTF8_1BYTE_MAX_CHAR +
+            UTF8_2BYTES_MAX_CHAR +
+            UTF8_3BYTES_MAX_CHAR +
+            UTF8_4BYTES_MAX_CHAR),
+        strict, false);
+    // Edge case: non-UTF-8; max bytes -> unable to truncate for max
+    checkContract(
+        truncator, comparator,
+        binary(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF),
+        strict, false);
+  }
+
+  // Checks the contract of truncator
+  // strict means actual truncation is required and the truncated value is a valid UTF-8 string
+  private void checkContract(BinaryTruncator truncator, Comparator<Binary> comparator, Binary value, boolean strictMin,
+      boolean strictMax) {
+    int length = value.length();
+
+    // Edge cases: returning the original value if no truncation is required
+    assertSame(value, truncator.truncateMin(value, length));
+    assertSame(value, truncator.truncateMax(value, length));
+    assertSame(value, truncator.truncateMin(value, random(length + 1, length * 2 + 1)));
+    assertSame(value, truncator.truncateMax(value, random(length + 1, length * 2 + 1)));
+
+    if (length > 1) {
+      checkMinContract(truncator, comparator, value, length - 1, strictMin);
+      checkMaxContract(truncator, comparator, value, length - 1, strictMax);
+      checkMinContract(truncator, comparator, value, random(1, length - 1), strictMin);
+      checkMaxContract(truncator, comparator, value, random(1, length - 1), strictMax);
+    }
+
+    // Edge case: possible to truncate min value to 0 length if original value is not empty
+    checkMinContract(truncator, comparator, value, 0, strictMin);
+    // Edge case: impossible to truncate max value to 0 length -> returning the original value
+    assertSame(value, truncator.truncateMax(value, 0));
+  }
+
+  private void checkMinContract(BinaryTruncator truncator, Comparator<Binary> comparator, Binary value, int length,
+      boolean strict) {
+    Binary truncated = truncator.truncateMin(value, length);
+    LOG.debug("\"{}\" --truncMin({})--> \"{}\" [{}]", value.toStringUsingUTF8(), length, truncated.toStringUsingUTF8(),
+        HEXA_STRINGIFIER.stringify(truncated));
+    assertTrue("truncatedMin(value) should be <= than value", comparator.compare(truncated, value) <= 0);
+    assertFalse("length of truncateMin(value) should not be > than the length of value",
+        truncated.length() > value.length());
+    if (isValidUtf8(value)) {
+      checkValidUtf8(truncated);
+    }
+    if (strict) {
+      assertTrue("length of truncateMin(value) ahould be < than the length of value",
+          truncated.length() < value.length());
+    }
+  }
+
+  private void checkMaxContract(BinaryTruncator truncator, Comparator<Binary> comparator, Binary value, int length,
+      boolean strict) {
+    Binary truncated = truncator.truncateMax(value, length);
+    LOG.debug("\"{}\" --truncMax({})--> \"{}\" [{}]", value.toStringUsingUTF8(), length, truncated.toStringUsingUTF8(),
+        HEXA_STRINGIFIER.stringify(truncated));
+    assertTrue("truncatedMax(value) should be >= than value", comparator.compare(truncated, value) >= 0);
+    assertFalse("length of truncateMax(value) should not be > than the length of value",
+        truncated.length() > value.length());
+    if (isValidUtf8(value)) {
+      checkValidUtf8(truncated);
+    }
+    if (strict) {
+      assertTrue("length of truncateMax(value) ahould be < than the length of value",
+          truncated.length() < value.length());
+    }
+  }
+
+  private static boolean isValidUtf8(Binary binary) {
+    try {
+      UTF8_DECODER.decode(binary.toByteBuffer());
+      return true;
+    } catch (CharacterCodingException e) {
+      return false;
+    }
+  }
+
+  private static void checkValidUtf8(Binary binary) {
+    try {
+      UTF8_DECODER.decode(binary.toByteBuffer());
+    } catch (CharacterCodingException e) {
+      throw new AssertionError("Truncated value should be a valid UTF-8 string", e);
+    }
+  }
+
+  private static int random(int min, int max) {
+    return RANDOM.nextInt(max - min + 1) + min;
+  }
+
+  private static Binary binary(int... unsignedBytes) {
+    byte[] byteArray = new byte[unsignedBytes.length];
+    for (int i = 0, n = byteArray.length; i < n; ++i) {
+      int b = unsignedBytes[i];
+      assert (0xFFFFFF00 & b) == 0;
+      byteArray[i] = (byte) b;
+    }
+    return Binary.fromConstantByteArray(byteArray);
+  }
+
+}
diff --git a/parquet-column/src/test/java/org/apache/parquet/internal/column/columnindex/TestBoundaryOrder.java b/parquet-column/src/test/java/org/apache/parquet/internal/column/columnindex/TestBoundaryOrder.java
new file mode 100644
index 0000000000..3d2a924217
--- /dev/null
+++ b/parquet-column/src/test/java/org/apache/parquet/internal/column/columnindex/TestBoundaryOrder.java
@@ -0,0 +1,487 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.parquet.internal.column.columnindex;
+
+import java.nio.ByteBuffer;
+import java.util.Collections;
+import java.util.Iterator;
+import java.util.PrimitiveIterator;
+import java.util.Random;
+import java.util.function.Function;
+import java.util.stream.IntStream;
+
+import org.apache.parquet.column.statistics.Statistics;
+import org.apache.parquet.internal.column.columnindex.ColumnIndexBuilder.ColumnIndexBase;
+import org.apache.parquet.schema.PrimitiveType;
+import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName;
+import org.apache.parquet.schema.Types;
+import org.junit.Assert;
+import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import it.unimi.dsi.fastutil.ints.IntArrayList;
+import it.unimi.dsi.fastutil.ints.IntList;
+
+/**
+ * Tests the operator implementations in {@link BoundaryOrder}.
+ */
+public class TestBoundaryOrder {
+  private static class SpyValueComparatorBuilder extends ColumnIndexBase<Integer> {
+    class SpyValueComparator extends ValueComparator {
+      private final ColumnIndexBase<?>.ValueComparator delegate;
+      private int compareCount;
+
+      SpyValueComparator(ColumnIndexBase<?>.ValueComparator delegate) {
+        this.delegate = delegate;
+      }
+
+      int getCompareCount() {
+        return compareCount;
+      }
+
+      @Override
+      int arrayLength() {
+        return delegate.arrayLength();
+      }
+
+      @Override
+      int translate(int arrayIndex) {
+        return delegate.translate(arrayIndex);
+      }
+
+      @Override
+      int compareValueToMin(int arrayIndex) {
+        ++compareCount;
+        return delegate.compareValueToMin(arrayIndex);
+      }
+
+      @Override
+      int compareValueToMax(int arrayIndex) {
+        ++compareCount;
+        return delegate.compareValueToMax(arrayIndex);
+      }
+    }
+
+    private SpyValueComparatorBuilder() {
+      super(TYPE);
+    }
+
+    SpyValueComparator build(ColumnIndexBase<?>.ValueComparator comparator) {
+      return new SpyValueComparator(comparator);
+    }
+
+    @Override
+    ByteBuffer getMinValueAsBytes(int arrayIndex) {
+      throw new Error("Shall never be invoked");
+    }
+
+    @Override
+    ByteBuffer getMaxValueAsBytes(int arrayIndex) {
+      throw new Error("Shall never be invoked");
+    }
+
+    @Override
+    String getMinValueAsString(int arrayIndex) {
+      throw new Error("Shall never be invoked");
+    }
+
+    @Override
+    String getMaxValueAsString(int arrayIndex) {
+      throw new Error("Shall never be invoked");
+    }
+
+    @Override
+    <T extends Comparable<T>> org.apache.parquet.filter2.predicate.Statistics<T> createStats(int arrayIndex) {
+      throw new Error("Shall never be invoked");
+    }
+
+    @Override
+    ColumnIndexBase<Integer>.ValueComparator createValueComparator(Object value) {
+      throw new Error("Shall never be invoked");
+    }
+  }
+
+  private static class ExecStats {
+    private long linearTime;
+    private long binaryTime;
+    private int linearCompareCount;
+    private int binaryCompareCount;
+    private int execCount;
+
+    IntList measureLinear(Function<ColumnIndexBase<?>.ValueComparator, PrimitiveIterator.OfInt> op,
+        ColumnIndexBase<?>.ValueComparator comparator) {
+      IntList list = new IntArrayList(comparator.arrayLength());
+      SpyValueComparatorBuilder.SpyValueComparator spyComparator = SPY_COMPARATOR_BUILDER.build(comparator);
+      long start = System.nanoTime();
+      op.apply(spyComparator).forEachRemaining((int value) -> list.add(value));
+      linearTime = System.nanoTime() - start;
+      linearCompareCount += spyComparator.getCompareCount();
+      return list;
+    }
+
+    IntList measureBinary(Function<ColumnIndexBase<?>.ValueComparator, PrimitiveIterator.OfInt> op,
+        ColumnIndexBase<?>.ValueComparator comparator) {
+      IntList list = new IntArrayList(comparator.arrayLength());
+      SpyValueComparatorBuilder.SpyValueComparator spyComparator = SPY_COMPARATOR_BUILDER.build(comparator);
+      long start = System.nanoTime();
+      op.apply(spyComparator).forEachRemaining((int value) -> list.add(value));
+      binaryTime = System.nanoTime() - start;
+      binaryCompareCount += spyComparator.getCompareCount();
+      return list;
+    }
+
+    void add(ExecStats stats) {
+      linearTime += stats.linearTime;
+      linearCompareCount += stats.linearCompareCount;
+      binaryTime += stats.binaryTime;
+      binaryCompareCount += stats.binaryCompareCount;
+      ++execCount;
+    }
+
+    @Override
+    public String toString() {
+      double linearMs = linearTime / 1_000_000.0;
+      double binaryMs = binaryTime / 1_000_000.0;
+      return String.format(
+          "Linear search: %.2fms (avg: %.6fms); number of compares: %d (avg: %d) [100.00%%]%n"
+              + "Binary search: %.2fms (avg: %.6fms); number of compares: %d (avg: %d) [%.2f%%]",
+          linearMs, linearMs / execCount, linearCompareCount, linearCompareCount / execCount,
+          binaryMs, binaryMs / execCount, binaryCompareCount, binaryCompareCount / execCount,
+          100.0 * binaryCompareCount / linearCompareCount);
+    }
+  }
+
+  private static final Logger LOGGER = LoggerFactory.getLogger(TestBoundaryOrder.class);
+  private static final PrimitiveType TYPE = Types.required(PrimitiveTypeName.INT32).named("test_int32");
+  private static final int FROM = -15;
+  private static final int TO = 15;
+  private static final ColumnIndexBase<?> ASCENDING;
+  private static final ColumnIndexBase<?> DESCENDING;
+  private static final int SINGLE_FROM = -1;
+  private static final int SINGLE_TO = 1;
+  private static final ColumnIndexBase<?> SINGLE;
+  private static final Random RANDOM = new Random(42);
+  private static final int RAND_FROM = -2000;
+  private static final int RAND_TO = 2000;
+  private static final int RAND_COUNT = 2000;
+  private static final ColumnIndexBase<?> RAND_ASCENDING;
+  private static final ColumnIndexBase<?> RAND_DESCENDING;
+  private static final SpyValueComparatorBuilder SPY_COMPARATOR_BUILDER = new SpyValueComparatorBuilder();
+  static {
+    ColumnIndexBuilder builder = ColumnIndexBuilder.getBuilder(TYPE, Integer.MAX_VALUE);
+    builder.add(stats(FROM, -12));
+    builder.add(stats(-10, -8));
+    builder.add(stats(-8, -4));
+    builder.add(stats(-6, -4));
+    builder.add(stats(-6, -3));
+    builder.add(stats(-6, -3));
+    builder.add(stats(-6, -3));
+    builder.add(stats(0, 3));
+    builder.add(stats(3, 5));
+    builder.add(stats(3, 5));
+    builder.add(stats(5, 8));
+    builder.add(stats(10, TO));
+    ASCENDING = (ColumnIndexBase<?>) builder.build();
+
+    builder = ColumnIndexBuilder.getBuilder(TYPE, Integer.MAX_VALUE);
+    builder.add(stats(10, TO));
+    builder.add(stats(5, 8));
+    builder.add(stats(3, 5));
+    builder.add(stats(3, 5));
+    builder.add(stats(0, 3));
+    builder.add(stats(-6, -3));
+    builder.add(stats(-6, -3));
+    builder.add(stats(-6, -3));
+    builder.add(stats(-6, -4));
+    builder.add(stats(-8, -4));
+    builder.add(stats(-10, -8));
+    builder.add(stats(FROM, -12));
+    DESCENDING = (ColumnIndexBase<?>) builder.build();
+
+    builder = ColumnIndexBuilder.getBuilder(TYPE, Integer.MAX_VALUE);
+    builder.add(stats(SINGLE_FROM, SINGLE_TO));
+    SINGLE = (ColumnIndexBase<?>) builder.build();
+
+    builder = ColumnIndexBuilder.getBuilder(TYPE, Integer.MAX_VALUE);
+    for (PrimitiveIterator.OfInt it = IntStream.generate(() -> RANDOM.nextInt(RAND_TO - RAND_FROM + 1) + RAND_FROM)
+        .limit(RAND_COUNT * 2).sorted().iterator(); it.hasNext();) {
+      builder.add(stats(it.nextInt(), it.nextInt()));
+    }
+    RAND_ASCENDING = (ColumnIndexBase<?>) builder.build();
+
+    builder = ColumnIndexBuilder.getBuilder(TYPE, Integer.MAX_VALUE);
+    for (Iterator<Integer> it = IntStream.generate(() -> RANDOM.nextInt(RAND_TO - RAND_FROM + 1) + RAND_FROM)
+        .limit(RAND_COUNT * 2).mapToObj(Integer::valueOf).sorted(Collections.reverseOrder()).iterator(); it
+            .hasNext();) {
+      builder.add(stats(it.next(), it.next()));
+    }
+    RAND_DESCENDING = (ColumnIndexBase<?>) builder.build();
+  }
+
+  private static Statistics<?> stats(int min, int max) {
+    Statistics<?> stats = Statistics.createStats(TYPE);
+    stats.updateStats(min);
+    stats.updateStats(max);
+    return stats;
+  }
+
+  private static ExecStats validateOperator(String msg,
+      Function<ColumnIndexBase<?>.ValueComparator, PrimitiveIterator.OfInt> validatorOp,
+      Function<ColumnIndexBase<?>.ValueComparator, PrimitiveIterator.OfInt> actualOp,
+      ColumnIndexBase<?>.ValueComparator comparator) {
+    ExecStats stats = new ExecStats();
+
+    IntList expected = stats.measureLinear(validatorOp, comparator);
+    IntList actual = stats.measureBinary(actualOp, comparator);
+
+    Assert.assertEquals(msg, expected, actual);
+
+    return stats;
+  }
+
+  @Test
+  public void testEq() {
+    for (int i = FROM - 1; i <= TO + 1; ++i) {
+      validateOperator("Mismatching page indexes for value " + i + " with ASCENDING order",
+          BoundaryOrder.UNORDERED::eq,
+          BoundaryOrder.ASCENDING::eq,
+          ASCENDING.createValueComparator(i));
+      validateOperator("Mismatching page indexes for value " + i + " with DESCENDING order",
+          BoundaryOrder.UNORDERED::eq,
+          BoundaryOrder.DESCENDING::eq,
+          DESCENDING.createValueComparator(i));
+    }
+    for (int i = SINGLE_FROM - 1; i <= SINGLE_TO + 1; ++i) {
+      ColumnIndexBase<?>.ValueComparator singleComparator = SINGLE.createValueComparator(i);
+      validateOperator("Mismatching page indexes for value " + i + " with ASCENDING order",
+          BoundaryOrder.UNORDERED::eq,
+          BoundaryOrder.ASCENDING::eq,
+          singleComparator);
+      validateOperator("Mismatching page indexes for value " + i + " with DESCENDING order",
+          BoundaryOrder.UNORDERED::eq,
+          BoundaryOrder.DESCENDING::eq,
+          singleComparator);
+    }
+    ExecStats stats = new ExecStats();
+    for (int i = RAND_FROM - 1; i <= RAND_TO + 1; ++i) {
+      stats.add(validateOperator("Mismatching page indexes for value " + i + " with ASCENDING order",
+          BoundaryOrder.UNORDERED::eq,
+          BoundaryOrder.ASCENDING::eq,
+          RAND_ASCENDING.createValueComparator(i)));
+      stats.add(validateOperator("Mismatching page indexes for value " + i + " with DESCENDING order",
+          BoundaryOrder.UNORDERED::eq,
+          BoundaryOrder.DESCENDING::eq,
+          RAND_DESCENDING.createValueComparator(i)));
+    }
+    LOGGER.info("Executed eq on random data (page count: {}, values searched: {}):\n{}", RAND_COUNT,
+        RAND_TO - RAND_FROM + 2, stats);
+  }
+
+  @Test
+  public void testGt() {
+    for (int i = FROM - 1; i <= TO + 1; ++i) {
+      validateOperator("Mismatching page indexes for value " + i + " with ASCENDING order",
+          BoundaryOrder.UNORDERED::gt,
+          BoundaryOrder.ASCENDING::gt,
+          ASCENDING.createValueComparator(i));
+      validateOperator("Mismatching page indexes for value " + i + " with DESCENDING order",
+          BoundaryOrder.UNORDERED::gt,
+          BoundaryOrder.DESCENDING::gt,
+          DESCENDING.createValueComparator(i));
+    }
+    for (int i = SINGLE_FROM - 1; i <= SINGLE_TO + 1; ++i) {
+      ColumnIndexBase<?>.ValueComparator singleComparator = SINGLE.createValueComparator(i);
+      validateOperator("Mismatching page indexes for value " + i + " with ASCENDING order",
+          BoundaryOrder.UNORDERED::gt,
+          BoundaryOrder.ASCENDING::gt,
+          singleComparator);
+      validateOperator("Mismatching page indexes for value " + i + " with DESCENDING order",
+          BoundaryOrder.UNORDERED::gt,
+          BoundaryOrder.DESCENDING::gt,
+          singleComparator);
+    }
+    ExecStats stats = new ExecStats();
+    for (int i = RAND_FROM - 1; i <= RAND_TO + 1; ++i) {
+      stats.add(validateOperator("Mismatching page indexes for value " + i + " with ASCENDING order",
+          BoundaryOrder.UNORDERED::gt,
+          BoundaryOrder.ASCENDING::gt,
+          RAND_ASCENDING.createValueComparator(i)));
+      stats.add(validateOperator("Mismatching page indexes for value " + i + " with DESCENDING order",
+          BoundaryOrder.UNORDERED::gt,
+          BoundaryOrder.DESCENDING::gt,
+          RAND_DESCENDING.createValueComparator(i)));
+    }
+    LOGGER.info("Executed gt on random data (page count: {}, values searched: {}):\n{}", RAND_COUNT,
+        RAND_TO - RAND_FROM + 2, stats);
+  }
+
+  @Test
+  public void testGtEq() {
+    for (int i = FROM - 1; i <= TO + 1; ++i) {
+      validateOperator("Mismatching page indexes for value " + i + " with ASCENDING order",
+          BoundaryOrder.UNORDERED::gtEq,
+          BoundaryOrder.ASCENDING::gtEq,
+          ASCENDING.createValueComparator(i));
+      validateOperator("Mismatching page indexes for value " + i + " with DESCENDING order",
+          BoundaryOrder.UNORDERED::gtEq,
+          BoundaryOrder.DESCENDING::gtEq,
+          DESCENDING.createValueComparator(i));
+    }
+    for (int i = SINGLE_FROM - 1; i <= SINGLE_TO + 1; ++i) {
+      ColumnIndexBase<?>.ValueComparator singleComparator = SINGLE.createValueComparator(i);
+      validateOperator("Mismatching page indexes for value " + i + " with ASCENDING order",
+          BoundaryOrder.UNORDERED::gtEq,
+          BoundaryOrder.ASCENDING::gtEq,
+          singleComparator);
+      validateOperator("Mismatching page indexes for value " + i + " with DESCENDING order",
+          BoundaryOrder.UNORDERED::gtEq,
+          BoundaryOrder.DESCENDING::gtEq,
+          singleComparator);
+    }
+    ExecStats stats = new ExecStats();
+    for (int i = RAND_FROM - 1; i <= RAND_TO + 1; ++i) {
+      stats.add(validateOperator("Mismatching page indexes for value " + i + " with ASCENDING order",
+          BoundaryOrder.UNORDERED::gtEq,
+          BoundaryOrder.ASCENDING::gtEq,
+          RAND_ASCENDING.createValueComparator(i)));
+      stats.add(validateOperator("Mismatching page indexes for value " + i + " with DESCENDING order",
+          BoundaryOrder.UNORDERED::gtEq,
+          BoundaryOrder.DESCENDING::gtEq,
+          RAND_DESCENDING.createValueComparator(i)));
+    }
+    LOGGER.info("Executed gtEq on random data (page count: {}, values searched: {}):\n{}", RAND_COUNT,
+        RAND_TO - RAND_FROM + 2, stats);
+  }
+
+  @Test
+  public void testLt() {
+    for (int i = FROM - 1; i <= TO + 1; ++i) {
+      validateOperator("Mismatching page indexes for value " + i + " with ASCENDING order",
+          BoundaryOrder.UNORDERED::lt,
+          BoundaryOrder.ASCENDING::lt,
+          ASCENDING.createValueComparator(i));
+      validateOperator("Mismatching page indexes for value " + i + " with DESCENDING order",
+          BoundaryOrder.UNORDERED::lt,
+          BoundaryOrder.DESCENDING::lt,
+          DESCENDING.createValueComparator(i));
+    }
+    for (int i = SINGLE_FROM - 1; i <= SINGLE_TO + 1; ++i) {
+      ColumnIndexBase<?>.ValueComparator singleComparator = SINGLE.createValueComparator(i);
+      validateOperator("Mismatching page indexes for value " + i + " with ASCENDING order",
+          BoundaryOrder.UNORDERED::lt,
+          BoundaryOrder.ASCENDING::lt,
+          singleComparator);
+      validateOperator("Mismatching page indexes for value " + i + " with DESCENDING order",
+          BoundaryOrder.UNORDERED::lt,
+          BoundaryOrder.DESCENDING::lt,
+          singleComparator);
+    }
+    ExecStats stats = new ExecStats();
+    for (int i = RAND_FROM - 1; i <= RAND_TO + 1; ++i) {
+      stats.add(validateOperator("Mismatching page indexes for value " + i + " with ASCENDING order",
+          BoundaryOrder.UNORDERED::lt,
+          BoundaryOrder.ASCENDING::lt,
+          RAND_ASCENDING.createValueComparator(i)));
+      stats.add(validateOperator("Mismatching page indexes for value " + i + " with DESCENDING order",
+          BoundaryOrder.UNORDERED::lt,
+          BoundaryOrder.DESCENDING::lt,
+          RAND_DESCENDING.createValueComparator(i)));
+    }
+    LOGGER.info("Executed lt on random data (page count: {}, values searched: {}):\n{}", RAND_COUNT,
+        RAND_TO - RAND_FROM + 2, stats);
+  }
+
+  @Test
+  public void testLtEq() {
+    for (int i = FROM - 1; i <= TO + 1; ++i) {
+      validateOperator("Mismatching page indexes for value " + i + " with ASCENDING order",
+          BoundaryOrder.UNORDERED::ltEq,
+          BoundaryOrder.ASCENDING::ltEq,
+          ASCENDING.createValueComparator(i));
+      validateOperator("Mismatching page indexes for value " + i + " with DESCENDING order",
+          BoundaryOrder.UNORDERED::ltEq,
+          BoundaryOrder.DESCENDING::ltEq,
+          DESCENDING.createValueComparator(i));
+    }
+    for (int i = SINGLE_FROM - 1; i <= SINGLE_TO + 1; ++i) {
+      ColumnIndexBase<?>.ValueComparator singleComparator = SINGLE.createValueComparator(i);
+      validateOperator("Mismatching page indexes for value " + i + " with ASCENDING order",
+          BoundaryOrder.UNORDERED::ltEq,
+          BoundaryOrder.ASCENDING::ltEq,
+          singleComparator);
+      validateOperator("Mismatching page indexes for value " + i + " with DESCENDING order",
+          BoundaryOrder.UNORDERED::ltEq,
+          BoundaryOrder.DESCENDING::ltEq,
+          singleComparator);
+    }
+    ExecStats stats = new ExecStats();
+    for (int i = RAND_FROM - 1; i <= RAND_TO + 1; ++i) {
+      stats.add(validateOperator("Mismatching page indexes for value " + i + " with ASCENDING order",
+          BoundaryOrder.UNORDERED::ltEq,
+          BoundaryOrder.ASCENDING::ltEq,
+          RAND_ASCENDING.createValueComparator(i)));
+      stats.add(validateOperator("Mismatching page indexes for value " + i + " with DESCENDING order",
+          BoundaryOrder.UNORDERED::ltEq,
+          BoundaryOrder.DESCENDING::ltEq,
+          RAND_DESCENDING.createValueComparator(i)));
+    }
+    LOGGER.info("Executed ltEq on random data (page count: {}, values searched: {}):\n{}", RAND_COUNT,
+        RAND_TO - RAND_FROM + 2, stats);
+  }
+
+  @Test
+  public void testNotEq() {
+    for (int i = -16; i <= 16; ++i) {
+      validateOperator("Mismatching page indexes for value " + i + " with ASCENDING order",
+          BoundaryOrder.UNORDERED::notEq,
+          BoundaryOrder.ASCENDING::notEq,
+          ASCENDING.createValueComparator(i));
+      validateOperator("Mismatching page indexes for value " + i + " with DESCENDING order",
+          BoundaryOrder.UNORDERED::notEq,
+          BoundaryOrder.DESCENDING::notEq,
+          DESCENDING.createValueComparator(i));
+    }
+    for (int i = FROM - 1; i <= TO + 1; ++i) {
+      ColumnIndexBase<?>.ValueComparator singleComparator = SINGLE.createValueComparator(i);
+      validateOperator("Mismatching page indexes for value " + i + " with ASCENDING order",
+          BoundaryOrder.UNORDERED::notEq,
+          BoundaryOrder.ASCENDING::notEq,
+          singleComparator);
+      validateOperator("Mismatching page indexes for value " + i + " with DESCENDING order",
+          BoundaryOrder.UNORDERED::notEq,
+          BoundaryOrder.DESCENDING::notEq,
+          singleComparator);
+    }
+    ExecStats stats = new ExecStats();
+    for (int i = RAND_FROM - 1; i <= RAND_TO + 1; ++i) {
+      stats.add(validateOperator("Mismatching page indexes for value " + i + " with ASCENDING order",
+          BoundaryOrder.UNORDERED::notEq,
+          BoundaryOrder.ASCENDING::notEq,
+          RAND_ASCENDING.createValueComparator(i)));
+      stats.add(validateOperator("Mismatching page indexes for value " + i + " with DESCENDING order",
+          BoundaryOrder.UNORDERED::notEq,
+          BoundaryOrder.DESCENDING::notEq,
+          RAND_DESCENDING.createValueComparator(i)));
+    }
+    LOGGER.info("Executed notEq on random data (page count: {}, values searched: {}):\n{}", RAND_COUNT,
+        RAND_TO - RAND_FROM + 2, stats);
+  }
+
+}
diff --git a/parquet-column/src/test/java/org/apache/parquet/internal/column/columnindex/TestColumnIndexBuilder.java b/parquet-column/src/test/java/org/apache/parquet/internal/column/columnindex/TestColumnIndexBuilder.java
new file mode 100644
index 0000000000..5a3947c980
--- /dev/null
+++ b/parquet-column/src/test/java/org/apache/parquet/internal/column/columnindex/TestColumnIndexBuilder.java
@@ -0,0 +1,1546 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.parquet.internal.column.columnindex;
+
+import static java.util.Arrays.asList;
+import static org.apache.parquet.filter2.predicate.FilterApi.binaryColumn;
+import static org.apache.parquet.filter2.predicate.FilterApi.booleanColumn;
+import static org.apache.parquet.filter2.predicate.FilterApi.doubleColumn;
+import static org.apache.parquet.filter2.predicate.FilterApi.eq;
+import static org.apache.parquet.filter2.predicate.FilterApi.floatColumn;
+import static org.apache.parquet.filter2.predicate.FilterApi.gt;
+import static org.apache.parquet.filter2.predicate.FilterApi.gtEq;
+import static org.apache.parquet.filter2.predicate.FilterApi.intColumn;
+import static org.apache.parquet.filter2.predicate.FilterApi.longColumn;
+import static org.apache.parquet.filter2.predicate.FilterApi.lt;
+import static org.apache.parquet.filter2.predicate.FilterApi.ltEq;
+import static org.apache.parquet.filter2.predicate.FilterApi.notEq;
+import static org.apache.parquet.filter2.predicate.FilterApi.userDefined;
+import static org.apache.parquet.filter2.predicate.LogicalInverter.invert;
+import static org.apache.parquet.schema.OriginalType.DECIMAL;
+import static org.apache.parquet.schema.OriginalType.UINT_8;
+import static org.apache.parquet.schema.OriginalType.UTF8;
+import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.BINARY;
+import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.BOOLEAN;
+import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.DOUBLE;
+import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.FLOAT;
+import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.INT32;
+import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.INT64;
+import static org.hamcrest.CoreMatchers.instanceOf;
+import static org.junit.Assert.assertArrayEquals;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertThat;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+
+import java.math.BigDecimal;
+import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.Comparator;
+import java.util.List;
+
+import org.apache.parquet.bytes.BytesUtils;
+import org.apache.parquet.column.statistics.Statistics;
+import org.apache.parquet.filter2.predicate.FilterPredicate;
+import org.apache.parquet.filter2.predicate.Operators.BinaryColumn;
+import org.apache.parquet.filter2.predicate.Operators.BooleanColumn;
+import org.apache.parquet.filter2.predicate.Operators.DoubleColumn;
+import org.apache.parquet.filter2.predicate.Operators.FloatColumn;
+import org.apache.parquet.filter2.predicate.Operators.IntColumn;
+import org.apache.parquet.filter2.predicate.Operators.LongColumn;
+import org.apache.parquet.filter2.predicate.UserDefinedPredicate;
+import org.apache.parquet.io.api.Binary;
+import org.apache.parquet.schema.PrimitiveType;
+import org.apache.parquet.schema.Types;
+import org.junit.Test;
+
+/**
+ * Tests for {@link ColumnIndexBuilder}.
+ */
+public class TestColumnIndexBuilder {
+
+  public static class BinaryDecimalIsNullOrZeroUdp extends UserDefinedPredicate<Binary> {
+    private static final Binary ZERO = decimalBinary("0.0");
+
+    @Override
+    public boolean keep(Binary value) {
+      return value == null || value.equals(ZERO);
+    }
+
+    @Override
+    public boolean canDrop(org.apache.parquet.filter2.predicate.Statistics<Binary> statistics) {
+      Comparator<Binary> cmp = statistics.getComparator();
+      return cmp.compare(statistics.getMin(), ZERO) > 0 || cmp.compare(statistics.getMax(), ZERO) < 0;
+    }
+
+    @Override
+    public boolean inverseCanDrop(org.apache.parquet.filter2.predicate.Statistics<Binary> statistics) {
+      Comparator<Binary> cmp = statistics.getComparator();
+      return cmp.compare(statistics.getMin(), ZERO) == 0 && cmp.compare(statistics.getMax(), ZERO) == 0;
+    }
+  }
+
+  public static class BinaryUtf8StartsWithB extends UserDefinedPredicate<Binary> {
+    private static final Binary B = stringBinary("B");
+    private static final Binary C = stringBinary("C");
+
+    @Override
+    public boolean keep(Binary value) {
+      return value != null && value.length() > 0 && value.getBytesUnsafe()[0] == 'B';
+    }
+
+    @Override
+    public boolean canDrop(org.apache.parquet.filter2.predicate.Statistics<Binary> statistics) {
+      Comparator<Binary> cmp = statistics.getComparator();
+      return cmp.compare(statistics.getMin(), C) >= 0 || cmp.compare(statistics.getMax(), B) < 0;
+    }
+
+    @Override
+    public boolean inverseCanDrop(org.apache.parquet.filter2.predicate.Statistics<Binary> statistics) {
+      Comparator<Binary> cmp = statistics.getComparator();
+      return cmp.compare(statistics.getMin(), B) >= 0 && cmp.compare(statistics.getMax(), C) < 0;
+    }
+  }
+
+  public static class BooleanIsTrueOrNull extends UserDefinedPredicate<Boolean> {
+    @Override
+    public boolean keep(Boolean value) {
+      return value == null || value;
+    }
+
+    @Override
+    public boolean canDrop(org.apache.parquet.filter2.predicate.Statistics<Boolean> statistics) {
+      return statistics.getComparator().compare(statistics.getMax(), true) != 0;
+    }
+
+    @Override
+    public boolean inverseCanDrop(org.apache.parquet.filter2.predicate.Statistics<Boolean> statistics) {
+      return statistics.getComparator().compare(statistics.getMin(), true) == 0;
+    }
+  }
+
+  public static class DoubleIsInteger extends UserDefinedPredicate<Double> {
+    @Override
+    public boolean keep(Double value) {
+      return value != null && Math.floor(value) == value;
+    }
+
+    @Override
+    public boolean canDrop(org.apache.parquet.filter2.predicate.Statistics<Double> statistics) {
+      double min = statistics.getMin();
+      double max = statistics.getMax();
+      Comparator<Double> cmp = statistics.getComparator();
+      return cmp.compare(Math.floor(min), Math.floor(max)) == 0 && cmp.compare(Math.floor(min), min) != 0
+          && cmp.compare(Math.floor(max), max) != 0;
+    }
+
+    @Override
+    public boolean inverseCanDrop(org.apache.parquet.filter2.predicate.Statistics<Double> statistics) {
+      double min = statistics.getMin();
+      double max = statistics.getMax();
+      Comparator<Double> cmp = statistics.getComparator();
+      return cmp.compare(min, max) == 0 && cmp.compare(Math.floor(min), min) == 0;
+    }
+  }
+
+  public static class FloatIsInteger extends UserDefinedPredicate<Float> {
+    private static float floor(float value) {
+      return (float) Math.floor(value);
+    }
+
+    @Override
+    public boolean keep(Float value) {
+      return value != null && Math.floor(value) == value;
+    }
+
+    @Override
+    public boolean canDrop(org.apache.parquet.filter2.predicate.Statistics<Float> statistics) {
+      float min = statistics.getMin();
+      float max = statistics.getMax();
+      Comparator<Float> cmp = statistics.getComparator();
+      return cmp.compare(floor(min), floor(max)) == 0 && cmp.compare(floor(min), min) != 0
+          && cmp.compare(floor(max), max) != 0;
+    }
+
+    @Override
+    public boolean inverseCanDrop(org.apache.parquet.filter2.predicate.Statistics<Float> statistics) {
+      float min = statistics.getMin();
+      float max = statistics.getMax();
+      Comparator<Float> cmp = statistics.getComparator();
+      return cmp.compare(min, max) == 0 && cmp.compare(floor(min), min) == 0;
+    }
+  }
+
+  public static class IntegerIsDivisableWith3 extends UserDefinedPredicate<Integer> {
+    @Override
+    public boolean keep(Integer value) {
+      return value != null && value % 3 == 0;
+    }
+
+    @Override
+    public boolean canDrop(org.apache.parquet.filter2.predicate.Statistics<Integer> statistics) {
+      int min = statistics.getMin();
+      int max = statistics.getMax();
+      return min % 3 != 0 && max % 3 != 0 && max - min < 3;
+    }
+
+    @Override
+    public boolean inverseCanDrop(org.apache.parquet.filter2.predicate.Statistics<Integer> statistics) {
+      int min = statistics.getMin();
+      int max = statistics.getMax();
+      return min == max && min % 3 == 0;
+    }
+  }
+
+  public static class LongIsDivisableWith3 extends UserDefinedPredicate<Long> {
+    @Override
+    public boolean keep(Long value) {
+      return value != null && value % 3 == 0;
+    }
+
+    @Override
+    public boolean canDrop(org.apache.parquet.filter2.predicate.Statistics<Long> statistics) {
+      long min = statistics.getMin();
+      long max = statistics.getMax();
+      return min % 3 != 0 && max % 3 != 0 && max - min < 3;
+    }
+
+    @Override
+    public boolean inverseCanDrop(org.apache.parquet.filter2.predicate.Statistics<Long> statistics) {
+      long min = statistics.getMin();
+      long max = statistics.getMax();
+      return min == max && min % 3 == 0;
+    }
+  }
+
+  @Test
+  public void testBuildBinaryDecimal() {
+    PrimitiveType type = Types.required(BINARY).as(DECIMAL).precision(12).scale(2).named("test_binary_decimal");
+    ColumnIndexBuilder builder = ColumnIndexBuilder.getBuilder(type, Integer.MAX_VALUE);
+    assertThat(builder, instanceOf(BinaryColumnIndexBuilder.class));
+    assertNull(builder.build());
+    BinaryColumn col = binaryColumn("test_col");
+
+    StatsBuilder sb = new StatsBuilder();
+    builder.add(sb.stats(type, null, null));
+    builder.add(sb.stats(type, decimalBinary("-0.17"), decimalBinary("1234567890.12")));
+    builder.add(sb.stats(type, decimalBinary("-234.23"), null, null, null));
+    builder.add(sb.stats(type, null, null, null));
+    builder.add(sb.stats(type, decimalBinary("-9999293.23"), decimalBinary("2348978.45")));
+    builder.add(sb.stats(type, null, null, null, null));
+    builder.add(sb.stats(type, null, null));
+    builder.add(sb.stats(type, decimalBinary("87656273")));
+    assertEquals(8, builder.getPageCount());
+    assertEquals(sb.getMinMaxSize(), builder.getMinMaxSize());
+    ColumnIndex columnIndex = builder.build();
+    assertEquals(BoundaryOrder.UNORDERED, columnIndex.getBoundaryOrder());
+    assertCorrectNullCounts(columnIndex, 2, 0, 3, 3, 0, 4, 2, 0);
+    assertCorrectNullPages(columnIndex, true, false, false, true, false, true, true, false);
+    assertCorrectValues(columnIndex.getMaxValues(),
+        null,
+        decimalBinary("1234567890.12"),
+        decimalBinary("-234.23"),
+        null,
+        decimalBinary("2348978.45"),
+        null,
+        null,
+        decimalBinary("87656273"));
+    assertCorrectValues(columnIndex.getMinValues(),
+        null,
+        decimalBinary("-0.17"),
+        decimalBinary("-234.23"),
+        null,
+        decimalBinary("-9999293.23"),
+        null,
+        null,
+        decimalBinary("87656273"));
+    assertCorrectFiltering(columnIndex, eq(col, decimalBinary("0.0")), 1, 4);
+    assertCorrectFiltering(columnIndex, eq(col, null), 0, 2, 3, 5, 6);
+    assertCorrectFiltering(columnIndex, notEq(col, decimalBinary("87656273")), 0, 1, 2, 3, 4, 5, 6);
+    assertCorrectFiltering(columnIndex, notEq(col, null), 1, 2, 4, 7);
+    assertCorrectFiltering(columnIndex, gt(col, decimalBinary("2348978.45")), 1);
+    assertCorrectFiltering(columnIndex, gtEq(col, decimalBinary("2348978.45")), 1, 4);
+    assertCorrectFiltering(columnIndex, lt(col, decimalBinary("-234.23")), 4);
+    assertCorrectFiltering(columnIndex, ltEq(col, decimalBinary("-234.23")), 2, 4);
+    assertCorrectFiltering(columnIndex, userDefined(col, BinaryDecimalIsNullOrZeroUdp.class), 0, 1, 2, 3, 4, 5, 6);
+    assertCorrectFiltering(columnIndex, invert(userDefined(col, BinaryDecimalIsNullOrZeroUdp.class)), 1, 2, 4, 7);
+
+    builder = ColumnIndexBuilder.getBuilder(type, Integer.MAX_VALUE);
+    sb = new StatsBuilder();
+    builder.add(sb.stats(type, null, null, null, null));
+    builder.add(sb.stats(type, decimalBinary("-9999293.23"), decimalBinary("-234.23")));
+    builder.add(sb.stats(type, decimalBinary("-0.17"), decimalBinary("87656273")));
+    builder.add(sb.stats(type, null, null));
+    builder.add(sb.stats(type, decimalBinary("87656273")));
+    builder.add(sb.stats(type, null, null));
+    builder.add(sb.stats(type, decimalBinary("1234567890.12"), null, null, null));
+    builder.add(sb.stats(type, null, null, null));
+    assertEquals(8, builder.getPageCount());
+    assertEquals(sb.getMinMaxSize(), builder.getMinMaxSize());
+    columnIndex = builder.build();
+    assertEquals(BoundaryOrder.ASCENDING, columnIndex.getBoundaryOrder());
+    assertCorrectNullCounts(columnIndex, 4, 0, 0, 2, 0, 2, 3, 3);
+    assertCorrectNullPages(columnIndex, true, false, false, true, false, true, false, true);
+    assertCorrectValues(columnIndex.getMaxValues(),
+        null,
+        decimalBinary("-234.23"),
+        decimalBinary("87656273"),
+        null,
+        decimalBinary("87656273"),
+        null,
+        decimalBinary("1234567890.12"),
+        null);
+    assertCorrectValues(columnIndex.getMinValues(),
+        null,
+        decimalBinary("-9999293.23"),
+        decimalBinary("-0.17"),
+        null,
+        decimalBinary("87656273"),
+        null,
+        decimalBinary("1234567890.12"),
+        null);
+    assertCorrectFiltering(columnIndex, eq(col, decimalBinary("87656273")), 2, 4);
+    assertCorrectFiltering(columnIndex, eq(col, null), 0, 3, 5, 6, 7);
+    assertCorrectFiltering(columnIndex, notEq(col, decimalBinary("87656273")), 0, 1, 2, 3, 5, 6, 7);
+    assertCorrectFiltering(columnIndex, notEq(col, null), 1, 2, 4, 6);
+    assertCorrectFiltering(columnIndex, gt(col, decimalBinary("87656273")), 6);
+    assertCorrectFiltering(columnIndex, gtEq(col, decimalBinary("87656273")), 2, 4, 6);
+    assertCorrectFiltering(columnIndex, lt(col, decimalBinary("-0.17")), 1);
+    assertCorrectFiltering(columnIndex, ltEq(col, decimalBinary("-0.17")), 1, 2);
+    assertCorrectFiltering(columnIndex, userDefined(col, BinaryDecimalIsNullOrZeroUdp.class), 0, 2, 3, 5, 6, 7);
+    assertCorrectFiltering(columnIndex, invert(userDefined(col, BinaryDecimalIsNullOrZeroUdp.class)), 1, 2, 4, 6);
+
+    builder = ColumnIndexBuilder.getBuilder(type, Integer.MAX_VALUE);
+    sb = new StatsBuilder();
+    builder.add(sb.stats(type, null, null, null));
+    builder.add(sb.stats(type, null, null));
+    builder.add(sb.stats(type, decimalBinary("1234567890.12"), null, null, null));
+    builder.add(sb.stats(type, null, null, null, null));
+    builder.add(sb.stats(type, decimalBinary("1234567890.12"), decimalBinary("87656273")));
+    builder.add(sb.stats(type, decimalBinary("987656273"), decimalBinary("-0.17")));
+    builder.add(sb.stats(type, null, null));
+    builder.add(sb.stats(type, decimalBinary("-234.23"), decimalBinary("-9999293.23")));
+    assertEquals(8, builder.getPageCount());
+    assertEquals(sb.getMinMaxSize(), builder.getMinMaxSize());
+    columnIndex = builder.build();
+    assertEquals(BoundaryOrder.DESCENDING, columnIndex.getBoundaryOrder());
+    assertCorrectNullCounts(columnIndex, 3, 2, 3, 4, 0, 0, 2, 0);
+    assertCorrectNullPages(columnIndex, true, true, false, true, false, false, true, false);
+    assertCorrectValues(columnIndex.getMaxValues(),
+        null,
+        null,
+        decimalBinary("1234567890.12"),
+        null,
+        decimalBinary("1234567890.12"),
+        decimalBinary("987656273"),
+        null,
+        decimalBinary("-234.23"));
+    assertCorrectValues(columnIndex.getMinValues(),
+        null,
+        null,
+        decimalBinary("1234567890.12"),
+        null,
+        decimalBinary("87656273"),
+        decimalBinary("-0.17"),
+        null,
+        decimalBinary("-9999293.23"));
+    assertCorrectFiltering(columnIndex, eq(col, decimalBinary("1234567890.12")), 2, 4);
+    assertCorrectFiltering(columnIndex, eq(col, null), 0, 1, 2, 3, 6);
+    assertCorrectFiltering(columnIndex, notEq(col, decimalBinary("0.0")), 0, 1, 2, 3, 4, 5, 6, 7);
+    assertCorrectFiltering(columnIndex, notEq(col, null), 2, 4, 5, 7);
+    assertCorrectFiltering(columnIndex, gt(col, decimalBinary("1234567890.12")));
+    assertCorrectFiltering(columnIndex, gtEq(col, decimalBinary("1234567890.12")), 2, 4);
+    assertCorrectFiltering(columnIndex, lt(col, decimalBinary("-0.17")), 7);
+    assertCorrectFiltering(columnIndex, ltEq(col, decimalBinary("-0.17")), 5, 7);
+    assertCorrectFiltering(columnIndex, userDefined(col, BinaryDecimalIsNullOrZeroUdp.class), 0, 1, 2, 3, 5, 6);
+    assertCorrectFiltering(columnIndex, invert(userDefined(col, BinaryDecimalIsNullOrZeroUdp.class)), 2, 4, 5, 7);
+  }
+
+  @Test
+  public void testBuildBinaryUtf8() {
+    PrimitiveType type = Types.required(BINARY).as(UTF8).named("test_binary_utf8");
+    ColumnIndexBuilder builder = ColumnIndexBuilder.getBuilder(type, Integer.MAX_VALUE);
+    assertThat(builder, instanceOf(BinaryColumnIndexBuilder.class));
+    assertNull(builder.build());
+    BinaryColumn col = binaryColumn("test_col");
+
+    StatsBuilder sb = new StatsBuilder();
+    builder.add(sb.stats(type, null, null));
+    builder.add(sb.stats(type, stringBinary("Jeltz"), stringBinary("Slartibartfast"), null, null));
+    builder.add(sb.stats(type, null, null, null, null, null));
+    builder.add(sb.stats(type, null, null));
+    builder.add(sb.stats(type, stringBinary("Beeblebrox"), stringBinary("Prefect")));
+    builder.add(sb.stats(type, stringBinary("Dent"), stringBinary("Trilian"), null));
+    builder.add(sb.stats(type, stringBinary("Beeblebrox")));
+    builder.add(sb.stats(type, null, null));
+    assertEquals(8, builder.getPageCount());
+    assertEquals(sb.getMinMaxSize(), builder.getMinMaxSize());
+    ColumnIndex columnIndex = builder.build();
+    assertEquals(BoundaryOrder.UNORDERED, columnIndex.getBoundaryOrder());
+    assertCorrectNullCounts(columnIndex, 2, 2, 5, 2, 0, 1, 0, 2);
+    assertCorrectNullPages(columnIndex, true, false, true, true, false, false, false, true);
+    assertCorrectValues(columnIndex.getMaxValues(),
+        null,
+        stringBinary("Slartibartfast"),
+        null,
+        null,
+        stringBinary("Prefect"),
+        stringBinary("Trilian"),
+        stringBinary("Beeblebrox"),
+        null);
+    assertCorrectValues(columnIndex.getMinValues(),
+        null,
+        stringBinary("Jeltz"),
+        null,
+        null,
+        stringBinary("Beeblebrox"),
+        stringBinary("Dent"),
+        stringBinary("Beeblebrox"),
+        null);
+    assertCorrectFiltering(columnIndex, eq(col, stringBinary("Marvin")), 1, 4, 5);
+    assertCorrectFiltering(columnIndex, eq(col, null), 0, 1, 2, 3, 5, 7);
+    assertCorrectFiltering(columnIndex, notEq(col, stringBinary("Beeblebrox")), 0, 1, 2, 3, 4, 5, 7);
+    assertCorrectFiltering(columnIndex, notEq(col, null), 1, 4, 5, 6);
+    assertCorrectFiltering(columnIndex, gt(col, stringBinary("Prefect")), 1, 5);
+    assertCorrectFiltering(columnIndex, gtEq(col, stringBinary("Prefect")), 1, 4, 5);
+    assertCorrectFiltering(columnIndex, lt(col, stringBinary("Dent")), 4, 6);
+    assertCorrectFiltering(columnIndex, ltEq(col, stringBinary("Dent")), 4, 5, 6);
+    assertCorrectFiltering(columnIndex, userDefined(col, BinaryUtf8StartsWithB.class), 4, 6);
+    assertCorrectFiltering(columnIndex, invert(userDefined(col, BinaryUtf8StartsWithB.class)), 0, 1, 2, 3, 4, 5, 7);
+
+    builder = ColumnIndexBuilder.getBuilder(type, Integer.MAX_VALUE);
+    sb = new StatsBuilder();
+    builder.add(sb.stats(type, stringBinary("Beeblebrox"), stringBinary("Dent"), null, null));
+    builder.add(sb.stats(type, null, null));
+    builder.add(sb.stats(type, null, null, null, null, null));
+    builder.add(sb.stats(type, stringBinary("Dent"), stringBinary("Jeltz")));
+    builder.add(sb.stats(type, stringBinary("Dent"), stringBinary("Prefect"), null));
+    builder.add(sb.stats(type, null, null));
+    builder.add(sb.stats(type, stringBinary("Slartibartfast")));
+    builder.add(sb.stats(type, null, null));
+    assertEquals(8, builder.getPageCount());
+    assertEquals(sb.getMinMaxSize(), builder.getMinMaxSize());
+    columnIndex = builder.build();
+    assertEquals(BoundaryOrder.ASCENDING, columnIndex.getBoundaryOrder());
+    assertCorrectNullCounts(columnIndex, 2, 2, 5, 0, 1, 2, 0, 2);
+    assertCorrectNullPages(columnIndex, false, true, true, false, false, true, false, true);
+    assertCorrectValues(columnIndex.getMaxValues(),
+        stringBinary("Dent"),
+        null,
+        null,
+        stringBinary("Jeltz"),
+        stringBinary("Prefect"),
+        null,
+        stringBinary("Slartibartfast"),
+        null);
+    assertCorrectValues(columnIndex.getMinValues(),
+        stringBinary("Beeblebrox"),
+        null,
+        null,
+        stringBinary("Dent"),
+        stringBinary("Dent"),
+        null,
+        stringBinary("Slartibartfast"),
+        null);
+    assertCorrectFiltering(columnIndex, eq(col, stringBinary("Jeltz")), 3, 4);
+    assertCorrectFiltering(columnIndex, eq(col, null), 0, 1, 2, 4, 5, 7);
+    assertCorrectFiltering(columnIndex, notEq(col, stringBinary("Slartibartfast")), 0, 1, 2, 3, 4, 5, 7);
+    assertCorrectFiltering(columnIndex, notEq(col, null), 0, 3, 4, 6);
+    assertCorrectFiltering(columnIndex, gt(col, stringBinary("Marvin")), 4, 6);
+    assertCorrectFiltering(columnIndex, gtEq(col, stringBinary("Marvin")), 4, 6);
+    assertCorrectFiltering(columnIndex, lt(col, stringBinary("Dent")), 0);
+    assertCorrectFiltering(columnIndex, ltEq(col, stringBinary("Dent")), 0, 3, 4);
+    assertCorrectFiltering(columnIndex, userDefined(col, BinaryUtf8StartsWithB.class), 0);
+    assertCorrectFiltering(columnIndex, invert(userDefined(col, BinaryUtf8StartsWithB.class)), 0, 1, 2, 3, 4, 5, 6, 7);
+
+    builder = ColumnIndexBuilder.getBuilder(type, Integer.MAX_VALUE);
+    sb = new StatsBuilder();
+    builder.add(sb.stats(type, null, null));
+    builder.add(sb.stats(type, stringBinary("Slartibartfast")));
+    builder.add(sb.stats(type, null, null, null, null, null));
+    builder.add(sb.stats(type, stringBinary("Prefect"), stringBinary("Jeltz"), null));
+    builder.add(sb.stats(type, stringBinary("Dent"), stringBinary("Dent")));
+    builder.add(sb.stats(type, null, null));
+    builder.add(sb.stats(type, null, null));
+    builder.add(sb.stats(type, stringBinary("Dent"), stringBinary("Beeblebrox"), null, null));
+    assertEquals(8, builder.getPageCount());
+    assertEquals(sb.getMinMaxSize(), builder.getMinMaxSize());
+    columnIndex = builder.build();
+    assertEquals(BoundaryOrder.DESCENDING, columnIndex.getBoundaryOrder());
+    assertCorrectNullCounts(columnIndex, 2, 0, 5, 1, 0, 2, 2, 2);
+    assertCorrectNullPages(columnIndex, true, false, true, false, false, true, true, false);
+    assertCorrectValues(columnIndex.getMaxValues(),
+        null,
+        stringBinary("Slartibartfast"),
+        null,
+        stringBinary("Prefect"),
+        stringBinary("Dent"),
+        null,
+        null,
+        stringBinary("Dent"));
+    assertCorrectValues(columnIndex.getMinValues(),
+        null,
+        stringBinary("Slartibartfast"),
+        null,
+        stringBinary("Jeltz"),
+        stringBinary("Dent"),
+        null,
+        null,
+        stringBinary("Beeblebrox"));
+    assertCorrectFiltering(columnIndex, eq(col, stringBinary("Marvin")), 3);
+    assertCorrectFiltering(columnIndex, eq(col, null), 0, 2, 3, 5, 6, 7);
+    assertCorrectFiltering(columnIndex, notEq(col, stringBinary("Dent")), 0, 1, 2, 3, 5, 6, 7);
+    assertCorrectFiltering(columnIndex, notEq(col, null), 1, 3, 4, 7);
+    assertCorrectFiltering(columnIndex, gt(col, stringBinary("Prefect")), 1);
+    assertCorrectFiltering(columnIndex, gtEq(col, stringBinary("Prefect")), 1, 3);
+    assertCorrectFiltering(columnIndex, lt(col, stringBinary("Marvin")), 3, 4, 7);
+    assertCorrectFiltering(columnIndex, ltEq(col, stringBinary("Marvin")), 3, 4, 7);
+    assertCorrectFiltering(columnIndex, userDefined(col, BinaryUtf8StartsWithB.class), 7);
+    assertCorrectFiltering(columnIndex, invert(userDefined(col, BinaryUtf8StartsWithB.class)), 0, 1, 2, 3, 4, 5, 6, 7);
+  }
+
+  @Test
+  public void testStaticBuildBinary() {
+    ColumnIndex columnIndex = ColumnIndexBuilder.build(
+        Types.required(BINARY).as(UTF8).named("test_binary_utf8"),
+        BoundaryOrder.ASCENDING,
+        asList(true, true, false, false, true, false, true, false),
+        asList(1l, 2l, 3l, 4l, 5l, 6l, 7l, 8l),
+        toBBList(
+            null,
+            null,
+            stringBinary("Beeblebrox"),
+            stringBinary("Dent"),
+            null,
+            stringBinary("Jeltz"),
+            null,
+            stringBinary("Slartibartfast")),
+        toBBList(
+            null,
+            null,
+            stringBinary("Dent"),
+            stringBinary("Dent"),
+            null,
+            stringBinary("Prefect"),
+            null,
+            stringBinary("Slartibartfast")));
+    assertEquals(BoundaryOrder.ASCENDING, columnIndex.getBoundaryOrder());
+    assertCorrectNullCounts(columnIndex, 1, 2, 3, 4, 5, 6, 7, 8);
+    assertCorrectNullPages(columnIndex, true, true, false, false, true, false, true, false);
+    assertCorrectValues(columnIndex.getMaxValues(),
+        null,
+        null,
+        stringBinary("Dent"),
+        stringBinary("Dent"),
+        null,
+        stringBinary("Prefect"),
+        null,
+        stringBinary("Slartibartfast"));
+    assertCorrectValues(columnIndex.getMinValues(),
+        null,
+        null,
+        stringBinary("Beeblebrox"),
+        stringBinary("Dent"),
+        null,
+        stringBinary("Jeltz"),
+        null,
+        stringBinary("Slartibartfast"));
+  }
+
+  @Test
+  public void testFilterWithoutNullCounts() {
+    ColumnIndex columnIndex = ColumnIndexBuilder.build(
+        Types.required(BINARY).as(UTF8).named("test_binary_utf8"),
+        BoundaryOrder.ASCENDING,
+        asList(true, true, false, false, true, false, true, false),
+        null,
+        toBBList(
+            null,
+            null,
+            stringBinary("Beeblebrox"),
+            stringBinary("Dent"),
+            null,
+            stringBinary("Jeltz"),
+            null,
+            stringBinary("Slartibartfast")),
+        toBBList(
+            null,
+            null,
+            stringBinary("Dent"),
+            stringBinary("Dent"),
+            null,
+            stringBinary("Prefect"),
+            null,
+            stringBinary("Slartibartfast")));
+    assertEquals(BoundaryOrder.ASCENDING, columnIndex.getBoundaryOrder());
+    assertNull(columnIndex.getNullCounts());
+    assertCorrectNullPages(columnIndex, true, true, false, false, true, false, true, false);
+    assertCorrectValues(columnIndex.getMaxValues(),
+        null,
+        null,
+        stringBinary("Dent"),
+        stringBinary("Dent"),
+        null,
+        stringBinary("Prefect"),
+        null,
+        stringBinary("Slartibartfast"));
+    assertCorrectValues(columnIndex.getMinValues(),
+        null,
+        null,
+        stringBinary("Beeblebrox"),
+        stringBinary("Dent"),
+        null,
+        stringBinary("Jeltz"),
+        null,
+        stringBinary("Slartibartfast"));
+
+    BinaryColumn col = binaryColumn("test_col");
+    assertCorrectFiltering(columnIndex, eq(col, stringBinary("Dent")), 2, 3);
+    assertCorrectFiltering(columnIndex, eq(col, null), 0, 1, 2, 3, 4, 5, 6, 7);
+    assertCorrectFiltering(columnIndex, notEq(col, stringBinary("Dent")), 0, 1, 2, 3, 4, 5, 6, 7);
+    assertCorrectFiltering(columnIndex, notEq(col, null), 2, 3, 5, 7);
+    assertCorrectFiltering(columnIndex, userDefined(col, BinaryDecimalIsNullOrZeroUdp.class), 0, 1, 2, 3, 4, 5, 6, 7);
+    assertCorrectFiltering(columnIndex, invert(userDefined(col, BinaryDecimalIsNullOrZeroUdp.class)), 2, 3, 5, 7);
+  }
+
+  @Test
+  public void testBuildBoolean() {
+    PrimitiveType type = Types.required(BOOLEAN).named("test_boolean");
+    ColumnIndexBuilder builder = ColumnIndexBuilder.getBuilder(type, Integer.MAX_VALUE);
+    assertThat(builder, instanceOf(BooleanColumnIndexBuilder.class));
+    assertNull(builder.build());
+    BooleanColumn col = booleanColumn("test_col");
+
+    builder = ColumnIndexBuilder.getBuilder(type, Integer.MAX_VALUE);
+    StatsBuilder sb = new StatsBuilder();
+    builder.add(sb.stats(type, false, true));
+    builder.add(sb.stats(type, true, false, null));
+    builder.add(sb.stats(type, true, true, null, null));
+    builder.add(sb.stats(type, null, null, null));
+    builder.add(sb.stats(type, false, false));
+    assertEquals(5, builder.getPageCount());
+    assertEquals(sb.getMinMaxSize(), builder.getMinMaxSize());
+    ColumnIndex columnIndex = builder.build();
+    assertEquals(BoundaryOrder.UNORDERED, columnIndex.getBoundaryOrder());
+    assertCorrectNullCounts(columnIndex, 0, 1, 2, 3, 0);
+    assertCorrectNullPages(columnIndex, false, false, false, true, false);
+    assertCorrectValues(columnIndex.getMaxValues(), true, true, true, null, false);
+    assertCorrectValues(columnIndex.getMinValues(), false, false, true, null, false);
+    assertCorrectFiltering(columnIndex, eq(col, true), 0, 1, 2);
+    assertCorrectFiltering(columnIndex, eq(col, null), 1, 2, 3);
+    assertCorrectFiltering(columnIndex, notEq(col, true), 0, 1, 2, 3, 4);
+    assertCorrectFiltering(columnIndex, notEq(col, null), 0, 1, 2, 4);
+    assertCorrectFiltering(columnIndex, userDefined(col, BooleanIsTrueOrNull.class), 0, 1, 2, 3);
+    assertCorrectFiltering(columnIndex, invert(userDefined(col, BooleanIsTrueOrNull.class)), 0, 1, 4);
+
+    builder = ColumnIndexBuilder.getBuilder(type, Integer.MAX_VALUE);
+    sb = new StatsBuilder();
+    builder.add(sb.stats(type, null, null));
+    builder.add(sb.stats(type, false, false));
+    builder.add(sb.stats(type, null, null, null));
+    builder.add(sb.stats(type, null, null, null, null));
+    builder.add(sb.stats(type, false, true, null));
+    builder.add(sb.stats(type, false, true, null, null));
+    builder.add(sb.stats(type, null, null, null));
+    assertEquals(7, builder.getPageCount());
+    assertEquals(sb.getMinMaxSize(), builder.getMinMaxSize());
+    columnIndex = builder.build();
+    assertEquals(BoundaryOrder.ASCENDING, columnIndex.getBoundaryOrder());
+    assertCorrectNullCounts(columnIndex, 2, 0, 3, 4, 1, 2, 3);
+    assertCorrectNullPages(columnIndex, true, false, true, true, false, false, true);
+    assertCorrectValues(columnIndex.getMaxValues(), null, false, null, null, true, true, null);
+    assertCorrectValues(columnIndex.getMinValues(), null, false, null, null, false, false, null);
+    assertCorrectFiltering(columnIndex, eq(col, true), 4, 5);
+    assertCorrectFiltering(columnIndex, eq(col, null), 0, 2, 3, 4, 5, 6);
+    assertCorrectFiltering(columnIndex, notEq(col, true), 0, 1, 2, 3, 4, 5, 6);
+    assertCorrectFiltering(columnIndex, notEq(col, null), 1, 4, 5);
+    assertCorrectFiltering(columnIndex, userDefined(col, BooleanIsTrueOrNull.class), 0, 2, 3, 4, 5, 6);
+    assertCorrectFiltering(columnIndex, invert(userDefined(col, BooleanIsTrueOrNull.class)), 1, 4, 5);
+
+    builder = ColumnIndexBuilder.getBuilder(type, Integer.MAX_VALUE);
+    sb = new StatsBuilder();
+    builder.add(sb.stats(type, null, null));
+    builder.add(sb.stats(type, true, true));
+    builder.add(sb.stats(type, null, null, null));
+    builder.add(sb.stats(type, null, null, null, null));
+    builder.add(sb.stats(type, true, false, null));
+    builder.add(sb.stats(type, false, false, null, null));
+    builder.add(sb.stats(type, null, null, null));
+    assertEquals(7, builder.getPageCount());
+    assertEquals(sb.getMinMaxSize(), builder.getMinMaxSize());
+    columnIndex = builder.build();
+    assertEquals(BoundaryOrder.DESCENDING, columnIndex.getBoundaryOrder());
+    assertCorrectNullCounts(columnIndex, 2, 0, 3, 4, 1, 2, 3);
+    assertCorrectNullPages(columnIndex, true, false, true, true, false, false, true);
+    assertCorrectValues(columnIndex.getMaxValues(), null, true, null, null, true, false, null);
+    assertCorrectValues(columnIndex.getMinValues(), null, true, null, null, false, false, null);
+    assertCorrectFiltering(columnIndex, eq(col, true), 1, 4);
+    assertCorrectFiltering(columnIndex, eq(col, null), 0, 2, 3, 4, 5, 6);
+    assertCorrectFiltering(columnIndex, notEq(col, true), 0, 2, 3, 4, 5, 6);
+    assertCorrectFiltering(columnIndex, notEq(col, null), 1, 4, 5);
+    assertCorrectFiltering(columnIndex, userDefined(col, BooleanIsTrueOrNull.class), 0, 1, 2, 3, 4, 5, 6);
+    assertCorrectFiltering(columnIndex, invert(userDefined(col, BooleanIsTrueOrNull.class)), 4, 5);
+  }
+
+  @Test
+  public void testStaticBuildBoolean() {
+    ColumnIndex columnIndex = ColumnIndexBuilder.build(
+        Types.required(BOOLEAN).named("test_boolean"),
+        BoundaryOrder.DESCENDING,
+        asList(false, true, false, true, false, true),
+        asList(9l, 8l, 7l, 6l, 5l, 0l),
+        toBBList(false, null, false, null, true, null),
+        toBBList(true, null, false, null, true, null));
+    assertEquals(BoundaryOrder.DESCENDING, columnIndex.getBoundaryOrder());
+    assertCorrectNullCounts(columnIndex, 9, 8, 7, 6, 5, 0);
+    assertCorrectNullPages(columnIndex, false, true, false, true, false, true);
+    assertCorrectValues(columnIndex.getMaxValues(), true, null, false, null, true, null);
+    assertCorrectValues(columnIndex.getMinValues(), false, null, false, null, true, null);
+  }
+
+  @Test
+  public void testBuildDouble() {
+    PrimitiveType type = Types.required(DOUBLE).named("test_double");
+    ColumnIndexBuilder builder = ColumnIndexBuilder.getBuilder(type, Integer.MAX_VALUE);
+    assertThat(builder, instanceOf(DoubleColumnIndexBuilder.class));
+    assertNull(builder.build());
+    DoubleColumn col = doubleColumn("test_col");
+
+    StatsBuilder sb = new StatsBuilder();
+    builder.add(sb.stats(type, -4.2, -4.1));
+    builder.add(sb.stats(type, -11.7, 7.0, null));
+    builder.add(sb.stats(type, 2.2, 2.2, null, null));
+    builder.add(sb.stats(type, null, null, null));
+    builder.add(sb.stats(type, 1.9, 2.32));
+    builder.add(sb.stats(type, -21.0, 8.1));
+    assertEquals(6, builder.getPageCount());
+    assertEquals(sb.getMinMaxSize(), builder.getMinMaxSize());
+    ColumnIndex columnIndex = builder.build();
+    assertEquals(BoundaryOrder.UNORDERED, columnIndex.getBoundaryOrder());
+    assertCorrectNullCounts(columnIndex, 0, 1, 2, 3, 0, 0);
+    assertCorrectNullPages(columnIndex, false, false, false, true, false, false);
+    assertCorrectValues(columnIndex.getMaxValues(), -4.1, 7.0, 2.2, null, 2.32, 8.1);
+    assertCorrectValues(columnIndex.getMinValues(), -4.2, -11.7, 2.2, null, 1.9, -21.0);
+    assertCorrectFiltering(columnIndex, eq(col, 0.0), 1, 5);
+    assertCorrectFiltering(columnIndex, eq(col, null), 1, 2, 3);
+    assertCorrectFiltering(columnIndex, notEq(col, 2.2), 0, 1, 2, 3, 4, 5);
+    assertCorrectFiltering(columnIndex, notEq(col, null), 0, 1, 2, 4, 5);
+    assertCorrectFiltering(columnIndex, gt(col, 2.2), 1, 4, 5);
+    assertCorrectFiltering(columnIndex, gtEq(col, 2.2), 1, 2, 4, 5);
+    assertCorrectFiltering(columnIndex, lt(col, -4.2), 1, 5);
+    assertCorrectFiltering(columnIndex, ltEq(col, -4.2), 0, 1, 5);
+    assertCorrectFiltering(columnIndex, userDefined(col, DoubleIsInteger.class), 1, 4, 5);
+    assertCorrectFiltering(columnIndex, invert(userDefined(col, DoubleIsInteger.class)), 0, 1, 2, 3, 4, 5);
+
+    builder = ColumnIndexBuilder.getBuilder(type, Integer.MAX_VALUE);
+    sb = new StatsBuilder();
+    builder.add(sb.stats(type, null, null));
+    builder.add(sb.stats(type, -532.3, -345.2, null, null));
+    builder.add(sb.stats(type, -234.7, -234.6, null));
+    builder.add(sb.stats(type, null, null));
+    builder.add(sb.stats(type, null, null, null));
+    builder.add(sb.stats(type, -234.6, 2.99999));
+    builder.add(sb.stats(type, null, null));
+    builder.add(sb.stats(type, 3.0, 42.83));
+    builder.add(sb.stats(type, null, null));
+    assertEquals(9, builder.getPageCount());
+    assertEquals(sb.getMinMaxSize(), builder.getMinMaxSize());
+    columnIndex = builder.build();
+    assertEquals(BoundaryOrder.ASCENDING, columnIndex.getBoundaryOrder());
+    assertCorrectNullCounts(columnIndex, 2, 2, 1, 2, 3, 0, 2, 0, 2);
+    assertCorrectNullPages(columnIndex, true, false, false, true, true, false, true, false, true);
+    assertCorrectValues(columnIndex.getMaxValues(), null, -345.2, -234.6, null, null, 2.99999, null, 42.83, null);
+    assertCorrectValues(columnIndex.getMinValues(), null, -532.3, -234.7, null, null, -234.6, null, 3.0, null);
+    assertCorrectFiltering(columnIndex, eq(col, 0.0), 5);
+    assertCorrectFiltering(columnIndex, eq(col, null), 0, 1, 2, 3, 4, 6, 8);
+    assertCorrectFiltering(columnIndex, notEq(col, 0.0), 0, 1, 2, 3, 4, 5, 6, 7, 8);
+    assertCorrectFiltering(columnIndex, notEq(col, null), 1, 2, 5, 7);
+    assertCorrectFiltering(columnIndex, gt(col, 2.99999), 7);
+    assertCorrectFiltering(columnIndex, gtEq(col, 2.99999), 5, 7);
+    assertCorrectFiltering(columnIndex, lt(col, -234.6), 1, 2);
+    assertCorrectFiltering(columnIndex, ltEq(col, -234.6), 1, 2, 5);
+    assertCorrectFiltering(columnIndex, userDefined(col, DoubleIsInteger.class), 1, 5, 7);
+    assertCorrectFiltering(columnIndex, invert(userDefined(col, DoubleIsInteger.class)), 0, 1, 2, 3, 4, 5, 6, 7, 8);
+
+    builder = ColumnIndexBuilder.getBuilder(type, Integer.MAX_VALUE);
+    sb = new StatsBuilder();
+    builder.add(sb.stats(type, null, null, null, null, null));
+    builder.add(sb.stats(type, 532.3, 345.2));
+    builder.add(sb.stats(type, null, null, null));
+    builder.add(sb.stats(type, 234.7, 234.6, null));
+    builder.add(sb.stats(type, null, null));
+    builder.add(sb.stats(type, 234.69, -2.99999));
+    builder.add(sb.stats(type, null, null));
+    builder.add(sb.stats(type, null, null));
+    builder.add(sb.stats(type, -3.0, -42.83));
+    assertEquals(9, builder.getPageCount());
+    assertEquals(sb.getMinMaxSize(), builder.getMinMaxSize());
+    columnIndex = builder.build();
+    assertEquals(BoundaryOrder.DESCENDING, columnIndex.getBoundaryOrder());
+    assertCorrectNullCounts(columnIndex, 5, 0, 3, 1, 2, 0, 2, 2, 0);
+    assertCorrectNullPages(columnIndex, true, false, true, false, true, false, true, true, false);
+    assertCorrectValues(columnIndex.getMaxValues(), null, 532.3, null, 234.7, null, 234.69, null, null, -3.0);
+    assertCorrectValues(columnIndex.getMinValues(), null, 345.2, null, 234.6, null, -2.99999, null, null, -42.83);
+    assertCorrectFiltering(columnIndex, eq(col, 234.6), 3, 5);
+    assertCorrectFiltering(columnIndex, eq(col, null), 0, 2, 3, 4, 6, 7);
+    assertCorrectFiltering(columnIndex, notEq(col, 2.2), 0, 1, 2, 3, 4, 5, 6, 7, 8);
+    assertCorrectFiltering(columnIndex, notEq(col, null), 1, 3, 5, 8);
+    assertCorrectFiltering(columnIndex, gt(col, 2.2), 1, 3, 5);
+    assertCorrectFiltering(columnIndex, gtEq(col, 234.69), 1, 3, 5);
+    assertCorrectFiltering(columnIndex, lt(col, -2.99999), 8);
+    assertCorrectFiltering(columnIndex, ltEq(col, -2.99999), 5, 8);
+    assertCorrectFiltering(columnIndex, userDefined(col, DoubleIsInteger.class), 1, 5, 8);
+    assertCorrectFiltering(columnIndex, invert(userDefined(col, DoubleIsInteger.class)), 0, 1, 2, 3, 4, 5, 6, 7, 8);
+  }
+
+  @Test
+  public void testBuildDoubleZeroNaN() {
+    PrimitiveType type = Types.required(DOUBLE).named("test_double");
+    ColumnIndexBuilder builder = ColumnIndexBuilder.getBuilder(type, Integer.MAX_VALUE);
+    StatsBuilder sb = new StatsBuilder();
+    builder.add(sb.stats(type, -1.0, -0.0));
+    builder.add(sb.stats(type, 0.0, 1.0));
+    builder.add(sb.stats(type, 1.0, 100.0));
+    ColumnIndex columnIndex = builder.build();
+    assertCorrectValues(columnIndex.getMinValues(), -1.0, -0.0, 1.0);
+    assertCorrectValues(columnIndex.getMaxValues(), 0.0, 1.0, 100.0);
+
+    builder = ColumnIndexBuilder.getBuilder(type, Integer.MAX_VALUE);
+    builder.add(sb.stats(type, -1.0, -0.0));
+    builder.add(sb.stats(type, 0.0, Double.NaN));
+    builder.add(sb.stats(type, 1.0, 100.0));
+    assertNull(builder.build());
+  }
+
+  @Test
+  public void testStaticBuildDouble() {
+    ColumnIndex columnIndex = ColumnIndexBuilder.build(
+        Types.required(DOUBLE).named("test_double"),
+        BoundaryOrder.UNORDERED,
+        asList(false, false, false, false, false, false),
+        asList(0l, 1l, 2l, 3l, 4l, 5l),
+        toBBList(-1.0, -2.0, -3.0, -4.0, -5.0, -6.0),
+        toBBList(1.0, 2.0, 3.0, 4.0, 5.0, 6.0));
+    assertEquals(BoundaryOrder.UNORDERED, columnIndex.getBoundaryOrder());
+    assertCorrectNullCounts(columnIndex, 0, 1, 2, 3, 4, 5);
+    assertCorrectNullPages(columnIndex, false, false, false, false, false, false);
+    assertCorrectValues(columnIndex.getMaxValues(), 1.0, 2.0, 3.0, 4.0, 5.0, 6.0);
+    assertCorrectValues(columnIndex.getMinValues(), -1.0, -2.0, -3.0, -4.0, -5.0, -6.0);
+  }
+
+  @Test
+  public void testBuildFloat() {
+    PrimitiveType type = Types.required(FLOAT).named("test_float");
+    ColumnIndexBuilder builder = ColumnIndexBuilder.getBuilder(type, Integer.MAX_VALUE);
+    assertThat(builder, instanceOf(FloatColumnIndexBuilder.class));
+    assertNull(builder.build());
+    FloatColumn col = floatColumn("test_col");
+
+    StatsBuilder sb = new StatsBuilder();
+    builder.add(sb.stats(type, -4.2f, -4.1f));
+    builder.add(sb.stats(type, -11.7f, 7.0f, null));
+    builder.add(sb.stats(type, 2.2f, 2.2f, null, null));
+    builder.add(sb.stats(type, null, null, null));
+    builder.add(sb.stats(type, 1.9f, 2.32f));
+    builder.add(sb.stats(type, -21.0f, 8.1f));
+    assertEquals(6, builder.getPageCount());
+    assertEquals(sb.getMinMaxSize(), builder.getMinMaxSize());
+    ColumnIndex columnIndex = builder.build();
+    assertEquals(BoundaryOrder.UNORDERED, columnIndex.getBoundaryOrder());
+    assertCorrectNullCounts(columnIndex, 0, 1, 2, 3, 0, 0);
+    assertCorrectNullPages(columnIndex, false, false, false, true, false, false);
+    assertCorrectValues(columnIndex.getMaxValues(), -4.1f, 7.0f, 2.2f, null, 2.32f, 8.1f);
+    assertCorrectValues(columnIndex.getMinValues(), -4.2f, -11.7f, 2.2f, null, 1.9f, -21.0f);
+    assertCorrectFiltering(columnIndex, eq(col, 0.0f), 1, 5);
+    assertCorrectFiltering(columnIndex, eq(col, null), 1, 2, 3);
+    assertCorrectFiltering(columnIndex, notEq(col, 2.2f), 0, 1, 2, 3, 4, 5);
+    assertCorrectFiltering(columnIndex, notEq(col, null), 0, 1, 2, 4, 5);
+    assertCorrectFiltering(columnIndex, gt(col, 2.2f), 1, 4, 5);
+    assertCorrectFiltering(columnIndex, gtEq(col, 2.2f), 1, 2, 4, 5);
+    assertCorrectFiltering(columnIndex, lt(col, 0.0f), 0, 1, 5);
+    assertCorrectFiltering(columnIndex, ltEq(col, 1.9f), 0, 1, 4, 5);
+    assertCorrectFiltering(columnIndex, userDefined(col, FloatIsInteger.class), 1, 4, 5);
+    assertCorrectFiltering(columnIndex, invert(userDefined(col, FloatIsInteger.class)), 0, 1, 2, 3, 4, 5);
+
+    builder = ColumnIndexBuilder.getBuilder(type, Integer.MAX_VALUE);
+    sb = new StatsBuilder();
+    builder.add(sb.stats(type, null, null));
+    builder.add(sb.stats(type, -532.3f, -345.2f, null, null));
+    builder.add(sb.stats(type, -300.6f, -234.7f, null));
+    builder.add(sb.stats(type, null, null));
+    builder.add(sb.stats(type, null, null, null));
+    builder.add(sb.stats(type, -234.6f, 2.99999f));
+    builder.add(sb.stats(type, null, null));
+    builder.add(sb.stats(type, 3.0f, 42.83f));
+    builder.add(sb.stats(type, null, null));
+    assertEquals(9, builder.getPageCount());
+    assertEquals(sb.getMinMaxSize(), builder.getMinMaxSize());
+    columnIndex = builder.build();
+    assertEquals(BoundaryOrder.ASCENDING, columnIndex.getBoundaryOrder());
+    assertCorrectNullCounts(columnIndex, 2, 2, 1, 2, 3, 0, 2, 0, 2);
+    assertCorrectNullPages(columnIndex, true, false, false, true, true, false, true, false, true);
+    assertCorrectValues(columnIndex.getMaxValues(), null, -345.2f, -234.7f, null, null, 2.99999f, null, 42.83f, null);
+    assertCorrectValues(columnIndex.getMinValues(), null, -532.3f, -300.6f, null, null, -234.6f, null, 3.0f, null);
+    assertCorrectFiltering(columnIndex, eq(col, 0.0f), 5);
+    assertCorrectFiltering(columnIndex, eq(col, null), 0, 1, 2, 3, 4, 6, 8);
+    assertCorrectFiltering(columnIndex, notEq(col, 2.2f), 0, 1, 2, 3, 4, 5, 6, 7, 8);
+    assertCorrectFiltering(columnIndex, notEq(col, null), 1, 2, 5, 7);
+    assertCorrectFiltering(columnIndex, gt(col, 2.2f), 5, 7);
+    assertCorrectFiltering(columnIndex, gtEq(col, -234.7f), 2, 5, 7);
+    assertCorrectFiltering(columnIndex, lt(col, -234.6f), 1, 2);
+    assertCorrectFiltering(columnIndex, ltEq(col, -234.6f), 1, 2, 5);
+    assertCorrectFiltering(columnIndex, userDefined(col, FloatIsInteger.class), 1, 2, 5, 7);
+    assertCorrectFiltering(columnIndex, invert(userDefined(col, FloatIsInteger.class)), 0, 1, 2, 3, 4, 5, 6, 7, 8);
+
+    builder = ColumnIndexBuilder.getBuilder(type, Integer.MAX_VALUE);
+    sb = new StatsBuilder();
+    builder.add(sb.stats(type, null, null, null, null, null));
+    builder.add(sb.stats(type, 532.3f, 345.2f));
+    builder.add(sb.stats(type, null, null, null));
+    builder.add(sb.stats(type, 234.7f, 234.6f, null));
+    builder.add(sb.stats(type, null, null));
+    builder.add(sb.stats(type, 234.6f, -2.99999f));
+    builder.add(sb.stats(type, null, null));
+    builder.add(sb.stats(type, null, null));
+    builder.add(sb.stats(type, -3.0f, -42.83f));
+    assertEquals(9, builder.getPageCount());
+    assertEquals(sb.getMinMaxSize(), builder.getMinMaxSize());
+    columnIndex = builder.build();
+    assertEquals(BoundaryOrder.DESCENDING, columnIndex.getBoundaryOrder());
+    assertCorrectNullCounts(columnIndex, 5, 0, 3, 1, 2, 0, 2, 2, 0);
+    assertCorrectNullPages(columnIndex, true, false, true, false, true, false, true, true, false);
+    assertCorrectValues(columnIndex.getMaxValues(), null, 532.3f, null, 234.7f, null, 234.6f, null, null, -3.0f);
+    assertCorrectValues(columnIndex.getMinValues(), null, 345.2f, null, 234.6f, null, -2.99999f, null, null, -42.83f);
+    assertCorrectFiltering(columnIndex, eq(col, 234.65f), 3);
+    assertCorrectFiltering(columnIndex, eq(col, null), 0, 2, 3, 4, 6, 7);
+    assertCorrectFiltering(columnIndex, notEq(col, 2.2f), 0, 1, 2, 3, 4, 5, 6, 7, 8);
+    assertCorrectFiltering(columnIndex, notEq(col, null), 1, 3, 5, 8);
+    assertCorrectFiltering(columnIndex, gt(col, 2.2f), 1, 3, 5);
+    assertCorrectFiltering(columnIndex, gtEq(col, 2.2f), 1, 3, 5);
+    assertCorrectFiltering(columnIndex, lt(col, 0.0f), 5, 8);
+    assertCorrectFiltering(columnIndex, ltEq(col, 0.0f), 5, 8);
+    assertCorrectFiltering(columnIndex, userDefined(col, FloatIsInteger.class), 1, 5, 8);
+    assertCorrectFiltering(columnIndex, invert(userDefined(col, FloatIsInteger.class)), 0, 1, 2, 3, 4, 5, 6, 7, 8);
+  }
+
+  @Test
+  public void testBuildFloatZeroNaN() {
+    PrimitiveType type = Types.required(FLOAT).named("test_float");
+    ColumnIndexBuilder builder = ColumnIndexBuilder.getBuilder(type, Integer.MAX_VALUE);
+    StatsBuilder sb = new StatsBuilder();
+    builder.add(sb.stats(type, -1.0f, -0.0f));
+    builder.add(sb.stats(type, 0.0f, 1.0f));
+    builder.add(sb.stats(type, 1.0f, 100.0f));
+    ColumnIndex columnIndex = builder.build();
+    assertCorrectValues(columnIndex.getMinValues(), -1.0f, -0.0f, 1.0f);
+    assertCorrectValues(columnIndex.getMaxValues(), 0.0f, 1.0f, 100.0f);
+
+    builder = ColumnIndexBuilder.getBuilder(type, Integer.MAX_VALUE);
+    builder.add(sb.stats(type, -1.0f, -0.0f));
+    builder.add(sb.stats(type, 0.0f, Float.NaN));
+    builder.add(sb.stats(type, 1.0f, 100.0f));
+    assertNull(builder.build());
+  }
+
+  @Test
+  public void testStaticBuildFloat() {
+    ColumnIndex columnIndex = ColumnIndexBuilder.build(
+        Types.required(FLOAT).named("test_float"),
+        BoundaryOrder.ASCENDING,
+        asList(true, true, true, false, false, false),
+        asList(9l, 8l, 7l, 6l, 0l, 0l),
+        toBBList(null, null, null, -3.0f, -2.0f, 0.1f),
+        toBBList(null, null, null, -2.0f, 0.0f, 6.0f));
+    assertEquals(BoundaryOrder.ASCENDING, columnIndex.getBoundaryOrder());
+    assertCorrectNullCounts(columnIndex, 9, 8, 7, 6, 0, 0);
+    assertCorrectNullPages(columnIndex, true, true, true, false, false, false);
+    assertCorrectValues(columnIndex.getMaxValues(), null, null, null, -2.0f, 0.0f, 6.0f);
+    assertCorrectValues(columnIndex.getMinValues(), null, null, null, -3.0f, -2.0f, 0.1f);
+  }
+
+  @Test
+  public void testBuildInt32() {
+    PrimitiveType type = Types.required(INT32).named("test_int32");
+    ColumnIndexBuilder builder = ColumnIndexBuilder.getBuilder(type, Integer.MAX_VALUE);
+    assertThat(builder, instanceOf(IntColumnIndexBuilder.class));
+    assertNull(builder.build());
+    IntColumn col = intColumn("test_col");
+
+    StatsBuilder sb = new StatsBuilder();
+    builder.add(sb.stats(type, -4, 10));
+    builder.add(sb.stats(type, -11, 7, null));
+    builder.add(sb.stats(type, 2, 2, null, null));
+    builder.add(sb.stats(type, null, null, null));
+    builder.add(sb.stats(type, 1, 2));
+    builder.add(sb.stats(type, -21, 8));
+    assertEquals(6, builder.getPageCount());
+    assertEquals(sb.getMinMaxSize(), builder.getMinMaxSize());
+    ColumnIndex columnIndex = builder.build();
+    assertEquals(BoundaryOrder.UNORDERED, columnIndex.getBoundaryOrder());
+    assertCorrectNullCounts(columnIndex, 0, 1, 2, 3, 0, 0);
+    assertCorrectNullPages(columnIndex, false, false, false, true, false, false);
+    assertCorrectValues(columnIndex.getMaxValues(), 10, 7, 2, null, 2, 8);
+    assertCorrectValues(columnIndex.getMinValues(), -4, -11, 2, null, 1, -21);
+    assertCorrectFiltering(columnIndex, eq(col, 2), 0, 1, 2, 4, 5);
+    assertCorrectFiltering(columnIndex, eq(col, null), 1, 2, 3);
+    assertCorrectFiltering(columnIndex, notEq(col, 2), 0, 1, 2, 3, 4, 5);
+    assertCorrectFiltering(columnIndex, notEq(col, null), 0, 1, 2, 4, 5);
+    assertCorrectFiltering(columnIndex, gt(col, 2), 0, 1, 5);
+    assertCorrectFiltering(columnIndex, gtEq(col, 2), 0, 1, 2, 4, 5);
+    assertCorrectFiltering(columnIndex, lt(col, 2), 0, 1, 4, 5);
+    assertCorrectFiltering(columnIndex, ltEq(col, 2), 0, 1, 2, 4, 5);
+    assertCorrectFiltering(columnIndex, userDefined(col, IntegerIsDivisableWith3.class), 0, 1, 5);
+    assertCorrectFiltering(columnIndex, invert(userDefined(col, IntegerIsDivisableWith3.class)), 0, 1, 2, 3, 4, 5);
+
+    builder = ColumnIndexBuilder.getBuilder(type, Integer.MAX_VALUE);
+    sb = new StatsBuilder();
+    builder.add(sb.stats(type, null, null));
+    builder.add(sb.stats(type, -532, -345, null, null));
+    builder.add(sb.stats(type, -500, -42, null));
+    builder.add(sb.stats(type, null, null));
+    builder.add(sb.stats(type, null, null, null));
+    builder.add(sb.stats(type, -42, 2));
+    builder.add(sb.stats(type, null, null));
+    builder.add(sb.stats(type, 3, 42));
+    builder.add(sb.stats(type, null, null));
+    assertEquals(9, builder.getPageCount());
+    assertEquals(sb.getMinMaxSize(), builder.getMinMaxSize());
+    columnIndex = builder.build();
+    assertEquals(BoundaryOrder.ASCENDING, columnIndex.getBoundaryOrder());
+    assertCorrectNullCounts(columnIndex, 2, 2, 1, 2, 3, 0, 2, 0, 2);
+    assertCorrectNullPages(columnIndex, true, false, false, true, true, false, true, false, true);
+    assertCorrectValues(columnIndex.getMaxValues(), null, -345, -42, null, null, 2, null, 42, null);
+    assertCorrectValues(columnIndex.getMinValues(), null, -532, -500, null, null, -42, null, 3, null);
+    assertCorrectFiltering(columnIndex, eq(col, 2), 5);
+    assertCorrectFiltering(columnIndex, eq(col, null), 0, 1, 2, 3, 4, 6, 8);
+    assertCorrectFiltering(columnIndex, notEq(col, 2), 0, 1, 2, 3, 4, 5, 6, 7, 8);
+    assertCorrectFiltering(columnIndex, notEq(col, null), 1, 2, 5, 7);
+    assertCorrectFiltering(columnIndex, gt(col, 2), 7);
+    assertCorrectFiltering(columnIndex, gtEq(col, 2), 5, 7);
+    assertCorrectFiltering(columnIndex, lt(col, 2), 1, 2, 5);
+    assertCorrectFiltering(columnIndex, ltEq(col, 2), 1, 2, 5);
+    assertCorrectFiltering(columnIndex, userDefined(col, IntegerIsDivisableWith3.class), 1, 2, 5, 7);
+    assertCorrectFiltering(columnIndex, invert(userDefined(col, IntegerIsDivisableWith3.class)), 0, 1, 2, 3, 4, 5, 6, 7,
+        8);
+
+    builder = ColumnIndexBuilder.getBuilder(type, Integer.MAX_VALUE);
+    sb = new StatsBuilder();
+    builder.add(sb.stats(type, null, null, null, null, null));
+    builder.add(sb.stats(type, 532, 345));
+    builder.add(sb.stats(type, null, null, null));
+    builder.add(sb.stats(type, 234, 42, null));
+    builder.add(sb.stats(type, null, null));
+    builder.add(sb.stats(type, 42, -2));
+    builder.add(sb.stats(type, null, null));
+    builder.add(sb.stats(type, null, null));
+    builder.add(sb.stats(type, -3, -42));
+    assertEquals(9, builder.getPageCount());
+    assertEquals(sb.getMinMaxSize(), builder.getMinMaxSize());
+    columnIndex = builder.build();
+    assertEquals(BoundaryOrder.DESCENDING, columnIndex.getBoundaryOrder());
+    assertCorrectNullCounts(columnIndex, 5, 0, 3, 1, 2, 0, 2, 2, 0);
+    assertCorrectNullPages(columnIndex, true, false, true, false, true, false, true, true, false);
+    assertCorrectValues(columnIndex.getMaxValues(), null, 532, null, 234, null, 42, null, null, -3);
+    assertCorrectValues(columnIndex.getMinValues(), null, 345, null, 42, null, -2, null, null, -42);
+    assertCorrectFiltering(columnIndex, eq(col, 2), 5);
+    assertCorrectFiltering(columnIndex, eq(col, null), 0, 2, 3, 4, 6, 7);
+    assertCorrectFiltering(columnIndex, notEq(col, 2), 0, 1, 2, 3, 4, 5, 6, 7, 8);
+    assertCorrectFiltering(columnIndex, notEq(col, null), 1, 3, 5, 8);
+    assertCorrectFiltering(columnIndex, gt(col, 2), 1, 3, 5);
+    assertCorrectFiltering(columnIndex, gtEq(col, 2), 1, 3, 5);
+    assertCorrectFiltering(columnIndex, lt(col, 2), 5, 8);
+    assertCorrectFiltering(columnIndex, ltEq(col, 2), 5, 8);
+    assertCorrectFiltering(columnIndex, userDefined(col, IntegerIsDivisableWith3.class), 1, 3, 5, 8);
+    assertCorrectFiltering(columnIndex, invert(userDefined(col, IntegerIsDivisableWith3.class)), 0, 1, 2, 3, 4, 5, 6, 7,
+        8);
+  }
+
+  @Test
+  public void testStaticBuildInt32() {
+    ColumnIndex columnIndex = ColumnIndexBuilder.build(
+        Types.required(INT32).named("test_int32"),
+        BoundaryOrder.DESCENDING,
+        asList(false, false, false, true, true, true),
+        asList(0l, 10l, 0l, 3l, 5l, 7l),
+        toBBList(10, 8, 6, null, null, null),
+        toBBList(9, 7, 5, null, null, null));
+    assertEquals(BoundaryOrder.DESCENDING, columnIndex.getBoundaryOrder());
+    assertCorrectNullCounts(columnIndex, 0, 10, 0, 3, 5, 7);
+    assertCorrectNullPages(columnIndex, false, false, false, true, true, true);
+    assertCorrectValues(columnIndex.getMaxValues(), 9, 7, 5, null, null, null);
+    assertCorrectValues(columnIndex.getMinValues(), 10, 8, 6, null, null, null);
+  }
+
+  @Test
+  public void testBuildUInt8() {
+    PrimitiveType type = Types.required(INT32).as(UINT_8).named("test_uint8");
+    ColumnIndexBuilder builder = ColumnIndexBuilder.getBuilder(type, Integer.MAX_VALUE);
+    assertThat(builder, instanceOf(IntColumnIndexBuilder.class));
+    assertNull(builder.build());
+    IntColumn col = intColumn("test_col");
+
+    StatsBuilder sb = new StatsBuilder();
+    builder.add(sb.stats(type, 4, 10));
+    builder.add(sb.stats(type, 11, 17, null));
+    builder.add(sb.stats(type, 2, 2, null, null));
+    builder.add(sb.stats(type, null, null, null));
+    builder.add(sb.stats(type, 1, 0xFF));
+    builder.add(sb.stats(type, 0xEF, 0xFA));
+    assertEquals(6, builder.getPageCount());
+    assertEquals(sb.getMinMaxSize(), builder.getMinMaxSize());
+    ColumnIndex columnIndex = builder.build();
+    assertEquals(BoundaryOrder.UNORDERED, columnIndex.getBoundaryOrder());
+    assertCorrectNullCounts(columnIndex, 0, 1, 2, 3, 0, 0);
+    assertCorrectNullPages(columnIndex, false, false, false, true, false, false);
+    assertCorrectValues(columnIndex.getMaxValues(), 10, 17, 2, null, 0xFF, 0xFA);
+    assertCorrectValues(columnIndex.getMinValues(), 4, 11, 2, null, 1, 0xEF);
+    assertCorrectFiltering(columnIndex, eq(col, 2), 2, 4);
+    assertCorrectFiltering(columnIndex, eq(col, null), 1, 2, 3);
+    assertCorrectFiltering(columnIndex, notEq(col, 2), 0, 1, 2, 3, 4, 5);
+    assertCorrectFiltering(columnIndex, notEq(col, null), 0, 1, 2, 4, 5);
+    assertCorrectFiltering(columnIndex, gt(col, 2), 0, 1, 4, 5);
+    assertCorrectFiltering(columnIndex, gtEq(col, 2), 0, 1, 2, 4, 5);
+    assertCorrectFiltering(columnIndex, lt(col, 0xEF), 0, 1, 2, 4);
+    assertCorrectFiltering(columnIndex, ltEq(col, 0xEF), 0, 1, 2, 4, 5);
+    assertCorrectFiltering(columnIndex, userDefined(col, IntegerIsDivisableWith3.class), 0, 1, 4, 5);
+    assertCorrectFiltering(columnIndex, invert(userDefined(col, IntegerIsDivisableWith3.class)), 0, 1, 2, 3, 4, 5);
+
+    builder = ColumnIndexBuilder.getBuilder(type, Integer.MAX_VALUE);
+    sb = new StatsBuilder();
+    builder.add(sb.stats(type, null, null));
+    builder.add(sb.stats(type, 0, 0, null, null));
+    builder.add(sb.stats(type, 0, 42, null));
+    builder.add(sb.stats(type, null, null));
+    builder.add(sb.stats(type, null, null, null));
+    builder.add(sb.stats(type, 42, 0xEE));
+    builder.add(sb.stats(type, null, null));
+    builder.add(sb.stats(type, 0xEF, 0xFF));
+    builder.add(sb.stats(type, null, null));
+    assertEquals(9, builder.getPageCount());
+    assertEquals(sb.getMinMaxSize(), builder.getMinMaxSize());
+    columnIndex = builder.build();
+    assertEquals(BoundaryOrder.ASCENDING, columnIndex.getBoundaryOrder());
+    assertCorrectNullCounts(columnIndex, 2, 2, 1, 2, 3, 0, 2, 0, 2);
+    assertCorrectNullPages(columnIndex, true, false, false, true, true, false, true, false, true);
+    assertCorrectValues(columnIndex.getMaxValues(), null, 0, 42, null, null, 0xEE, null, 0xFF, null);
+    assertCorrectValues(columnIndex.getMinValues(), null, 0, 0, null, null, 42, null, 0xEF, null);
+    assertCorrectFiltering(columnIndex, eq(col, 2), 2);
+    assertCorrectFiltering(columnIndex, eq(col, null), 0, 1, 2, 3, 4, 6, 8);
+    assertCorrectFiltering(columnIndex, notEq(col, 2), 0, 1, 2, 3, 4, 5, 6, 7, 8);
+    assertCorrectFiltering(columnIndex, notEq(col, null), 1, 2, 5, 7);
+    assertCorrectFiltering(columnIndex, gt(col, 0xEE), 7);
+    assertCorrectFiltering(columnIndex, gtEq(col, 0xEE), 5, 7);
+    assertCorrectFiltering(columnIndex, lt(col, 42), 1, 2);
+    assertCorrectFiltering(columnIndex, ltEq(col, 42), 1, 2, 5);
+    assertCorrectFiltering(columnIndex, userDefined(col, IntegerIsDivisableWith3.class), 1, 2, 5, 7);
+    assertCorrectFiltering(columnIndex, invert(userDefined(col, IntegerIsDivisableWith3.class)), 0, 1, 2, 3, 4, 5, 6, 7,
+        8);
+
+    builder = ColumnIndexBuilder.getBuilder(type, Integer.MAX_VALUE);
+    sb = new StatsBuilder();
+    builder.add(sb.stats(type, null, null, null, null, null));
+    builder.add(sb.stats(type, 0xFF, 0xFF));
+    builder.add(sb.stats(type, null, null, null));
+    builder.add(sb.stats(type, 0xEF, 0xEA, null));
+    builder.add(sb.stats(type, null, null));
+    builder.add(sb.stats(type, 0xEE, 42));
+    builder.add(sb.stats(type, null, null));
+    builder.add(sb.stats(type, null, null));
+    builder.add(sb.stats(type, 41, 0));
+    assertEquals(9, builder.getPageCount());
+    assertEquals(sb.getMinMaxSize(), builder.getMinMaxSize());
+    columnIndex = builder.build();
+    assertEquals(BoundaryOrder.DESCENDING, columnIndex.getBoundaryOrder());
+    assertCorrectNullCounts(columnIndex, 5, 0, 3, 1, 2, 0, 2, 2, 0);
+    assertCorrectNullPages(columnIndex, true, false, true, false, true, false, true, true, false);
+    assertCorrectValues(columnIndex.getMaxValues(), null, 0xFF, null, 0xEF, null, 0xEE, null, null, 41);
+    assertCorrectValues(columnIndex.getMinValues(), null, 0xFF, null, 0xEA, null, 42, null, null, 0);
+    assertCorrectFiltering(columnIndex, eq(col, 0xAB), 5);
+    assertCorrectFiltering(columnIndex, eq(col, null), 0, 2, 3, 4, 6, 7);
+    assertCorrectFiltering(columnIndex, notEq(col, 0xFF), 0, 2, 3, 4, 5, 6, 7, 8);
+    assertCorrectFiltering(columnIndex, notEq(col, null), 1, 3, 5, 8);
+    assertCorrectFiltering(columnIndex, gt(col, 0xFF));
+    assertCorrectFiltering(columnIndex, gtEq(col, 0xFF), 1);
+    assertCorrectFiltering(columnIndex, lt(col, 42), 8);
+    assertCorrectFiltering(columnIndex, ltEq(col, 42), 5, 8);
+    assertCorrectFiltering(columnIndex, userDefined(col, IntegerIsDivisableWith3.class), 1, 3, 5, 8);
+    assertCorrectFiltering(columnIndex, invert(userDefined(col, IntegerIsDivisableWith3.class)), 0, 2, 3, 4, 5, 6, 7,
+        8);
+  }
+
+  @Test
+  public void testBuildInt64() {
+    PrimitiveType type = Types.required(INT64).named("test_int64");
+    ColumnIndexBuilder builder = ColumnIndexBuilder.getBuilder(type, Integer.MAX_VALUE);
+    assertThat(builder, instanceOf(LongColumnIndexBuilder.class));
+    assertNull(builder.build());
+    LongColumn col = longColumn("test_col");
+
+    StatsBuilder sb = new StatsBuilder();
+    builder.add(sb.stats(type, -4l, 10l));
+    builder.add(sb.stats(type, -11l, 7l, null));
+    builder.add(sb.stats(type, 2l, 2l, null, null));
+    builder.add(sb.stats(type, null, null, null));
+    builder.add(sb.stats(type, 1l, 2l));
+    builder.add(sb.stats(type, -21l, 8l));
+    assertEquals(6, builder.getPageCount());
+    assertEquals(sb.getMinMaxSize(), builder.getMinMaxSize());
+    ColumnIndex columnIndex = builder.build();
+    assertEquals(BoundaryOrder.UNORDERED, columnIndex.getBoundaryOrder());
+    assertCorrectNullCounts(columnIndex, 0l, 1l, 2l, 3l, 0l, 0l);
+    assertCorrectNullPages(columnIndex, false, false, false, true, false, false);
+    assertCorrectValues(columnIndex.getMaxValues(), 10l, 7l, 2l, null, 2l, 8l);
+    assertCorrectValues(columnIndex.getMinValues(), -4l, -11l, 2l, null, 1l, -21l);
+    assertCorrectFiltering(columnIndex, eq(col, 0l), 0, 1, 5);
+    assertCorrectFiltering(columnIndex, eq(col, null), 1, 2, 3);
+    assertCorrectFiltering(columnIndex, notEq(col, 0l), 0, 1, 2, 3, 4, 5);
+    assertCorrectFiltering(columnIndex, notEq(col, null), 0, 1, 2, 4, 5);
+    assertCorrectFiltering(columnIndex, gt(col, 2l), 0, 1, 5);
+    assertCorrectFiltering(columnIndex, gtEq(col, 2l), 0, 1, 2, 4, 5);
+    assertCorrectFiltering(columnIndex, lt(col, -21l));
+    assertCorrectFiltering(columnIndex, ltEq(col, -21l), 5);
+    assertCorrectFiltering(columnIndex, userDefined(col, LongIsDivisableWith3.class), 0, 1, 5);
+    assertCorrectFiltering(columnIndex, invert(userDefined(col, LongIsDivisableWith3.class)), 0, 1, 2, 3, 4, 5);
+
+    builder = ColumnIndexBuilder.getBuilder(type, Integer.MAX_VALUE);
+    sb = new StatsBuilder();
+    builder.add(sb.stats(type, null, null));
+    builder.add(sb.stats(type, -532l, -345l, null, null));
+    builder.add(sb.stats(type, -234l, -42l, null));
+    builder.add(sb.stats(type, null, null));
+    builder.add(sb.stats(type, null, null, null));
+    builder.add(sb.stats(type, -42l, 2l));
+    builder.add(sb.stats(type, null, null));
+    builder.add(sb.stats(type, -3l, 42l));
+    builder.add(sb.stats(type, null, null));
+    assertEquals(9, builder.getPageCount());
+    assertEquals(sb.getMinMaxSize(), builder.getMinMaxSize());
+    columnIndex = builder.build();
+    assertEquals(BoundaryOrder.ASCENDING, columnIndex.getBoundaryOrder());
+    assertCorrectNullCounts(columnIndex, 2, 2, 1, 2, 3, 0, 2, 0, 2);
+    assertCorrectNullPages(columnIndex, true, false, false, true, true, false, true, false, true);
+    assertCorrectValues(columnIndex.getMaxValues(), null, -345l, -42l, null, null, 2l, null, 42l, null);
+    assertCorrectValues(columnIndex.getMinValues(), null, -532l, -234l, null, null, -42l, null, -3l, null);
+    assertCorrectFiltering(columnIndex, eq(col, -42l), 2, 5);
+    assertCorrectFiltering(columnIndex, eq(col, null), 0, 1, 2, 3, 4, 6, 8);
+    assertCorrectFiltering(columnIndex, notEq(col, -42l), 0, 1, 2, 3, 4, 5, 6, 7, 8);
+    assertCorrectFiltering(columnIndex, notEq(col, null), 1, 2, 5, 7);
+    assertCorrectFiltering(columnIndex, gt(col, 2l), 7);
+    assertCorrectFiltering(columnIndex, gtEq(col, 2l), 5, 7);
+    assertCorrectFiltering(columnIndex, lt(col, -42l), 1, 2);
+    assertCorrectFiltering(columnIndex, ltEq(col, -42l), 1, 2, 5);
+    assertCorrectFiltering(columnIndex, userDefined(col, LongIsDivisableWith3.class), 1, 2, 5, 7);
+    assertCorrectFiltering(columnIndex, invert(userDefined(col, LongIsDivisableWith3.class)), 0, 1, 2, 3, 4, 5, 6, 7,
+        8);
+
+    builder = ColumnIndexBuilder.getBuilder(type, Integer.MAX_VALUE);
+    sb = new StatsBuilder();
+    builder.add(sb.stats(type, null, null, null, null, null));
+    builder.add(sb.stats(type, 532l, 345l));
+    builder.add(sb.stats(type, null, null, null));
+    builder.add(sb.stats(type, 234l, 42l, null));
+    builder.add(sb.stats(type, null, null));
+    builder.add(sb.stats(type, 42l, -2l));
+    builder.add(sb.stats(type, null, null));
+    builder.add(sb.stats(type, null, null));
+    builder.add(sb.stats(type, -3l, -42l));
+    assertEquals(9, builder.getPageCount());
+    assertEquals(sb.getMinMaxSize(), builder.getMinMaxSize());
+    columnIndex = builder.build();
+    assertEquals(BoundaryOrder.DESCENDING, columnIndex.getBoundaryOrder());
+    assertCorrectNullCounts(columnIndex, 5, 0, 3, 1, 2, 0, 2, 2, 0);
+    assertCorrectNullPages(columnIndex, true, false, true, false, true, false, true, true, false);
+    assertCorrectValues(columnIndex.getMaxValues(), null, 532l, null, 234l, null, 42l, null, null, -3l);
+    assertCorrectValues(columnIndex.getMinValues(), null, 345l, null, 42l, null, -2l, null, null, -42l);
+    assertCorrectFiltering(columnIndex, eq(col, 0l), 5);
+    assertCorrectFiltering(columnIndex, eq(col, null), 0, 2, 3, 4, 6, 7);
+    assertCorrectFiltering(columnIndex, notEq(col, 0l), 0, 1, 2, 3, 4, 5, 6, 7, 8);
+    assertCorrectFiltering(columnIndex, notEq(col, null), 1, 3, 5, 8);
+    assertCorrectFiltering(columnIndex, gt(col, 2l), 1, 3, 5);
+    assertCorrectFiltering(columnIndex, gtEq(col, 2l), 1, 3, 5);
+    assertCorrectFiltering(columnIndex, lt(col, -42l));
+    assertCorrectFiltering(columnIndex, ltEq(col, -42l), 8);
+    assertCorrectFiltering(columnIndex, userDefined(col, LongIsDivisableWith3.class), 1, 3, 5, 8);
+    assertCorrectFiltering(columnIndex, invert(userDefined(col, LongIsDivisableWith3.class)), 0, 1, 2, 3, 4, 5, 6, 7,
+        8);
+  }
+
+  @Test
+  public void testStaticBuildInt64() {
+    ColumnIndex columnIndex = ColumnIndexBuilder.build(
+        Types.required(INT64).named("test_int64"),
+        BoundaryOrder.UNORDERED,
+        asList(true, false, true, false, true, false),
+        asList(1l, 2l, 3l, 4l, 5l, 6l),
+        toBBList(null, 2l, null, 4l, null, 9l),
+        toBBList(null, 3l, null, 15l, null, 10l));
+    assertEquals(BoundaryOrder.UNORDERED, columnIndex.getBoundaryOrder());
+    assertCorrectNullCounts(columnIndex, 1, 2, 3, 4, 5, 6);
+    assertCorrectNullPages(columnIndex, true, false, true, false, true, false);
+    assertCorrectValues(columnIndex.getMaxValues(), null, 3l, null, 15l, null, 10l);
+    assertCorrectValues(columnIndex.getMinValues(), null, 2l, null, 4l, null, 9l);
+  }
+
+  @Test
+  public void testNoOpBuilder() {
+    ColumnIndexBuilder builder = ColumnIndexBuilder.getNoOpBuilder();
+    StatsBuilder sb = new StatsBuilder();
+    builder.add(sb.stats(Types.required(BINARY).as(UTF8).named("test_binary_utf8"), stringBinary("Jeltz"),
+        stringBinary("Slartibartfast"), null, null));
+    builder.add(sb.stats(Types.required(BOOLEAN).named("test_boolean"), true, true, null, null));
+    builder.add(sb.stats(Types.required(DOUBLE).named("test_double"), null, null, null));
+    builder.add(sb.stats(Types.required(INT32).named("test_int32"), null, null));
+    builder.add(sb.stats(Types.required(INT64).named("test_int64"), -234l, -42l, null));
+    assertEquals(0, builder.getPageCount());
+    assertEquals(0, builder.getMinMaxSize());
+    assertNull(builder.build());
+  }
+
+  private static List<ByteBuffer> toBBList(Binary... values) {
+    List<ByteBuffer> buffers = new ArrayList<>(values.length);
+    for (Binary value : values) {
+      if (value == null) {
+        buffers.add(ByteBuffer.allocate(0));
+      } else {
+        buffers.add(value.toByteBuffer());
+      }
+    }
+    return buffers;
+  }
+
+  private static List<ByteBuffer> toBBList(Boolean... values) {
+    List<ByteBuffer> buffers = new ArrayList<>(values.length);
+    for (Boolean value : values) {
+      if (value == null) {
+        buffers.add(ByteBuffer.allocate(0));
+      } else {
+        buffers.add(ByteBuffer.wrap(BytesUtils.booleanToBytes(value)));
+      }
+    }
+    return buffers;
+  }
+
+  private static List<ByteBuffer> toBBList(Double... values) {
+    List<ByteBuffer> buffers = new ArrayList<>(values.length);
+    for (Double value : values) {
+      if (value == null) {
+        buffers.add(ByteBuffer.allocate(0));
+      } else {
+        buffers.add(ByteBuffer.wrap(BytesUtils.longToBytes(Double.doubleToLongBits(value))));
+      }
+    }
+    return buffers;
+  }
+
+  private static List<ByteBuffer> toBBList(Float... values) {
+    List<ByteBuffer> buffers = new ArrayList<>(values.length);
+    for (Float value : values) {
+      if (value == null) {
+        buffers.add(ByteBuffer.allocate(0));
+      } else {
+        buffers.add(ByteBuffer.wrap(BytesUtils.intToBytes(Float.floatToIntBits(value))));
+      }
+    }
+    return buffers;
+  }
+
+  private static List<ByteBuffer> toBBList(Integer... values) {
+    List<ByteBuffer> buffers = new ArrayList<>(values.length);
+    for (Integer value : values) {
+      if (value == null) {
+        buffers.add(ByteBuffer.allocate(0));
+      } else {
+        buffers.add(ByteBuffer.wrap(BytesUtils.intToBytes(value)));
+      }
+    }
+    return buffers;
+  }
+
+  private static List<ByteBuffer> toBBList(Long... values) {
+    List<ByteBuffer> buffers = new ArrayList<>(values.length);
+    for (Long value : values) {
+      if (value == null) {
+        buffers.add(ByteBuffer.allocate(0));
+      } else {
+        buffers.add(ByteBuffer.wrap(BytesUtils.longToBytes(value)));
+      }
+    }
+    return buffers;
+  }
+
+  private static Binary decimalBinary(String num) {
+    return Binary.fromConstantByteArray(new BigDecimal(num).unscaledValue().toByteArray());
+  }
+
+  private static Binary stringBinary(String str) {
+    return Binary.fromString(str);
+  }
+
+  private static void assertCorrectValues(List<ByteBuffer> values, Binary... expectedValues) {
+    assertEquals(expectedValues.length, values.size());
+    for (int i = 0; i < expectedValues.length; ++i) {
+      Binary expectedValue = expectedValues[i];
+      ByteBuffer value = values.get(i);
+      if (expectedValue == null) {
+        assertFalse("The byte buffer should be empty for null pages", value.hasRemaining());
+      } else {
+        assertArrayEquals("Invalid value for page " + i, expectedValue.getBytesUnsafe(), value.array());
+      }
+    }
+  }
+
+  private static void assertCorrectValues(List<ByteBuffer> values, Boolean... expectedValues) {
+    assertEquals(expectedValues.length, values.size());
+    for (int i = 0; i < expectedValues.length; ++i) {
+      Boolean expectedValue = expectedValues[i];
+      ByteBuffer value = values.get(i);
+      if (expectedValue == null) {
+        assertFalse("The byte buffer should be empty for null pages", value.hasRemaining());
+      } else {
+        assertEquals("The byte buffer should be 1 byte long for boolean", 1, value.remaining());
+        assertEquals("Invalid value for page " + i, expectedValue.booleanValue(), value.get(0) != 0);
+      }
+    }
+  }
+
+  private static void assertCorrectValues(List<ByteBuffer> values, Double... expectedValues) {
+    assertEquals(expectedValues.length, values.size());
+    for (int i = 0; i < expectedValues.length; ++i) {
+      Double expectedValue = expectedValues[i];
+      ByteBuffer value = values.get(i);
+      if (expectedValue == null) {
+        assertFalse("The byte buffer should be empty for null pages", value.hasRemaining());
+      } else {
+        assertEquals("The byte buffer should be 8 bytes long for double", 8, value.remaining());
+        assertTrue("Invalid value for page " + i, Double.compare(expectedValue.doubleValue(), value.getDouble(0)) == 0);
+      }
+    }
+  }
+
+  private static void assertCorrectValues(List<ByteBuffer> values, Float... expectedValues) {
+    assertEquals(expectedValues.length, values.size());
+    for (int i = 0; i < expectedValues.length; ++i) {
+      Float expectedValue = expectedValues[i];
+      ByteBuffer value = values.get(i);
+      if (expectedValue == null) {
+        assertFalse("The byte buffer should be empty for null pages", value.hasRemaining());
+      } else {
+        assertEquals("The byte buffer should be 4 bytes long for double", 4, value.remaining());
+        assertTrue("Invalid value for page " + i, Float.compare(expectedValue.floatValue(), value.getFloat(0)) == 0);
+      }
+    }
+  }
+
+  private static void assertCorrectValues(List<ByteBuffer> values, Integer... expectedValues) {
+    assertEquals(expectedValues.length, values.size());
+    for (int i = 0; i < expectedValues.length; ++i) {
+      Integer expectedValue = expectedValues[i];
+      ByteBuffer value = values.get(i);
+      if (expectedValue == null) {
+        assertFalse("The byte buffer should be empty for null pages", value.hasRemaining());
+      } else {
+        assertEquals("The byte buffer should be 4 bytes long for int32", 4, value.remaining());
+        assertEquals("Invalid value for page " + i, expectedValue.intValue(), value.getInt(0));
+      }
+    }
+  }
+
+  private static void assertCorrectValues(List<ByteBuffer> values, Long... expectedValues) {
+    assertEquals(expectedValues.length, values.size());
+    for (int i = 0; i < expectedValues.length; ++i) {
+      Long expectedValue = expectedValues[i];
+      ByteBuffer value = values.get(i);
+      if (expectedValue == null) {
+        assertFalse("The byte buffer should be empty for null pages", value.hasRemaining());
+      } else {
+        assertEquals("The byte buffer should be 8 bytes long for int64", 8, value.remaining());
+        assertEquals("Invalid value for page " + i, expectedValue.intValue(), value.getLong(0));
+      }
+    }
+  }
+
+  private static void assertCorrectNullCounts(ColumnIndex columnIndex, long... expectedNullCounts) {
+    List<Long> nullCounts = columnIndex.getNullCounts();
+    assertEquals(expectedNullCounts.length, nullCounts.size());
+    for (int i = 0; i < expectedNullCounts.length; ++i) {
+      assertEquals("Invalid null count at page " + i, expectedNullCounts[i], nullCounts.get(i).longValue());
+    }
+  }
+
+  private static void assertCorrectNullPages(ColumnIndex columnIndex, boolean... expectedNullPages) {
+    List<Boolean> nullPages = columnIndex.getNullPages();
+    assertEquals(expectedNullPages.length, nullPages.size());
+    for (int i = 0; i < expectedNullPages.length; ++i) {
+      assertEquals("Invalid null pages at page " + i, expectedNullPages[i], nullPages.get(i).booleanValue());
+    }
+  }
+
+  private static class StatsBuilder {
+    private long minMaxSize;
+
+    Statistics<?> stats(PrimitiveType type, Object... values) {
+      Statistics<?> stats = Statistics.createStats(type);
+      for (Object value : values) {
+        if (value == null) {
+          stats.incrementNumNulls();
+          continue;
+        }
+        switch (type.getPrimitiveTypeName()) {
+          case BINARY:
+          case FIXED_LEN_BYTE_ARRAY:
+          case INT96:
+            stats.updateStats((Binary) value);
+            break;
+          case BOOLEAN:
+            stats.updateStats((boolean) value);
+            break;
+          case DOUBLE:
+            stats.updateStats((double) value);
+            break;
+          case FLOAT:
+            stats.updateStats((float) value);
+            break;
+          case INT32:
+            stats.updateStats((int) value);
+            break;
+          case INT64:
+            stats.updateStats((long) value);
+            break;
+          default:
+            fail("Unsupported value type for stats: " + value.getClass());
+        }
+      }
+      if (stats.hasNonNullValue()) {
+        minMaxSize += stats.getMinBytes().length;
+        minMaxSize += stats.getMaxBytes().length;
+      }
+      return stats;
+    }
+
+    long getMinMaxSize() {
+      return minMaxSize;
+    }
+  }
+
+  private static void assertCorrectFiltering(ColumnIndex ci, FilterPredicate predicate, int... expectedIndexes) {
+    TestIndexIterator.assertEquals(predicate.accept(ci), expectedIndexes);
+  }
+}
diff --git a/parquet-column/src/test/java/org/apache/parquet/internal/column/columnindex/TestIndexIterator.java b/parquet-column/src/test/java/org/apache/parquet/internal/column/columnindex/TestIndexIterator.java
new file mode 100644
index 0000000000..d9047f26d4
--- /dev/null
+++ b/parquet-column/src/test/java/org/apache/parquet/internal/column/columnindex/TestIndexIterator.java
@@ -0,0 +1,63 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.parquet.internal.column.columnindex;
+
+import static org.junit.Assert.assertArrayEquals;
+
+import java.util.Arrays;
+import java.util.PrimitiveIterator;
+
+import org.junit.Test;
+
+import it.unimi.dsi.fastutil.ints.IntArrayList;
+import it.unimi.dsi.fastutil.ints.IntList;
+
+/**
+ * Unit test for {@link IndexIterator}.
+ */
+public class TestIndexIterator {
+  @Test
+  public void testAll() {
+    assertEquals(IndexIterator.all(10), 0, 1, 2, 3, 4, 5, 6, 7, 8, 9);
+  }
+
+  @Test
+  public void testFilter() {
+    assertEquals(IndexIterator.filter(30, value -> value % 3 == 0), 0, 3, 6, 9, 12, 15, 18, 21, 24, 27);
+  }
+
+  @Test
+  public void testFilterTranslate() {
+    assertEquals(IndexIterator.filterTranslate(20, value -> value < 5, Math::negateExact), 0, -1, -2, -3, -4);
+  }
+
+  @Test
+  public void testRangeTranslate() {
+    assertEquals(IndexIterator.rangeTranslate(11, 18, i -> i - 10), 1, 2, 3, 4, 5, 6, 7, 8);
+  }
+
+  static void assertEquals(PrimitiveIterator.OfInt actualIt, int... expectedValues) {
+    IntList actualList = new IntArrayList();
+    actualIt.forEachRemaining((int value) -> actualList.add(value));
+    int[] actualValues = actualList.toIntArray();
+    assertArrayEquals(
+        "ExpectedValues: " + Arrays.toString(expectedValues) + " ActualValues: " + Arrays.toString(actualValues),
+        expectedValues, actualValues);
+  }
+}
diff --git a/parquet-column/src/test/java/org/apache/parquet/internal/column/columnindex/TestOffsetIndexBuilder.java b/parquet-column/src/test/java/org/apache/parquet/internal/column/columnindex/TestOffsetIndexBuilder.java
new file mode 100644
index 0000000000..1e1275c84f
--- /dev/null
+++ b/parquet-column/src/test/java/org/apache/parquet/internal/column/columnindex/TestOffsetIndexBuilder.java
@@ -0,0 +1,113 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.parquet.internal.column.columnindex;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNull;
+
+import org.apache.parquet.internal.column.columnindex.OffsetIndex;
+import org.apache.parquet.internal.column.columnindex.OffsetIndexBuilder;
+import org.junit.Test;
+
+/**
+ * Tests for {@link OffsetIndexBuilder}.
+ */
+public class TestOffsetIndexBuilder {
+  @Test
+  public void testBuilderWithSizeAndRowCount() {
+    OffsetIndexBuilder builder = OffsetIndexBuilder.getBuilder();
+    assertNull(builder.build());
+    assertNull(builder.build(1234));
+
+    builder.add(1000, 10);
+    builder.add(2000, 19);
+    builder.add(3000, 27);
+    builder.add(1200, 9);
+    assertCorrectValues(builder.build(),
+        0, 1000, 0,
+        1000, 2000, 10,
+        3000, 3000, 29,
+        6000, 1200, 56);
+    assertCorrectValues(builder.build(10000),
+        10000, 1000, 0,
+        11000, 2000, 10,
+        13000, 3000, 29,
+        16000, 1200, 56);
+  }
+
+  @Test
+  public void testNoOpBuilderWithSizeAndRowCount() {
+    OffsetIndexBuilder builder = OffsetIndexBuilder.getNoOpBuilder();
+    builder.add(1, 2);
+    builder.add(3, 4);
+    builder.add(5, 6);
+    builder.add(7, 8);
+    assertNull(builder.build());
+    assertNull(builder.build(1000));
+  }
+
+  @Test
+  public void testBuilderWithOffsetSizeIndex() {
+    OffsetIndexBuilder builder = OffsetIndexBuilder.getBuilder();
+    assertNull(builder.build());
+    assertNull(builder.build(1234));
+
+    builder.add(1000, 10000, 0);
+    builder.add(22000, 12000, 100);
+    builder.add(48000, 22000, 211);
+    builder.add(90000, 30000, 361);
+    assertCorrectValues(builder.build(),
+        1000, 10000, 0,
+        22000, 12000, 100,
+        48000, 22000, 211,
+        90000, 30000, 361);
+    assertCorrectValues(builder.build(100000),
+        101000, 10000, 0,
+        122000, 12000, 100,
+        148000, 22000, 211,
+        190000, 30000, 361);
+  }
+
+  @Test
+  public void testNoOpBuilderWithOffsetSizeIndex() {
+    OffsetIndexBuilder builder = OffsetIndexBuilder.getNoOpBuilder();
+    builder.add(1, 2, 3);
+    builder.add(4, 5, 6);
+    builder.add(7, 8, 9);
+    builder.add(10, 11, 12);
+    assertNull(builder.build());
+    assertNull(builder.build(1000));
+  }
+
+  private void assertCorrectValues(OffsetIndex offsetIndex, long... offset_size_rowIndex_triplets) {
+    assertEquals(offset_size_rowIndex_triplets.length % 3, 0);
+    int pageCount = offset_size_rowIndex_triplets.length / 3;
+    assertEquals("Invalid pageCount", pageCount, offsetIndex.getPageCount());
+    for (int i = 0; i < pageCount; ++i) {
+      assertEquals("Invalid offsetIndex at page " + i, offset_size_rowIndex_triplets[3 * i],
+          offsetIndex.getOffset(i));
+      assertEquals("Invalid compressedPageSize at page " + i, offset_size_rowIndex_triplets[3 * i + 1],
+          offsetIndex.getCompressedPageSize(i));
+      assertEquals("Invalid firstRowIndex at page " + i, offset_size_rowIndex_triplets[3 * i + 2],
+          offsetIndex.getFirstRowIndex(i));
+      long expectedLastPageIndex = (i < pageCount - 1) ? (offset_size_rowIndex_triplets[3 * i + 5] - 1) : 999;
+      assertEquals("Invalid lastRowIndex at page " + i, expectedLastPageIndex, offsetIndex.getLastRowIndex(i, 1000));
+    }
+  }
+}
diff --git a/parquet-column/src/test/java/org/apache/parquet/internal/filter2/columnindex/TestColumnIndexFilter.java b/parquet-column/src/test/java/org/apache/parquet/internal/filter2/columnindex/TestColumnIndexFilter.java
new file mode 100644
index 0000000000..ae27214582
--- /dev/null
+++ b/parquet-column/src/test/java/org/apache/parquet/internal/filter2/columnindex/TestColumnIndexFilter.java
@@ -0,0 +1,464 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.parquet.internal.filter2.columnindex;
+
+import static java.nio.charset.StandardCharsets.UTF_8;
+import static org.apache.parquet.filter2.predicate.FilterApi.and;
+import static org.apache.parquet.filter2.predicate.FilterApi.binaryColumn;
+import static org.apache.parquet.filter2.predicate.FilterApi.booleanColumn;
+import static org.apache.parquet.filter2.predicate.FilterApi.doubleColumn;
+import static org.apache.parquet.filter2.predicate.FilterApi.eq;
+import static org.apache.parquet.filter2.predicate.FilterApi.gt;
+import static org.apache.parquet.filter2.predicate.FilterApi.gtEq;
+import static org.apache.parquet.filter2.predicate.FilterApi.intColumn;
+import static org.apache.parquet.filter2.predicate.FilterApi.lt;
+import static org.apache.parquet.filter2.predicate.FilterApi.ltEq;
+import static org.apache.parquet.filter2.predicate.FilterApi.notEq;
+import static org.apache.parquet.filter2.predicate.FilterApi.or;
+import static org.apache.parquet.filter2.predicate.FilterApi.userDefined;
+import static org.apache.parquet.filter2.predicate.LogicalInverter.invert;
+import static org.apache.parquet.internal.column.columnindex.BoundaryOrder.ASCENDING;
+import static org.apache.parquet.internal.column.columnindex.BoundaryOrder.DESCENDING;
+import static org.apache.parquet.internal.column.columnindex.BoundaryOrder.UNORDERED;
+import static org.apache.parquet.internal.filter2.columnindex.ColumnIndexFilter.calculateRowRanges;
+import static org.apache.parquet.io.api.Binary.fromString;
+import static org.apache.parquet.schema.OriginalType.UTF8;
+import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.BINARY;
+import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.DOUBLE;
+import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.INT32;
+import static org.apache.parquet.schema.Types.optional;
+import static org.junit.Assert.assertArrayEquals;
+
+import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+import java.util.stream.LongStream;
+
+import org.apache.parquet.bytes.BytesUtils;
+import org.apache.parquet.filter2.compat.FilterCompat;
+import org.apache.parquet.filter2.predicate.Statistics;
+import org.apache.parquet.filter2.predicate.UserDefinedPredicate;
+import org.apache.parquet.hadoop.metadata.ColumnPath;
+import org.apache.parquet.internal.column.columnindex.BoundaryOrder;
+import org.apache.parquet.internal.column.columnindex.ColumnIndex;
+import org.apache.parquet.internal.column.columnindex.ColumnIndexBuilder;
+import org.apache.parquet.internal.column.columnindex.OffsetIndex;
+import org.apache.parquet.internal.column.columnindex.OffsetIndexBuilder;
+import org.apache.parquet.internal.column.columnindex.TestColumnIndexBuilder.BinaryUtf8StartsWithB;
+import org.apache.parquet.internal.column.columnindex.TestColumnIndexBuilder.DoubleIsInteger;
+import org.apache.parquet.internal.column.columnindex.TestColumnIndexBuilder.IntegerIsDivisableWith3;
+import org.apache.parquet.schema.PrimitiveType;
+import org.junit.Test;
+
+import it.unimi.dsi.fastutil.longs.LongArrayList;
+import it.unimi.dsi.fastutil.longs.LongList;
+
+/**
+ * Unit tests of {@link ColumnIndexFilter}
+ */
+public class TestColumnIndexFilter {
+  private static class CIBuilder {
+    private static final ByteBuffer EMPTY = ByteBuffer.wrap(new byte[0]);
+    private final PrimitiveType type;
+    private final BoundaryOrder order;
+    private List<Boolean> nullPages = new ArrayList<>();
+    private List<Long> nullCounts = new ArrayList<>();
+    private List<ByteBuffer> minValues = new ArrayList<>();
+    private List<ByteBuffer> maxValues = new ArrayList<>();
+
+    CIBuilder(PrimitiveType type, BoundaryOrder order) {
+      this.type = type;
+      this.order = order;
+    }
+
+    CIBuilder addNullPage(long nullCount) {
+      nullPages.add(true);
+      nullCounts.add(nullCount);
+      minValues.add(EMPTY);
+      maxValues.add(EMPTY);
+      return this;
+    }
+
+    CIBuilder addPage(long nullCount, int min, int max) {
+      nullPages.add(false);
+      nullCounts.add(nullCount);
+      minValues.add(ByteBuffer.wrap(BytesUtils.intToBytes(min)));
+      maxValues.add(ByteBuffer.wrap(BytesUtils.intToBytes(max)));
+      return this;
+    }
+
+    CIBuilder addPage(long nullCount, String min, String max) {
+      nullPages.add(false);
+      nullCounts.add(nullCount);
+      minValues.add(ByteBuffer.wrap(min.getBytes(UTF_8)));
+      maxValues.add(ByteBuffer.wrap(max.getBytes(UTF_8)));
+      return this;
+    }
+
+    CIBuilder addPage(long nullCount, double min, double max) {
+      nullPages.add(false);
+      nullCounts.add(nullCount);
+      minValues.add(ByteBuffer.wrap(BytesUtils.longToBytes(Double.doubleToLongBits(min))));
+      maxValues.add(ByteBuffer.wrap(BytesUtils.longToBytes(Double.doubleToLongBits(max))));
+      return this;
+    }
+
+    ColumnIndex build() {
+      return ColumnIndexBuilder.build(type, order, nullPages, nullCounts, minValues, maxValues);
+    }
+  }
+
+  private static class OIBuilder {
+    private final OffsetIndexBuilder builder = OffsetIndexBuilder.getBuilder();
+
+    OIBuilder addPage(long rowCount) {
+      builder.add(1234, rowCount);
+      return this;
+    }
+
+    OffsetIndex build() {
+      return builder.build();
+    }
+  }
+
+  public static class AnyInt extends UserDefinedPredicate<Integer> {
+
+    @Override
+    public boolean keep(Integer value) {
+      return true;
+    }
+
+    @Override
+    public boolean canDrop(Statistics<Integer> statistics) {
+      return false;
+    }
+
+    @Override
+    public boolean inverseCanDrop(Statistics<Integer> statistics) {
+      return true;
+    }
+
+  }
+
+  /**
+   * <pre>
+   * row     column1        column2        column3        column4 (no column index)
+   *      ------0------  ------0------  ------0------  ------0------
+   * 0.   1              Zulu           2.03
+   *      ------1------  ------1------  ------1------  ------1------
+   * 1.   2              Yankee         4.67
+   * 2.   3              Xray           3.42
+   * 3.   4              Whiskey        8.71
+   *                     ------2------                 ------2------
+   * 4.   5              Victor         0.56
+   * 5.   6              Uniform        4.30
+   *                                    ------2------  ------3------
+   * 6.   null           null           null
+   *      ------2------                                ------4------
+   * 7.   7              Tango          3.50
+   *                     ------3------
+   * 8.   7              null           3.14
+   *      ------3------
+   * 9.   7              null           null
+   *                                    ------3------
+   * 10.  null           null           9.99
+   *                     ------4------
+   * 11.  8              Sierra         8.78
+   *                                                   ------5------
+   * 12.  9              Romeo          9.56
+   * 13.  10             Quebec         2.71
+   *      ------4------
+   * 14.  11             Papa           5.71
+   * 15.  12             Oscar          4.09
+   *                     ------5------  ------4------  ------6------
+   * 16.  13             November       null
+   * 17.  14             Mike           null
+   * 18.  15             Lima           0.36
+   * 19.  16             Kilo           2.94
+   * 20.  17             Juliett        4.23
+   *      ------5------  ------6------                 ------7------
+   * 21.  18             India          null
+   * 22.  19             Hotel          5.32
+   *                                    ------5------
+   * 23.  20             Golf           4.17
+   * 24.  21             Foxtrot        7.92
+   * 25.  22             Echo           7.95
+   *                                   ------6------
+   * 26.  23             Delta          null
+   *      ------6------
+   * 27.  24             Charlie        null
+   *                                                   ------8------
+   * 28.  25             Bravo          null
+   *                     ------7------
+   * 29.  26             Alfa           null
+   * </pre>
+   */
+  private static final long TOTAL_ROW_COUNT = 30;
+  private static final ColumnIndex COLUMN1_CI = new CIBuilder(optional(INT32).named("column1"), ASCENDING)
+      .addPage(0, 1, 1)
+      .addPage(1, 2, 6)
+      .addPage(0, 7, 7)
+      .addPage(1, 7, 10)
+      .addPage(0, 11, 17)
+      .addPage(0, 18, 23)
+      .addPage(0, 24, 26)
+      .build();
+  private static final OffsetIndex COLUMN1_OI = new OIBuilder()
+      .addPage(1)
+      .addPage(6)
+      .addPage(2)
+      .addPage(5)
+      .addPage(7)
+      .addPage(6)
+      .addPage(3)
+      .build();
+  private static final ColumnIndex COLUMN2_CI = new CIBuilder(optional(BINARY).as(UTF8).named("column2"), DESCENDING)
+      .addPage(0, "Zulu", "Zulu")
+      .addPage(0, "Whiskey", "Yankee")
+      .addPage(1, "Tango", "Victor")
+      .addNullPage(3)
+      .addPage(0, "Oscar", "Sierra")
+      .addPage(0, "Juliett", "November")
+      .addPage(0, "Bravo", "India")
+      .addPage(0, "Alfa", "Alfa")
+      .build();
+  private static final OffsetIndex COLUMN2_OI = new OIBuilder()
+      .addPage(1)
+      .addPage(3)
+      .addPage(4)
+      .addPage(3)
+      .addPage(5)
+      .addPage(5)
+      .addPage(8)
+      .addPage(1)
+      .build();
+  private static final ColumnIndex COLUMN3_CI = new CIBuilder(optional(DOUBLE).named("column3"), UNORDERED)
+      .addPage(0, 2.03, 2.03)
+      .addPage(0, 0.56, 8.71)
+      .addPage(2, 3.14, 3.50)
+      .addPage(0, 2.71, 9.99)
+      .addPage(3, 0.36, 5.32)
+      .addPage(0, 4.17, 7.95)
+      .addNullPage(4)
+      .build();
+  private static final OffsetIndex COLUMN3_OI = new OIBuilder()
+      .addPage(1)
+      .addPage(5)
+      .addPage(4)
+      .addPage(6)
+      .addPage(7)
+      .addPage(3)
+      .addPage(4)
+      .build();
+  private static final ColumnIndex COLUMN4_CI = null;
+  private static final OffsetIndex COLUMN4_OI = new OIBuilder()
+      .addPage(1)
+      .addPage(3)
+      .addPage(2)
+      .addPage(1)
+      .addPage(5)
+      .addPage(4)
+      .addPage(5)
+      .addPage(7)
+      .addPage(2)
+      .build();
+  private static final ColumnIndexStore STORE = new ColumnIndexStore() {
+    @Override
+    public ColumnIndex getColumnIndex(ColumnPath column) {
+      switch (column.toDotString()) {
+        case "column1":
+          return COLUMN1_CI;
+        case "column2":
+          return COLUMN2_CI;
+        case "column3":
+          return COLUMN3_CI;
+        case "column4":
+          return COLUMN4_CI;
+        default:
+          return null;
+      }
+    }
+
+    @Override
+    public OffsetIndex getOffsetIndex(ColumnPath column) {
+      switch (column.toDotString()) {
+        case "column1":
+          return COLUMN1_OI;
+        case "column2":
+          return COLUMN2_OI;
+        case "column3":
+          return COLUMN3_OI;
+        case "column4":
+          return COLUMN4_OI;
+        default:
+          throw new MissingOffsetIndexException(column);
+      }
+    }
+  };
+
+  private static Set<ColumnPath> paths(String... columns) {
+    Set<ColumnPath> paths = new HashSet<>();
+    for (String column : columns) {
+      paths.add(ColumnPath.fromDotString(column));
+    }
+    return paths;
+  }
+
+  private static void assertAllRows(RowRanges ranges, long rowCount) {
+    LongList actualList = new LongArrayList();
+    ranges.iterator().forEachRemaining((long value) -> actualList.add(value));
+    LongList expectedList = new LongArrayList();
+    LongStream.range(0, rowCount).forEach(expectedList::add);
+    assertArrayEquals(expectedList + " != " + actualList, expectedList.toLongArray(), actualList.toLongArray());
+  }
+
+  private static void assertRows(RowRanges ranges, long... expectedRows) {
+    LongList actualList = new LongArrayList();
+    ranges.iterator().forEachRemaining((long value) -> actualList.add(value));
+    assertArrayEquals(Arrays.toString(expectedRows) + " != " + actualList, expectedRows, actualList.toLongArray());
+  }
+
+  @Test
+  public void testFiltering() {
+    Set<ColumnPath> paths = paths("column1", "column2", "column3", "column4");
+
+    assertAllRows(
+        calculateRowRanges(FilterCompat.get(
+            userDefined(intColumn("column1"), AnyInt.class)), STORE, paths, TOTAL_ROW_COUNT),
+        TOTAL_ROW_COUNT);
+    assertRows(calculateRowRanges(FilterCompat.get(
+        and(
+            and(
+                eq(intColumn("column1"), null),
+                eq(binaryColumn("column2"), null)),
+            and(
+                eq(doubleColumn("column3"), null),
+                eq(booleanColumn("column4"), null)))),
+        STORE, paths, TOTAL_ROW_COUNT),
+        6, 9);
+    assertRows(calculateRowRanges(FilterCompat.get(
+        and(
+            and(
+                notEq(intColumn("column1"), null),
+                notEq(binaryColumn("column2"), null)),
+            and(
+                notEq(doubleColumn("column3"), null),
+                notEq(booleanColumn("column4"), null)))),
+        STORE, paths, TOTAL_ROW_COUNT),
+        0, 1, 2, 3, 4, 5, 6, 7, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25);
+    assertRows(calculateRowRanges(FilterCompat.get(
+        or(
+            and(
+                lt(intColumn("column1"), 20),
+                gtEq(binaryColumn("column2"), fromString("Quebec"))),
+            and(
+                gt(doubleColumn("column3"), 5.32),
+                ltEq(binaryColumn("column4"), fromString("XYZ"))))),
+        STORE, paths, TOTAL_ROW_COUNT),
+        0, 1, 2, 3, 4, 5, 6, 7, 10, 11, 12, 13, 14, 15, 23, 24, 25);
+    assertRows(calculateRowRanges(FilterCompat.get(
+        and(
+            and(
+                gtEq(intColumn("column1"), 7),
+                gt(binaryColumn("column2"), fromString("India"))),
+            and(
+                eq(doubleColumn("column3"), null),
+                notEq(binaryColumn("column4"), null)))),
+        STORE, paths, TOTAL_ROW_COUNT),
+        7, 16, 17, 18, 19, 20);
+    assertRows(calculateRowRanges(FilterCompat.get(
+        and(
+            or(
+                invert(userDefined(intColumn("column1"), AnyInt.class)),
+                eq(binaryColumn("column2"), fromString("Echo"))),
+            eq(doubleColumn("column3"), 6.0))),
+        STORE, paths, TOTAL_ROW_COUNT),
+        23, 24, 25);
+    assertRows(calculateRowRanges(FilterCompat.get(
+        and(
+            userDefined(intColumn("column1"), IntegerIsDivisableWith3.class),
+            and(
+                userDefined(binaryColumn("column2"), BinaryUtf8StartsWithB.class),
+                userDefined(doubleColumn("column3"), DoubleIsInteger.class)))),
+        STORE, paths, TOTAL_ROW_COUNT),
+        21, 22, 23, 24, 25);
+    assertRows(calculateRowRanges(FilterCompat.get(
+        and(
+            and(
+                gtEq(intColumn("column1"), 7),
+                lt(intColumn("column1"), 11)),
+            and(
+                gt(binaryColumn("column2"), fromString("Romeo")),
+                ltEq(binaryColumn("column2"), fromString("Tango"))))),
+        STORE, paths, TOTAL_ROW_COUNT),
+        7, 11, 12, 13);
+  }
+
+  @Test
+  public void testFilteringOnMissingColumns() {
+    Set<ColumnPath> paths = paths("column1", "column2", "column3", "column4");
+
+    // Missing column filter is always true
+    assertAllRows(calculateRowRanges(FilterCompat.get(
+        notEq(intColumn("missing_column"), 0)),
+        STORE, paths, TOTAL_ROW_COUNT),
+        TOTAL_ROW_COUNT);
+    assertRows(calculateRowRanges(FilterCompat.get(
+        and(
+            and(
+                gtEq(intColumn("column1"), 7),
+                lt(intColumn("column1"), 11)),
+            eq(binaryColumn("missing_column"), null))),
+        STORE, paths, TOTAL_ROW_COUNT),
+        7, 8, 9, 10, 11, 12, 13);
+
+    // Missing column filter is always false
+    assertRows(calculateRowRanges(FilterCompat.get(
+        or(
+            and(
+                gtEq(intColumn("column1"), 7),
+                lt(intColumn("column1"), 11)),
+            notEq(binaryColumn("missing_column"), null))),
+        STORE, paths, TOTAL_ROW_COUNT),
+        7, 8, 9, 10, 11, 12, 13);
+    assertRows(calculateRowRanges(FilterCompat.get(
+        gt(intColumn("missing_column"), 0)),
+        STORE, paths, TOTAL_ROW_COUNT));
+  }
+
+  @Test
+  public void testFilteringWithMissingOffsetIndex() {
+    Set<ColumnPath> paths = paths("column1", "column2", "column3", "column4", "column_wo_oi");
+
+    assertAllRows(calculateRowRanges(FilterCompat.get(
+        and(
+            and(
+                gtEq(intColumn("column1"), 7),
+                lt(intColumn("column1"), 11)),
+            and(
+                gt(binaryColumn("column2"), fromString("Romeo")),
+                ltEq(binaryColumn("column_wo_oi"), fromString("Tango"))))),
+        STORE, paths, TOTAL_ROW_COUNT),
+        TOTAL_ROW_COUNT);
+  }
+
+}
diff --git a/parquet-column/src/test/java/org/apache/parquet/internal/filter2/columnindex/TestRowRanges.java b/parquet-column/src/test/java/org/apache/parquet/internal/filter2/columnindex/TestRowRanges.java
new file mode 100644
index 0000000000..71b8844990
--- /dev/null
+++ b/parquet-column/src/test/java/org/apache/parquet/internal/filter2/columnindex/TestRowRanges.java
@@ -0,0 +1,155 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.parquet.internal.filter2.columnindex;
+
+import static org.apache.parquet.internal.filter2.columnindex.RowRanges.intersection;
+import static org.apache.parquet.internal.filter2.columnindex.RowRanges.union;
+import static org.junit.Assert.assertArrayEquals;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+
+import java.util.Arrays;
+import java.util.PrimitiveIterator;
+
+import org.apache.parquet.internal.column.columnindex.OffsetIndexBuilder;
+import org.junit.Test;
+
+import it.unimi.dsi.fastutil.longs.LongArrayList;
+import it.unimi.dsi.fastutil.longs.LongList;
+
+/**
+ * Unit test for {@link RowRanges}
+ */
+public class TestRowRanges {
+  private static RowRanges buildRanges(long... rowIndexes) {
+    if (rowIndexes.length == 0) {
+      return RowRanges.EMPTY;
+    }
+    OffsetIndexBuilder builder = OffsetIndexBuilder.getBuilder();
+    for (int i = 0, n = rowIndexes.length; i < n; i += 2) {
+      long from = rowIndexes[i];
+      long to = rowIndexes[i + 1];
+      builder.add(0, 0, from);
+      builder.add(0, 0, to + 1);
+    }
+    PrimitiveIterator.OfInt pageIndexes = new PrimitiveIterator.OfInt() {
+      private int index = 0;
+
+      @Override
+      public boolean hasNext() {
+        return index < rowIndexes.length;
+      }
+
+      @Override
+      public int nextInt() {
+        int ret = index;
+        index += 2;
+        return ret;
+      }
+    };
+    return RowRanges.create(rowIndexes[rowIndexes.length - 1], pageIndexes, builder.build());
+  }
+
+  private static void assertAllRowsEqual(PrimitiveIterator.OfLong actualIt, long... expectedValues) {
+    LongList actualList = new LongArrayList();
+    actualIt.forEachRemaining((long value) -> actualList.add(value));
+    assertArrayEquals(Arrays.toString(expectedValues) + "!= " + actualList, expectedValues, actualList.toLongArray());
+  }
+
+  @Test
+  public void testCreate() {
+    RowRanges ranges = buildRanges(
+        1, 2,
+        3, 4,
+        6, 7,
+        7, 10,
+        15, 17);
+    assertAllRowsEqual(ranges.iterator(), 1, 2, 3, 4, 6, 7, 8, 9, 10, 15, 16, 17);
+    assertEquals(12, ranges.rowCount());
+    assertTrue(ranges.isOverlapping(4, 5));
+    assertFalse(ranges.isOverlapping(5, 5));
+    assertTrue(ranges.isOverlapping(10, 14));
+    assertFalse(ranges.isOverlapping(11, 14));
+    assertFalse(ranges.isOverlapping(18, Long.MAX_VALUE));
+
+    ranges = RowRanges.createSingle(5);
+    assertAllRowsEqual(ranges.iterator(), 0, 1, 2, 3, 4);
+    assertEquals(5, ranges.rowCount());
+    assertTrue(ranges.isOverlapping(0, 100));
+    assertFalse(ranges.isOverlapping(5, Long.MAX_VALUE));
+
+    ranges = RowRanges.EMPTY;
+    assertAllRowsEqual(ranges.iterator());
+    assertEquals(0, ranges.rowCount());
+    assertFalse(ranges.isOverlapping(0, Long.MAX_VALUE));
+  }
+
+  @Test
+  public void testUnion() {
+    RowRanges ranges1 = buildRanges(
+        2, 5,
+        7, 9,
+        14, 14,
+        20, 24);
+    RowRanges ranges2 = buildRanges(
+        1, 2,
+        4, 5,
+        11, 12,
+        14, 15,
+        21, 22);
+    RowRanges empty = buildRanges();
+    assertAllRowsEqual(union(ranges1, ranges2).iterator(), 1, 2, 3, 4, 5, 7, 8, 9, 11, 12, 14, 15, 20, 21, 22, 23, 24);
+    assertAllRowsEqual(union(ranges2, ranges1).iterator(), 1, 2, 3, 4, 5, 7, 8, 9, 11, 12, 14, 15, 20, 21, 22, 23, 24);
+    assertAllRowsEqual(union(ranges1, ranges1).iterator(), 2, 3, 4, 5, 7, 8, 9, 14, 20, 21, 22, 23, 24);
+    assertAllRowsEqual(union(ranges1, empty).iterator(), 2, 3, 4, 5, 7, 8, 9, 14, 20, 21, 22, 23, 24);
+    assertAllRowsEqual(union(empty, ranges1).iterator(), 2, 3, 4, 5, 7, 8, 9, 14, 20, 21, 22, 23, 24);
+    assertAllRowsEqual(union(ranges2, ranges2).iterator(), 1, 2, 4, 5, 11, 12, 14, 15, 21, 22);
+    assertAllRowsEqual(union(ranges2, empty).iterator(), 1, 2, 4, 5, 11, 12, 14, 15, 21, 22);
+    assertAllRowsEqual(union(empty, ranges2).iterator(), 1, 2, 4, 5, 11, 12, 14, 15, 21, 22);
+    assertAllRowsEqual(union(empty, empty).iterator());
+  }
+
+  @Test
+  public void testIntersection() {
+    RowRanges ranges1 = buildRanges(
+        2, 5,
+        7, 9,
+        14, 14,
+        20, 24);
+    RowRanges ranges2 = buildRanges(
+        1, 2,
+        6, 7,
+        9, 9,
+        11, 12,
+        14, 15,
+        21, 22);
+    RowRanges empty = buildRanges();
+    assertAllRowsEqual(intersection(ranges1, ranges2).iterator(), 2, 7, 9, 14, 21, 22);
+    assertAllRowsEqual(intersection(ranges2, ranges1).iterator(), 2, 7, 9, 14, 21, 22);
+    assertAllRowsEqual(intersection(ranges1, ranges1).iterator(), 2, 3, 4, 5, 7, 8, 9, 14, 20, 21, 22, 23, 24);
+    assertAllRowsEqual(intersection(ranges1, empty).iterator());
+    assertAllRowsEqual(intersection(empty, ranges1).iterator());
+    assertAllRowsEqual(intersection(ranges2, ranges2).iterator(), 1, 2, 6, 7, 9, 11, 12, 14, 15, 21, 22);
+    assertAllRowsEqual(intersection(ranges2, empty).iterator());
+    assertAllRowsEqual(intersection(empty, ranges2).iterator());
+    assertAllRowsEqual(intersection(empty, empty).iterator());
+  }
+
+}
diff --git a/parquet-column/src/test/java/org/apache/parquet/parser/TestParquetParser.java b/parquet-column/src/test/java/org/apache/parquet/parser/TestParquetParser.java
index d8536012ba..fa200ab424 100644
--- a/parquet-column/src/test/java/org/apache/parquet/parser/TestParquetParser.java
+++ b/parquet-column/src/test/java/org/apache/parquet/parser/TestParquetParser.java
@@ -19,6 +19,7 @@
 package org.apache.parquet.parser;
 
 import static org.apache.parquet.schema.LogicalTypeAnnotation.TimeUnit.MILLIS;
+import static org.apache.parquet.schema.LogicalTypeAnnotation.TimeUnit.NANOS;
 import static org.apache.parquet.schema.LogicalTypeAnnotation.intType;
 import static org.apache.parquet.schema.LogicalTypeAnnotation.stringType;
 import static org.apache.parquet.schema.LogicalTypeAnnotation.timeType;
@@ -271,7 +272,9 @@ public void testTimeAnnotations() {
         "  required int64 timestamp (TIMESTAMP_MILLIS);" +
         "  required FIXED_LEN_BYTE_ARRAY(12) interval (INTERVAL);" +
         "  required int32 newTime (TIME(MILLIS,true));" +
+        "  required int64 nanoTime (TIME(NANOS,true));" +
         "  required int64 newTimestamp (TIMESTAMP(MILLIS,false));" +
+        "  required int64 nanoTimestamp (TIMESTAMP(NANOS,false));" +
         "}\n";
 
     MessageType parsed = MessageTypeParser.parseMessageType(message);
@@ -281,7 +284,9 @@ public void testTimeAnnotations() {
         .required(INT64).as(TIMESTAMP_MILLIS).named("timestamp")
         .required(FIXED_LEN_BYTE_ARRAY).length(12).as(INTERVAL).named("interval")
         .required(INT32).as(timeType(true, MILLIS)).named("newTime")
+        .required(INT64).as(timeType(true, NANOS)).named("nanoTime")
         .required(INT64).as(timestampType(false, MILLIS)).named("newTimestamp")
+        .required(INT64).as(timestampType(false, NANOS)).named("nanoTimestamp")
       .named("TimeMessage");
 
     assertEquals(expected, parsed);
diff --git a/parquet-column/src/test/java/org/apache/parquet/schema/TestMessageType.java b/parquet-column/src/test/java/org/apache/parquet/schema/TestMessageType.java
index 05619385bc..e511d4252f 100644
--- a/parquet-column/src/test/java/org/apache/parquet/schema/TestMessageType.java
+++ b/parquet-column/src/test/java/org/apache/parquet/schema/TestMessageType.java
@@ -148,7 +148,7 @@ public void testMergeSchema() {
       t9.union(t10);
       fail("moving from BINARY (UTF8) to BINARY");
     } catch (IncompatibleSchemaModificationException e) {
-      assertEquals("cannot merge original type null into UTF8", e.getMessage());
+      assertEquals("cannot merge logical type null into STRING", e.getMessage());
     }
 
     MessageType t11 = Types.buildMessage()
diff --git a/parquet-column/src/test/java/org/apache/parquet/schema/TestPrimitiveComparator.java b/parquet-column/src/test/java/org/apache/parquet/schema/TestPrimitiveComparator.java
index 3f9d6431b5..0bf3599419 100644
--- a/parquet-column/src/test/java/org/apache/parquet/schema/TestPrimitiveComparator.java
+++ b/parquet-column/src/test/java/org/apache/parquet/schema/TestPrimitiveComparator.java
@@ -23,6 +23,8 @@
 
 import java.math.BigInteger;
 import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.List;
 
 import static org.apache.parquet.schema.PrimitiveComparator.BOOLEAN_COMPARATOR;
 import static org.apache.parquet.schema.PrimitiveComparator.DOUBLE_COMPARATOR;
@@ -249,6 +251,23 @@ public void testBinaryAsSignedIntegerComparator() {
             ByteBuffer.wrap(new BigInteger("9999999999999999999999999999999999999999").toByteArray())));
   }
 
+  @Test
+  public void testBinaryAsSignedIntegerComparatorWithEquals() {
+    List<Binary> valuesToCompare = new ArrayList<>();
+    valuesToCompare.add(Binary.fromConstantByteBuffer(ByteBuffer.wrap(new byte[] { 0, 0, -108 })));
+    valuesToCompare.add(Binary.fromConstantByteBuffer(ByteBuffer.wrap(new byte[] { 0, 0, 0, 0, 0, -108 })));
+    valuesToCompare.add(Binary.fromConstantByteBuffer(ByteBuffer.wrap(new byte[] { 0, 0, 0, -108 })));
+    valuesToCompare.add(Binary.fromConstantByteBuffer(ByteBuffer.wrap(new byte[] { 0, 0, 0, 0, -108 })));
+    valuesToCompare.add(Binary.fromConstantByteBuffer(ByteBuffer.wrap(new byte[] { 0, -108 })));
+
+    for (Binary v1 : valuesToCompare) {
+      for (Binary v2 : valuesToCompare) {
+        assertEquals(String.format("Wrong result of comparison %s and %s", v1, v2),
+            0, BINARY_AS_SIGNED_INTEGER_COMPARATOR.compare(v1, v2));
+      }
+    }
+  }
+
   private <T> void testObjectComparator(PrimitiveComparator<T> comparator, T... valuesInAscendingOrder) {
     for (int i = 0; i < valuesInAscendingOrder.length; ++i) {
       for (int j = 0; j < valuesInAscendingOrder.length; ++j) {
diff --git a/parquet-column/src/test/java/org/apache/parquet/schema/TestPrimitiveStringifier.java b/parquet-column/src/test/java/org/apache/parquet/schema/TestPrimitiveStringifier.java
index 53045cfb8c..b5de4f850e 100644
--- a/parquet-column/src/test/java/org/apache/parquet/schema/TestPrimitiveStringifier.java
+++ b/parquet-column/src/test/java/org/apache/parquet/schema/TestPrimitiveStringifier.java
@@ -19,15 +19,26 @@
 package org.apache.parquet.schema;
 
 import static java.nio.charset.StandardCharsets.UTF_8;
+import static java.util.Arrays.asList;
 import static java.util.concurrent.TimeUnit.HOURS;
 import static java.util.concurrent.TimeUnit.MICROSECONDS;
 import static java.util.concurrent.TimeUnit.MILLISECONDS;
 import static java.util.concurrent.TimeUnit.MINUTES;
+import static java.util.concurrent.TimeUnit.NANOSECONDS;
 import static java.util.concurrent.TimeUnit.SECONDS;
 import static org.apache.parquet.schema.PrimitiveStringifier.DATE_STRINGIFIER;
 import static org.apache.parquet.schema.PrimitiveStringifier.DEFAULT_STRINGIFIER;
 import static org.apache.parquet.schema.PrimitiveStringifier.INTERVAL_STRINGIFIER;
+import static org.apache.parquet.schema.PrimitiveStringifier.TIMESTAMP_MICROS_STRINGIFIER;
+import static org.apache.parquet.schema.PrimitiveStringifier.TIMESTAMP_MICROS_UTC_STRINGIFIER;
+import static org.apache.parquet.schema.PrimitiveStringifier.TIMESTAMP_MILLIS_STRINGIFIER;
+import static org.apache.parquet.schema.PrimitiveStringifier.TIMESTAMP_MILLIS_UTC_STRINGIFIER;
+import static org.apache.parquet.schema.PrimitiveStringifier.TIMESTAMP_NANOS_STRINGIFIER;
+import static org.apache.parquet.schema.PrimitiveStringifier.TIMESTAMP_NANOS_UTC_STRINGIFIER;
+import static org.apache.parquet.schema.PrimitiveStringifier.TIME_NANOS_STRINGIFIER;
+import static org.apache.parquet.schema.PrimitiveStringifier.TIME_NANOS_UTC_STRINGIFIER;
 import static org.apache.parquet.schema.PrimitiveStringifier.TIME_STRINGIFIER;
+import static org.apache.parquet.schema.PrimitiveStringifier.TIME_UTC_STRINGIFIER;
 import static org.apache.parquet.schema.PrimitiveStringifier.UNSIGNED_STRINGIFIER;
 import static org.apache.parquet.schema.PrimitiveStringifier.UTF8_STRINGIFIER;
 import static org.junit.Assert.assertEquals;
@@ -35,7 +46,6 @@
 
 import java.math.BigInteger;
 import java.nio.ByteBuffer;
-import java.util.Arrays;
 import java.util.Calendar;
 import java.util.HashSet;
 import java.util.Set;
@@ -152,74 +162,124 @@ public void testDateStringifier() {
     assertEquals("2017-12-14", stringifier.stringify((int) MILLISECONDS.toDays(cal.getTimeInMillis())));
 
     cal.clear();
-    cal.set(1492, Calendar.AUGUST, 3);
-    assertEquals("1492-08-03", stringifier.stringify((int) MILLISECONDS.toDays(cal.getTimeInMillis())));
+    cal.set(1583, Calendar.AUGUST, 3);
+    assertEquals("1583-08-03", stringifier.stringify((int) MILLISECONDS.toDays(cal.getTimeInMillis())));
 
     checkThrowingUnsupportedException(stringifier, Integer.TYPE);
   }
 
   @Test
   public void testTimestampMillisStringifier() {
-    PrimitiveStringifier stringifier = PrimitiveStringifier.TIMESTAMP_MILLIS_STRINGIFIER;
+    for (PrimitiveStringifier stringifier : asList(TIMESTAMP_MILLIS_STRINGIFIER, TIMESTAMP_MILLIS_UTC_STRINGIFIER)) {
+      String timezoneAmendment = (stringifier == TIMESTAMP_MILLIS_STRINGIFIER ? "" : "+0000");
 
-    assertEquals("1970-01-01T00:00:00.000", stringifier.stringify(0l));
+      assertEquals(withZoneString("1970-01-01T00:00:00.000", timezoneAmendment), stringifier.stringify(0l));
 
-    Calendar cal = Calendar.getInstance(UTC);
-    cal.clear();
-    cal.set(2017, Calendar.DECEMBER, 15, 10, 9, 54);
-    cal.set(Calendar.MILLISECOND, 120);
-    assertEquals("2017-12-15T10:09:54.120", stringifier.stringify(cal.getTimeInMillis()));
+      Calendar cal = Calendar.getInstance(UTC);
+      cal.clear();
+      cal.set(2017, Calendar.DECEMBER, 15, 10, 9, 54);
+      cal.set(Calendar.MILLISECOND, 120);
+      assertEquals(withZoneString("2017-12-15T10:09:54.120", timezoneAmendment), stringifier.stringify(cal.getTimeInMillis()));
 
-    cal.clear();
-    cal.set(1948, Calendar.NOVEMBER, 23, 20, 19, 1);
-    cal.set(Calendar.MILLISECOND, 9);
-    assertEquals("1948-11-23T20:19:01.009", stringifier.stringify(cal.getTimeInMillis()));
+      cal.clear();
+      cal.set(1948, Calendar.NOVEMBER, 23, 20, 19, 1);
+      cal.set(Calendar.MILLISECOND, 9);
+      assertEquals(withZoneString("1948-11-23T20:19:01.009", timezoneAmendment), stringifier.stringify(cal.getTimeInMillis()));
 
-    checkThrowingUnsupportedException(stringifier, Long.TYPE);
+      checkThrowingUnsupportedException(stringifier, Long.TYPE);
+    }
   }
 
   @Test
   public void testTimestampMicrosStringifier() {
-    PrimitiveStringifier stringifier = PrimitiveStringifier.TIMESTAMP_MICROS_STRINGIFIER;
+    for (PrimitiveStringifier stringifier : asList(TIMESTAMP_MICROS_STRINGIFIER, TIMESTAMP_MICROS_UTC_STRINGIFIER)) {
+      String timezoneAmendment = (stringifier == TIMESTAMP_MICROS_STRINGIFIER ? "" : "+0000");
 
-    assertEquals("1970-01-01T00:00:00.000000", stringifier.stringify(0l));
+      assertEquals(withZoneString("1970-01-01T00:00:00.000000", timezoneAmendment), stringifier.stringify(0l));
 
-    Calendar cal = Calendar.getInstance(UTC);
-    cal.clear();
-    cal.set(2053, Calendar.JULY, 10, 22, 13, 24);
-    cal.set(Calendar.MILLISECOND, 84);
-    long micros = cal.getTimeInMillis() * 1000 + 900;
-    assertEquals("2053-07-10T22:13:24.084900", stringifier.stringify(micros));
+      Calendar cal = Calendar.getInstance(UTC);
+      cal.clear();
+      cal.set(2053, Calendar.JULY, 10, 22, 13, 24);
+      cal.set(Calendar.MILLISECOND, 84);
+      long micros = cal.getTimeInMillis() * 1000 + 900;
+      assertEquals(withZoneString("2053-07-10T22:13:24.084900", timezoneAmendment), stringifier.stringify(micros));
 
-    cal.clear();
-    cal.set(1848, Calendar.MARCH, 15, 9, 23, 59);
-    cal.set(Calendar.MILLISECOND, 765);
-    micros = cal.getTimeInMillis() * 1000 - 1;
-    assertEquals("1848-03-15T09:23:59.765001", stringifier.stringify(micros));
+      cal.clear();
+      cal.set(1848, Calendar.MARCH, 15, 9, 23, 59);
+      cal.set(Calendar.MILLISECOND, 765);
+      micros = cal.getTimeInMillis() * 1000 - 1;
+      assertEquals(withZoneString("1848-03-15T09:23:59.764999", timezoneAmendment), stringifier.stringify(micros));
+
+      checkThrowingUnsupportedException(stringifier, Long.TYPE);
+    }
+  }
 
-    checkThrowingUnsupportedException(stringifier, Long.TYPE);
+  @Test
+  public void testTimestampNanosStringifier() {
+    for (PrimitiveStringifier stringifier : asList(TIMESTAMP_NANOS_STRINGIFIER, TIMESTAMP_NANOS_UTC_STRINGIFIER)) {
+      String timezoneAmendment = (stringifier == TIMESTAMP_NANOS_STRINGIFIER ? "" : "+0000");
+
+      assertEquals(withZoneString("1970-01-01T00:00:00.000000000", timezoneAmendment), stringifier.stringify(0l));
+
+      Calendar cal = Calendar.getInstance(UTC);
+      cal.clear();
+      cal.set(2053, Calendar.JULY, 10, 22, 13, 24);
+      cal.set(Calendar.MILLISECOND, 84);
+      long nanos = cal.getTimeInMillis() * 1_000_000 + 536;
+      assertEquals(withZoneString("2053-07-10T22:13:24.084000536", timezoneAmendment), stringifier.stringify(nanos));
+
+      cal.clear();
+      cal.set(1848, Calendar.MARCH, 15, 9, 23, 59);
+      cal.set(Calendar.MILLISECOND, 765);
+      nanos = cal.getTimeInMillis() * 1_000_000 - 1;
+      assertEquals(withZoneString("1848-03-15T09:23:59.764999999", timezoneAmendment), stringifier.stringify(nanos));
+
+      checkThrowingUnsupportedException(stringifier, Long.TYPE);
+    }
   }
 
   @Test
   public void testTimeStringifier() {
-    PrimitiveStringifier stringifier = TIME_STRINGIFIER;
+    for (PrimitiveStringifier stringifier : asList(TIME_STRINGIFIER, TIME_UTC_STRINGIFIER)) {
+      String timezoneAmendment = (stringifier == TIME_STRINGIFIER ? "" : "+0000");
 
-    assertEquals("00:00:00.000", stringifier.stringify(0));
-    assertEquals("00:00:00.000000", stringifier.stringify(0l));
+      assertEquals(withZoneString("00:00:00.000", timezoneAmendment), stringifier.stringify(0));
+      assertEquals(withZoneString("00:00:00.000000", timezoneAmendment), stringifier.stringify(0l));
 
-    assertEquals("12:34:56.789", stringifier.stringify((int) convert(MILLISECONDS, 12, 34, 56, 789)));
-    assertEquals("12:34:56.789012", stringifier.stringify(convert(MICROSECONDS, 12, 34, 56, 789012)));
+      assertEquals(withZoneString("12:34:56.789", timezoneAmendment), stringifier.stringify((int) convert(MILLISECONDS, 12, 34, 56, 789)));
+      assertEquals(withZoneString("12:34:56.789012", timezoneAmendment), stringifier.stringify(convert(MICROSECONDS, 12, 34, 56, 789012)));
 
-    assertEquals("-12:34:56.789", stringifier.stringify((int) convert(MILLISECONDS, -12, -34, -56, -789)));
-    assertEquals("-12:34:56.789012", stringifier.stringify(convert(MICROSECONDS, -12, -34, -56, -789012)));
+      assertEquals(withZoneString("-12:34:56.789", timezoneAmendment), stringifier.stringify((int) convert(MILLISECONDS, -12, -34, -56, -789)));
+      assertEquals(withZoneString("-12:34:56.789012", timezoneAmendment), stringifier.stringify(convert(MICROSECONDS, -12, -34, -56, -789012)));
 
-    assertEquals("123:12:34.567", stringifier.stringify((int) convert(MILLISECONDS, 123, 12, 34, 567)));
-    assertEquals("12345:12:34.056789", stringifier.stringify(convert(MICROSECONDS, 12345, 12, 34, 56789)));
+      assertEquals(withZoneString("123:12:34.567", timezoneAmendment), stringifier.stringify((int) convert(MILLISECONDS, 123, 12, 34, 567)));
+      assertEquals(withZoneString("12345:12:34.056789", timezoneAmendment), stringifier.stringify(convert(MICROSECONDS, 12345, 12, 34, 56789)));
 
-    assertEquals("-123:12:34.567", stringifier.stringify((int) convert(MILLISECONDS, -123, -12, -34, -567)));
-    assertEquals("-12345:12:34.056789", stringifier.stringify(convert(MICROSECONDS, -12345, -12, -34, -56789)));
+      assertEquals(withZoneString("-123:12:34.567", timezoneAmendment), stringifier.stringify((int) convert(MILLISECONDS, -123, -12, -34, -567)));
+      assertEquals(withZoneString("-12345:12:34.056789", timezoneAmendment), stringifier.stringify(convert(MICROSECONDS, -12345, -12, -34, -56789)));
 
-    checkThrowingUnsupportedException(stringifier, Integer.TYPE, Long.TYPE);
+      checkThrowingUnsupportedException(stringifier, Integer.TYPE, Long.TYPE);
+    }
+  }
+
+  @Test
+  public void testTimeNanoStringifier() {
+    for (PrimitiveStringifier stringifier : asList(TIME_NANOS_STRINGIFIER, TIME_NANOS_UTC_STRINGIFIER)) {
+      String timezoneAmendment = (stringifier == TIME_NANOS_STRINGIFIER ? "" : "+0000");
+
+      assertEquals(withZoneString("00:00:00.000000000", timezoneAmendment), stringifier.stringify(0l));
+
+      assertEquals(withZoneString("12:34:56.789012987", timezoneAmendment), stringifier.stringify(convert(NANOSECONDS, 12, 34, 56, 789012987)));
+      assertEquals(withZoneString("-12:34:56.000789012", timezoneAmendment), stringifier.stringify(convert(NANOSECONDS, -12, -34, -56, -789012)));
+      assertEquals(withZoneString("12345:12:34.000056789", timezoneAmendment), stringifier.stringify(convert(NANOSECONDS, 12345, 12, 34, 56789)));
+      assertEquals(withZoneString("-12345:12:34.000056789", timezoneAmendment), stringifier.stringify(convert(NANOSECONDS, -12345, -12, -34, -56789)));
+
+      checkThrowingUnsupportedException(stringifier, Integer.TYPE, Long.TYPE);
+    }
+  }
+
+  private String withZoneString(String expected, String zoneString) {
+    return expected + zoneString;
   }
 
   private long convert(TimeUnit unit, long hours, long minutes, long seconds, long rest) {
@@ -250,7 +310,7 @@ public void testDecimalStringifier() {
   }
 
   private void checkThrowingUnsupportedException(PrimitiveStringifier stringifier, Class<?>... excludes) {
-    Set<Class<?>> set = new HashSet<>(Arrays.asList(excludes));
+    Set<Class<?>> set = new HashSet<>(asList(excludes));
     if (!set.contains(Integer.TYPE)) {
       try {
         stringifier.stringify(0);
diff --git a/parquet-column/src/test/java/org/apache/parquet/schema/TestTypeBuildersWithLogicalTypes.java b/parquet-column/src/test/java/org/apache/parquet/schema/TestTypeBuildersWithLogicalTypes.java
new file mode 100644
index 0000000000..fe13e604b6
--- /dev/null
+++ b/parquet-column/src/test/java/org/apache/parquet/schema/TestTypeBuildersWithLogicalTypes.java
@@ -0,0 +1,408 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.parquet.schema;
+
+import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName;
+import org.apache.parquet.schema.Type.Repetition;
+import org.junit.Assert;
+import org.junit.Test;
+
+import java.util.concurrent.Callable;
+
+import static org.apache.parquet.schema.LogicalTypeAnnotation.TimeUnit.MICROS;
+import static org.apache.parquet.schema.LogicalTypeAnnotation.TimeUnit.MILLIS;
+import static org.apache.parquet.schema.LogicalTypeAnnotation.TimeUnit.NANOS;
+import static org.apache.parquet.schema.LogicalTypeAnnotation.bsonType;
+import static org.apache.parquet.schema.LogicalTypeAnnotation.dateType;
+import static org.apache.parquet.schema.LogicalTypeAnnotation.decimalType;
+import static org.apache.parquet.schema.LogicalTypeAnnotation.intType;
+import static org.apache.parquet.schema.LogicalTypeAnnotation.jsonType;
+import static org.apache.parquet.schema.LogicalTypeAnnotation.stringType;
+import static org.apache.parquet.schema.LogicalTypeAnnotation.timeType;
+import static org.apache.parquet.schema.LogicalTypeAnnotation.timestampType;
+import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.BINARY;
+import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.BOOLEAN;
+import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.DOUBLE;
+import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY;
+import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.FLOAT;
+import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.INT32;
+import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.INT64;
+import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.INT96;
+import static org.apache.parquet.schema.Type.Repetition.REQUIRED;
+
+public class TestTypeBuildersWithLogicalTypes {
+  @Test
+  public void testGroupTypeConstruction() {
+    PrimitiveType f1 = Types.required(BINARY).as(stringType()).named("f1");
+    PrimitiveType f2 = Types.required(INT32).named("f2");
+    PrimitiveType f3 = Types.optional(INT32).named("f3");
+    String name = "group";
+    for (Repetition repetition : Repetition.values()) {
+      GroupType expected = new GroupType(repetition, name,
+          f1,
+          new GroupType(repetition, "g1", f2, f3));
+      GroupType built = Types.buildGroup(repetition)
+          .addField(f1)
+          .group(repetition).addFields(f2, f3).named("g1")
+          .named(name);
+      Assert.assertEquals(expected, built);
+
+      switch (repetition) {
+        case REQUIRED:
+          built = Types.requiredGroup()
+              .addField(f1)
+              .requiredGroup().addFields(f2, f3).named("g1")
+              .named(name);
+          break;
+        case OPTIONAL:
+          built = Types.optionalGroup()
+              .addField(f1)
+              .optionalGroup().addFields(f2, f3).named("g1")
+              .named(name);
+          break;
+        case REPEATED:
+          built = Types.repeatedGroup()
+              .addField(f1)
+              .repeatedGroup().addFields(f2, f3).named("g1")
+              .named(name);
+          break;
+      }
+      Assert.assertEquals(expected, built);
+    }
+  }
+
+  @Test
+  public void testDecimalAnnotation() {
+    // int32 primitive type
+    MessageType expected = new MessageType("DecimalMessage",
+      new PrimitiveType(REQUIRED, INT32, 0, "aDecimal",
+        decimalType(2, 9), null));
+    MessageType builderType = Types.buildMessage()
+      .required(INT32)
+      .as(decimalType(2, 9))
+      .named("aDecimal")
+      .named("DecimalMessage");
+    Assert.assertEquals(expected, builderType);
+    // int64 primitive type
+    expected = new MessageType("DecimalMessage",
+      new PrimitiveType(REQUIRED, INT64, 0, "aDecimal",
+        decimalType(2, 18), null));
+    builderType = Types.buildMessage()
+      .required(INT64)
+      .as(decimalType(2, 18)).precision(18).scale(2)
+      .named("aDecimal")
+      .named("DecimalMessage");
+    Assert.assertEquals(expected, builderType);
+    // binary primitive type
+    expected = new MessageType("DecimalMessage",
+      new PrimitiveType(REQUIRED, BINARY, 0, "aDecimal",
+        decimalType(2, 9), null));
+    builderType = Types.buildMessage()
+      .required(BINARY).as(decimalType(2, 9))
+      .named("aDecimal")
+      .named("DecimalMessage");
+    Assert.assertEquals(expected, builderType);
+    // fixed primitive type
+    expected = new MessageType("DecimalMessage",
+      new PrimitiveType(REQUIRED, FIXED_LEN_BYTE_ARRAY, 4, "aDecimal",
+        decimalType(2, 9), null));
+    builderType = Types.buildMessage()
+      .required(FIXED_LEN_BYTE_ARRAY).length(4)
+      .as(decimalType(2, 9))
+      .named("aDecimal")
+      .named("DecimalMessage");
+    Assert.assertEquals(expected, builderType);
+  }
+
+  @Test
+  public void testDecimalAnnotationPrecisionScaleBound() {
+    assertThrows("Should reject scale greater than precision",
+        IllegalArgumentException.class, () -> Types.buildMessage()
+            .required(INT32).as(decimalType(4, 3))
+                .named("aDecimal")
+            .named("DecimalMessage"));
+    assertThrows("Should reject scale greater than precision",
+        IllegalArgumentException.class, () -> Types.buildMessage()
+            .required(INT64).as(decimalType(4, 3))
+                .named("aDecimal")
+            .named("DecimalMessage"));
+    assertThrows("Should reject scale greater than precision",
+        IllegalArgumentException.class, () -> Types.buildMessage()
+            .required(BINARY).as(decimalType(4, 3))
+                .named("aDecimal")
+            .named("DecimalMessage"));
+    assertThrows("Should reject scale greater than precision",
+        IllegalArgumentException.class, () -> Types.buildMessage()
+            .required(FIXED_LEN_BYTE_ARRAY).length(7)
+            .as(decimalType(4, 3))
+            .named("aDecimal")
+            .named("DecimalMessage")
+    );
+  }
+
+  @Test
+  public void testDecimalAnnotationLengthCheck() {
+    // maximum precision for 4 bytes is 9
+    assertThrows("should reject precision 10 with length 4",
+        IllegalStateException.class, () -> Types.required(FIXED_LEN_BYTE_ARRAY).length(4)
+            .as(decimalType(2, 10))
+            .named("aDecimal"));
+    assertThrows("should reject precision 10 with length 4",
+        IllegalStateException.class, () -> Types.required(INT32)
+            .as(decimalType(2, 10))
+            .named("aDecimal"));
+    // maximum precision for 8 bytes is 19
+    assertThrows("should reject precision 19 with length 8",
+        IllegalStateException.class, () -> Types.required(FIXED_LEN_BYTE_ARRAY).length(8)
+            .as(decimalType(4, 19))
+            .named("aDecimal"));
+    assertThrows("should reject precision 19 with length 8",
+        IllegalStateException.class, () -> Types.required(INT64).length(8)
+            .as(decimalType(4, 19))
+            .named("aDecimal")
+    );
+  }
+
+  @Test
+  public void testDECIMALAnnotationRejectsUnsupportedTypes() {
+    PrimitiveTypeName[] unsupported = new PrimitiveTypeName[]{
+        BOOLEAN, INT96, DOUBLE, FLOAT
+    };
+    for (final PrimitiveTypeName type : unsupported) {
+      assertThrows("Should reject non-binary type: " + type,
+          IllegalStateException.class, () -> Types.required(type)
+              .as(decimalType(2, 9))
+              .named("d"));
+    }
+  }
+
+  @Test
+  public void testBinaryAnnotations() {
+    LogicalTypeAnnotation[] types = new LogicalTypeAnnotation[] {
+        stringType(), jsonType(), bsonType()};
+    for (final LogicalTypeAnnotation logicalType : types) {
+      PrimitiveType expected = new PrimitiveType(REQUIRED, BINARY, "col", logicalType);
+      PrimitiveType string = Types.required(BINARY).as(logicalType).named("col");
+      Assert.assertEquals(expected, string);
+    }
+  }
+
+  @Test
+  public void testBinaryAnnotationsRejectsNonBinary() {
+    LogicalTypeAnnotation[] types = new LogicalTypeAnnotation[] {
+        stringType(), jsonType(), bsonType()};
+    for (final LogicalTypeAnnotation logicalType : types) {
+      PrimitiveTypeName[] nonBinary = new PrimitiveTypeName[]{
+          BOOLEAN, INT32, INT64, INT96, DOUBLE, FLOAT
+      };
+      for (final PrimitiveTypeName type : nonBinary) {
+        assertThrows("Should reject non-binary type: " + type,
+            IllegalStateException.class, () -> Types.required(type).as(logicalType).named("col"));
+      }
+      assertThrows("Should reject non-binary type: FIXED_LEN_BYTE_ARRAY",
+          IllegalStateException.class, () -> Types.required(FIXED_LEN_BYTE_ARRAY).length(1)
+              .as(logicalType).named("col"));
+    }
+  }
+
+  @Test
+  public void testInt32Annotations() {
+    LogicalTypeAnnotation[] types = new LogicalTypeAnnotation[] {
+      dateType(), timeType(true, MILLIS), timeType(false, MILLIS),
+      intType(8, false), intType(16, false), intType(32, false),
+      intType(8, true), intType(16, true), intType(32, true)};
+    for (LogicalTypeAnnotation logicalType : types) {
+      PrimitiveType expected = new PrimitiveType(REQUIRED, INT32, "col", logicalType);
+      PrimitiveType date = Types.required(INT32).as(logicalType).named("col");
+      Assert.assertEquals(expected, date);
+    }
+  }
+
+  @Test
+  public void testInt32AnnotationsRejectNonInt32() {
+    LogicalTypeAnnotation[] types = new LogicalTypeAnnotation[] {
+      dateType(), timeType(true, MILLIS), timeType(false, MILLIS),
+      intType(8, false), intType(16, false), intType(32, false),
+      intType(8, true), intType(16, true), intType(32, true)};
+    for (final LogicalTypeAnnotation logicalType : types) {
+      PrimitiveTypeName[] nonInt32 = new PrimitiveTypeName[]{
+          BOOLEAN, INT64, INT96, DOUBLE, FLOAT, BINARY
+      };
+      for (final PrimitiveTypeName type : nonInt32) {
+        assertThrows("Should reject non-int32 type: " + type,
+            IllegalStateException.class, () -> Types.required(type).as(logicalType).named("col"));
+      }
+      assertThrows("Should reject non-int32 type: FIXED_LEN_BYTE_ARRAY",
+          IllegalStateException.class, () -> Types.required(FIXED_LEN_BYTE_ARRAY).length(1)
+              .as(logicalType).named("col"));
+    }
+  }
+
+  @Test
+  public void testInt64Annotations() {
+    LogicalTypeAnnotation[] types = new LogicalTypeAnnotation[] {
+      timeType(true, MICROS), timeType(false, MICROS),
+      timeType(true, NANOS), timeType(false, NANOS),
+      timestampType(true, MILLIS), timestampType(false, MILLIS),
+      timestampType(true, MICROS), timestampType(false, MICROS),
+      timestampType(true, NANOS), timestampType(false, NANOS),
+      intType(64, true), intType(64, false)};
+    for (LogicalTypeAnnotation logicalType : types) {
+      PrimitiveType expected = new PrimitiveType(REQUIRED, INT64, "col", logicalType);
+      PrimitiveType date = Types.required(INT64).as(logicalType).named("col");
+      Assert.assertEquals(expected, date);
+    }
+  }
+
+  @Test
+  public void testInt64AnnotationsRejectNonInt64() {
+    LogicalTypeAnnotation[] types = new LogicalTypeAnnotation[] {
+      timeType(true, MICROS), timeType(false, MICROS),
+      timeType(true, NANOS), timeType(false, NANOS),
+      timestampType(true, MILLIS), timestampType(false, MILLIS),
+      timestampType(true, MICROS), timestampType(false, MICROS),
+      timestampType(true, NANOS), timestampType(false, NANOS),
+      intType(64, true), intType(64, false)};
+    for (final LogicalTypeAnnotation logicalType : types) {
+      PrimitiveTypeName[] nonInt64 = new PrimitiveTypeName[]{
+          BOOLEAN, INT32, INT96, DOUBLE, FLOAT, BINARY
+      };
+      for (final PrimitiveTypeName type : nonInt64) {
+        assertThrows("Should reject non-int64 type: " + type,
+            IllegalStateException.class, (Callable<Type>) () -> Types.required(type).as(logicalType).named("col"));
+      }
+      assertThrows("Should reject non-int64 type: FIXED_LEN_BYTE_ARRAY",
+          IllegalStateException.class, (Callable<Type>) () -> Types.required(FIXED_LEN_BYTE_ARRAY).length(1)
+              .as(logicalType).named("col"));
+    }
+  }
+
+  @Test
+  public void testIntervalAnnotationRejectsNonFixed() {
+    PrimitiveTypeName[] nonFixed = new PrimitiveTypeName[]{
+        BOOLEAN, INT32, INT64, INT96, DOUBLE, FLOAT, BINARY
+    };
+    for (final PrimitiveTypeName type : nonFixed) {
+      assertThrows("Should reject non-fixed type: " + type,
+          IllegalStateException.class, () -> Types.required(type)
+              .as(LogicalTypeAnnotation.IntervalLogicalTypeAnnotation.getInstance()).named("interval"));
+    }
+  }
+
+  @Test
+  public void testIntervalAnnotationRejectsNonFixed12() {
+    assertThrows("Should reject fixed with length != 12: " + 11,
+        IllegalStateException.class, () -> Types.required(FIXED_LEN_BYTE_ARRAY).length(11)
+            .as(LogicalTypeAnnotation.IntervalLogicalTypeAnnotation.getInstance()).named("interval"));
+  }
+
+  @Test
+  public void testTypeConstructionWithUnsupportedColumnOrder() {
+    assertThrows(null, IllegalArgumentException.class,
+      () -> Types.optional(INT96).columnOrder(ColumnOrder.typeDefined()).named("int96_unsupported"));
+    assertThrows(null, IllegalArgumentException.class,
+      () -> Types.optional(PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY).length(12)
+                .as(LogicalTypeAnnotation.IntervalLogicalTypeAnnotation.getInstance())
+                .columnOrder(ColumnOrder.typeDefined()).named("interval_unsupported"));
+  }
+
+  @Test
+  public void testDecimalLogicalType() {
+    PrimitiveType expected = new PrimitiveType(REQUIRED, BINARY, "aDecimal",
+      LogicalTypeAnnotation.decimalType(3, 4));
+    PrimitiveType actual = Types.required(BINARY)
+      .as(LogicalTypeAnnotation.decimalType(3, 4)).named("aDecimal");
+    Assert.assertEquals(expected, actual);
+  }
+
+  @Test
+  public void testDecimalLogicalTypeWithDeprecatedScale() {
+    PrimitiveType expected = new PrimitiveType(REQUIRED, BINARY, "aDecimal",
+      LogicalTypeAnnotation.decimalType(3, 4));
+    PrimitiveType actual = Types.required(BINARY)
+      .as(LogicalTypeAnnotation.decimalType(3, 4)).scale(3).named("aDecimal");
+    Assert.assertEquals(expected, actual);
+  }
+
+  @Test
+  public void testDecimalLogicalTypeWithDeprecatedPrecision() {
+    PrimitiveType expected = new PrimitiveType(REQUIRED, BINARY, "aDecimal",
+      LogicalTypeAnnotation.decimalType(3, 4));
+    PrimitiveType actual = Types.required(BINARY)
+      .as(LogicalTypeAnnotation.decimalType(3, 4)).precision(4).named("aDecimal");
+    Assert.assertEquals(expected, actual);
+  }
+
+  @Test
+  public void testTimestampLogicalTypeWithUTCParameter() {
+    PrimitiveType utcMillisExpected = new PrimitiveType(REQUIRED, INT64, "aTimestamp",
+      timestampType(true, MILLIS));
+    PrimitiveType nonUtcMillisExpected = new PrimitiveType(REQUIRED, INT64, "aTimestamp",
+      timestampType(false, MILLIS));
+    PrimitiveType utcMicrosExpected = new PrimitiveType(REQUIRED, INT64, "aTimestamp",
+      timestampType(true, MICROS));
+    PrimitiveType nonUtcMicrosExpected = new PrimitiveType(REQUIRED, INT64, "aTimestamp",
+      timestampType(false, MICROS));
+
+    PrimitiveType utcMillisActual = Types.required(INT64)
+      .as(timestampType(true, MILLIS)).named("aTimestamp");
+    PrimitiveType nonUtcMillisActual = Types.required(INT64)
+      .as(timestampType(false, MILLIS)).named("aTimestamp");
+    PrimitiveType utcMicrosActual = Types.required(INT64)
+      .as(timestampType(true, MICROS)).named("aTimestamp");
+    PrimitiveType nonUtcMicrosActual = Types.required(INT64)
+      .as(timestampType(false, MICROS)).named("aTimestamp");
+
+    Assert.assertEquals(utcMillisExpected, utcMillisActual);
+    Assert.assertEquals(nonUtcMillisExpected, nonUtcMillisActual);
+    Assert.assertEquals(utcMicrosExpected, utcMicrosActual);
+    Assert.assertEquals(nonUtcMicrosExpected, nonUtcMicrosActual);
+  }
+
+  @Test(expected = IllegalArgumentException.class)
+  public void testDecimalLogicalTypeWithDeprecatedScaleMismatch() {
+    Types.required(BINARY)
+      .as(LogicalTypeAnnotation.decimalType(3, 4))
+      .scale(4).named("aDecimal");
+  }
+
+  @Test(expected = IllegalArgumentException.class)
+  public void testDecimalLogicalTypeWithDeprecatedPrecisionMismatch() {
+    Types.required(BINARY)
+      .as(LogicalTypeAnnotation.decimalType(3, 4))
+      .precision(5).named("aDecimal");
+  }
+
+  /**
+   * A convenience method to avoid a large number of @Test(expected=...) tests
+   * @param message A String message to describe this assertion
+   * @param expected An Exception class that the Runnable should throw
+   * @param callable A Callable that is expected to throw the exception
+   */
+  public static void assertThrows(
+      String message, Class<? extends Exception> expected, Callable callable) {
+    try {
+      callable.call();
+      Assert.fail("No exception was thrown (" + message + "), expected: " +
+          expected.getName());
+    } catch (Exception actual) {
+      Assert.assertEquals(message, expected, actual.getClass());
+    }
+  }
+}
diff --git a/parquet-common/pom.xml b/parquet-common/pom.xml
index e7b2446a65..1009628544 100644
--- a/parquet-common/pom.xml
+++ b/parquet-common/pom.xml
@@ -38,8 +38,8 @@
   <dependencies>
     <dependency>
       <groupId>org.apache.parquet</groupId>
-      <artifactId>parquet-format</artifactId>
-      <version>${parquet.format.version}</version>
+      <artifactId>parquet-format-structures</artifactId>
+      <version>${project.version}</version>
     </dependency>
 
     <dependency>
@@ -61,6 +61,12 @@
       <version>${slf4j.version}</version>
       <scope>test</scope>
     </dependency>
+
+    <dependency>
+      <groupId>org.apache.yetus</groupId>
+      <artifactId>audience-annotations</artifactId>
+      <version>0.7.0</version>
+    </dependency>
   </dependencies>
 
   <build>
diff --git a/parquet-format-structures/pom.xml b/parquet-format-structures/pom.xml
new file mode 100644
index 0000000000..e69cced3b2
--- /dev/null
+++ b/parquet-format-structures/pom.xml
@@ -0,0 +1,206 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  - Licensed to the Apache Software Foundation (ASF) under one
+  - or more contributor license agreements.  See the NOTICE file
+  - distributed with this work for additional information
+  - regarding copyright ownership.  The ASF licenses this file
+  - to you under the Apache License, Version 2.0 (the
+  - "License"); you may not use this file except in compliance
+  - with the License.  You may obtain a copy of the License at
+  -
+  -   http://www.apache.org/licenses/LICENSE-2.0
+  -
+  - Unless required by applicable law or agreed to in writing,
+  - software distributed under the License is distributed on an
+  - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  - KIND, either express or implied.  See the License for the
+  - specific language governing permissions and limitations
+  - under the License.
+  -->
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+  <modelVersion>4.0.0</modelVersion>
+
+  <parent>
+    <groupId>org.apache.parquet</groupId>
+    <artifactId>parquet</artifactId>
+    <relativePath>../pom.xml</relativePath>
+    <version>1.10.1-SNAPSHOT</version>
+  </parent>
+
+  <artifactId>parquet-format-structures</artifactId>
+  <packaging>jar</packaging>
+
+  <name>Apache Parquet Format Structures</name>
+  <url>http://parquet.apache.org/</url>
+  <description>Parquet-mr related java classes to use the parquet-format thrift structures.</description>
+
+  <properties>
+    <parquet.thrift.path>${project.build.directory}/parquet-format-thrift</parquet.thrift.path>
+  </properties>
+
+  <build>
+    <plugins>
+      <!-- Getting the parquet-format thrift file -->
+       <plugin>
+         <groupId>org.apache.maven.plugins</groupId>
+         <artifactId>maven-dependency-plugin</artifactId>
+         <executions>
+           <execution>
+             <id>unpack</id>
+             <phase>generate-sources</phase>
+             <goals>
+               <goal>unpack</goal>
+             </goals>
+             <configuration>
+               <artifactItems>
+                 <artifactItem>
+                   <groupId>org.apache.parquet</groupId>
+                   <artifactId>parquet-format</artifactId>
+                   <version>${parquet.format.version}</version>
+                   <type>jar</type>
+                 </artifactItem>
+               </artifactItems>
+               <includes>parquet.thrift</includes>
+               <outputDirectory>${parquet.thrift.path}</outputDirectory>
+             </configuration>
+           </execution>
+         </executions>
+       </plugin>
+      <!-- thrift -->
+      <plugin>
+        <groupId>org.apache.thrift.tools</groupId>
+        <artifactId>maven-thrift-plugin</artifactId>
+        <version>0.1.11</version>
+        <configuration>
+          <thriftSourceRoot>${parquet.thrift.path}</thriftSourceRoot>
+          <thriftExecutable>${format.thrift.executable}</thriftExecutable>
+        </configuration>
+        <executions>
+          <execution>
+            <id>thrift-sources</id>
+            <phase>generate-sources</phase>
+            <goals>
+              <goal>compile</goal>
+            </goals>
+          </execution>
+        </executions>
+      </plugin>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-shade-plugin</artifactId>
+        <executions>
+          <execution>
+            <phase>package</phase>
+            <goals>
+              <goal>shade</goal>
+            </goals>
+            <configuration>
+              <keepDependenciesWithProvidedScope>true</keepDependenciesWithProvidedScope>
+              <artifactSet>
+                <includes>
+                  <include>org.apache.thrift:libthrift</include>
+                </includes>
+              </artifactSet>
+              <filters>
+                <filter>
+                  <!-- Sigh. The Thrift jar contains its source -->
+                  <artifact>org.apache.thrift:libthrift</artifact>
+                  <excludes>
+                    <exclude>**/*.java</exclude>
+                    <exclude>META-INF/LICENSE.txt</exclude>
+                    <exclude>META-INF/NOTICE.txt</exclude>
+                  </excludes>
+                </filter>
+              </filters>
+              <relocations>
+                <relocation>
+                  <pattern>org.apache.thrift</pattern>
+                  <shadedPattern>${shade.prefix}.org.apache.thrift</shadedPattern>
+                </relocation>
+              </relocations>
+            </configuration>
+          </execution>
+        </executions>
+      </plugin>
+      <!-- Configure build/javadoc as well to support "mvn javadoc:javadoc" -->
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-javadoc-plugin</artifactId>
+        <configuration>
+          <!-- We have to turn off the javadoc check because thrift generates improper comments -->
+          <additionalparam>-Xdoclint:none</additionalparam>
+        </configuration>
+      </plugin>
+    </plugins>
+  </build>
+
+  <reports>
+    <plugins>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-javadoc-plugin</artifactId>
+        <configuration>
+          <!-- We have to turn off the javadoc check because thrift generates improper comments -->
+          <additionalparam>-Xdoclint:none</additionalparam>
+        </configuration>
+      </plugin>
+    </plugins>
+  </reports>
+
+  <dependencies>
+    <dependency>
+      <groupId>org.slf4j</groupId>
+      <artifactId>slf4j-api</artifactId>
+      <version>${slf4j.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.thrift</groupId>
+      <artifactId>libthrift</artifactId>
+      <version>${format.thrift.version}</version>
+    </dependency>
+  </dependencies>
+
+  <profiles>
+    <profile>
+      <activation>
+        <os>
+          <family>!windows</family>
+        </os>
+      </activation>
+      <id>UnixClassOS</id>
+      <build>
+        <plugins>
+          <plugin>
+            <groupId>org.codehaus.mojo</groupId>
+            <artifactId>exec-maven-plugin</artifactId>
+            <version>1.2.1</version>
+            <executions>
+              <execution>
+                <id>check-thrift-version</id>
+                <phase>generate-sources</phase>
+                <goals>
+                  <goal>exec</goal>
+                </goals>
+                <configuration>
+                  <executable>sh</executable>
+                  <workingDirectory>${basedir}</workingDirectory>
+                  <arguments>
+                    <argument>-c</argument>
+                    <argument>${thrift.executable} -version | fgrep 'Thrift version ${thrift.version}' &amp;&amp; exit 0;
+                      echo "=================================================================================";
+                      echo "========== [FATAL] Build is configured to require Thrift version ${thrift.version} ==========";
+                      echo -n "========== Currently installed: ";
+                      ${thrift.executable} -version;
+                      echo "=================================================================================";
+                      exit 1
+                    </argument>
+                  </arguments>
+                </configuration>
+              </execution>
+            </executions>
+          </plugin>
+        </plugins>
+      </build>
+    </profile>
+  </profiles>
+</project>
diff --git a/parquet-format-structures/src/main/java/org/apache/parquet/format/InterningProtocol.java b/parquet-format-structures/src/main/java/org/apache/parquet/format/InterningProtocol.java
new file mode 100644
index 0000000000..a405d4f879
--- /dev/null
+++ b/parquet-format-structures/src/main/java/org/apache/parquet/format/InterningProtocol.java
@@ -0,0 +1,231 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.parquet.format;
+
+import java.nio.ByteBuffer;
+
+import org.apache.thrift.TException;
+import org.apache.thrift.protocol.TField;
+import org.apache.thrift.protocol.TList;
+import org.apache.thrift.protocol.TMap;
+import org.apache.thrift.protocol.TMessage;
+import org.apache.thrift.protocol.TProtocol;
+import org.apache.thrift.protocol.TSet;
+import org.apache.thrift.protocol.TStruct;
+import org.apache.thrift.transport.TTransport;
+
+/**
+ * TProtocol that interns the strings.
+ */
+public class InterningProtocol extends TProtocol {
+
+  private final TProtocol delegate;
+
+  public InterningProtocol(TProtocol delegate) {
+    super(delegate.getTransport());
+    this.delegate = delegate;
+  }
+
+  public TTransport getTransport() {
+    return delegate.getTransport();
+  }
+
+  public void writeMessageBegin(TMessage message) throws TException {
+    delegate.writeMessageBegin(message);
+  }
+
+  public void writeMessageEnd() throws TException {
+    delegate.writeMessageEnd();
+  }
+
+  public int hashCode() {
+    return delegate.hashCode();
+  }
+
+  public void writeStructBegin(TStruct struct) throws TException {
+    delegate.writeStructBegin(struct);
+  }
+
+  public void writeStructEnd() throws TException {
+    delegate.writeStructEnd();
+  }
+
+  public void writeFieldBegin(TField field) throws TException {
+    delegate.writeFieldBegin(field);
+  }
+
+  public void writeFieldEnd() throws TException {
+    delegate.writeFieldEnd();
+  }
+
+  public void writeFieldStop() throws TException {
+    delegate.writeFieldStop();
+  }
+
+  public void writeMapBegin(TMap map) throws TException {
+    delegate.writeMapBegin(map);
+  }
+
+  public void writeMapEnd() throws TException {
+    delegate.writeMapEnd();
+  }
+
+  public void writeListBegin(TList list) throws TException {
+    delegate.writeListBegin(list);
+  }
+
+  public void writeListEnd() throws TException {
+    delegate.writeListEnd();
+  }
+
+  public void writeSetBegin(TSet set) throws TException {
+    delegate.writeSetBegin(set);
+  }
+
+  public void writeSetEnd() throws TException {
+    delegate.writeSetEnd();
+  }
+
+  public void writeBool(boolean b) throws TException {
+    delegate.writeBool(b);
+  }
+
+  public void writeByte(byte b) throws TException {
+    delegate.writeByte(b);
+  }
+
+  public void writeI16(short i16) throws TException {
+    delegate.writeI16(i16);
+  }
+
+  public void writeI32(int i32) throws TException {
+    delegate.writeI32(i32);
+  }
+
+  public void writeI64(long i64) throws TException {
+    delegate.writeI64(i64);
+  }
+
+  public void writeDouble(double dub) throws TException {
+    delegate.writeDouble(dub);
+  }
+
+  public void writeString(String str) throws TException {
+    delegate.writeString(str);
+  }
+
+  public void writeBinary(ByteBuffer buf) throws TException {
+    delegate.writeBinary(buf);
+  }
+
+  public TMessage readMessageBegin() throws TException {
+    return delegate.readMessageBegin();
+  }
+
+  public void readMessageEnd() throws TException {
+    delegate.readMessageEnd();
+  }
+
+  public TStruct readStructBegin() throws TException {
+    return delegate.readStructBegin();
+  }
+
+  public void readStructEnd() throws TException {
+    delegate.readStructEnd();
+  }
+
+  public TField readFieldBegin() throws TException {
+    return delegate.readFieldBegin();
+  }
+
+  public void readFieldEnd() throws TException {
+    delegate.readFieldEnd();
+  }
+
+  public TMap readMapBegin() throws TException {
+    return delegate.readMapBegin();
+  }
+
+  public void readMapEnd() throws TException {
+    delegate.readMapEnd();
+  }
+
+  public TList readListBegin() throws TException {
+    return delegate.readListBegin();
+  }
+
+  public void readListEnd() throws TException {
+    delegate.readListEnd();
+  }
+
+  public TSet readSetBegin() throws TException {
+    return delegate.readSetBegin();
+  }
+
+  public void readSetEnd() throws TException {
+    delegate.readSetEnd();
+  }
+
+  public boolean equals(Object obj) {
+    return delegate.equals(obj);
+  }
+
+  public boolean readBool() throws TException {
+    return delegate.readBool();
+  }
+
+  public byte readByte() throws TException {
+    return delegate.readByte();
+  }
+
+  public short readI16() throws TException {
+    return delegate.readI16();
+  }
+
+  public int readI32() throws TException {
+    return delegate.readI32();
+  }
+
+  public long readI64() throws TException {
+    return delegate.readI64();
+  }
+
+  public double readDouble() throws TException {
+    return delegate.readDouble();
+  }
+
+  public String readString() throws TException {
+    // this is where we intern the strings
+    return delegate.readString().intern();
+  }
+
+  public ByteBuffer readBinary() throws TException {
+    return delegate.readBinary();
+  }
+
+  public void reset() {
+    delegate.reset();
+  }
+
+  public String toString() {
+    return delegate.toString();
+  }
+
+}
diff --git a/parquet-format-structures/src/main/java/org/apache/parquet/format/LogicalTypes.java b/parquet-format-structures/src/main/java/org/apache/parquet/format/LogicalTypes.java
new file mode 100644
index 0000000000..7c63e41daf
--- /dev/null
+++ b/parquet-format-structures/src/main/java/org/apache/parquet/format/LogicalTypes.java
@@ -0,0 +1,55 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.parquet.format;
+
+/**
+ * Convenience instances of logical type classes.
+ */
+public class LogicalTypes {
+  public static class TimeUnits {
+    public static final TimeUnit MILLIS = TimeUnit.MILLIS(new MilliSeconds());
+    public static final TimeUnit MICROS = TimeUnit.MICROS(new MicroSeconds());
+  }
+
+  public static LogicalType DECIMAL(int scale, int precision) {
+    return LogicalType.DECIMAL(new DecimalType(scale, precision));
+  }
+
+  public static final LogicalType UTF8 = LogicalType.STRING(new StringType());
+  public static final LogicalType MAP  = LogicalType.MAP(new MapType());
+  public static final LogicalType LIST = LogicalType.LIST(new ListType());
+  public static final LogicalType ENUM = LogicalType.ENUM(new EnumType());
+  public static final LogicalType DATE = LogicalType.DATE(new DateType());
+  public static final LogicalType TIME_MILLIS = LogicalType.TIME(new TimeType(true, TimeUnits.MILLIS));
+  public static final LogicalType TIME_MICROS = LogicalType.TIME(new TimeType(true, TimeUnits.MICROS));
+  public static final LogicalType TIMESTAMP_MILLIS = LogicalType.TIMESTAMP(new TimestampType(true, TimeUnits.MILLIS));
+  public static final LogicalType TIMESTAMP_MICROS = LogicalType.TIMESTAMP(new TimestampType(true, TimeUnits.MICROS));
+  public static final LogicalType INT_8 = LogicalType.INTEGER(new IntType((byte) 8, true));
+  public static final LogicalType INT_16 = LogicalType.INTEGER(new IntType((byte) 16, true));
+  public static final LogicalType INT_32 = LogicalType.INTEGER(new IntType((byte) 32, true));
+  public static final LogicalType INT_64 = LogicalType.INTEGER(new IntType((byte) 64, true));
+  public static final LogicalType UINT_8 = LogicalType.INTEGER(new IntType((byte) 8, false));
+  public static final LogicalType UINT_16 = LogicalType.INTEGER(new IntType((byte) 16, false));
+  public static final LogicalType UINT_32 = LogicalType.INTEGER(new IntType((byte) 32, false));
+  public static final LogicalType UINT_64 = LogicalType.INTEGER(new IntType((byte) 64, false));
+  public static final LogicalType UNKNOWN = LogicalType.UNKNOWN(new NullType());
+  public static final LogicalType JSON = LogicalType.JSON(new JsonType());
+  public static final LogicalType BSON = LogicalType.BSON(new BsonType());
+}
diff --git a/parquet-format-structures/src/main/java/org/apache/parquet/format/Util.java b/parquet-format-structures/src/main/java/org/apache/parquet/format/Util.java
new file mode 100644
index 0000000000..d09d007a20
--- /dev/null
+++ b/parquet-format-structures/src/main/java/org/apache/parquet/format/Util.java
@@ -0,0 +1,236 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.parquet.format;
+
+import static org.apache.parquet.format.FileMetaData._Fields.CREATED_BY;
+import static org.apache.parquet.format.FileMetaData._Fields.KEY_VALUE_METADATA;
+import static org.apache.parquet.format.FileMetaData._Fields.NUM_ROWS;
+import static org.apache.parquet.format.FileMetaData._Fields.ROW_GROUPS;
+import static org.apache.parquet.format.FileMetaData._Fields.SCHEMA;
+import static org.apache.parquet.format.FileMetaData._Fields.VERSION;
+import static org.apache.parquet.format.event.Consumers.fieldConsumer;
+import static org.apache.parquet.format.event.Consumers.listElementsOf;
+import static org.apache.parquet.format.event.Consumers.listOf;
+import static org.apache.parquet.format.event.Consumers.struct;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.util.List;
+
+import org.apache.thrift.TBase;
+import org.apache.thrift.TException;
+import org.apache.thrift.protocol.TCompactProtocol;
+import org.apache.thrift.protocol.TProtocol;
+import org.apache.thrift.transport.TIOStreamTransport;
+
+import org.apache.parquet.format.event.Consumers.Consumer;
+import org.apache.parquet.format.event.Consumers.DelegatingFieldConsumer;
+import org.apache.parquet.format.event.EventBasedThriftReader;
+import org.apache.parquet.format.event.TypedConsumer.I32Consumer;
+import org.apache.parquet.format.event.TypedConsumer.I64Consumer;
+import org.apache.parquet.format.event.TypedConsumer.StringConsumer;
+
+/**
+ * Utility to read/write metadata
+ * We use the TCompactProtocol to serialize metadata
+ */
+public class Util {
+
+  public static void writeColumnIndex(ColumnIndex columnIndex, OutputStream to) throws IOException {
+    write(columnIndex, to);
+  }
+
+  public static ColumnIndex readColumnIndex(InputStream from) throws IOException {
+    return read(from, new ColumnIndex());
+  }
+
+  public static void writeOffsetIndex(OffsetIndex offsetIndex, OutputStream to) throws IOException {
+    write(offsetIndex, to);
+  }
+
+  public static OffsetIndex readOffsetIndex(InputStream from) throws IOException {
+    return read(from, new OffsetIndex());
+  }
+
+  public static void writePageHeader(PageHeader pageHeader, OutputStream to) throws IOException {
+    write(pageHeader, to);
+  }
+
+  public static PageHeader readPageHeader(InputStream from) throws IOException {
+    return read(from, new PageHeader());
+  }
+
+  public static void writeFileMetaData(org.apache.parquet.format.FileMetaData fileMetadata, OutputStream to) throws IOException {
+    write(fileMetadata, to);
+  }
+
+  public static FileMetaData readFileMetaData(InputStream from) throws IOException {
+    return read(from, new FileMetaData());
+  }
+  /**
+   * reads the meta data from the stream
+   * @param from the stream to read the metadata from
+   * @param skipRowGroups whether row groups should be skipped
+   * @return the resulting metadata
+   * @throws IOException if any I/O error occurs during the reading
+   */
+  public static FileMetaData readFileMetaData(InputStream from, boolean skipRowGroups) throws IOException {
+    FileMetaData md = new FileMetaData();
+    if (skipRowGroups) {
+      readFileMetaData(from, new DefaultFileMetaDataConsumer(md), skipRowGroups);
+    } else {
+      read(from, md);
+    }
+    return md;
+  }
+
+  /**
+   * To read metadata in a streaming fashion.
+   *
+   */
+  public static abstract class FileMetaDataConsumer {
+    abstract public void setVersion(int version);
+    abstract public void setSchema(List<SchemaElement> schema);
+    abstract public void setNumRows(long numRows);
+    abstract public void addRowGroup(RowGroup rowGroup);
+    abstract public void addKeyValueMetaData(KeyValue kv);
+    abstract public void setCreatedBy(String createdBy);
+  }
+
+  /**
+   * Simple default consumer that sets the fields
+   *
+   */
+  public static final class DefaultFileMetaDataConsumer extends FileMetaDataConsumer {
+    private final FileMetaData md;
+
+    public DefaultFileMetaDataConsumer(FileMetaData md) {
+      this.md = md;
+    }
+
+    @Override
+    public void setVersion(int version) {
+      md.setVersion(version);
+    }
+
+    @Override
+    public void setSchema(List<SchemaElement> schema) {
+      md.setSchema(schema);
+    }
+
+    @Override
+    public void setNumRows(long numRows) {
+      md.setNum_rows(numRows);
+    }
+
+    @Override
+    public void setCreatedBy(String createdBy) {
+      md.setCreated_by(createdBy);
+    }
+
+    @Override
+    public void addRowGroup(RowGroup rowGroup) {
+      md.addToRow_groups(rowGroup);
+    }
+
+    @Override
+    public void addKeyValueMetaData(KeyValue kv) {
+      md.addToKey_value_metadata(kv);
+    }
+  }
+
+  public static void readFileMetaData(InputStream from, FileMetaDataConsumer consumer) throws IOException {
+    readFileMetaData(from, consumer, false);
+  }
+
+  public static void readFileMetaData(InputStream from, final FileMetaDataConsumer consumer, boolean skipRowGroups) throws IOException {
+    try {
+      DelegatingFieldConsumer eventConsumer = fieldConsumer()
+      .onField(VERSION, new I32Consumer() {
+        @Override
+        public void consume(int value) {
+          consumer.setVersion(value);
+        }
+      }).onField(SCHEMA, listOf(SchemaElement.class, new Consumer<List<SchemaElement>>() {
+        @Override
+        public void consume(List<SchemaElement> schema) {
+          consumer.setSchema(schema);
+        }
+      })).onField(NUM_ROWS, new I64Consumer() {
+        @Override
+        public void consume(long value) {
+          consumer.setNumRows(value);
+        }
+      }).onField(KEY_VALUE_METADATA, listElementsOf(struct(KeyValue.class, new Consumer<KeyValue>() {
+        @Override
+        public void consume(KeyValue kv) {
+          consumer.addKeyValueMetaData(kv);
+        }
+      }))).onField(CREATED_BY, new StringConsumer() {
+        @Override
+        public void consume(String value) {
+          consumer.setCreatedBy(value);
+        }
+      });
+      if (!skipRowGroups) {
+        eventConsumer = eventConsumer.onField(ROW_GROUPS, listElementsOf(struct(RowGroup.class, new Consumer<RowGroup>() {
+          @Override
+          public void consume(RowGroup rowGroup) {
+            consumer.addRowGroup(rowGroup);
+          }
+        })));
+      }
+      new EventBasedThriftReader(protocol(from)).readStruct(eventConsumer);
+
+    } catch (TException e) {
+      throw new IOException("can not read FileMetaData: " + e.getMessage(), e);
+    }
+  }
+
+  private static TProtocol protocol(OutputStream to) {
+    return protocol(new TIOStreamTransport(to));
+  }
+
+  private static TProtocol protocol(InputStream from) {
+    return protocol(new TIOStreamTransport(from));
+  }
+
+  private static InterningProtocol protocol(TIOStreamTransport t) {
+    return new InterningProtocol(new TCompactProtocol(t));
+  }
+
+  private static <T extends TBase<?,?>> T read(InputStream from, T tbase) throws IOException {
+    try {
+      tbase.read(protocol(from));
+      return tbase;
+    } catch (TException e) {
+      throw new IOException("can not read " + tbase.getClass() + ": " + e.getMessage(), e);
+    }
+  }
+
+  private static void write(TBase<?, ?> tbase, OutputStream to) throws IOException {
+    try {
+      tbase.write(protocol(to));
+    } catch (TException e) {
+      throw new IOException("can not write " + tbase, e);
+    }
+  }
+}
diff --git a/parquet-format-structures/src/main/java/org/apache/parquet/format/event/Consumers.java b/parquet-format-structures/src/main/java/org/apache/parquet/format/event/Consumers.java
new file mode 100644
index 0000000000..ef87997e7a
--- /dev/null
+++ b/parquet-format-structures/src/main/java/org/apache/parquet/format/event/Consumers.java
@@ -0,0 +1,193 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.parquet.format.event;
+
+import static java.util.Collections.unmodifiableMap;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.thrift.TBase;
+import org.apache.thrift.TException;
+import org.apache.thrift.TFieldIdEnum;
+import org.apache.thrift.protocol.TList;
+import org.apache.thrift.protocol.TProtocol;
+import org.apache.thrift.protocol.TProtocolUtil;
+
+import org.apache.parquet.format.event.Consumers.Consumer;
+import org.apache.parquet.format.event.TypedConsumer.ListConsumer;
+import org.apache.parquet.format.event.TypedConsumer.StructConsumer;
+
+/**
+ * Entry point for reading thrift in a streaming fashion
+ */
+public class Consumers {
+
+  /**
+   * To consume objects coming from a DelegatingFieldConsumer
+   *
+   * @param <T> the type of consumed objects
+   */
+  public static interface Consumer<T> {
+    void consume(T t);
+  }
+
+  /**
+   * Delegates reading the field to TypedConsumers.
+   * There is one TypedConsumer per thrift type.
+   * use {@link #onField(TFieldIdEnum, TypedConsumer)} et al. to consume specific thrift fields.
+   * @see Consumers#fieldConsumer()
+   */
+  public static class DelegatingFieldConsumer implements FieldConsumer {
+
+    private final Map<Short, TypedConsumer> contexts;
+    private final FieldConsumer defaultFieldEventConsumer;
+
+    private DelegatingFieldConsumer(FieldConsumer defaultFieldEventConsumer, Map<Short, TypedConsumer> contexts) {
+      this.defaultFieldEventConsumer = defaultFieldEventConsumer;
+      this.contexts = unmodifiableMap(contexts);
+    }
+
+    private DelegatingFieldConsumer() {
+      this(new SkippingFieldConsumer());
+    }
+
+    private DelegatingFieldConsumer(FieldConsumer defaultFieldEventConsumer) {
+      this(defaultFieldEventConsumer, Collections.<Short, TypedConsumer>emptyMap());
+    }
+
+    public DelegatingFieldConsumer onField(TFieldIdEnum e, TypedConsumer typedConsumer) {
+      Map<Short, TypedConsumer> newContexts = new HashMap<Short, TypedConsumer>(contexts);
+      newContexts.put(e.getThriftFieldId(), typedConsumer);
+      return new DelegatingFieldConsumer(defaultFieldEventConsumer, newContexts);
+    }
+
+    @Override
+    public void consumeField(
+        TProtocol protocol, EventBasedThriftReader reader,
+        short id, byte type) throws TException {
+      TypedConsumer delegate = contexts.get(id);
+      if (delegate != null) {
+        delegate.read(protocol, reader, type);
+      } else {
+        defaultFieldEventConsumer.consumeField(protocol, reader, id, type);
+      }
+    }
+  }
+
+  /**
+   * call onField on the resulting DelegatingFieldConsumer to handle individual fields
+   * @return a new DelegatingFieldConsumer
+   */
+  public static DelegatingFieldConsumer fieldConsumer() {
+    return new DelegatingFieldConsumer();
+  }
+
+  /**
+   * To consume a list of elements
+   * @param c the class of the list content
+   * @param consumer the consumer that will receive the list
+   * @param <T> the type of the list content
+   * @return a ListConsumer that can be passed to the DelegatingFieldConsumer
+   */
+  public static <T extends TBase<T,? extends TFieldIdEnum>> ListConsumer listOf(Class<T> c, final Consumer<List<T>> consumer) {
+    class ListConsumer implements Consumer<T> {
+      List<T> list;
+      @Override
+      public void consume(T t) {
+        list.add(t);
+      }
+    }
+    final ListConsumer co = new ListConsumer();
+    return new DelegatingListElementsConsumer(struct(c, co)) {
+      @Override
+      public void consumeList(TProtocol protocol,
+          EventBasedThriftReader reader, TList tList) throws TException {
+        co.list = new ArrayList<T>();
+        super.consumeList(protocol, reader, tList);
+        consumer.consume(co.list);
+      }
+    };
+  }
+
+  /**
+   * To consume list elements one by one
+   * @param consumer the consumer that will read the elements
+   * @return a ListConsumer that can be passed to the DelegatingFieldConsumer
+   */
+  public static ListConsumer listElementsOf(TypedConsumer consumer) {
+    return new DelegatingListElementsConsumer(consumer);
+  }
+
+  public static <T extends TBase<T,? extends TFieldIdEnum>> StructConsumer struct(final Class<T> c, final Consumer<T> consumer) {
+    return new TBaseStructConsumer<T>(c, consumer);
+  }
+}
+
+class SkippingFieldConsumer implements FieldConsumer {
+  @Override
+  public void consumeField(TProtocol protocol, EventBasedThriftReader reader, short id, byte type) throws TException {
+    TProtocolUtil.skip(protocol, type);
+  }
+}
+
+class DelegatingListElementsConsumer extends ListConsumer {
+
+  private TypedConsumer elementConsumer;
+
+  protected DelegatingListElementsConsumer(TypedConsumer consumer) {
+    this.elementConsumer = consumer;
+  }
+
+  @Override
+  public void consumeElement(TProtocol protocol, EventBasedThriftReader reader, byte elemType) throws TException {
+    elementConsumer.read(protocol, reader, elemType);
+  }
+}
+class TBaseStructConsumer<T extends TBase<T, ? extends TFieldIdEnum>> extends StructConsumer {
+
+  private final Class<T> c;
+  private Consumer<T> consumer;
+
+  public TBaseStructConsumer(Class<T> c, Consumer<T> consumer) {
+    this.c = c;
+    this.consumer = consumer;
+  }
+
+  @Override
+  public void consumeStruct(TProtocol protocol, EventBasedThriftReader reader) throws TException {
+    T o = newObject();
+    o.read(protocol);
+    consumer.consume(o);
+  }
+
+  protected T newObject() {
+    try {
+      return c.newInstance();
+    } catch (InstantiationException e) {
+      throw new RuntimeException(c.getName(), e);
+    } catch (IllegalAccessException e) {
+      throw new RuntimeException(c.getName(), e);
+    }
+  }
+
+}
\ No newline at end of file
diff --git a/parquet-format-structures/src/main/java/org/apache/parquet/format/event/EventBasedThriftReader.java b/parquet-format-structures/src/main/java/org/apache/parquet/format/event/EventBasedThriftReader.java
new file mode 100644
index 0000000000..2fb9cf651f
--- /dev/null
+++ b/parquet-format-structures/src/main/java/org/apache/parquet/format/event/EventBasedThriftReader.java
@@ -0,0 +1,126 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.parquet.format.event;
+
+import org.apache.thrift.TException;
+import org.apache.thrift.protocol.TField;
+import org.apache.thrift.protocol.TList;
+import org.apache.thrift.protocol.TMap;
+import org.apache.thrift.protocol.TProtocol;
+import org.apache.thrift.protocol.TSet;
+import org.apache.thrift.protocol.TType;
+
+import org.apache.parquet.format.event.TypedConsumer.ListConsumer;
+import org.apache.parquet.format.event.TypedConsumer.MapConsumer;
+import org.apache.parquet.format.event.TypedConsumer.SetConsumer;
+
+/**
+ * Event based reader for Thrift
+ */
+public final class EventBasedThriftReader {
+
+  private final TProtocol protocol;
+
+  /**
+   * @param protocol the protocol to read from
+   */
+  public EventBasedThriftReader(TProtocol protocol) {
+    this.protocol = protocol;
+  }
+
+  /**
+   * reads a Struct from the underlying protocol and passes the field events to the FieldConsumer
+   * @param c the field consumer
+   * @throws TException if any thrift related error occurs during the reading
+   */
+  public void readStruct(FieldConsumer c) throws TException {
+    protocol.readStructBegin();
+    readStructContent(c);
+    protocol.readStructEnd();
+  }
+
+  /**
+   * reads the content of a struct (fields) from the underlying protocol and passes the events to c
+   * @param c the field consumer
+   * @throws TException if any thrift related error occurs during the reading
+   */
+  public void readStructContent(FieldConsumer c) throws TException {
+    TField field;
+    while (true) {
+      field = protocol.readFieldBegin();
+      if (field.type == TType.STOP) {
+        break;
+      }
+      c.consumeField(protocol, this, field.id, field.type);
+    }
+  }
+
+  /**
+   * reads the set content (elements) from the underlying protocol and passes the events to the set event consumer
+   * @param eventConsumer the consumer
+   * @param tSet the set descriptor
+   * @throws TException if any thrift related error occurs during the reading
+   */
+  public void readSetContent(SetConsumer eventConsumer, TSet tSet)
+      throws TException {
+    for (int i = 0; i < tSet.size; i++) {
+      eventConsumer.consumeElement(protocol, this, tSet.elemType);
+    }
+  }
+
+  /**
+   * reads the map content (key values) from the underlying protocol and passes the events to the map event consumer
+   * @param eventConsumer the consumer
+   * @param tMap the map descriptor
+   * @throws TException if any thrift related error occurs during the reading
+   */
+  public void readMapContent(MapConsumer eventConsumer, TMap tMap)
+      throws TException {
+    for (int i = 0; i < tMap.size; i++) {
+      eventConsumer.consumeEntry(protocol, this, tMap.keyType, tMap.valueType);
+    }
+  }
+
+  /**
+   * reads a key-value pair
+   * @param keyType the type of the key
+   * @param keyConsumer the consumer for the key
+   * @param valueType the type of the value
+   * @param valueConsumer the consumer for the value
+   * @throws TException if any thrift related error occurs during the reading
+   */
+  public void readMapEntry(byte keyType, TypedConsumer keyConsumer, byte valueType, TypedConsumer valueConsumer)
+      throws TException {
+    keyConsumer.read(protocol, this, keyType);
+    valueConsumer.read(protocol, this, valueType);
+  }
+
+  /**
+   * reads the list content (elements) from the underlying protocol and passes the events to the list event consumer
+   * @param eventConsumer the consumer
+   * @param tList the list descriptor
+   * @throws TException if any thrift related error occurs during the reading
+   */
+  public void readListContent(ListConsumer eventConsumer, TList tList)
+      throws TException {
+    for (int i = 0; i < tList.size; i++) {
+      eventConsumer.consumeElement(protocol, this, tList.elemType);
+    }
+  }
+}
\ No newline at end of file
diff --git a/parquet-format-structures/src/main/java/org/apache/parquet/format/event/FieldConsumer.java b/parquet-format-structures/src/main/java/org/apache/parquet/format/event/FieldConsumer.java
new file mode 100644
index 0000000000..6656934b6c
--- /dev/null
+++ b/parquet-format-structures/src/main/java/org/apache/parquet/format/event/FieldConsumer.java
@@ -0,0 +1,39 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.parquet.format.event;
+
+import org.apache.thrift.TException;
+import org.apache.thrift.protocol.TProtocol;
+
+/**
+ * To receive Thrift field events
+ */
+public interface FieldConsumer {
+
+  /**
+   * called by the EventBasedThriftReader when reading a field from a Struct
+   * @param protocol the underlying protocol
+   * @param eventBasedThriftReader the reader to delegate to further calls.
+   * @param id the id of the field
+   * @param type the type of the field
+   * @throws TException if any thrift related error occurs during the reading
+   */
+  public void consumeField(TProtocol protocol, EventBasedThriftReader eventBasedThriftReader, short id, byte type) throws TException;
+
+}
\ No newline at end of file
diff --git a/parquet-format-structures/src/main/java/org/apache/parquet/format/event/TypedConsumer.java b/parquet-format-structures/src/main/java/org/apache/parquet/format/event/TypedConsumer.java
new file mode 100644
index 0000000000..734449f5ed
--- /dev/null
+++ b/parquet-format-structures/src/main/java/org/apache/parquet/format/event/TypedConsumer.java
@@ -0,0 +1,205 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.parquet.format.event;
+
+import static org.apache.thrift.protocol.TType.BOOL;
+import static org.apache.thrift.protocol.TType.BYTE;
+import static org.apache.thrift.protocol.TType.DOUBLE;
+import static org.apache.thrift.protocol.TType.I16;
+import static org.apache.thrift.protocol.TType.I32;
+import static org.apache.thrift.protocol.TType.I64;
+import static org.apache.thrift.protocol.TType.LIST;
+import static org.apache.thrift.protocol.TType.MAP;
+import static org.apache.thrift.protocol.TType.SET;
+import static org.apache.thrift.protocol.TType.STRING;
+import static org.apache.thrift.protocol.TType.STRUCT;
+
+import org.apache.thrift.TException;
+import org.apache.thrift.protocol.TList;
+import org.apache.thrift.protocol.TMap;
+import org.apache.thrift.protocol.TProtocol;
+import org.apache.thrift.protocol.TSet;
+
+/**
+ * receive thrift events of a given type
+ */
+abstract public class TypedConsumer {
+
+  abstract public static class DoubleConsumer extends TypedConsumer {
+    protected DoubleConsumer() { super(DOUBLE); }
+    @Override
+    final void read(TProtocol protocol, EventBasedThriftReader reader) throws TException {
+      this.consume(protocol.readDouble());
+    }
+    abstract public void consume(double value);
+  }
+
+  abstract public static class ByteConsumer extends TypedConsumer {
+    protected ByteConsumer() { super(BYTE); }
+    @Override
+    final void read(TProtocol protocol, EventBasedThriftReader reader) throws TException {
+      this.consume(protocol.readByte());
+    }
+    abstract public void consume(byte value);
+  }
+
+  abstract public static class BoolConsumer extends TypedConsumer {
+    protected BoolConsumer() { super(BOOL); }
+    @Override
+    final void read(TProtocol protocol, EventBasedThriftReader reader) throws TException {
+      this.consume(protocol.readBool());
+    }
+    abstract public void consume(boolean value);
+  }
+
+  abstract public static class I32Consumer extends TypedConsumer {
+    protected I32Consumer() { super(I32); }
+    @Override
+    final void read(TProtocol protocol, EventBasedThriftReader reader) throws TException {
+      this.consume(protocol.readI32());
+    }
+    abstract public void consume(int value);
+  }
+
+  abstract public static class I64Consumer extends TypedConsumer {
+    protected I64Consumer() { super(I64); }
+    final void read(TProtocol protocol, EventBasedThriftReader reader) throws TException {
+      this.consume(protocol.readI64());
+    }
+    abstract public void consume(long value);
+  }
+
+  abstract public static class I16Consumer extends TypedConsumer {
+    protected I16Consumer() { super(I16); }
+    @Override
+    final void read(TProtocol protocol, EventBasedThriftReader reader) throws TException {
+      this.consume(protocol.readI16());
+    }
+    abstract public void consume(short value);
+  }
+
+  abstract public static class StringConsumer extends TypedConsumer {
+    protected StringConsumer() { super(STRING); }
+    @Override
+    final void read(TProtocol protocol, EventBasedThriftReader reader) throws TException {
+      this.consume(protocol.readString());
+    }
+    abstract public void consume(String value);
+  }
+
+  abstract public static class StructConsumer extends TypedConsumer {
+    protected StructConsumer() { super(STRUCT); }
+    @Override
+    final void read(TProtocol protocol, EventBasedThriftReader reader) throws TException {
+      this.consumeStruct(protocol, reader);
+    }
+    /**
+     * can either delegate to the reader or read the struct from the protocol
+     * reader.readStruct(fieldConsumer);
+     * @param protocol the underlying protocol
+     * @param reader the reader to delegate to
+     * @throws TException if any thrift related error occurs during the reading
+     */
+    abstract public void consumeStruct(TProtocol protocol, EventBasedThriftReader reader) throws TException;
+  }
+
+  abstract public static class ListConsumer extends TypedConsumer {
+    protected ListConsumer() { super(LIST); }
+    @Override
+    final void read(TProtocol protocol, EventBasedThriftReader reader) throws TException {
+      this.consumeList(protocol, reader, protocol.readListBegin());
+      protocol.readListEnd();
+    }
+    public void consumeList(TProtocol protocol, EventBasedThriftReader reader, TList tList) throws TException {
+      reader.readListContent(this, tList);
+    }
+    /**
+     * can either delegate to the reader or read the element from the protocol
+     * @param protocol the underlying protocol
+     * @param reader the reader to delegate to
+     * @param elemType the type of the element
+     * @throws TException if any thrift related error occurs during the reading
+     */
+    abstract public void consumeElement(TProtocol protocol, EventBasedThriftReader reader, byte elemType) throws TException;
+  }
+
+  abstract public static class SetConsumer extends TypedConsumer {
+    protected SetConsumer() { super(SET); }
+    @Override
+    final void read(TProtocol protocol, EventBasedThriftReader reader) throws TException {
+      this.consumeSet(protocol, reader, protocol.readSetBegin());
+      protocol.readSetEnd();
+    }
+    public void consumeSet(TProtocol protocol, EventBasedThriftReader reader, TSet tSet) throws TException {
+      reader.readSetContent(this, tSet);
+    }
+    /**
+     * can either delegate to the reader or read the set from the protocol
+     * @param protocol the underlying protocol
+     * @param reader the reader to delegate to
+     * @param elemType the type of the element
+     * @throws TException if any thrift related error occurs during the reading
+     */
+    abstract public void consumeElement(
+        TProtocol protocol, EventBasedThriftReader reader,
+        byte elemType) throws TException;
+  }
+
+  abstract public static class MapConsumer extends TypedConsumer {
+    protected MapConsumer() { super(MAP); }
+    @Override
+    final void read(TProtocol protocol, EventBasedThriftReader reader)
+        throws TException {
+      this.consumeMap(protocol, reader , protocol.readMapBegin());
+      protocol.readMapEnd();
+    }
+    public void consumeMap(TProtocol protocol, EventBasedThriftReader reader, TMap tMap) throws TException {
+      reader.readMapContent(this, tMap);
+    }
+    /**
+     * can either delegate to the reader or read the map entry from the protocol
+     * @param protocol the underlying protocol
+     * @param reader the reader to delegate to
+     * @param keyType the type of the key
+     * @param valueType the type of the value
+     * @throws TException if any thrift related error occurs during the reading
+     */
+    abstract public void consumeEntry(
+        TProtocol protocol, EventBasedThriftReader reader,
+        byte keyType, byte valueType) throws TException;
+  }
+
+  public final byte type;
+
+  private TypedConsumer(byte type) {
+    this.type = type;
+  }
+
+  final public void read(TProtocol protocol, EventBasedThriftReader reader, byte type) throws TException {
+    if (this.type != type) {
+      throw new TException(
+          "Incorrect type in stream. "
+              + "Expected " + this.type
+              + " but got " + type);
+    }
+    this.read(protocol, reader);
+  }
+
+  abstract void read(TProtocol protocol, EventBasedThriftReader reader) throws TException;
+}
\ No newline at end of file
diff --git a/parquet-format-structures/src/test/java/org/apache/parquet/format/TestUtil.java b/parquet-format-structures/src/test/java/org/apache/parquet/format/TestUtil.java
new file mode 100644
index 0000000000..1adf0998fb
--- /dev/null
+++ b/parquet-format-structures/src/test/java/org/apache/parquet/format/TestUtil.java
@@ -0,0 +1,83 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.parquet.format;
+
+import static java.util.Arrays.asList;
+import static junit.framework.Assert.assertEquals;
+import static junit.framework.Assert.assertNull;
+import static org.apache.parquet.format.Util.readFileMetaData;
+import static org.apache.parquet.format.Util.writeFileMetaData;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+
+import org.junit.Test;
+
+import org.apache.parquet.format.Util.DefaultFileMetaDataConsumer;
+public class TestUtil {
+
+  @Test
+  public void testReadFileMetadata() throws Exception {
+    ByteArrayOutputStream baos = new ByteArrayOutputStream();
+    FileMetaData md = new FileMetaData(
+        1,
+        asList(new SchemaElement("foo")),
+        10,
+        asList(
+            new RowGroup(
+                asList(
+                    new ColumnChunk(0),
+                    new ColumnChunk(1)
+                    ),
+                10,
+                5),
+            new RowGroup(
+                asList(
+                    new ColumnChunk(2),
+                    new ColumnChunk(3)
+                    ),
+                11,
+                5)
+        )
+    );
+    writeFileMetaData(md , baos);
+    FileMetaData md2 = readFileMetaData(in(baos));
+    FileMetaData md3 = new FileMetaData();
+    readFileMetaData(in(baos), new DefaultFileMetaDataConsumer(md3));
+    FileMetaData md4 = new FileMetaData();
+    readFileMetaData(in(baos), new DefaultFileMetaDataConsumer(md4), true);
+    FileMetaData md5 = readFileMetaData(in(baos), true);
+    FileMetaData md6 = readFileMetaData(in(baos), false);
+    assertEquals(md, md2);
+    assertEquals(md, md3);
+    assertNull(md4.getRow_groups());
+    assertNull(md5.getRow_groups());
+    assertEquals(md4, md5);
+    md4.setRow_groups(md.getRow_groups());
+    md5.setRow_groups(md.getRow_groups());
+    assertEquals(md, md4);
+    assertEquals(md, md5);
+    assertEquals(md4, md5);
+    assertEquals(md, md6);
+  }
+
+  private ByteArrayInputStream in(ByteArrayOutputStream baos) {
+    return new ByteArrayInputStream(baos.toByteArray());
+  }
+}
diff --git a/parquet-hadoop/pom.xml b/parquet-hadoop/pom.xml
index 98972a2357..8d31f7dd03 100644
--- a/parquet-hadoop/pom.xml
+++ b/parquet-hadoop/pom.xml
@@ -43,8 +43,8 @@
     </dependency>
     <dependency>
       <groupId>org.apache.parquet</groupId>
-      <artifactId>parquet-format</artifactId>
-      <version>${parquet.format.version}</version>
+      <artifactId>parquet-format-structures</artifactId>
+      <version>${project.version}</version>
     </dependency>
     <dependency>
       <groupId>org.apache.hadoop</groupId>
diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/HadoopReadOptions.java b/parquet-hadoop/src/main/java/org/apache/parquet/HadoopReadOptions.java
index b8f481e8a7..4f5c78adb2 100644
--- a/parquet-hadoop/src/main/java/org/apache/parquet/HadoopReadOptions.java
+++ b/parquet-hadoop/src/main/java/org/apache/parquet/HadoopReadOptions.java
@@ -28,6 +28,7 @@
 
 import java.util.Map;
 
+import static org.apache.parquet.hadoop.ParquetInputFormat.COLUMN_INDEX_FILTERING_ENABLED;
 import static org.apache.parquet.hadoop.ParquetInputFormat.DICTIONARY_FILTERING_ENABLED;
 import static org.apache.parquet.hadoop.ParquetInputFormat.RECORD_FILTERING_ENABLED;
 import static org.apache.parquet.hadoop.ParquetInputFormat.STATS_FILTERING_ENABLED;
@@ -43,6 +44,7 @@ private HadoopReadOptions(boolean useSignedStringMinMax,
                             boolean useStatsFilter,
                             boolean useDictionaryFilter,
                             boolean useRecordFilter,
+                            boolean useColumnIndexFilter,
                             FilterCompat.Filter recordFilter,
                             MetadataFilter metadataFilter,
                             CompressionCodecFactory codecFactory,
@@ -51,8 +53,8 @@ private HadoopReadOptions(boolean useSignedStringMinMax,
                             Map<String, String> properties,
                             Configuration conf) {
     super(
-        useSignedStringMinMax, useStatsFilter, useDictionaryFilter, useRecordFilter, recordFilter,
-        metadataFilter, codecFactory, allocator, maxAllocationSize, properties
+        useSignedStringMinMax, useStatsFilter, useDictionaryFilter, useRecordFilter, useColumnIndexFilter,
+        recordFilter, metadataFilter, codecFactory, allocator, maxAllocationSize, properties
     );
     this.conf = conf;
   }
@@ -83,6 +85,7 @@ public Builder(Configuration conf) {
       useDictionaryFilter(conf.getBoolean(DICTIONARY_FILTERING_ENABLED, true));
       useStatsFilter(conf.getBoolean(STATS_FILTERING_ENABLED, true));
       useRecordFilter(conf.getBoolean(RECORD_FILTERING_ENABLED, true));
+      useColumnIndexFilter(conf.getBoolean(COLUMN_INDEX_FILTERING_ENABLED, true));
       withCodecFactory(HadoopCodecs.newFactory(conf, 0));
       withRecordFilter(getFilter(conf));
       withMaxAllocationInBytes(conf.getInt(ALLOCATION_SIZE, 8388608));
@@ -95,7 +98,7 @@ public Builder(Configuration conf) {
     @Override
     public ParquetReadOptions build() {
       return new HadoopReadOptions(
-          useSignedStringMinMax, useStatsFilter, useDictionaryFilter, useRecordFilter,
+          useSignedStringMinMax, useStatsFilter, useDictionaryFilter, useRecordFilter, useColumnIndexFilter,
           recordFilter, metadataFilter, codecFactory, allocator, maxAllocationSize, properties,
           conf);
     }
diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/ParquetReadOptions.java b/parquet-hadoop/src/main/java/org/apache/parquet/ParquetReadOptions.java
index 4ef24601c9..846d3bd809 100644
--- a/parquet-hadoop/src/main/java/org/apache/parquet/ParquetReadOptions.java
+++ b/parquet-hadoop/src/main/java/org/apache/parquet/ParquetReadOptions.java
@@ -38,12 +38,14 @@ public class ParquetReadOptions {
   private static final boolean RECORD_FILTERING_ENABLED_DEFAULT = true;
   private static final boolean STATS_FILTERING_ENABLED_DEFAULT = true;
   private static final boolean DICTIONARY_FILTERING_ENABLED_DEFAULT = true;
+  private static final boolean COLUMN_INDEX_FILTERING_ENABLED_DEFAULT = true;
   private static final int ALLOCATION_SIZE_DEFAULT = 8388608; // 8MB
 
   private final boolean useSignedStringMinMax;
   private final boolean useStatsFilter;
   private final boolean useDictionaryFilter;
   private final boolean useRecordFilter;
+  private final boolean useColumnIndexFilter;
   private final FilterCompat.Filter recordFilter;
   private final ParquetMetadataConverter.MetadataFilter metadataFilter;
   private final CompressionCodecFactory codecFactory;
@@ -55,6 +57,7 @@ public class ParquetReadOptions {
                      boolean useStatsFilter,
                      boolean useDictionaryFilter,
                      boolean useRecordFilter,
+                     boolean useColumnIndexFilter,
                      FilterCompat.Filter recordFilter,
                      ParquetMetadataConverter.MetadataFilter metadataFilter,
                      CompressionCodecFactory codecFactory,
@@ -65,6 +68,7 @@ public class ParquetReadOptions {
     this.useStatsFilter = useStatsFilter;
     this.useDictionaryFilter = useDictionaryFilter;
     this.useRecordFilter = useRecordFilter;
+    this.useColumnIndexFilter = useColumnIndexFilter;
     this.recordFilter = recordFilter;
     this.metadataFilter = metadataFilter;
     this.codecFactory = codecFactory;
@@ -89,6 +93,10 @@ public boolean useRecordFilter() {
     return useRecordFilter;
   }
 
+  public boolean useColumnIndexFilter() {
+    return useColumnIndexFilter;
+  }
+
   public FilterCompat.Filter getRecordFilter() {
     return recordFilter;
   }
@@ -134,6 +142,7 @@ public static class Builder {
     protected boolean useStatsFilter = STATS_FILTERING_ENABLED_DEFAULT;
     protected boolean useDictionaryFilter = DICTIONARY_FILTERING_ENABLED_DEFAULT;
     protected boolean useRecordFilter = RECORD_FILTERING_ENABLED_DEFAULT;
+    protected boolean useColumnIndexFilter = COLUMN_INDEX_FILTERING_ENABLED_DEFAULT;
     protected FilterCompat.Filter recordFilter = null;
     protected ParquetMetadataConverter.MetadataFilter metadataFilter = NO_FILTER;
     // the page size parameter isn't used when only using the codec factory to get decompressors
@@ -182,6 +191,15 @@ public Builder useRecordFilter() {
       return this;
     }
 
+    public Builder useColumnIndexFilter(boolean useColumnIndexFilter) {
+      this.useColumnIndexFilter = useColumnIndexFilter;
+      return this;
+    }
+
+    public Builder useColumnIndexFilter() {
+      return useColumnIndexFilter(true);
+    }
+
     public Builder withRecordFilter(FilterCompat.Filter rowGroupFilter) {
       this.recordFilter = rowGroupFilter;
       return this;
@@ -239,7 +257,7 @@ public Builder copy(ParquetReadOptions options) {
 
     public ParquetReadOptions build() {
       return new ParquetReadOptions(
-          useSignedStringMinMax, useStatsFilter, useDictionaryFilter, useRecordFilter,
+          useSignedStringMinMax, useStatsFilter, useDictionaryFilter, useRecordFilter, useColumnIndexFilter,
           recordFilter, metadataFilter, codecFactory, allocator, maxAllocationSize, properties);
     }
   }
diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java b/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java
index 7f2a766a47..468ae0277f 100644
--- a/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java
+++ b/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java
@@ -18,6 +18,10 @@
  */
 package org.apache.parquet.format.converter;
 
+import static java.util.Optional.empty;
+
+import static java.util.Optional.empty;
+import static java.util.Optional.of;
 import static org.apache.parquet.format.Util.readFileMetaData;
 import static org.apache.parquet.format.Util.writePageHeader;
 
@@ -52,6 +56,7 @@
 import org.apache.parquet.format.MapType;
 import org.apache.parquet.format.MicroSeconds;
 import org.apache.parquet.format.MilliSeconds;
+import org.apache.parquet.format.NanoSeconds;
 import org.apache.parquet.format.NullType;
 import org.apache.parquet.format.PageEncodingStats;
 import org.apache.parquet.format.StringType;
@@ -59,7 +64,9 @@
 import org.apache.parquet.format.TimeUnit;
 import org.apache.parquet.format.TimestampType;
 import org.apache.parquet.hadoop.metadata.ColumnPath;
+import org.apache.parquet.format.BoundaryOrder;
 import org.apache.parquet.format.ColumnChunk;
+import org.apache.parquet.format.ColumnIndex;
 import org.apache.parquet.format.ColumnMetaData;
 import org.apache.parquet.format.ColumnOrder;
 import org.apache.parquet.format.ConvertedType;
@@ -70,7 +77,9 @@
 import org.apache.parquet.format.FieldRepetitionType;
 import org.apache.parquet.format.FileMetaData;
 import org.apache.parquet.format.KeyValue;
+import org.apache.parquet.format.OffsetIndex;
 import org.apache.parquet.format.PageHeader;
+import org.apache.parquet.format.PageLocation;
 import org.apache.parquet.format.PageType;
 import org.apache.parquet.format.RowGroup;
 import org.apache.parquet.format.SchemaElement;
@@ -82,6 +91,9 @@
 import org.apache.parquet.hadoop.metadata.CompressionCodecName;
 import org.apache.parquet.column.EncodingStats;
 import org.apache.parquet.hadoop.metadata.ParquetMetadata;
+import org.apache.parquet.internal.column.columnindex.ColumnIndexBuilder;
+import org.apache.parquet.internal.column.columnindex.OffsetIndexBuilder;
+import org.apache.parquet.internal.hadoop.metadata.IndexReference;
 import org.apache.parquet.io.ParquetDecodingException;
 import org.apache.parquet.schema.ColumnOrder.ColumnOrderName;
 import org.apache.parquet.schema.GroupType;
@@ -248,7 +260,7 @@ LogicalType convertToLogicalType(LogicalTypeAnnotation logicalTypeAnnotation) {
   }
 
   ConvertedType convertToConvertedType(LogicalTypeAnnotation logicalTypeAnnotation) {
-    return logicalTypeAnnotation.accept(CONVERTED_TYPE_CONVERTER_VISITOR).get();
+    return logicalTypeAnnotation.accept(CONVERTED_TYPE_CONVERTER_VISITOR).orElse(null);
   }
 
   static org.apache.parquet.format.TimeUnit convertUnit(LogicalTypeAnnotation.TimeUnit unit) {
@@ -257,6 +269,8 @@ static org.apache.parquet.format.TimeUnit convertUnit(LogicalTypeAnnotation.Time
         return org.apache.parquet.format.TimeUnit.MICROS(new MicroSeconds());
       case MILLIS:
         return org.apache.parquet.format.TimeUnit.MILLIS(new MilliSeconds());
+      case NANOS:
+        return TimeUnit.NANOS(new NanoSeconds());
       default:
         throw new RuntimeException("Unknown time unit " + unit);
     }
@@ -264,161 +278,165 @@ static org.apache.parquet.format.TimeUnit convertUnit(LogicalTypeAnnotation.Time
 
   private static class ConvertedTypeConverterVisitor implements LogicalTypeAnnotation.LogicalTypeAnnotationVisitor<ConvertedType> {
     @Override
-    public Optional<ConvertedType> visit(LogicalTypeAnnotation.StringLogicalTypeAnnotation logicalTypeAnnotation) {
-      return Optional.of(ConvertedType.UTF8);
+    public Optional<ConvertedType> visit(LogicalTypeAnnotation.StringLogicalTypeAnnotation stringLogicalType) {
+      return of(ConvertedType.UTF8);
     }
 
     @Override
-    public Optional<ConvertedType> visit(LogicalTypeAnnotation.MapLogicalTypeAnnotation logicalTypeAnnotation) {
-      return Optional.of(ConvertedType.MAP);
+    public Optional<ConvertedType> visit(LogicalTypeAnnotation.MapLogicalTypeAnnotation mapLogicalType) {
+      return of(ConvertedType.MAP);
     }
 
     @Override
-    public Optional<ConvertedType> visit(LogicalTypeAnnotation.ListLogicalTypeAnnotation logicalTypeAnnotation) {
-      return Optional.of(ConvertedType.LIST);
+    public Optional<ConvertedType> visit(LogicalTypeAnnotation.ListLogicalTypeAnnotation listLogicalType) {
+      return of(ConvertedType.LIST);
     }
 
     @Override
-    public Optional<ConvertedType> visit(LogicalTypeAnnotation.EnumLogicalTypeAnnotation logicalTypeAnnotation) {
-      return Optional.of(ConvertedType.ENUM);
+    public Optional<ConvertedType> visit(LogicalTypeAnnotation.EnumLogicalTypeAnnotation enumLogicalType) {
+      return of(ConvertedType.ENUM);
     }
 
     @Override
-    public Optional<ConvertedType> visit(LogicalTypeAnnotation.DecimalLogicalTypeAnnotation logicalTypeAnnotation) {
-      return Optional.of(ConvertedType.DECIMAL);
+    public Optional<ConvertedType> visit(LogicalTypeAnnotation.DecimalLogicalTypeAnnotation decimalLogicalType) {
+      return of(ConvertedType.DECIMAL);
     }
 
     @Override
-    public Optional<ConvertedType> visit(LogicalTypeAnnotation.DateLogicalTypeAnnotation logicalTypeAnnotation) {
-      return Optional.of(ConvertedType.DATE);
+    public Optional<ConvertedType> visit(LogicalTypeAnnotation.DateLogicalTypeAnnotation dateLogicalType) {
+      return of(ConvertedType.DATE);
     }
 
     @Override
-    public Optional<ConvertedType> visit(LogicalTypeAnnotation.TimeLogicalTypeAnnotation logicalTypeAnnotation) {
-      switch (logicalTypeAnnotation.getUnit()) {
+    public Optional<ConvertedType> visit(LogicalTypeAnnotation.TimeLogicalTypeAnnotation timeLogicalType) {
+      switch (timeLogicalType.getUnit()) {
         case MILLIS:
-          return Optional.of(ConvertedType.TIME_MILLIS);
+          return of(ConvertedType.TIME_MILLIS);
         case MICROS:
-          return Optional.of(ConvertedType.TIME_MICROS);
+          return of(ConvertedType.TIME_MICROS);
+        case NANOS:
+          return empty();
         default:
-          throw new RuntimeException("Unknown converted type for " + logicalTypeAnnotation.toOriginalType());
+          throw new RuntimeException("Unknown converted type for " + timeLogicalType.toOriginalType());
       }
     }
 
     @Override
-    public Optional<ConvertedType> visit(LogicalTypeAnnotation.TimestampLogicalTypeAnnotation logicalTypeAnnotation) {
-      switch (logicalTypeAnnotation.getUnit()) {
+    public Optional<ConvertedType> visit(LogicalTypeAnnotation.TimestampLogicalTypeAnnotation timestampLogicalType) {
+      switch (timestampLogicalType.getUnit()) {
         case MICROS:
-          return Optional.of(ConvertedType.TIMESTAMP_MICROS);
+          return of(ConvertedType.TIMESTAMP_MICROS);
         case MILLIS:
-          return Optional.of(ConvertedType.TIMESTAMP_MILLIS);
+          return of(ConvertedType.TIMESTAMP_MILLIS);
+        case NANOS:
+          return empty();
         default:
-          throw new RuntimeException("Unknown converted type for " + logicalTypeAnnotation.toOriginalType());
+          throw new RuntimeException("Unknown converted type for " + timestampLogicalType.toOriginalType());
       }
     }
 
     @Override
-    public Optional<ConvertedType> visit(LogicalTypeAnnotation.IntLogicalTypeAnnotation logicalTypeAnnotation) {
-      boolean signed = logicalTypeAnnotation.isSigned();
-      switch (logicalTypeAnnotation.getBitWidth()) {
+    public Optional<ConvertedType> visit(LogicalTypeAnnotation.IntLogicalTypeAnnotation intLogicalType) {
+      boolean signed = intLogicalType.isSigned();
+      switch (intLogicalType.getBitWidth()) {
         case 8:
-          return Optional.of(signed ? ConvertedType.INT_8 : ConvertedType.UINT_8);
+          return of(signed ? ConvertedType.INT_8 : ConvertedType.UINT_8);
         case 16:
-          return Optional.of(signed ? ConvertedType.INT_16 : ConvertedType.UINT_16);
+          return of(signed ? ConvertedType.INT_16 : ConvertedType.UINT_16);
         case 32:
-          return Optional.of(signed ? ConvertedType.INT_32 : ConvertedType.UINT_32);
+          return of(signed ? ConvertedType.INT_32 : ConvertedType.UINT_32);
         case 64:
-          return Optional.of(signed ? ConvertedType.INT_64 : ConvertedType.UINT_64);
+          return of(signed ? ConvertedType.INT_64 : ConvertedType.UINT_64);
         default:
-          throw new RuntimeException("Unknown original type " + logicalTypeAnnotation.toOriginalType());
+          throw new RuntimeException("Unknown original type " + intLogicalType.toOriginalType());
       }
     }
 
     @Override
-    public Optional<ConvertedType> visit(LogicalTypeAnnotation.JsonLogicalTypeAnnotation logicalTypeAnnotation) {
-      return Optional.of(ConvertedType.JSON);
+    public Optional<ConvertedType> visit(LogicalTypeAnnotation.JsonLogicalTypeAnnotation jsonLogicalType) {
+      return of(ConvertedType.JSON);
     }
 
     @Override
-    public Optional<ConvertedType> visit(LogicalTypeAnnotation.BsonLogicalTypeAnnotation logicalTypeAnnotation) {
-      return Optional.of(ConvertedType.BSON);
+    public Optional<ConvertedType> visit(LogicalTypeAnnotation.BsonLogicalTypeAnnotation bsonLogicalType) {
+      return of(ConvertedType.BSON);
     }
 
     @Override
-    public Optional<ConvertedType> visit(LogicalTypeAnnotation.IntervalLogicalTypeAnnotation logicalTypeAnnotation) {
-      return Optional.of(ConvertedType.INTERVAL);
+    public Optional<ConvertedType> visit(LogicalTypeAnnotation.IntervalLogicalTypeAnnotation intervalLogicalType) {
+      return of(ConvertedType.INTERVAL);
     }
 
     @Override
-    public Optional<ConvertedType> visit(LogicalTypeAnnotation.MapKeyValueTypeAnnotation logicalTypeAnnotation) {
-      return Optional.of(ConvertedType.MAP_KEY_VALUE);
+    public Optional<ConvertedType> visit(LogicalTypeAnnotation.MapKeyValueTypeAnnotation mapKeyValueLogicalType) {
+      return of(ConvertedType.MAP_KEY_VALUE);
     }
   }
 
   private static class LogicalTypeConverterVisitor implements LogicalTypeAnnotation.LogicalTypeAnnotationVisitor<LogicalType> {
     @Override
-    public Optional<LogicalType> visit(LogicalTypeAnnotation.StringLogicalTypeAnnotation logicalTypeAnnotation) {
-      return Optional.of(LogicalType.STRING(new StringType()));
+    public Optional<LogicalType> visit(LogicalTypeAnnotation.StringLogicalTypeAnnotation stringLogicalType) {
+      return of(LogicalType.STRING(new StringType()));
     }
 
     @Override
-    public Optional<LogicalType> visit(LogicalTypeAnnotation.MapLogicalTypeAnnotation logicalTypeAnnotation) {
-      return Optional.of(LogicalType.MAP(new MapType()));
+    public Optional<LogicalType> visit(LogicalTypeAnnotation.MapLogicalTypeAnnotation mapLogicalType) {
+      return of(LogicalType.MAP(new MapType()));
     }
 
     @Override
-    public Optional<LogicalType> visit(LogicalTypeAnnotation.ListLogicalTypeAnnotation logicalTypeAnnotation) {
-      return Optional.of(LogicalType.LIST(new ListType()));
+    public Optional<LogicalType> visit(LogicalTypeAnnotation.ListLogicalTypeAnnotation listLogicalType) {
+      return of(LogicalType.LIST(new ListType()));
     }
 
     @Override
-    public Optional<LogicalType> visit(LogicalTypeAnnotation.EnumLogicalTypeAnnotation logicalTypeAnnotation) {
-      return Optional.of(LogicalType.ENUM(new EnumType()));
+    public Optional<LogicalType> visit(LogicalTypeAnnotation.EnumLogicalTypeAnnotation enumLogicalType) {
+      return of(LogicalType.ENUM(new EnumType()));
     }
 
     @Override
-    public Optional<LogicalType> visit(LogicalTypeAnnotation.DecimalLogicalTypeAnnotation logicalTypeAnnotation) {
-      return Optional.of(LogicalType.DECIMAL(new DecimalType(logicalTypeAnnotation.getScale(), logicalTypeAnnotation.getPrecision())));
+    public Optional<LogicalType> visit(LogicalTypeAnnotation.DecimalLogicalTypeAnnotation decimalLogicalType) {
+      return of(LogicalType.DECIMAL(new DecimalType(decimalLogicalType.getScale(), decimalLogicalType.getPrecision())));
     }
 
     @Override
-    public Optional<LogicalType> visit(LogicalTypeAnnotation.DateLogicalTypeAnnotation logicalTypeAnnotation) {
-      return Optional.of(LogicalType.DATE(new DateType()));
+    public Optional<LogicalType> visit(LogicalTypeAnnotation.DateLogicalTypeAnnotation dateLogicalType) {
+      return of(LogicalType.DATE(new DateType()));
     }
 
     @Override
-    public Optional<LogicalType> visit(LogicalTypeAnnotation.TimeLogicalTypeAnnotation logicalTypeAnnotation) {
-      return Optional.of(LogicalType.TIME(new TimeType(logicalTypeAnnotation.isAdjustedToUTC(), convertUnit(logicalTypeAnnotation.getUnit()))));
+    public Optional<LogicalType> visit(LogicalTypeAnnotation.TimeLogicalTypeAnnotation timeLogicalType) {
+      return of(LogicalType.TIME(new TimeType(timeLogicalType.isAdjustedToUTC(), convertUnit(timeLogicalType.getUnit()))));
     }
 
     @Override
-    public Optional<LogicalType> visit(LogicalTypeAnnotation.TimestampLogicalTypeAnnotation logicalTypeAnnotation) {
-      return Optional.of(LogicalType.TIMESTAMP(new TimestampType(logicalTypeAnnotation.isAdjustedToUTC(), convertUnit(logicalTypeAnnotation.getUnit()))));
+    public Optional<LogicalType> visit(LogicalTypeAnnotation.TimestampLogicalTypeAnnotation timestampLogicalType) {
+      return of(LogicalType.TIMESTAMP(new TimestampType(timestampLogicalType.isAdjustedToUTC(), convertUnit(timestampLogicalType.getUnit()))));
     }
 
     @Override
-    public Optional<LogicalType> visit(LogicalTypeAnnotation.IntLogicalTypeAnnotation logicalTypeAnnotation) {
-      return Optional.of(LogicalType.INTEGER(new IntType((byte) logicalTypeAnnotation.getBitWidth(), logicalTypeAnnotation.isSigned())));
+    public Optional<LogicalType> visit(LogicalTypeAnnotation.IntLogicalTypeAnnotation intLogicalType) {
+      return of(LogicalType.INTEGER(new IntType((byte) intLogicalType.getBitWidth(), intLogicalType.isSigned())));
     }
 
     @Override
-    public Optional<LogicalType> visit(LogicalTypeAnnotation.JsonLogicalTypeAnnotation logicalTypeAnnotation) {
-      return Optional.of(LogicalType.JSON(new JsonType()));
+    public Optional<LogicalType> visit(LogicalTypeAnnotation.JsonLogicalTypeAnnotation jsonLogicalType) {
+      return of(LogicalType.JSON(new JsonType()));
     }
 
     @Override
-    public Optional<LogicalType> visit(LogicalTypeAnnotation.BsonLogicalTypeAnnotation logicalTypeAnnotation) {
-      return Optional.of(LogicalType.BSON(new BsonType()));
+    public Optional<LogicalType> visit(LogicalTypeAnnotation.BsonLogicalTypeAnnotation bsonLogicalType) {
+      return of(LogicalType.BSON(new BsonType()));
     }
 
     @Override
-    public Optional<LogicalType> visit(LogicalTypeAnnotation.IntervalLogicalTypeAnnotation logicalTypeAnnotation) {
-      return Optional.of(LogicalType.UNKNOWN(new NullType()));
+    public Optional<LogicalType> visit(LogicalTypeAnnotation.IntervalLogicalTypeAnnotation intervalLogicalType) {
+      return of(LogicalType.UNKNOWN(new NullType()));
     }
 
     @Override
-    public Optional<LogicalType> visit(LogicalTypeAnnotation.MapKeyValueTypeAnnotation logicalTypeAnnotation) {
-      return Optional.of(LogicalType.UNKNOWN(new NullType()));
+    public Optional<LogicalType> visit(LogicalTypeAnnotation.MapKeyValueTypeAnnotation mapKeyValueLogicalType) {
+      return of(LogicalType.UNKNOWN(new NullType()));
     }
   }
 
@@ -449,6 +467,17 @@ private void addRowGroup(ParquetMetadata parquetMetadata, List<RowGroup> rowGrou
 //      columnChunk.meta_data.index_page_offset = ;
 //      columnChunk.meta_data.key_value_metadata = ; // nothing yet
 
+      IndexReference columnIndexRef = columnMetaData.getColumnIndexReference();
+      if (columnIndexRef != null) {
+        columnChunk.setColumn_index_offset(columnIndexRef.getOffset());
+        columnChunk.setColumn_index_length(columnIndexRef.getLength());
+      }
+      IndexReference offsetIndexRef = columnMetaData.getOffsetIndexReference();
+      if (offsetIndexRef != null) {
+        columnChunk.setOffset_index_offset(offsetIndexRef.getOffset());
+        columnChunk.setOffset_index_length(offsetIndexRef.getLength());
+      }
+
       parquetColumns.add(columnChunk);
     }
     RowGroup rowGroup = new RowGroup(parquetColumns, block.getTotalByteSize(), block.getRowCount());
@@ -670,9 +699,11 @@ enum SortOrder {
     UNKNOWN
   }
 
-  private static final Set<OriginalType> STRING_TYPES = Collections
+  private static final Set<Class> STRING_TYPES = Collections
       .unmodifiableSet(new HashSet<>(Arrays.asList(
-          OriginalType.UTF8, OriginalType.ENUM, OriginalType.JSON
+        LogicalTypeAnnotation.StringLogicalTypeAnnotation.class,
+        LogicalTypeAnnotation.EnumLogicalTypeAnnotation.class,
+        LogicalTypeAnnotation.JsonLogicalTypeAnnotation.class
       )));
 
   /**
@@ -689,10 +720,10 @@ private boolean overrideSortOrderToSigned(PrimitiveType type) {
     // even if the override is set, only return stats for string-ish types
     // a null type annotation is considered string-ish because some writers
     // failed to use the UTF8 annotation.
-    OriginalType annotation = type.getOriginalType();
+    LogicalTypeAnnotation annotation = type.getLogicalTypeAnnotation();
     return useSignedStringMinMax &&
         PrimitiveTypeName.BINARY == type.getPrimitiveTypeName() &&
-        (annotation == null || STRING_TYPES.contains(annotation));
+        (annotation == null || STRING_TYPES.contains(annotation.getClass()));
   }
 
   /**
@@ -719,36 +750,76 @@ private static SortOrder defaultSortOrder(PrimitiveTypeName primitive) {
    * @return the "correct" sort order of the type that applications assume
    */
   private static SortOrder sortOrder(PrimitiveType primitive) {
-    OriginalType annotation = primitive.getOriginalType();
+    LogicalTypeAnnotation annotation = primitive.getLogicalTypeAnnotation();
     if (annotation != null) {
-      switch (annotation) {
-        case INT_8:
-        case INT_16:
-        case INT_32:
-        case INT_64:
-        case DATE:
-        case TIME_MICROS:
-        case TIME_MILLIS:
-        case TIMESTAMP_MICROS:
-        case TIMESTAMP_MILLIS:
-          return SortOrder.SIGNED;
-        case UINT_8:
-        case UINT_16:
-        case UINT_32:
-        case UINT_64:
-        case ENUM:
-        case UTF8:
-        case BSON:
-        case JSON:
-          return SortOrder.UNSIGNED;
-        case DECIMAL:
-        case LIST:
-        case MAP:
-        case MAP_KEY_VALUE:
-        case INTERVAL:
-          return SortOrder.UNKNOWN;
-      }
+      return annotation.accept(new LogicalTypeAnnotation.LogicalTypeAnnotationVisitor<SortOrder>() {
+        @Override
+        public Optional<SortOrder> visit(LogicalTypeAnnotation.IntLogicalTypeAnnotation intLogicalType) {
+          return intLogicalType.isSigned() ? of(SortOrder.SIGNED) : of(SortOrder.UNSIGNED);
+        }
+
+        @Override
+        public Optional<SortOrder> visit(LogicalTypeAnnotation.IntervalLogicalTypeAnnotation intervalLogicalType) {
+          return of(SortOrder.UNKNOWN);
+        }
+
+        @Override
+        public Optional<SortOrder> visit(LogicalTypeAnnotation.DateLogicalTypeAnnotation dateLogicalType) {
+          return of(SortOrder.SIGNED);
+        }
+
+        @Override
+        public Optional<SortOrder> visit(LogicalTypeAnnotation.EnumLogicalTypeAnnotation enumLogicalType) {
+          return of(SortOrder.UNSIGNED);
+        }
+
+        @Override
+        public Optional<SortOrder> visit(LogicalTypeAnnotation.BsonLogicalTypeAnnotation bsonLogicalType) {
+          return of(SortOrder.UNSIGNED);
+        }
+
+        @Override
+        public Optional<SortOrder> visit(LogicalTypeAnnotation.JsonLogicalTypeAnnotation jsonLogicalType) {
+          return of(SortOrder.UNSIGNED);
+        }
+
+        @Override
+        public Optional<SortOrder> visit(LogicalTypeAnnotation.StringLogicalTypeAnnotation stringLogicalType) {
+          return of(SortOrder.UNSIGNED);
+        }
+
+        @Override
+        public Optional<SortOrder> visit(LogicalTypeAnnotation.DecimalLogicalTypeAnnotation decimalLogicalType) {
+          return of(SortOrder.UNKNOWN);
+        }
+
+        @Override
+        public Optional<SortOrder> visit(LogicalTypeAnnotation.MapKeyValueTypeAnnotation mapKeyValueLogicalType) {
+          return of(SortOrder.UNKNOWN);
+        }
+
+        @Override
+        public Optional<SortOrder> visit(LogicalTypeAnnotation.MapLogicalTypeAnnotation mapLogicalType) {
+          return of(SortOrder.UNKNOWN);
+        }
+
+        @Override
+        public Optional<SortOrder> visit(LogicalTypeAnnotation.ListLogicalTypeAnnotation listLogicalType) {
+          return of(SortOrder.UNKNOWN);
+        }
+
+        @Override
+        public Optional<SortOrder> visit(LogicalTypeAnnotation.TimeLogicalTypeAnnotation timeLogicalType) {
+          return of(SortOrder.SIGNED);
+        }
+
+        @Override
+        public Optional<SortOrder> visit(LogicalTypeAnnotation.TimestampLogicalTypeAnnotation timestampLogicalType) {
+          return of(SortOrder.SIGNED);
+        }
+      }).orElse(defaultSortOrder(primitive.getPrimitiveTypeName()));
     }
+
     return defaultSortOrder(primitive.getPrimitiveTypeName());
   }
 
@@ -894,6 +965,8 @@ private LogicalTypeAnnotation.TimeUnit convertTimeUnit(TimeUnit unit) {
         return LogicalTypeAnnotation.TimeUnit.MICROS;
       case MILLIS:
         return LogicalTypeAnnotation.TimeUnit.MILLIS;
+      case NANOS:
+        return LogicalTypeAnnotation.TimeUnit.NANOS;
       default:
         throw new RuntimeException("Unknown time unit " + unit);
     }
@@ -1117,6 +1190,8 @@ public ParquetMetadata fromParquetMetadata(FileMetaData parquetMetadata) throws
               metaData.num_values,
               metaData.total_compressed_size,
               metaData.total_uncompressed_size);
+          column.setColumnIndexReference(toColumnIndexReference(columnChunk));
+          column.setOffsetIndexReference(toOffsetIndexReference(columnChunk));
           // TODO
           // index_page_offset
           // key_value_metadata
@@ -1138,6 +1213,20 @@ public ParquetMetadata fromParquetMetadata(FileMetaData parquetMetadata) throws
         blocks);
   }
 
+  private static IndexReference toColumnIndexReference(ColumnChunk columnChunk) {
+    if (columnChunk.isSetColumn_index_offset() && columnChunk.isSetColumn_index_length()) {
+      return new IndexReference(columnChunk.getColumn_index_offset(), columnChunk.getColumn_index_length());
+    }
+    return null;
+  }
+
+  private static IndexReference toOffsetIndexReference(ColumnChunk columnChunk) {
+    if (columnChunk.isSetOffset_index_offset() && columnChunk.isSetOffset_index_length()) {
+      return new IndexReference(columnChunk.getOffset_index_offset(), columnChunk.getOffset_index_length());
+    }
+    return null;
+  }
+
   private static ColumnPath getPath(ColumnMetaData metaData) {
     String[] path = metaData.path_in_schema.toArray(new String[metaData.path_in_schema.size()]);
     return ColumnPath.get(path);
@@ -1332,4 +1421,78 @@ public void writeDictionaryPageHeader(
     writePageHeader(pageHeader, to);
   }
 
+  private static BoundaryOrder toParquetBoundaryOrder(
+      org.apache.parquet.internal.column.columnindex.BoundaryOrder boundaryOrder) {
+    switch (boundaryOrder) {
+      case ASCENDING:
+        return BoundaryOrder.ASCENDING;
+      case DESCENDING:
+        return BoundaryOrder.DESCENDING;
+      case UNORDERED:
+        return BoundaryOrder.UNORDERED;
+      default:
+        throw new IllegalArgumentException("Unsupported boundary order: " + boundaryOrder);
+    }
+  }
+
+  private static org.apache.parquet.internal.column.columnindex.BoundaryOrder fromParquetBoundaryOrder(
+      BoundaryOrder boundaryOrder) {
+    switch (boundaryOrder) {
+      case ASCENDING:
+        return org.apache.parquet.internal.column.columnindex.BoundaryOrder.ASCENDING;
+      case DESCENDING:
+        return org.apache.parquet.internal.column.columnindex.BoundaryOrder.DESCENDING;
+      case UNORDERED:
+        return org.apache.parquet.internal.column.columnindex.BoundaryOrder.UNORDERED;
+      default:
+        throw new IllegalArgumentException("Unsupported boundary order: " + boundaryOrder);
+    }
+  }
+
+  public static ColumnIndex toParquetColumnIndex(PrimitiveType type,
+      org.apache.parquet.internal.column.columnindex.ColumnIndex columnIndex) {
+    if (!isMinMaxStatsSupported(type) || columnIndex == null) {
+      return null;
+    }
+    ColumnIndex parquetColumnIndex = new ColumnIndex(
+        columnIndex.getNullPages(),
+        columnIndex.getMinValues(),
+        columnIndex.getMaxValues(),
+        toParquetBoundaryOrder(columnIndex.getBoundaryOrder()));
+    parquetColumnIndex.setNull_counts(columnIndex.getNullCounts());
+    return parquetColumnIndex;
+  }
+
+  public static org.apache.parquet.internal.column.columnindex.ColumnIndex fromParquetColumnIndex(PrimitiveType type,
+      ColumnIndex parquetColumnIndex) {
+    if (!isMinMaxStatsSupported(type)) {
+      return null;
+    }
+    return ColumnIndexBuilder.build(type,
+        fromParquetBoundaryOrder(parquetColumnIndex.getBoundary_order()),
+        parquetColumnIndex.getNull_pages(),
+        parquetColumnIndex.getNull_counts(),
+        parquetColumnIndex.getMin_values(),
+        parquetColumnIndex.getMax_values());
+  }
+
+  public static OffsetIndex toParquetOffsetIndex(org.apache.parquet.internal.column.columnindex.OffsetIndex offsetIndex) {
+    List<PageLocation> pageLocations = new ArrayList<>(offsetIndex.getPageCount());
+    for (int i = 0, n = offsetIndex.getPageCount(); i < n; ++i) {
+      pageLocations.add(new PageLocation(
+          offsetIndex.getOffset(i),
+          offsetIndex.getCompressedPageSize(i),
+          offsetIndex.getFirstRowIndex(i)));
+    }
+    return new OffsetIndex(pageLocations);
+  }
+
+  public static org.apache.parquet.internal.column.columnindex.OffsetIndex fromParquetOffsetIndex(
+      OffsetIndex parquetOffsetIndex) {
+    OffsetIndexBuilder builder = OffsetIndexBuilder.getBuilder();
+    for (PageLocation pageLocation : parquetOffsetIndex.getPage_locations()) {
+      builder.add(pageLocation.getOffset(), pageLocation.getCompressed_page_size(), pageLocation.getFirst_row_index());
+    }
+    return builder.build();
+  }
 }
diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/BloomFilterDataReader.java b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/BloomFilterDataReader.java
index 6b861e55c5..96e258fe40 100644
--- a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/BloomFilterDataReader.java
+++ b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/BloomFilterDataReader.java
@@ -17,11 +17,9 @@
  * under the License.
  */
 package org.apache.parquet.hadoop;
-
 import java.io.IOException;
 import java.util.HashMap;
 import java.util.Map;
-
 import org.apache.parquet.Strings;
 import org.apache.parquet.column.ColumnDescriptor;
 import org.apache.parquet.column.values.bloomfilter.BloomFilter;
@@ -29,18 +27,15 @@
 import org.apache.parquet.hadoop.metadata.BlockMetaData;
 import org.apache.parquet.hadoop.metadata.ColumnChunkMetaData;
 import org.apache.parquet.io.ParquetDecodingException;
-
 /**
  * A {@link BloomFilterReader} implementation that reads Bloom filter data from
  * an open {@link ParquetFileReader}.
  *
  */
-
 public class BloomFilterDataReader implements BloomFilterReader {
   private final ParquetFileReader reader;
   private final Map<String, ColumnChunkMetaData> columns;
   private final Map<String, BloomFilter> cache = new HashMap<>();
-
   public BloomFilterDataReader(ParquetFileReader fileReader, BlockMetaData block) {
     this.reader = fileReader;
     this.columns = new HashMap<>();
@@ -48,7 +43,6 @@ public BloomFilterDataReader(ParquetFileReader fileReader, BlockMetaData block)
       columns.put(column.getPath().toDotString(), column);
     }
   }
-
   @Override
   public BloomFilter readBloomFilter(ColumnDescriptor descriptor) {
     String dotPath = Strings.join(descriptor.getPath(), ".");
@@ -57,11 +51,9 @@ public BloomFilter readBloomFilter(ColumnDescriptor descriptor) {
       throw new ParquetDecodingException(
         "Cannot load Bloom filter data, unknown column: " + dotPath);
     }
-
     if (cache.containsKey(dotPath)) {
       return cache.get(dotPath);
     }
-
     try {
       synchronized (cache) {
         if (!cache.containsKey(dotPath)) {
@@ -70,7 +62,6 @@ public BloomFilter readBloomFilter(ColumnDescriptor descriptor) {
           cache.put(dotPath, bloomFilter);
         }
       }
-
       return cache.get(dotPath);
     } catch (IOException e) {
       throw new ParquetDecodingException(
diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ColumnChunkPageReadStore.java b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ColumnChunkPageReadStore.java
index 37dfd6d394..0dc71e0743 100644
--- a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ColumnChunkPageReadStore.java
+++ b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ColumnChunkPageReadStore.java
@@ -18,13 +18,17 @@
  */
 package org.apache.parquet.hadoop;
 
+import static org.apache.parquet.Ints.checkedCast;
+
 import java.io.IOException;
 import java.util.HashMap;
 import java.util.LinkedList;
 import java.util.List;
 import java.util.Map;
-
+import java.util.Optional;
+import java.util.PrimitiveIterator;
 import org.apache.parquet.Ints;
+import org.apache.parquet.bytes.BytesInput;
 import org.apache.parquet.column.ColumnDescriptor;
 import org.apache.parquet.column.page.DataPage;
 import org.apache.parquet.column.page.DataPageV1;
@@ -33,9 +37,9 @@
 import org.apache.parquet.column.page.DictionaryPageReadStore;
 import org.apache.parquet.column.page.PageReadStore;
 import org.apache.parquet.column.page.PageReader;
-import org.apache.parquet.compression.CompressionCodecFactory;
 import org.apache.parquet.compression.CompressionCodecFactory.BytesInputDecompressor;
-import org.apache.parquet.hadoop.CodecFactory.BytesDecompressor;
+import org.apache.parquet.internal.column.columnindex.OffsetIndex;
+import org.apache.parquet.internal.filter2.columnindex.RowRanges;
 import org.apache.parquet.io.ParquetDecodingException;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -62,8 +66,13 @@ static final class ColumnChunkPageReader implements PageReader {
     private final long valueCount;
     private final List<DataPage> compressedPages;
     private final DictionaryPage compressedDictionaryPage;
+    // null means no page synchronization is required; firstRowIndex will not be returned by the pages
+    private final OffsetIndex offsetIndex;
+    private final long rowCount;
+    private int pageIndex = 0;
 
-    ColumnChunkPageReader(BytesInputDecompressor decompressor, List<DataPage> compressedPages, DictionaryPage compressedDictionaryPage) {
+    ColumnChunkPageReader(BytesInputDecompressor decompressor, List<DataPage> compressedPages,
+        DictionaryPage compressedDictionaryPage, OffsetIndex offsetIndex, long rowCount) {
       this.decompressor = decompressor;
       this.compressedPages = new LinkedList<DataPage>(compressedPages);
       this.compressedDictionaryPage = compressedDictionaryPage;
@@ -72,6 +81,8 @@ static final class ColumnChunkPageReader implements PageReader {
         count += p.getValueCount();
       }
       this.valueCount = count;
+      this.offsetIndex = offsetIndex;
+      this.rowCount = rowCount;
     }
 
     @Override
@@ -85,18 +96,34 @@ public DataPage readPage() {
         return null;
       }
       DataPage compressedPage = compressedPages.remove(0);
+      final int currentPageIndex = pageIndex++;
       return compressedPage.accept(new DataPage.Visitor<DataPage>() {
         @Override
         public DataPage visit(DataPageV1 dataPageV1) {
           try {
-            return new DataPageV1(
-                decompressor.decompress(dataPageV1.getBytes(), dataPageV1.getUncompressedSize()),
-                dataPageV1.getValueCount(),
-                dataPageV1.getUncompressedSize(),
-                dataPageV1.getStatistics(),
-                dataPageV1.getRlEncoding(),
-                dataPageV1.getDlEncoding(),
-                dataPageV1.getValueEncoding());
+            BytesInput decompressed = decompressor.decompress(dataPageV1.getBytes(), dataPageV1.getUncompressedSize());
+            if (offsetIndex == null) {
+              return new DataPageV1(
+                  decompressed,
+                  dataPageV1.getValueCount(),
+                  dataPageV1.getUncompressedSize(),
+                  dataPageV1.getStatistics(),
+                  dataPageV1.getRlEncoding(),
+                  dataPageV1.getDlEncoding(),
+                  dataPageV1.getValueEncoding());
+            } else {
+              long firstRowIndex = offsetIndex.getFirstRowIndex(currentPageIndex);
+              return new DataPageV1(
+                  decompressed,
+                  dataPageV1.getValueCount(),
+                  dataPageV1.getUncompressedSize(),
+                  firstRowIndex,
+                  checkedCast(offsetIndex.getLastRowIndex(currentPageIndex, rowCount) - firstRowIndex + 1),
+                  dataPageV1.getStatistics(),
+                  dataPageV1.getRlEncoding(),
+                  dataPageV1.getDlEncoding(),
+                  dataPageV1.getValueEncoding());
+            }
           } catch (IOException e) {
             throw new ParquetDecodingException("could not decompress page", e);
           }
@@ -105,23 +132,49 @@ public DataPage visit(DataPageV1 dataPageV1) {
         @Override
         public DataPage visit(DataPageV2 dataPageV2) {
           if (!dataPageV2.isCompressed()) {
-            return dataPageV2;
+            if (offsetIndex == null) {
+              return dataPageV2;
+            } else {
+              return DataPageV2.uncompressed(
+                  dataPageV2.getRowCount(),
+                  dataPageV2.getNullCount(),
+                  dataPageV2.getValueCount(),
+                  offsetIndex.getFirstRowIndex(currentPageIndex),
+                  dataPageV2.getRepetitionLevels(),
+                  dataPageV2.getDefinitionLevels(),
+                  dataPageV2.getDataEncoding(),
+                  dataPageV2.getData(),
+                  dataPageV2.getStatistics());
+            }
           }
           try {
             int uncompressedSize = Ints.checkedCast(
                 dataPageV2.getUncompressedSize()
-                - dataPageV2.getDefinitionLevels().size()
-                - dataPageV2.getRepetitionLevels().size());
-            return DataPageV2.uncompressed(
-                dataPageV2.getRowCount(),
-                dataPageV2.getNullCount(),
-                dataPageV2.getValueCount(),
-                dataPageV2.getRepetitionLevels(),
-                dataPageV2.getDefinitionLevels(),
-                dataPageV2.getDataEncoding(),
-                decompressor.decompress(dataPageV2.getData(), uncompressedSize),
-                dataPageV2.getStatistics()
-                );
+                    - dataPageV2.getDefinitionLevels().size()
+                    - dataPageV2.getRepetitionLevels().size());
+            BytesInput decompressed = decompressor.decompress(dataPageV2.getData(), uncompressedSize);
+            if (offsetIndex == null) {
+              return DataPageV2.uncompressed(
+                  dataPageV2.getRowCount(),
+                  dataPageV2.getNullCount(),
+                  dataPageV2.getValueCount(),
+                  dataPageV2.getRepetitionLevels(),
+                  dataPageV2.getDefinitionLevels(),
+                  dataPageV2.getDataEncoding(),
+                  decompressed,
+                  dataPageV2.getStatistics());
+            } else {
+              return DataPageV2.uncompressed(
+                  dataPageV2.getRowCount(),
+                  dataPageV2.getNullCount(),
+                  dataPageV2.getValueCount(),
+                  offsetIndex.getFirstRowIndex(currentPageIndex),
+                  dataPageV2.getRepetitionLevels(),
+                  dataPageV2.getDefinitionLevels(),
+                  dataPageV2.getDataEncoding(),
+                  decompressed,
+                  dataPageV2.getStatistics());
+            }
           } catch (IOException e) {
             throw new ParquetDecodingException("could not decompress page", e);
           }
@@ -147,9 +200,16 @@ public DictionaryPage readDictionaryPage() {
 
   private final Map<ColumnDescriptor, ColumnChunkPageReader> readers = new HashMap<ColumnDescriptor, ColumnChunkPageReader>();
   private final long rowCount;
+  private final RowRanges rowRanges;
 
   public ColumnChunkPageReadStore(long rowCount) {
     this.rowCount = rowCount;
+    rowRanges = null;
+  }
+
+  ColumnChunkPageReadStore(RowRanges rowRanges) {
+    this.rowRanges = rowRanges;
+    rowCount = rowRanges.rowCount();
   }
 
   @Override
@@ -170,6 +230,11 @@ public DictionaryPage readDictionaryPage(ColumnDescriptor descriptor) {
     return readers.get(descriptor).readDictionaryPage();
   }
 
+  @Override
+  public Optional<PrimitiveIterator.OfLong> getRowIndexes() {
+    return rowRanges == null ? Optional.empty() : Optional.of(rowRanges.iterator());
+  }
+
   void addColumn(ColumnDescriptor path, ColumnChunkPageReader reader) {
     if (readers.put(path, reader) != null) {
       throw new RuntimeException(path+ " was added twice");
diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ColumnChunkPageWriteStore.java b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ColumnChunkPageWriteStore.java
index caa41fc7c0..85bdbdbd9b 100644
--- a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ColumnChunkPageWriteStore.java
+++ b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ColumnChunkPageWriteStore.java
@@ -28,7 +28,6 @@
 import java.util.Set;
 
 import org.apache.parquet.bytes.BytesInput;
-import org.apache.parquet.bytes.ByteBufferAllocator;
 import org.apache.parquet.bytes.ConcatenatingByteArrayCollector;
 import org.apache.parquet.column.ColumnDescriptor;
 import org.apache.parquet.column.Encoding;
@@ -36,23 +35,22 @@
 import org.apache.parquet.column.page.PageWriteStore;
 import org.apache.parquet.column.page.PageWriter;
 import org.apache.parquet.column.statistics.Statistics;
-import org.apache.parquet.column.values.bloomfilter.BlockSplitBloomFilter;
-import org.apache.parquet.column.values.bloomfilter.BloomFilter;
-import org.apache.parquet.column.values.bloomfilter.BloomFilterWriteStore;
-import org.apache.parquet.column.values.bloomfilter.BloomFilterWriter;
 import org.apache.parquet.format.converter.ParquetMetadataConverter;
 import org.apache.parquet.hadoop.CodecFactory.BytesCompressor;
+import org.apache.parquet.internal.column.columnindex.ColumnIndexBuilder;
+import org.apache.parquet.internal.column.columnindex.OffsetIndexBuilder;
 import org.apache.parquet.io.ParquetEncodingException;
 import org.apache.parquet.schema.MessageType;
+import org.apache.parquet.bytes.ByteBufferAllocator;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-class ColumnChunkPageWriteStore implements PageWriteStore, BloomFilterWriteStore {
+class ColumnChunkPageWriteStore implements PageWriteStore {
   private static final Logger LOG = LoggerFactory.getLogger(ColumnChunkPageWriteStore.class);
 
   private static ParquetMetadataConverter parquetMetadataConverter = new ParquetMetadataConverter();
 
-  private static final class ColumnChunkPageWriter implements PageWriter, BloomFilterWriter {
+  private static final class ColumnChunkPageWriter implements PageWriter {
 
     private final ColumnDescriptor path;
     private final BytesCompressor compressor;
@@ -60,7 +58,6 @@ private static final class ColumnChunkPageWriter implements PageWriter, BloomFil
     private final ByteArrayOutputStream tempOutputStream = new ByteArrayOutputStream();
     private final ConcatenatingByteArrayCollector buf;
     private DictionaryPage dictionaryPage;
-    private BloomFilter bloomFilter;
 
     private long uncompressedLength;
     private long compressedLength;
@@ -72,21 +69,38 @@ private static final class ColumnChunkPageWriter implements PageWriter, BloomFil
     private Set<Encoding> dlEncodings = new HashSet<Encoding>();
     private List<Encoding> dataEncodings = new ArrayList<Encoding>();
 
+    private ColumnIndexBuilder columnIndexBuilder;
+    private OffsetIndexBuilder offsetIndexBuilder;
     private Statistics totalStatistics;
     private final ByteBufferAllocator allocator;
 
     private ColumnChunkPageWriter(ColumnDescriptor path,
                                   BytesCompressor compressor,
-                                  ByteBufferAllocator allocator) {
+                                  ByteBufferAllocator allocator,
+                                  int columnIndexTruncateLength) {
       this.path = path;
       this.compressor = compressor;
       this.allocator = allocator;
       this.buf = new ConcatenatingByteArrayCollector();
+      this.columnIndexBuilder = ColumnIndexBuilder.getBuilder(path.getPrimitiveType(), columnIndexTruncateLength);
+      this.offsetIndexBuilder = OffsetIndexBuilder.getBuilder();
+    }
+
+    @Override
+    @Deprecated
+    public void writePage(BytesInput bytesInput, int valueCount, Statistics<?> statistics, Encoding rlEncoding,
+        Encoding dlEncoding, Encoding valuesEncoding) throws IOException {
+      // Setting the builders to the no-op ones so no column/offset indexes will be written for this column chunk
+      columnIndexBuilder = ColumnIndexBuilder.getNoOpBuilder();
+      offsetIndexBuilder = OffsetIndexBuilder.getNoOpBuilder();
+
+      writePage(bytesInput, valueCount, -1, statistics, rlEncoding, dlEncoding, valuesEncoding);
     }
 
     @Override
     public void writePage(BytesInput bytes,
                           int valueCount,
+                          int rowCount,
                           Statistics statistics,
                           Encoding rlEncoding,
                           Encoding dlEncoding,
@@ -126,6 +140,9 @@ public void writePage(BytesInput bytes,
         totalStatistics.mergeStatistics(statistics);
       }
 
+      columnIndexBuilder.add(statistics);
+      offsetIndexBuilder.add(toIntWithCheck(tempOutputStream.size() + compressedSize), rowCount);
+
       // by concatenating before collecting instead of collecting twice,
       // we only allocate one buffer to copy into instead of multiple.
       buf.collect(BytesInput.concat(BytesInput.from(tempOutputStream), compressedBytes));
@@ -171,6 +188,9 @@ public void writePageV2(
         totalStatistics.mergeStatistics(statistics);
       }
 
+      columnIndexBuilder.add(statistics);
+      offsetIndexBuilder.add(toIntWithCheck((long) tempOutputStream.size() + compressedSize), rowCount);
+
       // by concatenating before collecting instead of collecting twice,
       // we only allocate one buffer to copy into instead of multiple.
       buf.collect(
@@ -198,18 +218,20 @@ public long getMemSize() {
     }
 
     public void writeToFileWriter(ParquetFileWriter writer) throws IOException {
-      writer.startColumn(path, totalValueCount, compressor.getCodecName());
-      if (bloomFilter != null) {
-        writer.writeBloomFilter(bloomFilter);
-      }
-
-      if (dictionaryPage != null) {
-        writer.writeDictionaryPage(dictionaryPage);
-        // tracking the dictionary encoding is handled in writeDictionaryPage
-      }
-      writer.writeDataPages(buf, uncompressedLength, compressedLength, totalStatistics,
-          rlEncodings, dlEncodings, dataEncodings);
-      writer.endColumn();
+      writer.writeColumnChunk(
+          path,
+          totalValueCount,
+          compressor.getCodecName(),
+          dictionaryPage,
+          buf,
+          uncompressedLength,
+          compressedLength,
+          totalStatistics,
+          columnIndexBuilder,
+          offsetIndexBuilder,
+          rlEncodings,
+          dlEncodings,
+          dataEncodings);
       if (LOG.isDebugEnabled()) {
         LOG.debug(
             String.format(
@@ -247,20 +269,16 @@ public String memUsageString(String prefix) {
       return buf.memUsageString(prefix + " ColumnChunkPageWriter");
     }
 
-    @Override
-    public void writeBloomFilter(BloomFilter bloomFilter) {
-      this.bloomFilter = bloomFilter;
-    }
-
   }
 
   private final Map<ColumnDescriptor, ColumnChunkPageWriter> writers = new HashMap<ColumnDescriptor, ColumnChunkPageWriter>();
   private final MessageType schema;
 
-  public ColumnChunkPageWriteStore(BytesCompressor compressor, MessageType schema, ByteBufferAllocator allocator) {
+  public ColumnChunkPageWriteStore(BytesCompressor compressor, MessageType schema, ByteBufferAllocator allocator,
+      int columnIndexTruncateLength) {
     this.schema = schema;
     for (ColumnDescriptor path : schema.getColumns()) {
-      writers.put(path,  new ColumnChunkPageWriter(path, compressor, allocator));
+      writers.put(path, new ColumnChunkPageWriter(path, compressor, allocator, columnIndexTruncateLength));
     }
   }
 
@@ -269,11 +287,6 @@ public PageWriter getPageWriter(ColumnDescriptor path) {
     return writers.get(path);
   }
 
-  @Override
-  public BloomFilterWriter getBloomFilterWriter(ColumnDescriptor path) {
-    return writers.get(path);
-  }
-
   public void flushToFileWriter(ParquetFileWriter writer) throws IOException {
     for (ColumnDescriptor path : schema.getColumns()) {
       ColumnChunkPageWriter pageWriter = writers.get(path);
@@ -281,4 +294,9 @@ public void flushToFileWriter(ParquetFileWriter writer) throws IOException {
     }
   }
 
+  void flushToFileWriter(ColumnDescriptor path, ParquetFileWriter writer) throws IOException {
+    ColumnChunkPageWriter pageWriter = writers.get(path);
+    pageWriter.writeToFileWriter(writer);
+  }
+
 }
diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ColumnIndexFilterUtils.java b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ColumnIndexFilterUtils.java
new file mode 100644
index 0000000000..448515e2a9
--- /dev/null
+++ b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ColumnIndexFilterUtils.java
@@ -0,0 +1,157 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.parquet.hadoop;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Formatter;
+import java.util.List;
+
+import org.apache.parquet.hadoop.metadata.ColumnChunkMetaData;
+import org.apache.parquet.internal.column.columnindex.OffsetIndex;
+import org.apache.parquet.internal.filter2.columnindex.RowRanges;
+
+import it.unimi.dsi.fastutil.ints.IntArrayList;
+import it.unimi.dsi.fastutil.ints.IntList;
+
+/**
+ * Internal utility class to help at column index based filtering.
+ */
+class ColumnIndexFilterUtils {
+  static class OffsetRange {
+    private final long offset;
+    private long length;
+
+    private OffsetRange(long offset, int length) {
+      this.offset = offset;
+      this.length = length;
+    }
+
+    long getOffset() {
+      return offset;
+    }
+
+    long getLength() {
+      return length;
+    }
+
+    private boolean extend(long offset, int length) {
+      if (this.offset + this.length == offset) {
+        this.length += length;
+        return true;
+      } else {
+        return false;
+      }
+    }
+  }
+
+  private static class FilteredOffsetIndex implements OffsetIndex {
+    private final OffsetIndex offsetIndex;
+    private final int[] indexMap;
+
+    private FilteredOffsetIndex(OffsetIndex offsetIndex, int[] indexMap) {
+      this.offsetIndex = offsetIndex;
+      this.indexMap = indexMap;
+    }
+
+    @Override
+    public int getPageCount() {
+      return indexMap.length;
+    }
+
+    @Override
+    public long getOffset(int pageIndex) {
+      return offsetIndex.getOffset(indexMap[pageIndex]);
+    }
+
+    @Override
+    public int getCompressedPageSize(int pageIndex) {
+      return offsetIndex.getCompressedPageSize(indexMap[pageIndex]);
+    }
+
+    @Override
+    public long getFirstRowIndex(int pageIndex) {
+      return offsetIndex.getFirstRowIndex(indexMap[pageIndex]);
+    }
+
+    @Override
+    public long getLastRowIndex(int pageIndex, long totalRowCount) {
+      int nextIndex = indexMap[pageIndex] + 1;
+      return (nextIndex >= offsetIndex.getPageCount() ? totalRowCount : offsetIndex.getFirstRowIndex(nextIndex)) - 1;
+    }
+
+    @Override
+    public String toString() {
+      try (Formatter formatter = new Formatter()) {
+        formatter.format("%-12s  %20s  %16s  %20s\n", "", "offset", "compressed size", "first row index");
+        for (int i = 0, n = offsetIndex.getPageCount(); i < n; ++i) {
+          int index = Arrays.binarySearch(indexMap, i);
+          boolean isHidden = index < 0;
+          formatter.format("%spage-%-5d  %20d  %16d  %20d\n",
+              isHidden ? "- " : "  ",
+              isHidden ? i : index,
+              offsetIndex.getOffset(i),
+              offsetIndex.getCompressedPageSize(i),
+              offsetIndex.getFirstRowIndex(i));
+        }
+        return formatter.toString();
+      }
+    }
+  }
+
+  /*
+   * Returns the filtered offset index containing only the pages which are overlapping with rowRanges.
+   */
+  static OffsetIndex filterOffsetIndex(OffsetIndex offsetIndex, RowRanges rowRanges, long totalRowCount) {
+    IntList indexMap = new IntArrayList();
+    for (int i = 0, n = offsetIndex.getPageCount(); i < n; ++i) {
+      long from = offsetIndex.getFirstRowIndex(i);
+      if (rowRanges.isOverlapping(from, offsetIndex.getLastRowIndex(i, totalRowCount))) {
+        indexMap.add(i);
+      }
+    }
+    return new FilteredOffsetIndex(offsetIndex, indexMap.toIntArray());
+  }
+
+  static List<OffsetRange> calculateOffsetRanges(OffsetIndex offsetIndex, ColumnChunkMetaData cm,
+      long firstPageOffset) {
+    List<OffsetRange> ranges = new ArrayList<>();
+    int n = offsetIndex.getPageCount();
+    if (n > 0) {
+      OffsetRange currentRange = null;
+
+      // Add a range for the dictionary page if required
+      long rowGroupOffset = cm.getStartingPos();
+      if (rowGroupOffset < firstPageOffset) {
+        currentRange = new OffsetRange(rowGroupOffset, (int) (firstPageOffset - rowGroupOffset));
+        ranges.add(currentRange);
+      }
+
+      for (int i = 0; i < n; ++i) {
+        long offset = offsetIndex.getOffset(i);
+        int length = offsetIndex.getCompressedPageSize(i);
+        if (currentRange == null || !currentRange.extend(offset, length)) {
+          currentRange = new OffsetRange(offset, length);
+          ranges.add(currentRange);
+        }
+      }
+    }
+    return ranges;
+  }
+}
diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ColumnIndexStoreImpl.java b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ColumnIndexStoreImpl.java
new file mode 100644
index 0000000000..684c5f2114
--- /dev/null
+++ b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ColumnIndexStoreImpl.java
@@ -0,0 +1,155 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.parquet.hadoop;
+
+import static java.util.Collections.emptySet;
+
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Set;
+
+import org.apache.parquet.hadoop.metadata.BlockMetaData;
+import org.apache.parquet.hadoop.metadata.ColumnChunkMetaData;
+import org.apache.parquet.hadoop.metadata.ColumnPath;
+import org.apache.parquet.internal.column.columnindex.ColumnIndex;
+import org.apache.parquet.internal.column.columnindex.OffsetIndex;
+import org.apache.parquet.internal.filter2.columnindex.ColumnIndexStore;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Internal implementation of {@link ColumnIndexStore}.
+ */
+class ColumnIndexStoreImpl implements ColumnIndexStore {
+
+  private interface IndexStore {
+    ColumnIndex getColumnIndex();
+
+    OffsetIndex getOffsetIndex();
+  }
+
+  private class IndexStoreImpl implements IndexStore {
+    private final ColumnChunkMetaData meta;
+    private ColumnIndex columnIndex;
+    private boolean columnIndexRead;
+    private final OffsetIndex offsetIndex;
+
+    IndexStoreImpl(ColumnChunkMetaData meta) {
+      this.meta = meta;
+      OffsetIndex oi;
+      try {
+        oi = reader.readOffsetIndex(meta);
+      } catch (IOException e) {
+        // If the I/O issue still stands it will fail the reading later;
+        // otherwise we fail the filtering only with a missing offset index.
+        LOGGER.warn("Unable to read offset index for column {}", meta.getPath(), e);
+        oi = null;
+      }
+      if (oi == null) {
+        throw new MissingOffsetIndexException(meta.getPath());
+      }
+      offsetIndex = oi;
+    }
+
+    @Override
+    public ColumnIndex getColumnIndex() {
+      if (!columnIndexRead) {
+        try {
+          columnIndex = reader.readColumnIndex(meta);
+        } catch (IOException e) {
+          // If the I/O issue still stands it will fail the reading later;
+          // otherwise we fail the filtering only with a missing column index.
+          LOGGER.warn("Unable to read column index for column {}", meta.getPath(), e);
+        }
+        columnIndexRead = true;
+      }
+      return columnIndex;
+    }
+
+    @Override
+    public OffsetIndex getOffsetIndex() {
+      return offsetIndex;
+    }
+  }
+
+  private static final Logger LOGGER = LoggerFactory.getLogger(ColumnIndexStoreImpl.class);
+  // Used for columns are not in this parquet file
+  private static final IndexStore MISSING_INDEX_STORE = new IndexStore() {
+    @Override
+    public ColumnIndex getColumnIndex() {
+      return null;
+    }
+
+    @Override
+    public OffsetIndex getOffsetIndex() {
+      return null;
+    }
+  };
+  private static final ColumnIndexStoreImpl EMPTY = new ColumnIndexStoreImpl(null, new BlockMetaData(), emptySet()) {
+    @Override
+    public ColumnIndex getColumnIndex(ColumnPath column) {
+      return null;
+    }
+
+    @Override
+    public OffsetIndex getOffsetIndex(ColumnPath column) {
+      throw new MissingOffsetIndexException(column);
+    }
+  };
+
+  private final ParquetFileReader reader;
+  private final Map<ColumnPath, IndexStore> store;
+
+  /*
+   * Creates a column index store which lazily reads column/offset indexes for the columns in paths. (paths are the set
+   * of columns used for the projection)
+   */
+  static ColumnIndexStore create(ParquetFileReader reader, BlockMetaData block, Set<ColumnPath> paths) {
+    try {
+      return new ColumnIndexStoreImpl(reader, block, paths);
+    } catch (MissingOffsetIndexException e) {
+      return EMPTY;
+    }
+  }
+
+  private ColumnIndexStoreImpl(ParquetFileReader reader, BlockMetaData block, Set<ColumnPath> paths) {
+    // TODO[GS]: Offset index for every paths will be required; pre-read the consecutive ones at once?
+    // TODO[GS]: Pre-read column index based on filter?
+    this.reader = reader;
+    Map<ColumnPath, IndexStore> store = new HashMap<>();
+    for (ColumnChunkMetaData column : block.getColumns()) {
+      ColumnPath path = column.getPath();
+      if (paths.contains(path)) {
+        store.put(path, new IndexStoreImpl(column));
+      }
+    }
+    this.store = store;
+  }
+
+  @Override
+  public ColumnIndex getColumnIndex(ColumnPath column) {
+    return store.getOrDefault(column, MISSING_INDEX_STORE).getColumnIndex();
+  }
+
+  @Override
+  public OffsetIndex getOffsetIndex(ColumnPath column) {
+    return store.getOrDefault(column, MISSING_INDEX_STORE).getOffsetIndex();
+  }
+}
diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/InternalParquetRecordReader.java b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/InternalParquetRecordReader.java
index a048878693..e57f3cbcee 100644
--- a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/InternalParquetRecordReader.java
+++ b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/InternalParquetRecordReader.java
@@ -124,7 +124,7 @@ private void checkRead() throws IOException {
 
       LOG.info("at row " + current + ". reading next block");
       long t0 = System.currentTimeMillis();
-      PageReadStore pages = reader.readNextRowGroup();
+      PageReadStore pages = reader.readNextFilteredRowGroup();
       if (pages == null) {
         throw new IOException("expecting more rows but reached last block. Read " + current + " out of " + total);
       }
@@ -182,7 +182,7 @@ public void initialize(ParquetFileReader reader, ParquetReadOptions options) {
     this.columnCount = requestedSchema.getPaths().size();
     this.recordConverter = readSupport.prepareForRead(conf, fileMetadata, fileSchema, readContext);
     this.strictTypeChecking = options.isEnabled(STRICT_TYPE_CHECKING, true);
-    this.total = reader.getRecordCount();
+    this.total = reader.getFilteredRecordCount();
     this.unmaterializableRecordCounter = new UnmaterializableRecordCounter(options, total);
     this.filterRecords = options.useRecordFilter();
     reader.setRequestedSchema(requestedSchema);
@@ -204,7 +204,7 @@ public void initialize(ParquetFileReader reader, Configuration configuration)
     this.recordConverter = readSupport.prepareForRead(
         configuration, fileMetadata, fileSchema, readContext);
     this.strictTypeChecking = configuration.getBoolean(STRICT_TYPE_CHECKING, true);
-    this.total = reader.getRecordCount();
+    this.total = reader.getFilteredRecordCount();
     this.unmaterializableRecordCounter = new UnmaterializableRecordCounter(configuration, total);
     this.filterRecords = configuration.getBoolean(RECORD_FILTERING_ENABLED, true);
     reader.setRequestedSchema(requestedSchema);
diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/InternalParquetRecordWriter.java b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/InternalParquetRecordWriter.java
index d9e9b5e15e..d8af379d13 100644
--- a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/InternalParquetRecordWriter.java
+++ b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/InternalParquetRecordWriter.java
@@ -20,7 +20,6 @@
 
 import static java.lang.Math.max;
 import static java.lang.Math.min;
-import static java.lang.String.format;
 import static org.apache.parquet.Preconditions.checkNotNull;
 
 import java.io.IOException;
@@ -102,7 +101,8 @@ public ParquetMetadata getFooter() {
   }
 
   private void initStore() {
-    pageStore = new ColumnChunkPageWriteStore(compressor, schema, props.getAllocator());
+    pageStore = new ColumnChunkPageWriteStore(compressor, schema, props.getAllocator(),
+        props.getColumnIndexTruncateLength());
     columnStore = props.newColumnWriteStore(schema, pageStore);
     MessageColumnIO columnIO = new ColumnIOFactory(validating).getColumnIO(schema);
     this.recordConsumer = columnIO.getRecordWriter(columnStore);
@@ -144,7 +144,7 @@ private void checkBlockSizeReached() throws IOException {
       // flush the row group if it is within ~2 records of the limit
       // it is much better to be slightly under size than to be over at all
       if (memSize > (nextRowGroupSize - 2 * recordSize)) {
-        LOG.info("mem size {} > {}: flushing {} records to disk.", memSize, nextRowGroupSize, recordCount);
+        LOG.debug("mem size {} > {}: flushing {} records to disk.", memSize, nextRowGroupSize, recordCount);
         flushRowGroupToStore();
         initStore();
         recordCountForNextMemCheck = min(max(MINIMUM_RECORD_COUNT_FOR_CHECK, recordCount / 2), MAXIMUM_RECORD_COUNT_FOR_CHECK);
@@ -162,7 +162,7 @@ private void checkBlockSizeReached() throws IOException {
   private void flushRowGroupToStore()
       throws IOException {
     recordConsumer.flush();
-    LOG.info("Flushing mem columnStore to file. allocated memory: {}", columnStore.getAllocatedSize());
+    LOG.debug("Flushing mem columnStore to file. allocated memory: {}", columnStore.getAllocatedSize());
     if (columnStore.getAllocatedSize() > (3 * rowGroupSizeThreshold)) {
       LOG.warn("Too much memory used: {}", columnStore.memUsageString());
     }
diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetFileReader.java b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetFileReader.java
index 3975bf9f48..8e76634901 100644
--- a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetFileReader.java
+++ b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetFileReader.java
@@ -23,6 +23,8 @@
 import static org.apache.parquet.filter2.compat.RowGroupFilter.FilterLevel.STATISTICS;
 import static org.apache.parquet.format.converter.ParquetMetadataConverter.NO_FILTER;
 import static org.apache.parquet.format.converter.ParquetMetadataConverter.SKIP_ROW_GROUPS;
+import static org.apache.parquet.hadoop.ColumnIndexFilterUtils.calculateOffsetRanges;
+import static org.apache.parquet.hadoop.ColumnIndexFilterUtils.filterOffsetIndex;
 import static org.apache.parquet.hadoop.ParquetFileWriter.MAGIC;
 import static org.apache.parquet.hadoop.ParquetFileWriter.PARQUET_COMMON_METADATA_FILE;
 import static org.apache.parquet.hadoop.ParquetFileWriter.PARQUET_METADATA_FILE;
@@ -42,36 +44,38 @@
 import java.util.List;
 import java.util.Map;
 import java.util.Map.Entry;
+import java.util.Optional;
 import java.util.Set;
 import java.util.concurrent.Callable;
 import java.util.concurrent.ExecutionException;
 import java.util.concurrent.ExecutorService;
 import java.util.concurrent.Executors;
 import java.util.concurrent.Future;
+import java.util.stream.Collectors;
+import java.util.stream.Stream;
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
-
+import org.apache.parquet.HadoopReadOptions;
 import org.apache.parquet.ParquetReadOptions;
 import org.apache.parquet.bytes.ByteBufferInputStream;
 import org.apache.parquet.bytes.BytesInput;
 import org.apache.parquet.column.ColumnDescriptor;
 import org.apache.parquet.column.Encoding;
-import org.apache.parquet.column.page.DictionaryPageReadStore;
-import org.apache.parquet.column.values.bloomfilter.BlockSplitBloomFilter;
-import org.apache.parquet.compression.CompressionCodecFactory.BytesInputDecompressor;
-import org.apache.parquet.filter2.compat.FilterCompat;
-import org.apache.parquet.filter2.compat.RowGroupFilter;
-
 import org.apache.parquet.column.page.DataPage;
 import org.apache.parquet.column.page.DataPageV1;
 import org.apache.parquet.column.page.DataPageV2;
 import org.apache.parquet.column.page.DictionaryPage;
+import org.apache.parquet.column.page.DictionaryPageReadStore;
+import org.apache.parquet.column.page.PageReader;
 import org.apache.parquet.column.page.PageReadStore;
+import org.apache.parquet.column.values.bloomfilter.BlockSplitBloomFilter;
 import org.apache.parquet.column.values.bloomfilter.BloomFilter;
-import org.apache.parquet.hadoop.metadata.ColumnPath;
+import org.apache.parquet.compression.CompressionCodecFactory.BytesInputDecompressor;
+import org.apache.parquet.filter2.compat.FilterCompat;
+import org.apache.parquet.filter2.compat.RowGroupFilter;
 import org.apache.parquet.format.DataPageHeader;
 import org.apache.parquet.format.DataPageHeaderV2;
 import org.apache.parquet.format.DictionaryPageHeader;
@@ -80,19 +84,27 @@
 import org.apache.parquet.format.converter.ParquetMetadataConverter;
 import org.apache.parquet.format.converter.ParquetMetadataConverter.MetadataFilter;
 import org.apache.parquet.hadoop.ColumnChunkPageReadStore.ColumnChunkPageReader;
+import org.apache.parquet.hadoop.ColumnIndexFilterUtils.OffsetRange;
 import org.apache.parquet.hadoop.metadata.BlockMetaData;
 import org.apache.parquet.hadoop.metadata.ColumnChunkMetaData;
+import org.apache.parquet.hadoop.metadata.ColumnPath;
 import org.apache.parquet.hadoop.metadata.FileMetaData;
 import org.apache.parquet.hadoop.metadata.ParquetMetadata;
 import org.apache.parquet.hadoop.util.HadoopInputFile;
-import org.apache.parquet.HadoopReadOptions;
 import org.apache.parquet.hadoop.util.HiddenFileFilter;
 import org.apache.parquet.hadoop.util.counters.BenchmarkCounter;
-import org.apache.parquet.io.ParquetDecodingException;
+import org.apache.parquet.internal.column.columnindex.ColumnIndex;
+import org.apache.parquet.internal.column.columnindex.OffsetIndex;
+import org.apache.parquet.internal.filter2.columnindex.ColumnIndexFilter;
+import org.apache.parquet.internal.filter2.columnindex.ColumnIndexStore;
+import org.apache.parquet.internal.filter2.columnindex.RowRanges;
+import org.apache.parquet.internal.hadoop.metadata.IndexReference;
 import org.apache.parquet.io.InputFile;
+import org.apache.parquet.io.ParquetDecodingException;
 import org.apache.parquet.io.SeekableInputStream;
 import org.apache.parquet.schema.MessageType;
 import org.apache.parquet.schema.PrimitiveType;
+import org.apache.yetus.audience.InterfaceAudience.Private;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -605,6 +617,8 @@ public static ParquetFileReader open(InputFile file, ParquetReadOptions options)
   private final Map<ColumnPath, ColumnDescriptor> paths = new HashMap<>();
   private final FileMetaData fileMetaData; // may be null
   private final List<BlockMetaData> blocks;
+  private final List<ColumnIndexStore> blockIndexStores;
+  private final List<RowRanges> blockRowRanges;
 
   // not final. in some cases, this may be lazily loaded for backward-compat.
   private ParquetMetadata footer;
@@ -646,6 +660,8 @@ public ParquetFileReader(
     this.f = file.newStream();
     this.options = HadoopReadOptions.builder(configuration).build();
     this.blocks = filterRowGroups(blocks);
+    this.blockIndexStores = listWithNulls(this.blocks.size());
+    this.blockRowRanges = listWithNulls(this.blocks.size());
     for (ColumnDescriptor col : columns) {
       paths.put(ColumnPath.get(col.getPath()), col);
     }
@@ -680,6 +696,8 @@ public ParquetFileReader(Configuration conf, Path file, ParquetMetadata footer)
     this.footer = footer;
     this.fileMetaData = footer.getFileMetaData();
     this.blocks = filterRowGroups(footer.getBlocks());
+    this.blockIndexStores = listWithNulls(this.blocks.size());
+    this.blockRowRanges = listWithNulls(this.blocks.size());
     for (ColumnDescriptor col : footer.getFileMetaData().getSchema().getColumns()) {
       paths.put(ColumnPath.get(col.getPath()), col);
     }
@@ -700,11 +718,17 @@ public ParquetFileReader(InputFile file, ParquetReadOptions options) throws IOEx
     }
     this.fileMetaData = footer.getFileMetaData();
     this.blocks = filterRowGroups(footer.getBlocks());
+    this.blockIndexStores = listWithNulls(this.blocks.size());
+    this.blockRowRanges = listWithNulls(this.blocks.size());
     for (ColumnDescriptor col : footer.getFileMetaData().getSchema().getColumns()) {
       paths.put(ColumnPath.get(col.getPath()), col);
     }
   }
 
+  private static <T> List<T> listWithNulls(int size) {
+    return Stream.generate(() -> (T) null).limit(size).collect(Collectors.toCollection(ArrayList<T>::new));
+  }
+
   public ParquetMetadata getFooter() {
     if (footer == null) {
       try {
@@ -732,6 +756,17 @@ public long getRecordCount() {
     return total;
   }
 
+  long getFilteredRecordCount() {
+    if (!options.useColumnIndexFilter()) {
+      return getRecordCount();
+    }
+    long total = 0;
+    for (int i = 0, n = blocks.size(); i < n; ++i) {
+      total += getRowRanges(i).rowCount();
+    }
+    return total;
+  }
+
   /**
    * @return the path for this file
    * @deprecated will be removed in 2.0.0; use {@link #getFile()} instead
@@ -794,30 +829,111 @@ public PageReadStore readNextRowGroup() throws IOException {
       throw new RuntimeException("Illegal row group of 0 rows");
     }
     this.currentRowGroup = new ColumnChunkPageReadStore(block.getRowCount());
-    // prepare the list of consecutive chunks to read them in one scan
-    List<ConsecutiveChunkList> allChunks = new ArrayList<ConsecutiveChunkList>();
-    ConsecutiveChunkList currentChunks = null;
+    // prepare the list of consecutive parts to read them in one scan
+    List<ConsecutivePartList> allParts = new ArrayList<ConsecutivePartList>();
+    ConsecutivePartList currentParts = null;
     for (ColumnChunkMetaData mc : block.getColumns()) {
       ColumnPath pathKey = mc.getPath();
       BenchmarkCounter.incrementTotalBytes(mc.getTotalSize());
       ColumnDescriptor columnDescriptor = paths.get(pathKey);
       if (columnDescriptor != null) {
         long startingPos = mc.getStartingPos();
-        // first chunk or not consecutive => new list
-        if (currentChunks == null || currentChunks.endPos() != startingPos) {
-          currentChunks = new ConsecutiveChunkList(startingPos);
-          allChunks.add(currentChunks);
+        // first part or not consecutive => new list
+        if (currentParts == null || currentParts.endPos() != startingPos) {
+          currentParts = new ConsecutivePartList(startingPos);
+          allParts.add(currentParts);
         }
-        currentChunks.addChunk(new ChunkDescriptor(columnDescriptor, mc, startingPos, (int)mc.getTotalSize()));
+        currentParts.addChunk(new ChunkDescriptor(columnDescriptor, mc, startingPos, (int)mc.getTotalSize()));
       }
     }
     // actually read all the chunks
-    for (ConsecutiveChunkList consecutiveChunks : allChunks) {
-      final List<Chunk> chunks = consecutiveChunks.readAll(f);
-      for (Chunk chunk : chunks) {
-        currentRowGroup.addColumn(chunk.descriptor.col, chunk.readAllPages());
+    ChunkListBuilder builder = new ChunkListBuilder();
+    for (ConsecutivePartList consecutiveChunks : allParts) {
+      consecutiveChunks.readAll(f, builder);
+    }
+    for (Chunk chunk : builder.build()) {
+      currentRowGroup.addColumn(chunk.descriptor.col, chunk.readAllPages());
+    }
+
+    // avoid re-reading bytes the dictionary reader is used after this call
+    if (nextDictionaryReader != null) {
+      nextDictionaryReader.setRowGroup(currentRowGroup);
+    }
+
+    advanceToNextBlock();
+
+    return currentRowGroup;
+  }
+
+  /**
+   * Reads all the columns requested from the row group at the current file position. It may skip specific pages based
+   * on the column indexes according to the actual filter. As the rows are not aligned among the pages of the different
+   * columns row synchronization might be required.
+   *
+   * @return the PageReadStore which can provide PageReaders for each column
+   * @throws IOException
+   *           if any I/O error occurs while reading
+   * @see {@link PageReadStore#isInPageFilteringMode()}
+   */
+  public PageReadStore readNextFilteredRowGroup() throws IOException {
+    if (currentBlock == blocks.size()) {
+      return null;
+    }
+    if (!options.useColumnIndexFilter()) {
+      return readNextRowGroup();
+    }
+    BlockMetaData block = blocks.get(currentBlock);
+    if (block.getRowCount() == 0) {
+      throw new RuntimeException("Illegal row group of 0 rows");
+    }
+    ColumnIndexStore ciStore = getColumnIndexStore(currentBlock);
+    RowRanges rowRanges = getRowRanges(currentBlock);
+    long rowCount = rowRanges.rowCount();
+    if (rowCount == 0) {
+      // There are no matching rows -> skipping this row-group
+      advanceToNextBlock();
+      return readNextFilteredRowGroup();
+    }
+    if (rowCount == block.getRowCount()) {
+      // All rows are matching -> fall back to the non-filtering path
+      return readNextRowGroup();
+    }
+
+    this.currentRowGroup = new ColumnChunkPageReadStore(rowRanges);
+    // prepare the list of consecutive parts to read them in one scan
+    ChunkListBuilder builder = new ChunkListBuilder();
+    List<ConsecutivePartList> allParts = new ArrayList<ConsecutivePartList>();
+    ConsecutivePartList currentParts = null;
+    for (ColumnChunkMetaData mc : block.getColumns()) {
+      ColumnPath pathKey = mc.getPath();
+      ColumnDescriptor columnDescriptor = paths.get(pathKey);
+      if (columnDescriptor != null) {
+        OffsetIndex offsetIndex = ciStore.getOffsetIndex(mc.getPath());
+
+        OffsetIndex filteredOffsetIndex = filterOffsetIndex(offsetIndex, rowRanges,
+            block.getRowCount());
+        for (OffsetRange range : calculateOffsetRanges(filteredOffsetIndex, mc, offsetIndex.getOffset(0))) {
+          BenchmarkCounter.incrementTotalBytes(range.getLength());
+          long startingPos = range.getOffset();
+          // first part or not consecutive => new list
+          if (currentParts == null || currentParts.endPos() != startingPos) {
+            currentParts = new ConsecutivePartList(startingPos);
+            allParts.add(currentParts);
+          }
+          ChunkDescriptor chunkDescriptor = new ChunkDescriptor(columnDescriptor, mc, startingPos,
+              (int) range.getLength());
+          currentParts.addChunk(chunkDescriptor);
+          builder.setOffsetIndex(chunkDescriptor, filteredOffsetIndex);
+        }
       }
     }
+    // actually read all the chunks
+    for (ConsecutivePartList consecutiveChunks : allParts) {
+      consecutiveChunks.readAll(f, builder);
+    }
+    for (Chunk chunk : builder.build()) {
+      currentRowGroup.addColumn(chunk.descriptor.col, chunk.readAllPages());
+    }
 
     // avoid re-reading bytes the dictionary reader is used after this call
     if (nextDictionaryReader != null) {
@@ -829,6 +945,25 @@ public PageReadStore readNextRowGroup() throws IOException {
     return currentRowGroup;
   }
 
+  private ColumnIndexStore getColumnIndexStore(int blockIndex) {
+    ColumnIndexStore ciStore = blockIndexStores.get(blockIndex);
+    if (ciStore == null) {
+      ciStore = ColumnIndexStoreImpl.create(this, blocks.get(blockIndex), paths.keySet());
+      blockIndexStores.set(blockIndex, ciStore);
+    }
+    return ciStore;
+  }
+
+  private RowRanges getRowRanges(int blockIndex) {
+    RowRanges rowRanges = blockRowRanges.get(blockIndex);
+    if (rowRanges == null) {
+      rowRanges = ColumnIndexFilter.calculateRowRanges(options.getRecordFilter(), getColumnIndexStore(blockIndex),
+          paths.keySet(), blocks.get(blockIndex).getRowCount());
+      blockRowRanges.set(blockIndex, rowRanges);
+    }
+    return rowRanges;
+  }
+
   public boolean skipNextRowGroup() {
     return advanceToNextBlock();
   }
@@ -863,9 +998,6 @@ public DictionaryPageReader getDictionaryReader(BlockMetaData block) {
     return new DictionaryPageReader(this, block);
   }
 
-  public BloomFilterDataReader getBloomFilterDataReader(BlockMetaData block) {
-    return new BloomFilterDataReader(this, block);
-  }
   /**
    * Reads and decompresses a dictionary page for the given column chunk.
    *
@@ -917,6 +1049,10 @@ private DictionaryPage readCompressedDictionary(
         converter.getEncoding(dictHeader.getEncoding()));
   }
 
+  public BloomFilterDataReader getBloomFilterDataReader(BlockMetaData block) {
+    return new BloomFilterDataReader(this, block);
+  }
+
   /**
    * Reads Bloom filter data for the given column chunk.
    *
@@ -926,26 +1062,55 @@ private DictionaryPage readCompressedDictionary(
    */
   public BloomFilter readBloomFilter(ColumnChunkMetaData meta) throws IOException {
     long bloomFilterOffset = meta.getBloomFilterOffset();
-
     if (bloomFilterOffset == Long.MAX_VALUE) return null;
     f.seek(bloomFilterOffset);
-
     // Read Bloom filter data header.
     byte[] bytes = new byte[BlockSplitBloomFilter.HEADER_SIZE];
     f.read(bytes);
     ByteBuffer bloomHeader = ByteBuffer.wrap(bytes);
     IntBuffer headerBuffer = bloomHeader.order(ByteOrder.LITTLE_ENDIAN).asIntBuffer();
     int numBytes = headerBuffer.get();
-
     BloomFilter.HashStrategy hash = BloomFilter.HashStrategy.values()[headerBuffer.get()];
     BloomFilter.Algorithm algorithm = BloomFilter.Algorithm.values()[headerBuffer.get()];
-
     byte[] bitset = new byte[numBytes];
     f.readFully(bitset);
-
     return new BlockSplitBloomFilter(bitset);
   }
 
+  /**
+   * @param column
+   *          the column chunk which the column index is to be returned for
+   * @return the column index for the specified column chunk or {@code null} if there is no index
+   * @throws IOException
+   *           if any I/O error occurs during reading the file
+   */
+  @Private
+  public ColumnIndex readColumnIndex(ColumnChunkMetaData column) throws IOException {
+    IndexReference ref = column.getColumnIndexReference();
+    if (ref == null) {
+      return null;
+    }
+    f.seek(ref.getOffset());
+    return ParquetMetadataConverter.fromParquetColumnIndex(column.getPrimitiveType(), Util.readColumnIndex(f));
+  }
+
+  /**
+   * @param column
+   *          the column chunk which the offset index is to be returned for
+   * @return the offset index for the specified column chunk or {@code null} if there is no index
+   * @throws IOException
+   *           if any I/O error occurs during reading the file
+   */
+  @Private
+  public OffsetIndex readOffsetIndex(ColumnChunkMetaData column) throws IOException {
+    IndexReference ref = column.getOffsetIndexReference();
+    if (ref == null) {
+      return null;
+    }
+    f.seek(ref.getOffset());
+    return ParquetMetadataConverter.fromParquetOffsetIndex(Util.readOffsetIndex(f));
+  }
+
   @Override
   public void close() throws IOException {
     try {
@@ -957,6 +1122,57 @@ public void close() throws IOException {
     }
   }
 
+  /*
+   * Builder to concatenate the buffers of the discontinuous parts for the same column. These parts are generated as a
+   * result of the column-index based filtering when some pages might be skipped at reading.
+   */
+  private class ChunkListBuilder {
+    private class ChunkData {
+      final List<ByteBuffer> buffers = new ArrayList<>();
+      OffsetIndex offsetIndex;
+    }
+
+    private final Map<ChunkDescriptor, ChunkData> map = new HashMap<>();
+    private ChunkDescriptor lastDescriptor;
+    private SeekableInputStream f;
+
+    void add(ChunkDescriptor descriptor, List<ByteBuffer> buffers, SeekableInputStream f) {
+      ChunkData data = map.get(descriptor);
+      if (data == null) {
+        data = new ChunkData();
+        map.put(descriptor, data);
+      }
+      data.buffers.addAll(buffers);
+
+      lastDescriptor = descriptor;
+      this.f = f;
+    }
+
+    void setOffsetIndex(ChunkDescriptor descriptor, OffsetIndex offsetIndex) {
+      ChunkData data = map.get(descriptor);
+      if (data == null) {
+        data = new ChunkData();
+        map.put(descriptor, data);
+      }
+      data.offsetIndex = offsetIndex;
+    }
+
+    List<Chunk> build() {
+      List<Chunk> chunks = new ArrayList<>();
+      for (Entry<ChunkDescriptor, ChunkData> entry : map.entrySet()) {
+        ChunkDescriptor descriptor = entry.getKey();
+        ChunkData data = entry.getValue();
+        if (descriptor.equals(lastDescriptor)) {
+          // because of a bug, the last chunk might be larger than descriptor.size
+          chunks.add(new WorkaroundChunk(lastDescriptor, data.buffers, f, data.offsetIndex));
+        } else {
+          chunks.add(new Chunk(descriptor, data.buffers, data.offsetIndex));
+        }
+      }
+      return chunks;
+    }
+  }
+
   /**
    * The data for a column chunk
    */
@@ -964,15 +1180,17 @@ private class Chunk {
 
     protected final ChunkDescriptor descriptor;
     protected final ByteBufferInputStream stream;
+    final OffsetIndex offsetIndex;
 
     /**
-     *
      * @param descriptor descriptor for the chunk
      * @param buffers ByteBuffers that contain the chunk
+     * @param offsetIndex the offset index for this column; might be null
      */
-    public Chunk(ChunkDescriptor descriptor, List<ByteBuffer> buffers) {
+    public Chunk(ChunkDescriptor descriptor, List<ByteBuffer> buffers, OffsetIndex offsetIndex) {
       this.descriptor = descriptor;
       this.stream = ByteBufferInputStream.wrap(buffers);
+      this.offsetIndex = offsetIndex;
     }
 
     protected PageHeader readPageHeader() throws IOException {
@@ -989,7 +1207,8 @@ public ColumnChunkPageReader readAllPages() throws IOException {
       PrimitiveType type = getFileMetaData().getSchema()
           .getType(descriptor.col.getPath()).asPrimitiveType();
       long valuesCountReadSoFar = 0;
-      while (valuesCountReadSoFar < descriptor.metadata.getValueCount()) {
+      int dataPageCountReadSoFar = 0;
+      while (hasMorePages(valuesCountReadSoFar, dataPageCountReadSoFar)) {
         PageHeader pageHeader = readPageHeader();
         int uncompressedPageSize = pageHeader.getUncompressed_page_size();
         int compressedPageSize = pageHeader.getCompressed_page_size();
@@ -999,8 +1218,8 @@ public ColumnChunkPageReader readAllPages() throws IOException {
             if (dictionaryPage != null) {
               throw new ParquetDecodingException("more than one dictionary page in column " + descriptor.col);
             }
-          DictionaryPageHeader dicHeader = pageHeader.getDictionary_page_header();
-          dictionaryPage =
+            DictionaryPageHeader dicHeader = pageHeader.getDictionary_page_header();
+            dictionaryPage =
                 new DictionaryPage(
                     this.readAsBytesInput(compressedPageSize),
                     uncompressedPageSize,
@@ -1024,6 +1243,7 @@ public ColumnChunkPageReader readAllPages() throws IOException {
                     converter.getEncoding(dataHeaderV1.getEncoding())
                     ));
             valuesCountReadSoFar += dataHeaderV1.getNum_values();
+            ++dataPageCountReadSoFar;
             break;
           case DATA_PAGE_V2:
             DataPageHeaderV2 dataHeaderV2 = pageHeader.getData_page_header_v2();
@@ -1045,6 +1265,7 @@ public ColumnChunkPageReader readAllPages() throws IOException {
                     dataHeaderV2.isIs_compressed()
                     ));
             valuesCountReadSoFar += dataHeaderV2.getNum_values();
+            ++dataPageCountReadSoFar;
             break;
           default:
             LOG.debug("skipping page of type {} of size {}", pageHeader.getType(), compressedPageSize);
@@ -1052,7 +1273,7 @@ public ColumnChunkPageReader readAllPages() throws IOException {
             break;
         }
       }
-      if (valuesCountReadSoFar != descriptor.metadata.getValueCount()) {
+      if (offsetIndex == null && valuesCountReadSoFar != descriptor.metadata.getValueCount()) {
         // Would be nice to have a CorruptParquetFileException or something as a subclass?
         throw new IOException(
             "Expected " + descriptor.metadata.getValueCount() + " values in column chunk at " +
@@ -1061,7 +1282,13 @@ public ColumnChunkPageReader readAllPages() throws IOException {
             + " pages ending at file offset " + (descriptor.fileOffset + stream.position()));
       }
       BytesInputDecompressor decompressor = options.getCodecFactory().getDecompressor(descriptor.metadata.getCodec());
-      return new ColumnChunkPageReader(decompressor, pagesInChunk, dictionaryPage);
+      return new ColumnChunkPageReader(decompressor, pagesInChunk, dictionaryPage, offsetIndex,
+          blocks.get(currentBlock).getRowCount());
+    }
+
+    private boolean hasMorePages(long valuesCountReadSoFar, int dataPageCountReadSoFar) {
+      return offsetIndex == null ? valuesCountReadSoFar < descriptor.metadata.getValueCount()
+          : dataPageCountReadSoFar < offsetIndex.getPageCount();
     }
 
     /**
@@ -1086,8 +1313,8 @@ private class WorkaroundChunk extends Chunk {
      * @param descriptor the descriptor of the chunk
      * @param f the file stream positioned at the end of this chunk
      */
-    private WorkaroundChunk(ChunkDescriptor descriptor, List<ByteBuffer> buffers, SeekableInputStream f) {
-      super(descriptor, buffers);
+    private WorkaroundChunk(ChunkDescriptor descriptor, List<ByteBuffer> buffers, SeekableInputStream f, OffsetIndex offsetIndex) {
+      super(descriptor, buffers, offsetIndex);
       this.f = f;
     }
 
@@ -1136,7 +1363,7 @@ public BytesInput readAsBytesInput(int size) throws IOException {
 
 
   /**
-   * information needed to read a column chunk
+   * Information needed to read a column chunk or a part of it.
    */
   private static class ChunkDescriptor {
 
@@ -1162,12 +1389,29 @@ private ChunkDescriptor(
       this.fileOffset = fileOffset;
       this.size = size;
     }
+
+    @Override
+    public int hashCode() {
+      return col.hashCode();
+    }
+
+    @Override
+    public boolean equals(Object obj) {
+      if (this == obj) {
+        return true;
+      } else if (obj instanceof ChunkDescriptor) {
+        return col.equals(((ChunkDescriptor) obj).col);
+      } else {
+        return false;
+      }
+    }
   }
 
   /**
-   * describes a list of consecutive column chunks to be read at once.
+   * Describes a list of consecutive parts to be read at once. A consecutive part may contain whole column chunks or
+   * only parts of them (some pages).
    */
-  private class ConsecutiveChunkList {
+  private class ConsecutivePartList {
 
     private final long offset;
     private int length;
@@ -1176,7 +1420,7 @@ private class ConsecutiveChunkList {
     /**
      * @param offset where the first chunk starts
      */
-    ConsecutiveChunkList(long offset) {
+    ConsecutivePartList(long offset) {
       this.offset = offset;
     }
 
@@ -1192,45 +1436,19 @@ public void addChunk(ChunkDescriptor descriptor) {
 
     /**
      * @param f file to read the chunks from
-     * @return the chunks
+     * @param builder used to build chunk list to read the pages for the different columns
      * @throws IOException if there is an error while reading from the stream
      */
-    public List<Chunk> readAll(SeekableInputStream f) throws IOException {
-      List<Chunk> result = new ArrayList<Chunk>(chunks.size());
-      f.seek(offset);
-
-      int fullAllocations = length / options.getMaxAllocationSize();
-      int lastAllocationSize = length % options.getMaxAllocationSize();
-
-      int numAllocations = fullAllocations + (lastAllocationSize > 0 ? 1 : 0);
-      List<ByteBuffer> buffers = new ArrayList<>(numAllocations);
-
-      for (int i = 0; i < fullAllocations; i += 1) {
-        buffers.add(options.getAllocator().allocate(options.getMaxAllocationSize()));
-      }
-
-      if (lastAllocationSize > 0) {
-        buffers.add(options.getAllocator().allocate(lastAllocationSize));
-      }
-
-      for (ByteBuffer buffer : buffers) {
-        f.readFully(buffer);
-        buffer.flip();
-      }
+    public void readAll(SeekableInputStream f, ChunkListBuilder builder) throws IOException {
+      List<ByteBuffer> buffers = readBlocks(f, offset, length);
 
       // report in a counter the data we just scanned
       BenchmarkCounter.incrementBytesRead(length);
       ByteBufferInputStream stream = ByteBufferInputStream.wrap(buffers);
       for (int i = 0; i < chunks.size(); i++) {
         ChunkDescriptor descriptor = chunks.get(i);
-        if (i < chunks.size() - 1) {
-          result.add(new Chunk(descriptor, stream.sliceBuffers(descriptor.size)));
-        } else {
-          // because of a bug, the last chunk might be larger than descriptor.size
-          result.add(new WorkaroundChunk(descriptor, stream.sliceBuffers(descriptor.size), f));
-        }
+        builder.add(descriptor, stream.sliceBuffers(descriptor.size), f);
       }
-      return result ;
     }
 
     /**
@@ -1242,4 +1460,72 @@ public long endPos() {
 
   }
 
+  /**
+   * @param f file to read the blocks from
+   * @return the ByteBuffer blocks
+   * @throws IOException if there is an error while reading from the stream
+   */
+  List<ByteBuffer> readBlocks(SeekableInputStream f, long offset, int length) throws IOException {
+    f.seek(offset);
+
+    int fullAllocations = length / options.getMaxAllocationSize();
+    int lastAllocationSize = length % options.getMaxAllocationSize();
+
+    int numAllocations = fullAllocations + (lastAllocationSize > 0 ? 1 : 0);
+    List<ByteBuffer> buffers = new ArrayList<>(numAllocations);
+
+    for (int i = 0; i < fullAllocations; i++) {
+      buffers.add(options.getAllocator().allocate(options.getMaxAllocationSize()));
+    }
+
+    if (lastAllocationSize > 0) {
+      buffers.add(options.getAllocator().allocate(lastAllocationSize));
+    }
+
+    for (ByteBuffer buffer : buffers) {
+      f.readFully(buffer);
+      buffer.flip();
+    }
+    return buffers;
+  }
+
+  Optional<PageReader> readColumnInBlock(int blockIndex, ColumnDescriptor columnDescriptor) {
+    BlockMetaData block = blocks.get(blockIndex);
+    if (block.getRowCount() == 0) {
+      throw new RuntimeException("Illegal row group of 0 rows");
+    }
+    Optional<ColumnChunkMetaData> mc = findColumnByPath(block, columnDescriptor.getPath());
+
+    return mc.map(column -> new ChunkDescriptor(columnDescriptor, column, column.getStartingPos(), (int) column.getTotalSize()))
+      .map(chunk -> readChunk(f, chunk));
+  }
+
+  private ColumnChunkPageReader readChunk(SeekableInputStream f, ChunkDescriptor descriptor) {
+    try {
+      List<ByteBuffer> buffers = readBlocks(f, descriptor.fileOffset, descriptor.size);
+      ByteBufferInputStream stream = ByteBufferInputStream.wrap(buffers);
+      Chunk chunk = new WorkaroundChunk(descriptor, stream.sliceBuffers(descriptor.size), f, null);
+      return chunk.readAllPages();
+    } catch (IOException e) {
+      throw new RuntimeException(e);
+    }
+  }
+
+  private Optional<ColumnChunkMetaData> findColumnByPath(BlockMetaData block, String[] path) {
+    for (ColumnChunkMetaData column : block.getColumns()) {
+      if (Arrays.equals(column.getPath().toArray(), path)) {
+        return Optional.of(column);
+      }
+    }
+    return Optional.empty();
+  }
+
+  public int blocksCount() {
+    return blocks.size();
+  }
+
+  public BlockMetaData getBlockMetaData(int blockIndex) {
+    return blocks.get(blockIndex);
+  }
+
 }
diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetFileWriter.java b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetFileWriter.java
index 7c52b1b93f..0a13e543e4 100644
--- a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetFileWriter.java
+++ b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetFileWriter.java
@@ -1,4 +1,4 @@
-/* 
+/*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
@@ -6,9 +6,9 @@
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
- * 
+ *
  *   http://www.apache.org/licenses/LICENSE-2.0
- * 
+ *
  * Unless required by applicable law or agreed to in writing,
  * software distributed under the License is distributed on an
  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
@@ -19,18 +19,22 @@
 package org.apache.parquet.hadoop;
 
 import static org.apache.parquet.format.Util.writeFileMetaData;
+import static org.apache.parquet.format.converter.ParquetMetadataConverter.MAX_STATS_SIZE;
 import static org.apache.parquet.hadoop.ParquetWriter.DEFAULT_BLOCK_SIZE;
 import static org.apache.parquet.hadoop.ParquetWriter.MAX_PADDING_SIZE_DEFAULT;
 
 import java.io.IOException;
 import java.nio.charset.Charset;
 import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.LinkedHashSet;
 import java.util.List;
 import java.util.Map;
 import java.util.Map.Entry;
+import java.util.Optional;
 import java.util.Set;
 
 import org.apache.hadoop.conf.Configuration;
@@ -41,16 +45,27 @@
 import org.apache.parquet.Preconditions;
 import org.apache.parquet.Strings;
 import org.apache.parquet.Version;
+import org.apache.parquet.bytes.ByteBufferAllocator;
 import org.apache.parquet.bytes.BytesInput;
 import org.apache.parquet.bytes.BytesUtils;
+import org.apache.parquet.bytes.HeapByteBufferAllocator;
 import org.apache.parquet.column.ColumnDescriptor;
+import org.apache.parquet.column.ColumnReader;
+import org.apache.parquet.column.ColumnWriteStore;
+import org.apache.parquet.column.ColumnWriter;
 import org.apache.parquet.column.Encoding;
 import org.apache.parquet.column.EncodingStats;
+import org.apache.parquet.column.ParquetProperties;
+import org.apache.parquet.column.impl.ColumnReadStoreImpl;
+import org.apache.parquet.column.impl.ColumnWriteStoreV1;
 import org.apache.parquet.column.page.DictionaryPage;
+import org.apache.parquet.column.page.PageReader;
 import org.apache.parquet.column.statistics.Statistics;
 import org.apache.parquet.column.values.bloomfilter.BloomFilter;
+import org.apache.parquet.example.DummyRecordConverter;
 import org.apache.parquet.hadoop.ParquetOutputFormat.JobSummaryLevel;
 import org.apache.parquet.hadoop.metadata.ColumnPath;
+import org.apache.parquet.format.Util;
 import org.apache.parquet.format.converter.ParquetMetadataConverter;
 import org.apache.parquet.hadoop.metadata.BlockMetaData;
 import org.apache.parquet.hadoop.metadata.ColumnChunkMetaData;
@@ -58,8 +73,14 @@
 import org.apache.parquet.hadoop.metadata.FileMetaData;
 import org.apache.parquet.hadoop.metadata.GlobalMetaData;
 import org.apache.parquet.hadoop.metadata.ParquetMetadata;
+import org.apache.parquet.hadoop.util.BlocksCombiner;
 import org.apache.parquet.hadoop.util.HadoopOutputFile;
 import org.apache.parquet.hadoop.util.HadoopStreams;
+import org.apache.parquet.internal.column.columnindex.ColumnIndex;
+import org.apache.parquet.internal.column.columnindex.ColumnIndexBuilder;
+import org.apache.parquet.internal.column.columnindex.OffsetIndex;
+import org.apache.parquet.internal.column.columnindex.OffsetIndexBuilder;
+import org.apache.parquet.internal.hadoop.metadata.IndexReference;
 import org.apache.parquet.io.InputFile;
 import org.apache.parquet.io.OutputFile;
 import org.apache.parquet.io.SeekableInputStream;
@@ -94,13 +115,22 @@ public static enum Mode {
   private final MessageType schema;
   private final PositionOutputStream out;
   private final AlignmentStrategy alignment;
+  private final int columnIndexTruncateLength;
 
   // file data
   private List<BlockMetaData> blocks = new ArrayList<BlockMetaData>();
 
+  // The column/offset indexes per blocks per column chunks
+  private final List<List<ColumnIndex>> columnIndexes = new ArrayList<>();
+  private final List<List<OffsetIndex>> offsetIndexes = new ArrayList<>();
+
   // row group data
   private BlockMetaData currentBlock; // appended to by endColumn
 
+  // The column/offset indexes for the actual block
+  private List<ColumnIndex> currentColumnIndexes;
+  private List<OffsetIndex> currentOffsetIndexes;
+
   // row group data set at the start of a row group
   private long currentRecordCount; // set in startBlock
 
@@ -110,6 +140,9 @@ public static enum Mode {
   private long uncompressedLength;
   private long compressedLength;
   private Statistics currentStatistics; // accumulated in writePage(s)
+  private ColumnIndexBuilder columnIndexBuilder;
+  private OffsetIndexBuilder offsetIndexBuilder;
+  private long firstPageOffset;
 
   // column chunk data set at the start of a column
   private CompressionCodecName currentChunkCodec; // set in startColumn
@@ -228,10 +261,27 @@ public ParquetFileWriter(Configuration configuration, MessageType schema,
    * @param rowGroupSize the row group size
    * @param maxPaddingSize the maximum padding
    * @throws IOException if the file can not be created
+   * @deprecated will be removed in 2.0.0
    */
+  @Deprecated
   public ParquetFileWriter(OutputFile file, MessageType schema, Mode mode,
                            long rowGroupSize, int maxPaddingSize)
       throws IOException {
+    this(file, schema, mode, rowGroupSize, maxPaddingSize,
+        ParquetProperties.DEFAULT_COLUMN_INDEX_TRUNCATE_LENGTH);
+  }
+  /**
+   * @param file OutputFile to create or overwrite
+   * @param schema the schema of the data
+   * @param mode file creation mode
+   * @param rowGroupSize the row group size
+   * @param maxPaddingSize the maximum padding
+   * @param columnIndexTruncateLength the length which the min/max values in column indexes tried to be truncated to
+   * @throws IOException if the file can not be created
+   */
+  public ParquetFileWriter(OutputFile file, MessageType schema, Mode mode,
+                           long rowGroupSize, int maxPaddingSize, int columnIndexTruncateLength)
+      throws IOException {
     TypeUtil.checkValidWriteSchema(schema);
 
     this.schema = schema;
@@ -251,6 +301,7 @@ public ParquetFileWriter(OutputFile file, MessageType schema, Mode mode,
     }
 
     this.encodingStatsBuilder = new EncodingStats.Builder();
+    this.columnIndexTruncateLength = columnIndexTruncateLength;
   }
 
   /**
@@ -273,6 +324,8 @@ public ParquetFileWriter(OutputFile file, MessageType schema, Mode mode,
     this.out = HadoopStreams.wrap(
         fs.create(file, true, 8192, fs.getDefaultReplication(file), rowAndBlockSize));
     this.encodingStatsBuilder = new EncodingStats.Builder();
+    // no truncation is needed for testing
+    this.columnIndexTruncateLength = Integer.MAX_VALUE;
   }
   /**
    * start the file
@@ -298,6 +351,9 @@ public void startBlock(long recordCount) throws IOException {
 
     currentBlock = new BlockMetaData();
     currentRecordCount = recordCount;
+
+    currentColumnIndexes = new ArrayList<>();
+    currentOffsetIndexes = new ArrayList<>();
   }
 
   /**
@@ -322,6 +378,10 @@ public void startColumn(ColumnDescriptor descriptor,
     uncompressedLength = 0;
     // The statistics will be copied from the first one added at writeDataPage(s) so we have the correct typed one
     currentStatistics = null;
+
+    columnIndexBuilder = ColumnIndexBuilder.getBuilder(currentChunkType, columnIndexTruncateLength);
+    offsetIndexBuilder = OffsetIndexBuilder.getBuilder();
+    firstPageOffset = -1;
   }
 
   /**
@@ -379,6 +439,9 @@ public void writeDataPage(
       Encoding dlEncoding,
       Encoding valuesEncoding) throws IOException {
     state = state.write();
+    // We are unable to build indexes without rowCount so skip them for this column
+    offsetIndexBuilder = OffsetIndexBuilder.getNoOpBuilder();
+    columnIndexBuilder = ColumnIndexBuilder.getNoOpBuilder();
     long beforeHeader = out.getPos();
     LOG.debug("{}: write data page: {} values", beforeHeader, valueCount);
     int compressedPageSize = (int)bytes.size();
@@ -410,8 +473,50 @@ public void writeDataPage(
    * @param dlEncoding encoding of the definition level
    * @param valuesEncoding encoding of values
    * @throws IOException if there is an error while writing
+   * @deprecated this method does not support writing column indexes; Use
+   *             {@link #writeDataPage(int, int, BytesInput, Statistics, long, Encoding, Encoding, Encoding)} instead
+   */
+  @Deprecated
+  public void writeDataPage(
+      int valueCount, int uncompressedPageSize,
+      BytesInput bytes,
+      Statistics statistics,
+      Encoding rlEncoding,
+      Encoding dlEncoding,
+      Encoding valuesEncoding) throws IOException {
+    // We are unable to build indexes without rowCount so skip them for this column
+    offsetIndexBuilder = OffsetIndexBuilder.getNoOpBuilder();
+    columnIndexBuilder = ColumnIndexBuilder.getNoOpBuilder();
+    innerWriteDataPage(valueCount, uncompressedPageSize, bytes, statistics, rlEncoding, dlEncoding, valuesEncoding);
+  }
+
+  /**
+   * Writes a single page
+   * @param valueCount count of values
+   * @param uncompressedPageSize the size of the data once uncompressed
+   * @param bytes the compressed data for the page without header
+   * @param statistics the statistics of the page
+   * @param rowCount the number of rows in the page
+   * @param rlEncoding encoding of the repetition level
+   * @param dlEncoding encoding of the definition level
+   * @param valuesEncoding encoding of values
+   * @throws IOException if any I/O error occurs during writing the file
    */
   public void writeDataPage(
+      int valueCount, int uncompressedPageSize,
+      BytesInput bytes,
+      Statistics statistics,
+      long rowCount,
+      Encoding rlEncoding,
+      Encoding dlEncoding,
+      Encoding valuesEncoding) throws IOException {
+    long beforeHeader = out.getPos();
+    innerWriteDataPage(valueCount, uncompressedPageSize, bytes, statistics, rlEncoding, dlEncoding, valuesEncoding);
+
+    offsetIndexBuilder.add((int) (out.getPos() - beforeHeader), rowCount);
+  }
+
+  private void innerWriteDataPage(
       int valueCount, int uncompressedPageSize,
       BytesInput bytes,
       Statistics statistics,
@@ -420,8 +525,11 @@ public void writeDataPage(
       Encoding valuesEncoding) throws IOException {
     state = state.write();
     long beforeHeader = out.getPos();
+    if (firstPageOffset == -1) {
+      firstPageOffset = beforeHeader;
+    }
     LOG.debug("{}: write data page: {} values", beforeHeader, valueCount);
-    int compressedPageSize = (int)bytes.size();
+    int compressedPageSize = (int) bytes.size();
     metadataConverter.writeDataPageHeader(
         uncompressedPageSize, compressedPageSize,
         valueCount,
@@ -443,6 +551,8 @@ public void writeDataPage(
       currentStatistics.mergeStatistics(statistics);
     }
 
+    columnIndexBuilder.add(statistics);
+
     encodingStatsBuilder.addDataEncoding(valuesEncoding);
     currentEncodings.add(rlEncoding);
     currentEncodings.add(dlEncoding);
@@ -450,25 +560,47 @@ public void writeDataPage(
   }
 
   /**
-   * writes a number of pages at once
-   * @param bytes bytes to be written including page headers
+   * Writes a column chunk at once
+   * @param descriptor the descriptor of the column
+   * @param valueCount the value count in this column
+   * @param compressionCodecName the name of the compression codec used for compressing the pages
+   * @param dictionaryPage the dictionary page for this column chunk (might be null)
+   * @param bytes the encoded pages including page headers to be written as is
    * @param uncompressedTotalPageSize total uncompressed size (without page headers)
    * @param compressedTotalPageSize total compressed size (without page headers)
+   * @param totalStats accumulated statistics for the column chunk
+   * @param columnIndexBuilder the builder object for the column index
+   * @param offsetIndexBuilder the builder object for the offset index
+   * @param rlEncodings the RL encodings used in this column chunk
+   * @param dlEncodings the DL encodings used in this column chunk
+   * @param dataEncodings the data encodings used in this column chunk
    * @throws IOException if there is an error while writing
    */
-  void writeDataPages(BytesInput bytes,
-                      long uncompressedTotalPageSize,
-                      long compressedTotalPageSize,
-                      Statistics totalStats,
-                      Set<Encoding> rlEncodings,
-                      Set<Encoding> dlEncodings,
-                      List<Encoding> dataEncodings) throws IOException {
+  void writeColumnChunk(ColumnDescriptor descriptor,
+      long valueCount,
+      CompressionCodecName compressionCodecName,
+      DictionaryPage dictionaryPage,
+      BytesInput bytes,
+      long uncompressedTotalPageSize,
+      long compressedTotalPageSize,
+      Statistics<?> totalStats,
+      ColumnIndexBuilder columnIndexBuilder,
+      OffsetIndexBuilder offsetIndexBuilder,
+      Set<Encoding> rlEncodings,
+      Set<Encoding> dlEncodings,
+      List<Encoding> dataEncodings) throws IOException {
+    startColumn(descriptor, valueCount, compressionCodecName);
+
     state = state.write();
+    if (dictionaryPage != null) {
+      writeDictionaryPage(dictionaryPage);
+    }
     LOG.debug("{}: write data pages", out.getPos());
     long headersSize = bytes.size() - compressedTotalPageSize;
     this.uncompressedLength += uncompressedTotalPageSize + headersSize;
     this.compressedLength += compressedTotalPageSize + headersSize;
     LOG.debug("{}: write data pages content", out.getPos());
+    firstPageOffset = out.getPos();
     bytes.writeAllTo(out);
     encodingStatsBuilder.addDataEncodings(dataEncodings);
     if (rlEncodings.isEmpty()) {
@@ -478,6 +610,11 @@ void writeDataPages(BytesInput bytes,
     currentEncodings.addAll(dlEncodings);
     currentEncodings.addAll(dataEncodings);
     currentStatistics = totalStats;
+
+    this.columnIndexBuilder = columnIndexBuilder;
+    this.offsetIndexBuilder = offsetIndexBuilder;
+
+    endColumn();
   }
 
   /**
@@ -487,6 +624,12 @@ void writeDataPages(BytesInput bytes,
   public void endColumn() throws IOException {
     state = state.endColumn();
     LOG.debug("{}: end column", out.getPos());
+    if (columnIndexBuilder.getMinMaxSize() > columnIndexBuilder.getPageCount() * MAX_STATS_SIZE) {
+      currentColumnIndexes.add(null);
+    } else {
+      currentColumnIndexes.add(columnIndexBuilder.build());
+    }
+    currentOffsetIndexes.add(offsetIndexBuilder.build(firstPageOffset));
     currentBlock.addColumn(ColumnChunkMetaData.get(
         currentChunkPath,
         currentChunkType,
@@ -503,6 +646,8 @@ public void endColumn() throws IOException {
     this.currentBlock.setTotalByteSize(currentBlock.getTotalByteSize() + uncompressedLength);
     this.uncompressedLength = 0;
     this.compressedLength = 0;
+    columnIndexBuilder = null;
+    offsetIndexBuilder = null;
   }
 
   /**
@@ -514,6 +659,10 @@ public void endBlock() throws IOException {
     LOG.debug("{}: end block", out.getPos());
     currentBlock.setRowCount(currentRecordCount);
     blocks.add(currentBlock);
+    columnIndexes.add(currentColumnIndexes);
+    offsetIndexes.add(currentOffsetIndexes);
+    currentColumnIndexes = null;
+    currentOffsetIndexes = null;
     currentBlock = null;
   }
 
@@ -532,6 +681,116 @@ public void appendFile(InputFile file) throws IOException {
     ParquetFileReader.open(file).appendTo(this);
   }
 
+  public int merge(List<InputFile> inputFiles, CodecFactory.BytesCompressor compressor, String createdBy, long maxBlockSize) throws IOException {
+    List<ParquetFileReader> readers = getReaders(inputFiles);
+    try {
+      ByteBufferAllocator allocator = new HeapByteBufferAllocator();
+      ColumnReadStoreImpl columnReadStore = new ColumnReadStoreImpl(null, new DummyRecordConverter(schema).getRootConverter(), schema, createdBy);
+      this.start();
+      List<BlocksCombiner.SmallBlocksUnion> largeBlocks = BlocksCombiner.combineLargeBlocks(readers, maxBlockSize);
+      for (BlocksCombiner.SmallBlocksUnion smallBlocks : largeBlocks) {
+        for (int columnIndex = 0; columnIndex < schema.getColumns().size(); columnIndex++) {
+          ColumnDescriptor path = schema.getColumns().get(columnIndex);
+          ColumnChunkPageWriteStore store = new ColumnChunkPageWriteStore(compressor, schema, allocator, ParquetProperties.DEFAULT_COLUMN_INDEX_TRUNCATE_LENGTH);
+          ColumnWriteStoreV1 columnWriteStoreV1 = new ColumnWriteStoreV1(schema, store, ParquetProperties.builder().build());
+          for (BlocksCombiner.SmallBlock smallBlock : smallBlocks.getBlocks()) {
+            ParquetFileReader parquetFileReader = smallBlock.getReader();
+            try {
+              Optional<PageReader> columnChunkPageReader = parquetFileReader.readColumnInBlock(smallBlock.getBlockIndex(), path);
+              ColumnWriter columnWriter = columnWriteStoreV1.getColumnWriter(path);
+              if (columnChunkPageReader.isPresent()) {
+                ColumnReader columnReader = columnReadStore.newMemColumnReader(path, columnChunkPageReader.get());
+                for (int i = 0; i < columnReader.getTotalValueCount(); i++) {
+                  consumeTriplet(columnWriteStoreV1, columnWriter, columnReader);
+                }
+              } else {
+                MessageType inputFileSchema = parquetFileReader.getFileMetaData().getSchema();
+                String[] parentPath = getExisingParentPath(path, inputFileSchema);
+                int def = parquetFileReader.getFileMetaData().getSchema().getMaxDefinitionLevel(parentPath);
+                int rep = parquetFileReader.getFileMetaData().getSchema().getMaxRepetitionLevel(parentPath);
+                for (int i = 0; i < parquetFileReader.getBlockMetaData(smallBlock.getBlockIndex()).getRowCount(); i++) {
+                  columnWriter.writeNull(rep, def);
+                  if (def == 0) {
+                    // V1 pages also respect record boundaries so we have to mark them
+                    columnWriteStoreV1.endRecord();
+                  }
+                }
+              }
+            } catch (Exception e) {
+              LOG.error("File {} is not readable", parquetFileReader.getFile(), e);
+            }
+          }
+          if (columnIndex == 0) {
+            this.startBlock(smallBlocks.getRowCount());
+          }
+          columnWriteStoreV1.flush();
+          store.flushToFileWriter(path, this);
+        }
+        this.endBlock();
+      }
+      this.end(Collections.emptyMap());
+    }finally {
+      BlocksCombiner.closeReaders(readers);
+    }
+    return 0;
+  }
+
+  private String[] getExisingParentPath(ColumnDescriptor path, MessageType inputFileSchema) {
+    List<String> parentPath = Arrays.asList(path.getPath());
+    while (parentPath.size() > 0 && !inputFileSchema.containsPath(parentPath.toArray(new String[parentPath.size()]))) {
+      parentPath = parentPath.subList(0, parentPath.size() - 1);
+    }
+    return parentPath.toArray(new String[parentPath.size()]);
+  }
+
+  private List<ParquetFileReader> getReaders(List<InputFile> inputFiles) throws IOException {
+    List<ParquetFileReader> readers = new ArrayList<>(inputFiles.size());
+    for (InputFile inputFile : inputFiles) {
+      readers.add(ParquetFileReader.open(inputFile));
+    }
+    return readers;
+  }
+
+  private void consumeTriplet(ColumnWriteStore columnWriteStore, ColumnWriter columnWriter, ColumnReader columnReader) {
+    int definitionLevel = columnReader.getCurrentDefinitionLevel();
+    int repetitionLevel = columnReader.getCurrentRepetitionLevel();
+    ColumnDescriptor column = columnReader.getDescriptor();
+    PrimitiveType type = column.getPrimitiveType();
+    if (definitionLevel < column.getMaxDefinitionLevel()) {
+      columnWriter.writeNull(repetitionLevel, definitionLevel);
+    } else {
+      switch (type.getPrimitiveTypeName()) {
+        case INT32:
+          columnWriter.write(columnReader.getInteger(), repetitionLevel, definitionLevel);
+          break;
+        case INT64:
+          columnWriter.write(columnReader.getLong(), repetitionLevel, definitionLevel);
+          break;
+        case BINARY:
+        case FIXED_LEN_BYTE_ARRAY:
+        case INT96:
+          columnWriter.write(columnReader.getBinary(), repetitionLevel, definitionLevel);
+          break;
+        case BOOLEAN:
+          columnWriter.write(columnReader.getBoolean(), repetitionLevel, definitionLevel);
+          break;
+        case FLOAT:
+          columnWriter.write(columnReader.getFloat(), repetitionLevel, definitionLevel);
+          break;
+        case DOUBLE:
+          columnWriter.write(columnReader.getDouble(), repetitionLevel, definitionLevel);
+          break;
+        default:
+          throw new IllegalArgumentException("Unknown primitive type " + type);
+      }
+    }
+    columnReader.consume();
+    if (repetitionLevel == 0) {
+      // V1 pages also respect record boundaries so we have to mark them
+      columnWriteStore.endRecord();
+    }
+  }
+
   /**
    * @param file a file stream to read from
    * @param rowGroups row groups to copy
@@ -626,6 +885,11 @@ public void appendRowGroup(SeekableInputStream from, BlockMetaData rowGroup,
         length = 0;
       }
 
+      // TODO: column/offset indexes are not copied
+      // (it would require seeking to the end of the file for each row groups)
+      currentColumnIndexes.add(null);
+      currentOffsetIndexes.add(null);
+
       currentBlock.addColumn(ColumnChunkMetaData.get(
           chunk.getPath(),
           chunk.getPrimitiveType(),
@@ -693,12 +957,57 @@ private static void copy(SeekableInputStream from, PositionOutputStream to,
    */
   public void end(Map<String, String> extraMetaData) throws IOException {
     state = state.end();
+    serializeColumnIndexes(columnIndexes, blocks, out);
+    serializeOffsetIndexes(offsetIndexes, blocks, out);
     LOG.debug("{}: end", out.getPos());
     this.footer = new ParquetMetadata(new FileMetaData(schema, extraMetaData, Version.FULL_VERSION), blocks);
     serializeFooter(footer, out);
     out.close();
   }
 
+  private static void serializeColumnIndexes(
+      List<List<ColumnIndex>> columnIndexes,
+      List<BlockMetaData> blocks,
+      PositionOutputStream out) throws IOException {
+    LOG.debug("{}: column indexes", out.getPos());
+    for (int bIndex = 0, bSize = blocks.size(); bIndex < bSize; ++bIndex) {
+      List<ColumnChunkMetaData> columns = blocks.get(bIndex).getColumns();
+      List<ColumnIndex> blockColumnIndexes = columnIndexes.get(bIndex);
+      for (int cIndex = 0, cSize = columns.size(); cIndex < cSize; ++cIndex) {
+        ColumnChunkMetaData column = columns.get(cIndex);
+        org.apache.parquet.format.ColumnIndex columnIndex = ParquetMetadataConverter
+            .toParquetColumnIndex(column.getPrimitiveType(), blockColumnIndexes.get(cIndex));
+        if (columnIndex == null) {
+          continue;
+        }
+        long offset = out.getPos();
+        Util.writeColumnIndex(columnIndex, out);
+        column.setColumnIndexReference(new IndexReference(offset, (int) (out.getPos() - offset)));
+      }
+    }
+  }
+
+  private static void serializeOffsetIndexes(
+      List<List<OffsetIndex>> offsetIndexes,
+      List<BlockMetaData> blocks,
+      PositionOutputStream out) throws IOException {
+    LOG.debug("{}: offset indexes", out.getPos());
+    for (int bIndex = 0, bSize = blocks.size(); bIndex < bSize; ++bIndex) {
+      List<ColumnChunkMetaData> columns = blocks.get(bIndex).getColumns();
+      List<OffsetIndex> blockOffsetIndexes = offsetIndexes.get(bIndex);
+      for (int cIndex = 0, cSize = columns.size(); cIndex < cSize; ++cIndex) {
+        OffsetIndex offsetIndex = blockOffsetIndexes.get(cIndex);
+        if (offsetIndex == null) {
+          continue;
+        }
+        ColumnChunkMetaData column = columns.get(cIndex);
+        long offset = out.getPos();
+        Util.writeOffsetIndex(ParquetMetadataConverter.toParquetOffsetIndex(offsetIndex), out);
+        column.setOffsetIndexReference(new IndexReference(offset, (int) (out.getPos() - offset)));
+      }
+    }
+  }
+
   private static void serializeFooter(ParquetMetadata footer, PositionOutputStream out) throws IOException {
     long footerIndex = out.getPos();
     org.apache.parquet.format.FileMetaData parquetMetadata = metadataConverter.toParquetMetadata(CURRENT_VERSION, footer);
diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetInputFormat.java b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetInputFormat.java
index 3348ed8eb2..4d6f42c2b8 100644
--- a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetInputFormat.java
+++ b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetInputFormat.java
@@ -1,4 +1,4 @@
-/* 
+/*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
@@ -6,9 +6,9 @@
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
- * 
+ *
  *   http://www.apache.org/licenses/LICENSE-2.0
- * 
+ *
  * Unless required by applicable law or agreed to in writing,
  * software distributed under the License is distributed on an
  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
@@ -129,6 +129,11 @@ public class ParquetInputFormat<T> extends FileInputFormat<Void, T> {
    */
   public static final String DICTIONARY_FILTERING_ENABLED = "parquet.filter.dictionary.enabled";
 
+  /**
+   * key to configure whether column index filtering of pages is enabled
+   */
+  public static final String COLUMN_INDEX_FILTERING_ENABLED = "parquet.filter.columnindex.enabled";
+
   /**
    * key to configure whether row group bloom filtering is enabled
    */
diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetOutputFormat.java b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetOutputFormat.java
index ffcf5c6a32..0789bf50d4 100644
--- a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetOutputFormat.java
+++ b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetOutputFormat.java
@@ -1,4 +1,4 @@
-/*
+/* 
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
@@ -6,9 +6,9 @@
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
- *
+ * 
  *   http://www.apache.org/licenses/LICENSE-2.0
- *
+ * 
  * Unless required by applicable law or agreed to in writing,
  * software distributed under the License is distributed on an
  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
@@ -143,9 +143,7 @@ public static enum JobSummaryLevel {
   public static final String MIN_ROW_COUNT_FOR_PAGE_SIZE_CHECK = "parquet.page.size.row.check.min";
   public static final String MAX_ROW_COUNT_FOR_PAGE_SIZE_CHECK = "parquet.page.size.row.check.max";
   public static final String ESTIMATE_PAGE_SIZE_CHECK = "parquet.page.size.check.estimate";
-  public static final String BLOOM_FILTER_COLUMN_NAMES = "parquet.bloom.filter.column.names";
-  public static final String BLOOM_FILTER_EXPECT_DISTINCT_NUMBERS = "parquet.bloom.filter.expected.distinct.numbers";
-  public static final String ENABLE_BLOOM_FILTER = "parquet.enable.bloom.filter";
+  public static final String COLUMN_INDEX_TRUNCATE_LENGTH = "parquet.columnindex.truncate.length";
 
   public static JobSummaryLevel getJobSummaryLevel(Configuration conf) {
     String level = conf.get(JOB_SUMMARY_LEVEL);
@@ -211,14 +209,6 @@ public static boolean getEnableDictionary(JobContext jobContext) {
     return getEnableDictionary(getConfiguration(jobContext));
   }
 
-  public static void setBloomFilterColumnNames(Job job, String names) {
-    getConfiguration(job).set(BLOOM_FILTER_COLUMN_NAMES, names);
-  }
-
-  public static String getBloomFilterColumnNames(JobContext jobContext) {
-    return getBloomFilterColumnNames(getConfiguration(jobContext));
-  }
-
   public static int getBlockSize(JobContext jobContext) {
     return getBlockSize(getConfiguration(jobContext));
   }
@@ -252,19 +242,6 @@ public static boolean getEnableDictionary(Configuration configuration) {
         ENABLE_DICTIONARY, ParquetProperties.DEFAULT_IS_DICTIONARY_ENABLED);
   }
 
-  public static String getBloomFilterColumnNames(Configuration conf) {
-    return conf.get(BLOOM_FILTER_COLUMN_NAMES);
-  }
-
-  public static boolean getEnableBloomFilter(Configuration configuration) {
-    return configuration.getBoolean(ENABLE_BLOOM_FILTER,
-        ParquetProperties.DEFAULT_BLOOM_FILTER_ENABLED);
-  }
-
-  public static String getBloomFilterExpectedDistinctNumbers(Configuration configuration) {
-    return configuration.get(BLOOM_FILTER_EXPECT_DISTINCT_NUMBERS);
-  }
-
   public static int getMinRowCountForPageSizeCheck(Configuration configuration) {
     return configuration.getInt(MIN_ROW_COUNT_FOR_PAGE_SIZE_CHECK,
         ParquetProperties.DEFAULT_MINIMUM_RECORD_COUNT_FOR_CHECK);
@@ -336,6 +313,18 @@ private static int getMaxPaddingSize(Configuration conf) {
     return conf.getInt(MAX_PADDING_BYTES, ParquetWriter.MAX_PADDING_SIZE_DEFAULT);
   }
 
+  public static void setColumnIndexTruncateLength(JobContext jobContext, int length) {
+    setColumnIndexTruncateLength(getConfiguration(jobContext), length);
+  }
+
+  public static void setColumnIndexTruncateLength(Configuration conf, int length) {
+    conf.setInt(COLUMN_INDEX_TRUNCATE_LENGTH, length);
+  }
+
+  private static int getColumnIndexTruncateLength(Configuration conf) {
+    return conf.getInt(COLUMN_INDEX_TRUNCATE_LENGTH, ParquetProperties.DEFAULT_COLUMN_INDEX_TRUNCATE_LENGTH);
+  }
+
   private WriteSupport<T> writeSupport;
   private ParquetOutputCommitter committer;
 
@@ -385,13 +374,12 @@ public RecordWriter<Void, T> getRecordWriter(Configuration conf, Path file, Comp
     ParquetProperties props = ParquetProperties.builder()
         .withPageSize(getPageSize(conf))
         .withDictionaryPageSize(getDictionaryPageSize(conf))
-        .withBloomFilterEnabled(getEnableBloomFilter(conf))
-        .withBloomFilterInfo(getBloomFilterColumnNames(conf), getBloomFilterExpectedDistinctNumbers(conf))
         .withDictionaryEncoding(getEnableDictionary(conf))
         .withWriterVersion(getWriterVersion(conf))
         .estimateRowCountForPageSizeCheck(getEstimatePageSizeCheck(conf))
         .withMinRowCountForPageSizeCheck(getMinRowCountForPageSizeCheck(conf))
         .withMaxRowCountForPageSizeCheck(getMaxRowCountForPageSizeCheck(conf))
+        .withColumnIndexTruncateLength(getColumnIndexTruncateLength(conf))
         .build();
 
     long blockSize = getLongBlockSize(conf);
@@ -409,14 +397,12 @@ public RecordWriter<Void, T> getRecordWriter(Configuration conf, Path file, Comp
       LOG.info("Page size checking is: {}", (props.estimateNextSizeCheck() ? "estimated" : "constant"));
       LOG.info("Min row count for page size check is: {}", props.getMinRowCountForPageSizeCheck());
       LOG.info("Max row count for page size check is: {}", props.getMaxRowCountForPageSizeCheck());
-      LOG.info("Parquet Bloom Filter is {}", props.isBloomFilterEnabled()? "on": "off");
-      LOG.info("Parquet Bloom filter column names are: {}", props.getBloomFilterExpectValues().keySet());
-      LOG.info("Parquet Bloom filter column expect distinct values are: {}", props.getBloomFilterExpectValues().values());
+      LOG.info("Truncate length for column indexes is: {}", props.getColumnIndexTruncateLength());
     }
 
     WriteContext init = writeSupport.init(conf);
     ParquetFileWriter w = new ParquetFileWriter(HadoopOutputFile.fromPath(file, conf),
-        init.getSchema(), Mode.CREATE, blockSize, maxPaddingSize);
+        init.getSchema(), Mode.CREATE, blockSize, maxPaddingSize, props.getColumnIndexTruncateLength());
     w.start();
 
     float maxLoad = conf.getFloat(ParquetOutputFormat.MEMORY_POOL_RATIO,
diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetReader.java b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetReader.java
index d9b273bb94..de20808ff8 100644
--- a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetReader.java
+++ b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetReader.java
@@ -270,6 +270,16 @@ public Builder<T> useRecordFilter() {
       return this;
     }
 
+    public Builder<T> useColumnIndexFilter(boolean useColumnIndexFilter) {
+      optionsBuilder.useColumnIndexFilter(useColumnIndexFilter);
+      return this;
+    }
+
+    public Builder<T> useColumnIndexFilter() {
+      optionsBuilder.useColumnIndexFilter();
+      return this;
+    }
+
     public Builder<T> withFileRange(long start, long end) {
       optionsBuilder.withRange(start, end);
       return this;
diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetWriter.java b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetWriter.java
index a32df39a5d..5b0e4f82d1 100644
--- a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetWriter.java
+++ b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetWriter.java
@@ -278,7 +278,7 @@ public ParquetWriter(Path file, Configuration conf, WriteSupport<T> writeSupport
     MessageType schema = writeContext.getSchema();
 
     ParquetFileWriter fileWriter = new ParquetFileWriter(
-        file, schema, mode, rowGroupSize, maxPaddingSize);
+        file, schema, mode, rowGroupSize, maxPaddingSize, encodingProps.getColumnIndexTruncateLength());
     fileWriter.start();
 
     this.codecFactory = new CodecFactory(conf, encodingProps.getPageSizeThreshold());
diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/metadata/ColumnChunkMetaData.java b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/metadata/ColumnChunkMetaData.java
index 9f476f6d07..c55225c176 100644
--- a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/metadata/ColumnChunkMetaData.java
+++ b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/metadata/ColumnChunkMetaData.java
@@ -1,4 +1,4 @@
-/* 
+/*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
@@ -6,9 +6,9 @@
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
- * 
+ *
  *   http://www.apache.org/licenses/LICENSE-2.0
- * 
+ *
  * Unless required by applicable law or agreed to in writing,
  * software distributed under the License is distributed on an
  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
@@ -24,9 +24,11 @@
 import org.apache.parquet.column.EncodingStats;
 import org.apache.parquet.column.statistics.BooleanStatistics;
 import org.apache.parquet.column.statistics.Statistics;
+import org.apache.parquet.internal.hadoop.metadata.IndexReference;
 import org.apache.parquet.schema.PrimitiveType;
 import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName;
 import org.apache.parquet.schema.Types;
+import org.apache.yetus.audience.InterfaceAudience.Private;
 
 /**
  * Column meta data for a block stored in the file footer and passed in the InputSplit
@@ -143,18 +145,18 @@ && positiveLongFitsInAnInt(totalUncompressedSize)) {
   }
 
   public static ColumnChunkMetaData get(
-      ColumnPath path,
-      PrimitiveType type,
-      CompressionCodecName codec,
-      EncodingStats encodingStats,
-      Set<Encoding> encodings,
-      Statistics statistics,
-      long firstDataPage,
-      long dictionaryPageOffset,
-      long bloomFilterDataOffset,
-      long valueCount,
-      long totalSize,
-      long totalUncompressedSize) {
+    ColumnPath path,
+    PrimitiveType type,
+    CompressionCodecName codec,
+    EncodingStats encodingStats,
+    Set<Encoding> encodings,
+    Statistics statistics,
+    long firstDataPage,
+    long dictionaryPageOffset,
+    long bloomFilterDataOffset,
+    long valueCount,
+    long totalSize,
+    long totalUncompressedSize) {
     // to save space we store those always positive longs in ints when they fit.
     if (positiveLongFitsInAnInt(firstDataPage)
       && positiveLongFitsInAnInt(dictionaryPageOffset)
@@ -162,26 +164,26 @@ && positiveLongFitsInAnInt(valueCount)
       && positiveLongFitsInAnInt(totalSize)
       && positiveLongFitsInAnInt(totalUncompressedSize)) {
       return new IntColumnChunkMetaData(
-          path, type, codec,
-          encodingStats, encodings,
-          statistics,
-          firstDataPage,
-          dictionaryPageOffset,
-          bloomFilterDataOffset,
-          valueCount,
-          totalSize,
-          totalUncompressedSize);
+        path, type, codec,
+        encodingStats, encodings,
+        statistics,
+        firstDataPage,
+        dictionaryPageOffset,
+        bloomFilterDataOffset,
+        valueCount,
+        totalSize,
+        totalUncompressedSize);
     } else {
       return new LongColumnChunkMetaData(
-          path, type, codec,
-          encodingStats, encodings,
-          statistics,
-          firstDataPage,
-          dictionaryPageOffset,
-          bloomFilterDataOffset,
-          valueCount,
-          totalSize,
-          totalUncompressedSize);
+        path, type, codec,
+        encodingStats, encodings,
+        statistics,
+        firstDataPage,
+        dictionaryPageOffset,
+        bloomFilterDataOffset,
+        valueCount,
+        totalSize,
+        totalUncompressedSize);
     }
   }
 
@@ -213,6 +215,9 @@ protected static boolean positiveLongFitsInAnInt(long value) {
   // we save 3 references by storing together the column properties that have few distinct values
   private final ColumnChunkProperties properties;
 
+  private IndexReference columnIndexReference;
+  private IndexReference offsetIndexReference;
+
   protected ColumnChunkMetaData(ColumnChunkProperties columnChunkProperties) {
     this(null, columnChunkProperties);
   }
@@ -229,9 +234,7 @@ public CompressionCodecName getCodec() {
   /**
    *
    * @return column identifier
-   * @deprecated will be removed in 2.0.0. Use {@link #getPrimitiveType()} instead.
    */
-  @Deprecated
   public ColumnPath getPath() {
     return properties.getPath();
   }
@@ -287,6 +290,40 @@ public PrimitiveType getPrimitiveType() {
    */
   abstract public Statistics getStatistics();
 
+  /**
+   * @return the reference to the column index
+   */
+  @Private
+  public IndexReference getColumnIndexReference() {
+    return columnIndexReference;
+  }
+
+  /**
+   * @param indexReference
+   *          the reference to the column index
+   */
+  @Private
+  public void setColumnIndexReference(IndexReference indexReference) {
+    this.columnIndexReference = indexReference;
+  }
+
+  /**
+   * @return the reference to the offset index
+   */
+  @Private
+  public IndexReference getOffsetIndexReference() {
+    return offsetIndexReference;
+  }
+
+  /**
+   * @param offsetIndexReference
+   *          the reference to the offset index
+   */
+  @Private
+  public void setOffsetIndexReference(IndexReference offsetIndexReference) {
+    this.offsetIndexReference = offsetIndexReference;
+  }
+
   /**
    * @return all the encodings used in this column
    */
diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/util/BlocksCombiner.java b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/util/BlocksCombiner.java
new file mode 100644
index 0000000000..02dadc7f54
--- /dev/null
+++ b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/util/BlocksCombiner.java
@@ -0,0 +1,106 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.parquet.hadoop.util;
+
+import org.apache.parquet.hadoop.ParquetFileReader;
+import org.apache.parquet.hadoop.metadata.BlockMetaData;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+import static java.util.Collections.unmodifiableList;
+
+public class BlocksCombiner {
+
+  private static final Logger LOG = LoggerFactory.getLogger(BlocksCombiner.class);
+
+  public static List<SmallBlocksUnion> combineLargeBlocks(List<ParquetFileReader> readers, long maxBlockSize) {
+    List<SmallBlocksUnion> blocks = new ArrayList<>();
+    long largeBlockSize = 0;
+    long largeBlockRecords = 0;
+    List<SmallBlock> smallBlocks = new ArrayList<>();
+    for (ParquetFileReader reader : readers) {
+      for (int blockIndex = 0; blockIndex < reader.blocksCount(); blockIndex++) {
+        BlockMetaData block = reader.getBlockMetaData(blockIndex);
+        if (!smallBlocks.isEmpty() && largeBlockSize + block.getTotalByteSize() > maxBlockSize) {
+          blocks.add(new SmallBlocksUnion(smallBlocks, largeBlockRecords));
+          smallBlocks = new ArrayList<>();
+          largeBlockSize = 0;
+          largeBlockRecords = 0;
+        }
+        largeBlockSize += block.getTotalByteSize();
+        largeBlockRecords += block.getRowCount();
+        smallBlocks.add(new SmallBlock(reader, blockIndex));
+      }
+    }
+    if (!smallBlocks.isEmpty()) {
+      blocks.add(new SmallBlocksUnion(smallBlocks, largeBlockRecords));
+    }
+    return unmodifiableList(blocks);
+  }
+
+  public static void closeReaders(List<ParquetFileReader> readers) {
+    readers.forEach(r -> {
+      try {
+        r.close();
+      } catch (IOException e) {
+        LOG.error("Error closing reader {}", r.getFile(), e);
+      }
+    });
+  }
+
+  public static class SmallBlocksUnion {
+    private final List<SmallBlock> blocks;
+    private final long rowCount;
+
+    public SmallBlocksUnion(List<SmallBlock> blocks, long rowCount) {
+      this.blocks = blocks;
+      this.rowCount = rowCount;
+    }
+
+    public List<SmallBlock> getBlocks() {
+      return blocks;
+    }
+
+    public long getRowCount() {
+      return rowCount;
+    }
+  }
+
+  public static class SmallBlock {
+    private final ParquetFileReader reader;
+    private final int blockIndex;
+
+    public SmallBlock(ParquetFileReader reader, int blockIndex) {
+      this.reader = reader;
+      this.blockIndex = blockIndex;
+    }
+
+    public ParquetFileReader getReader() {
+      return reader;
+    }
+
+    public int getBlockIndex() {
+      return blockIndex;
+    }
+  }
+}
diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/internal/hadoop/metadata/IndexReference.java b/parquet-hadoop/src/main/java/org/apache/parquet/internal/hadoop/metadata/IndexReference.java
new file mode 100644
index 0000000000..5e02f1efec
--- /dev/null
+++ b/parquet-hadoop/src/main/java/org/apache/parquet/internal/hadoop/metadata/IndexReference.java
@@ -0,0 +1,41 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.parquet.internal.hadoop.metadata;
+
+/**
+ * Reference to an index (OffsetIndex and ColumnIndex) for a row-group containing the offset and length values so the
+ * reader can read the referenced data.
+ */
+public class IndexReference {
+  private final long offset;
+  private final int length;
+
+  public IndexReference(long offset, int length) {
+    this.offset = offset;
+    this.length = length;
+  }
+
+  public long getOffset() {
+    return offset;
+  }
+
+  public int getLength() {
+    return length;
+  }
+}
diff --git a/parquet-hadoop/src/test/java/org/apache/parquet/filter2/recordlevel/PhoneBookWriter.java b/parquet-hadoop/src/test/java/org/apache/parquet/filter2/recordlevel/PhoneBookWriter.java
index 7acda935c3..18ddca0d96 100644
--- a/parquet-hadoop/src/test/java/org/apache/parquet/filter2/recordlevel/PhoneBookWriter.java
+++ b/parquet-hadoop/src/test/java/org/apache/parquet/filter2/recordlevel/PhoneBookWriter.java
@@ -31,6 +31,7 @@
 import org.apache.parquet.filter2.compat.FilterCompat.Filter;
 import org.apache.parquet.hadoop.ParquetReader;
 import org.apache.parquet.hadoop.ParquetWriter;
+import org.apache.parquet.hadoop.example.ExampleParquetWriter;
 import org.apache.parquet.hadoop.example.GroupReadSupport;
 import org.apache.parquet.hadoop.example.GroupWriteSupport;
 import org.apache.parquet.schema.MessageType;
@@ -91,6 +92,11 @@ public int hashCode() {
       result = 31 * result + (lat != null ? lat.hashCode() : 0);
       return result;
     }
+
+    @Override
+    public String toString() {
+      return "Location [lon=" + lon + ", lat=" + lat + "]";
+    }
   }
 
   public static class PhoneNumber {
@@ -129,6 +135,11 @@ public int hashCode() {
       result = 31 * result + (kind != null ? kind.hashCode() : 0);
       return result;
     }
+
+    @Override
+    public String toString() {
+      return "PhoneNumber [number=" + number + ", kind=" + kind + "]";
+    }
   }
 
   public static class User {
@@ -183,6 +194,11 @@ public int hashCode() {
       result = 31 * result + (location != null ? location.hashCode() : 0);
       return result;
     }
+
+    @Override
+    public String toString() {
+      return "User [id=" + id + ", name=" + name + ", phoneNumbers=" + phoneNumbers + ", location=" + location + "]";
+    }
   }
 
   public static SimpleGroup groupFromUser(User user) {
@@ -216,6 +232,56 @@ public static SimpleGroup groupFromUser(User user) {
     return root;
   }
 
+  private static User userFromGroup(Group root) {
+    return new User(getLong(root, "id"), getString(root, "name"), getPhoneNumbers(getGroup(root, "phoneNumbers")),
+        getLocation(getGroup(root, "location")));
+  }
+
+  private static List<PhoneNumber> getPhoneNumbers(Group phoneNumbers) {
+    if (phoneNumbers == null) {
+      return null;
+    }
+    List<PhoneNumber> list = new ArrayList<>();
+    for (int i = 0, n = phoneNumbers.getFieldRepetitionCount("phone"); i < n; ++i) {
+      Group phone = phoneNumbers.getGroup("phone", i);
+      list.add(new PhoneNumber(getLong(phone, "number"), getString(phone, "kind")));
+    }
+    return list;
+  }
+
+  private static Location getLocation(Group location) {
+    if (location == null) {
+      return null;
+    }
+    return new Location(getDouble(location, "lon"), getDouble(location, "lat"));
+  }
+
+  private static boolean isNull(Group group, String field) {
+    int repetition = group.getFieldRepetitionCount(field);
+    if (repetition == 0) {
+      return true;
+    } else if (repetition == 1) {
+      return false;
+    }
+    throw new AssertionError("Invalid repetitionCount " + repetition + " for field " + field + " in group " + group);
+  }
+
+  private static Long getLong(Group group, String field) {
+    return isNull(group, field) ? null : group.getLong(field, 0);
+  }
+
+  private static String getString(Group group, String field) {
+    return isNull(group, field) ? null : group.getString(field, 0);
+  }
+
+  private static Double getDouble(Group group, String field) {
+    return isNull(group, field) ? null : group.getDouble(field, 0);
+  }
+
+  private static Group getGroup(Group group, String field) {
+    return isNull(group, field) ? null : group.getGroup(field, 0);
+  }
+
   public static File writeToFile(List<User> users) throws IOException {
     File f = File.createTempFile("phonebook", ".parquet");
     f.deleteOnExit();
@@ -229,25 +295,30 @@ public static File writeToFile(List<User> users) throws IOException {
   }
 
   public static void writeToFile(File f, List<User> users) throws IOException {
-    Configuration conf = new Configuration();
-    GroupWriteSupport.setSchema(schema, conf);
+    write(ExampleParquetWriter.builder(new Path(f.getAbsolutePath())), users);
+  }
 
-    ParquetWriter<Group> writer = new ParquetWriter<Group>(new Path(f.getAbsolutePath()), conf, new GroupWriteSupport());
-    for (User u : users) {
-      writer.write(groupFromUser(u));
+  public static void write(ParquetWriter.Builder<Group, ?> builder, List<User> users) throws IOException {
+    builder.config(GroupWriteSupport.PARQUET_EXAMPLE_SCHEMA, schema.toString());
+    try (ParquetWriter<Group> writer = builder.build()) {
+      for (User u : users) {
+        writer.write(groupFromUser(u));
+      }
     }
-    writer.close();
   }
 
-  public static List<Group> readFile(File f, Filter filter) throws IOException {
+  private static ParquetReader<Group> createReader(Path file, Filter filter) throws IOException {
     Configuration conf = new Configuration();
     GroupWriteSupport.setSchema(schema, conf);
 
-    ParquetReader<Group> reader =
-        ParquetReader.builder(new GroupReadSupport(), new Path(f.getAbsolutePath()))
-                     .withConf(conf)
-                     .withFilter(filter)
-                     .build();
+    return ParquetReader.builder(new GroupReadSupport(), file)
+        .withConf(conf)
+        .withFilter(filter)
+        .build();
+  }
+
+  public static List<Group> readFile(File f, Filter filter) throws IOException {
+    ParquetReader<Group> reader = createReader(new Path(f.getAbsolutePath()), filter);
 
     Group current;
     List<Group> users = new ArrayList<Group>();
@@ -261,6 +332,16 @@ public static List<Group> readFile(File f, Filter filter) throws IOException {
     return users;
   }
 
+  public static List<User> readUsers(ParquetReader.Builder<Group> builder) throws IOException {
+    ParquetReader<Group> reader = builder.set(GroupWriteSupport.PARQUET_EXAMPLE_SCHEMA, schema.toString()).build();
+
+    List<User> users = new ArrayList<>();
+    for (Group group = reader.read(); group != null; group = reader.read()) {
+      users.add(userFromGroup(group));
+    }
+    return users;
+  }
+
   public static void main(String[] args) throws IOException {
     File f = new File(args[0]);
     writeToFile(f, TestRecordLevelFilters.makeUsers());
diff --git a/parquet-hadoop/src/test/java/org/apache/parquet/format/converter/TestParquetMetadataConverter.java b/parquet-hadoop/src/test/java/org/apache/parquet/format/converter/TestParquetMetadataConverter.java
index d1a3a3c233..358a29a671 100644
--- a/parquet-hadoop/src/test/java/org/apache/parquet/format/converter/TestParquetMetadataConverter.java
+++ b/parquet-hadoop/src/test/java/org/apache/parquet/format/converter/TestParquetMetadataConverter.java
@@ -25,6 +25,7 @@
 import static org.apache.parquet.schema.MessageTypeParser.parseMessageType;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNull;
 import static org.junit.Assert.assertSame;
 import static org.junit.Assert.assertTrue;
 import static org.junit.Assert.fail;
@@ -69,6 +70,11 @@
 import org.apache.parquet.hadoop.metadata.ColumnPath;
 import org.apache.parquet.hadoop.metadata.CompressionCodecName;
 import org.apache.parquet.hadoop.metadata.ParquetMetadata;
+import org.apache.parquet.internal.column.columnindex.BoundaryOrder;
+import org.apache.parquet.internal.column.columnindex.ColumnIndex;
+import org.apache.parquet.internal.column.columnindex.ColumnIndexBuilder;
+import org.apache.parquet.internal.column.columnindex.OffsetIndex;
+import org.apache.parquet.internal.column.columnindex.OffsetIndexBuilder;
 import org.apache.parquet.io.api.Binary;
 import org.apache.parquet.schema.PrimitiveType;
 import org.apache.parquet.schema.LogicalTypeAnnotation;
@@ -201,6 +207,12 @@ public void testTimeLogicalTypes() {
       .required(PrimitiveTypeName.INT64)
       .as(timestampType(true, LogicalTypeAnnotation.TimeUnit.MICROS))
       .named("aTimestampUtcMicros")
+      .required(PrimitiveTypeName.INT64)
+      .as(timestampType(false, LogicalTypeAnnotation.TimeUnit.NANOS))
+      .named("aTimestampNonUtcNanos")
+      .required(PrimitiveTypeName.INT64)
+      .as(timestampType(true, LogicalTypeAnnotation.TimeUnit.NANOS))
+      .named("aTimestampUtcNanos")
       .required(PrimitiveTypeName.INT32)
       .as(timeType(false, LogicalTypeAnnotation.TimeUnit.MILLIS))
       .named("aTimeNonUtcMillis")
@@ -213,6 +225,12 @@ public void testTimeLogicalTypes() {
       .required(PrimitiveTypeName.INT64)
       .as(timeType(true, LogicalTypeAnnotation.TimeUnit.MICROS))
       .named("aTimeUtcMicros")
+      .required(PrimitiveTypeName.INT64)
+      .as(timeType(false, LogicalTypeAnnotation.TimeUnit.NANOS))
+      .named("aTimeNonUtcNanos")
+      .required(PrimitiveTypeName.INT64)
+      .as(timeType(true, LogicalTypeAnnotation.TimeUnit.NANOS))
+      .named("aTimeUtcNanos")
       .named("Message");
     List<SchemaElement> parquetSchema = parquetMetadataConverter.toParquetSchema(expected);
     MessageType schema = parquetMetadataConverter.fromParquetSchema(parquetSchema, null);
@@ -973,4 +991,60 @@ public void testColumnOrders() throws IOException {
     assertEquals(ColumnOrder.undefined(), columns.get(1).getPrimitiveType().columnOrder());
     assertEquals(ColumnOrder.undefined(), columns.get(2).getPrimitiveType().columnOrder());
   }
+
+  @Test
+  public void testOffsetIndexConversion() {
+    OffsetIndexBuilder builder = OffsetIndexBuilder.getBuilder();
+    builder.add(1000, 10000, 0);
+    builder.add(22000, 12000, 100);
+    OffsetIndex offsetIndex = ParquetMetadataConverter
+        .fromParquetOffsetIndex(ParquetMetadataConverter.toParquetOffsetIndex(builder.build(100000)));
+    assertEquals(2, offsetIndex.getPageCount());
+    assertEquals(101000, offsetIndex.getOffset(0));
+    assertEquals(10000, offsetIndex.getCompressedPageSize(0));
+    assertEquals(0, offsetIndex.getFirstRowIndex(0));
+    assertEquals(122000, offsetIndex.getOffset(1));
+    assertEquals(12000, offsetIndex.getCompressedPageSize(1));
+    assertEquals(100, offsetIndex.getFirstRowIndex(1));
+  }
+
+  @Test
+  public void testColumnIndexConversion() {
+    PrimitiveType type = Types.required(PrimitiveTypeName.INT64).named("test_int64");
+    ColumnIndexBuilder builder = ColumnIndexBuilder.getBuilder(type, Integer.MAX_VALUE);
+    Statistics<?> stats = Statistics.createStats(type);
+    stats.incrementNumNulls(16);
+    stats.updateStats(-100l);
+    stats.updateStats(100l);
+    builder.add(stats);
+    stats = Statistics.createStats(type);
+    stats.incrementNumNulls(111);
+    builder.add(stats);
+    stats = Statistics.createStats(type);
+    stats.updateStats(200l);
+    stats.updateStats(500l);
+    builder.add(stats);
+    org.apache.parquet.format.ColumnIndex parquetColumnIndex = 
+        ParquetMetadataConverter.toParquetColumnIndex(type, builder.build());
+    ColumnIndex columnIndex = ParquetMetadataConverter.fromParquetColumnIndex(type, parquetColumnIndex);
+    assertEquals(BoundaryOrder.ASCENDING, columnIndex.getBoundaryOrder());
+    assertTrue(Arrays.asList(false, true, false).equals(columnIndex.getNullPages()));
+    assertTrue(Arrays.asList(16l, 111l, 0l).equals(columnIndex.getNullCounts()));
+    assertTrue(Arrays.asList(
+        ByteBuffer.wrap(BytesUtils.longToBytes(-100l)),
+        ByteBuffer.allocate(0),
+        ByteBuffer.wrap(BytesUtils.longToBytes(200l))).equals(columnIndex.getMinValues()));
+    assertTrue(Arrays.asList(
+        ByteBuffer.wrap(BytesUtils.longToBytes(100l)),
+        ByteBuffer.allocate(0),
+        ByteBuffer.wrap(BytesUtils.longToBytes(500l))).equals(columnIndex.getMaxValues()));
+
+    assertNull("Should handle null column index", ParquetMetadataConverter
+        .toParquetColumnIndex(Types.required(PrimitiveTypeName.INT32).named("test_int32"), null));
+    assertNull("Should ignore unsupported types", ParquetMetadataConverter
+        .toParquetColumnIndex(Types.required(PrimitiveTypeName.INT96).named("test_int96"), columnIndex));
+    assertNull("Should ignore unsupported types",
+        ParquetMetadataConverter.fromParquetColumnIndex(Types.required(PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY)
+            .length(12).as(OriginalType.INTERVAL).named("test_interval"), parquetColumnIndex));
+  }
 }
diff --git a/parquet-hadoop/src/test/java/org/apache/parquet/hadoop/TestColumnChunkPageWriteStore.java b/parquet-hadoop/src/test/java/org/apache/parquet/hadoop/TestColumnChunkPageWriteStore.java
index a5381f073b..9a27defe15 100644
--- a/parquet-hadoop/src/test/java/org/apache/parquet/hadoop/TestColumnChunkPageWriteStore.java
+++ b/parquet-hadoop/src/test/java/org/apache/parquet/hadoop/TestColumnChunkPageWriteStore.java
@@ -18,8 +18,13 @@
  */
 package org.apache.parquet.hadoop;
 
+import static org.junit.Assert.assertArrayEquals;
 import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.mockito.Matchers.any;
 import static org.mockito.Matchers.eq;
+import static org.mockito.Matchers.isNull;
+import static org.mockito.Matchers.same;
 import static org.mockito.Mockito.inOrder;
 import static org.apache.parquet.column.Encoding.PLAIN;
 import static org.apache.parquet.column.Encoding.RLE;
@@ -51,13 +56,23 @@
 import org.apache.parquet.column.ColumnDescriptor;
 import org.apache.parquet.column.Encoding;
 import org.apache.parquet.column.page.DataPageV2;
+import org.apache.parquet.column.page.DictionaryPage;
 import org.apache.parquet.column.page.PageReadStore;
 import org.apache.parquet.column.page.PageReader;
 import org.apache.parquet.column.page.PageWriter;
 import org.apache.parquet.column.statistics.BinaryStatistics;
 import org.apache.parquet.column.statistics.Statistics;
+import org.apache.parquet.hadoop.ParquetFileWriter.Mode;
+import org.apache.parquet.hadoop.metadata.ColumnChunkMetaData;
 import org.apache.parquet.hadoop.metadata.CompressionCodecName;
 import org.apache.parquet.hadoop.metadata.ParquetMetadata;
+import org.apache.parquet.hadoop.util.HadoopOutputFile;
+import org.apache.parquet.internal.column.columnindex.ColumnIndex;
+import org.apache.parquet.internal.column.columnindex.ColumnIndexBuilder;
+import org.apache.parquet.internal.column.columnindex.OffsetIndex;
+import org.apache.parquet.internal.column.columnindex.OffsetIndexBuilder;
+import org.apache.parquet.io.OutputFile;
+import org.apache.parquet.io.PositionOutputStream;
 import org.apache.parquet.schema.MessageType;
 import org.apache.parquet.schema.MessageTypeParser;
 import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName;
@@ -66,6 +81,40 @@
 
 public class TestColumnChunkPageWriteStore {
 
+  // OutputFile implementation to expose the PositionOutputStream internally used by the writer
+  private static class OutputFileForTesting implements OutputFile {
+    private PositionOutputStream out;
+    private final HadoopOutputFile file;
+
+    OutputFileForTesting(Path path, Configuration conf) throws IOException {
+      file = HadoopOutputFile.fromPath(path, conf);
+    }
+
+    PositionOutputStream out() {
+      return out;
+    }
+
+    @Override
+    public PositionOutputStream create(long blockSizeHint) throws IOException {
+      return out = file.create(blockSizeHint);
+    }
+
+    @Override
+    public PositionOutputStream createOrOverwrite(long blockSizeHint) throws IOException {
+      return out = file.createOrOverwrite(blockSizeHint);
+    }
+
+    @Override
+    public boolean supportsBlockSize() {
+      return file.supportsBlockSize();
+    }
+
+    @Override
+    public long defaultBlockSize() {
+      return file.defaultBlockSize();
+    }
+  }
+
   private int pageSize = 1024;
   private int initialSize = 1024;
   private Configuration conf;
@@ -98,13 +147,21 @@ public void test() throws Exception {
     BytesInput data = BytesInput.fromInt(v);
     int rowCount = 5;
     int nullCount = 1;
+    statistics.incrementNumNulls(nullCount);
+    statistics.setMinMaxFromBytes(new byte[] {0, 1, 2}, new byte[] {0, 1, 2, 3});
+    long pageOffset;
+    long pageSize;
 
     {
-      ParquetFileWriter writer = new ParquetFileWriter(conf, schema, file);
+      OutputFileForTesting outputFile = new OutputFileForTesting(file, conf);
+      ParquetFileWriter writer = new ParquetFileWriter(outputFile, schema, Mode.CREATE,
+          ParquetWriter.DEFAULT_BLOCK_SIZE, ParquetWriter.MAX_PADDING_SIZE_DEFAULT);
       writer.start();
       writer.startBlock(rowCount);
+      pageOffset = outputFile.out().getPos();
       {
-        ColumnChunkPageWriteStore store = new ColumnChunkPageWriteStore(compressor(GZIP), schema , new HeapByteBufferAllocator());
+        ColumnChunkPageWriteStore store = new ColumnChunkPageWriteStore(compressor(GZIP), schema,
+            new HeapByteBufferAllocator(), Integer.MAX_VALUE);
         PageWriter pageWriter = store.getPageWriter(col);
         pageWriter.writePageV2(
             rowCount, nullCount, valueCount,
@@ -112,6 +169,7 @@ public void test() throws Exception {
             dataEncoding, data,
             statistics);
         store.flushToFileWriter(writer);
+        pageSize = outputFile.out().getPos() - pageOffset;
       }
       writer.endBlock();
       writer.end(new HashMap<String, String>());
@@ -132,6 +190,20 @@ public void test() throws Exception {
       assertEquals(dataEncoding, page.getDataEncoding());
       assertEquals(v, intValue(page.getData()));
       assertEquals(statistics.toString(), page.getStatistics().toString());
+
+      // Checking column/offset indexes for the one page
+      ColumnChunkMetaData column = footer.getBlocks().get(0).getColumns().get(0);
+      ColumnIndex columnIndex = reader.readColumnIndex(column);
+      assertArrayEquals(statistics.getMinBytes(), columnIndex.getMinValues().get(0).array());
+      assertArrayEquals(statistics.getMaxBytes(), columnIndex.getMaxValues().get(0).array());
+      assertEquals(statistics.getNumNulls(), columnIndex.getNullCounts().get(0).longValue());
+      assertFalse(columnIndex.getNullPages().get(0));
+      OffsetIndex offsetIndex = reader.readOffsetIndex(column);
+      assertEquals(1, offsetIndex.getPageCount());
+      assertEquals(pageSize, offsetIndex.getCompressedPageSize(0));
+      assertEquals(0, offsetIndex.getFirstRowIndex(0));
+      assertEquals(pageOffset, offsetIndex.getOffset(0));
+
       reader.close();
     }
   }
@@ -164,7 +236,7 @@ public void testColumnOrderV1() throws IOException {
     // TODO - look back at this, an allocator was being passed here in the ByteBuffer changes
     // see comment at this constructor
     ColumnChunkPageWriteStore store = new ColumnChunkPageWriteStore(
-        compressor(UNCOMPRESSED), schema, new HeapByteBufferAllocator());
+        compressor(UNCOMPRESSED), schema, new HeapByteBufferAllocator(), Integer.MAX_VALUE);
 
     for (ColumnDescriptor col : schema.getColumns()) {
       PageWriter pageWriter = store.getPageWriter(col);
@@ -175,8 +247,20 @@ public void testColumnOrderV1() throws IOException {
     store.flushToFileWriter(mockFileWriter);
 
     for (ColumnDescriptor col : schema.getColumns()) {
-      inOrder.verify(mockFileWriter).startColumn(
-          eq(col), eq((long) fakeCount), eq(UNCOMPRESSED));
+      inOrder.verify(mockFileWriter).writeColumnChunk(
+          eq(col),
+          eq((long) fakeCount),
+          eq(UNCOMPRESSED),
+          isNull(DictionaryPage.class),
+          any(),
+          eq(fakeData.size()),
+          eq(fakeData.size()),
+          eq(fakeStats),
+          same(ColumnIndexBuilder.getNoOpBuilder()), // Deprecated writePage -> no column index
+          same(OffsetIndexBuilder.getNoOpBuilder()), // Deprecated writePage -> no offset index
+          any(),
+          any(),
+          any());
     }
   }
 
diff --git a/parquet-hadoop/src/test/java/org/apache/parquet/hadoop/TestColumnIndexFiltering.java b/parquet-hadoop/src/test/java/org/apache/parquet/hadoop/TestColumnIndexFiltering.java
new file mode 100644
index 0000000000..71155ced7b
--- /dev/null
+++ b/parquet-hadoop/src/test/java/org/apache/parquet/hadoop/TestColumnIndexFiltering.java
@@ -0,0 +1,442 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.parquet.hadoop;
+
+import static java.util.Collections.emptyList;
+import static org.apache.parquet.filter2.predicate.FilterApi.and;
+import static org.apache.parquet.filter2.predicate.FilterApi.binaryColumn;
+import static org.apache.parquet.filter2.predicate.FilterApi.doubleColumn;
+import static org.apache.parquet.filter2.predicate.FilterApi.eq;
+import static org.apache.parquet.filter2.predicate.FilterApi.gtEq;
+import static org.apache.parquet.filter2.predicate.FilterApi.longColumn;
+import static org.apache.parquet.filter2.predicate.FilterApi.lt;
+import static org.apache.parquet.filter2.predicate.FilterApi.ltEq;
+import static org.apache.parquet.filter2.predicate.FilterApi.not;
+import static org.apache.parquet.filter2.predicate.FilterApi.notEq;
+import static org.apache.parquet.filter2.predicate.FilterApi.or;
+import static org.apache.parquet.filter2.predicate.FilterApi.userDefined;
+import static org.apache.parquet.filter2.predicate.LogicalInverter.invert;
+import static org.apache.parquet.hadoop.ParquetFileWriter.Mode.OVERWRITE;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+
+import java.io.IOException;
+import java.io.Serializable;
+import java.nio.file.Files;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Random;
+import java.util.function.Predicate;
+import java.util.stream.Collectors;
+import java.util.stream.Stream;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.parquet.column.ParquetProperties.WriterVersion;
+import org.apache.parquet.filter2.compat.FilterCompat;
+import org.apache.parquet.filter2.compat.FilterCompat.Filter;
+import org.apache.parquet.filter2.predicate.FilterPredicate;
+import org.apache.parquet.filter2.predicate.Statistics;
+import org.apache.parquet.filter2.predicate.UserDefinedPredicate;
+import org.apache.parquet.filter2.recordlevel.PhoneBookWriter;
+import org.apache.parquet.filter2.recordlevel.PhoneBookWriter.Location;
+import org.apache.parquet.filter2.recordlevel.PhoneBookWriter.PhoneNumber;
+import org.apache.parquet.filter2.recordlevel.PhoneBookWriter.User;
+import org.apache.parquet.hadoop.example.ExampleParquetWriter;
+import org.apache.parquet.hadoop.example.GroupReadSupport;
+import org.apache.parquet.io.api.Binary;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+import org.junit.runners.Parameterized.Parameters;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Unit tests for high level column index based filtering.
+ */
+@RunWith(Parameterized.class)
+public class TestColumnIndexFiltering {
+  private static final Logger LOGGER = LoggerFactory.getLogger(TestColumnIndexFiltering.class);
+  private static final Random RANDOM = new Random(42);
+  private static final String[] PHONE_KINDS = { null, "mobile", "home", "work" };
+  private static final List<User> DATA = Collections.unmodifiableList(generateData(10000));
+  private static final Path FILE_V1 = createTempFile();
+  private static final Path FILE_V2 = createTempFile();
+
+  @Parameters
+  public static Collection<Object[]> params() {
+    return Arrays.asList(new Object[] { FILE_V1 }, new Object[] { FILE_V2 });
+  }
+
+  private final Path file;
+
+  public TestColumnIndexFiltering(Path file) {
+    this.file = file;
+  }
+
+  private static List<User> generateData(int rowCount) {
+    List<User> users = new ArrayList<>();
+    List<String> names = generateNames(rowCount);
+    for (int i = 0; i < rowCount; ++i) {
+      users.add(new User(i, names.get(i), generatePhoneNumbers(), generateLocation(i, rowCount)));
+    }
+    return users;
+  }
+
+  private static List<String> generateNames(int rowCount) {
+    List<String> list = new ArrayList<>();
+
+    // Adding fix values for filtering
+    list.add("anderson");
+    list.add("anderson");
+    list.add("miller");
+    list.add("miller");
+    list.add("miller");
+    list.add("thomas");
+    list.add("thomas");
+    list.add("williams");
+
+    int nullCount = rowCount / 100;
+
+    String alphabet = "aabcdeefghiijklmnoopqrstuuvwxyz";
+    int maxLength = 8;
+    for (int i = rowCount - list.size() - nullCount; i >= 0; --i) {
+      int l = RANDOM.nextInt(maxLength);
+      StringBuilder builder = new StringBuilder(l);
+      for (int j = 0; j < l; ++j) {
+        builder.append(alphabet.charAt(RANDOM.nextInt(alphabet.length())));
+      }
+      list.add(builder.toString());
+    }
+    Collections.sort(list, (str1, str2) -> -str1.compareTo(str2));
+
+    // Adding nulls to random places
+    for (int i = 0; i < nullCount; ++i) {
+      list.add(RANDOM.nextInt(list.size()), null);
+    }
+
+    return list;
+  }
+
+  private static List<PhoneNumber> generatePhoneNumbers() {
+    int length = RANDOM.nextInt(5) - 1;
+    if (length < 0) {
+      return null;
+    }
+    List<PhoneNumber> phoneNumbers = new ArrayList<>(length);
+    for (int i = 0; i < length; ++i) {
+      // 6 digits numbers
+      long number = Math.abs(RANDOM.nextLong() % 900000) + 100000;
+      phoneNumbers.add(new PhoneNumber(number, PHONE_KINDS[RANDOM.nextInt(PHONE_KINDS.length)]));
+    }
+    return phoneNumbers;
+  }
+
+  private static Location generateLocation(int id, int rowCount) {
+    if (RANDOM.nextDouble() < 0.01) {
+      return null;
+    }
+
+    double lat = RANDOM.nextDouble() * 90.0 - (id < rowCount / 2 ? 90.0 : 0.0);
+    double lon = RANDOM.nextDouble() * 90.0 - (id < rowCount / 4 || id >= 3 * rowCount / 4 ? 90.0 : 0.0);
+
+    return new Location(RANDOM.nextDouble() < 0.01 ? null : lat, RANDOM.nextDouble() < 0.01 ? null : lon);
+  }
+
+  private static Path createTempFile() {
+    try {
+      return new Path(Files.createTempFile("test-ci_", ".parquet").toAbsolutePath().toString());
+    } catch (IOException e) {
+      throw new AssertionError("Unable to create temporary file", e);
+    }
+  }
+
+  private List<User> readUsers(FilterPredicate filter, boolean useOtherFiltering) throws IOException {
+    return readUsers(FilterCompat.get(filter), useOtherFiltering, true);
+  }
+
+  private List<User> readUsers(FilterPredicate filter, boolean useOtherFiltering, boolean useColumnIndexFilter)
+      throws IOException {
+    return readUsers(FilterCompat.get(filter), useOtherFiltering, useColumnIndexFilter);
+  }
+
+  private List<User> readUsers(Filter filter, boolean useOtherFiltering) throws IOException {
+    return readUsers(filter, useOtherFiltering, true);
+  }
+
+  private List<User> readUsers(Filter filter, boolean useOtherFiltering, boolean useColumnIndexFilter)
+      throws IOException {
+    return PhoneBookWriter.readUsers(ParquetReader.builder(new GroupReadSupport(), file)
+        .withFilter(filter)
+        .useDictionaryFilter(useOtherFiltering)
+        .useStatsFilter(useOtherFiltering)
+        .useRecordFilter(useOtherFiltering)
+        .useColumnIndexFilter(useColumnIndexFilter));
+  }
+
+  // Assumes that both lists are in the same order
+  private static void assertContains(Stream<User> expected, List<User> actual) {
+    Iterator<User> expIt = expected.iterator();
+    if (!expIt.hasNext()) {
+      return;
+    }
+    User exp = expIt.next();
+    for (User act : actual) {
+      if (act.equals(exp)) {
+        if (!expIt.hasNext()) {
+          break;
+        }
+        exp = expIt.next();
+      }
+    }
+    assertFalse("Not all expected elements are in the actual list. E.g.: " + exp, expIt.hasNext());
+  }
+
+  private void assertCorrectFiltering(Predicate<User> expectedFilter, FilterPredicate actualFilter)
+      throws IOException {
+    // Check with only column index based filtering
+    List<User> result = readUsers(actualFilter, false);
+
+    assertTrue("Column-index filtering should drop some pages", result.size() < DATA.size());
+    LOGGER.info("{}/{} records read; filtering ratio: {}%", result.size(), DATA.size(),
+        100 * result.size() / DATA.size());
+    // Asserts that all the required records are in the result
+    assertContains(DATA.stream().filter(expectedFilter), result);
+    // Asserts that all the retrieved records are in the file (validating non-matching records)
+    assertContains(result.stream(), DATA);
+
+    // Check with all the filtering filtering to ensure the result contains exactly the required values
+    result = readUsers(actualFilter, true);
+    assertEquals(DATA.stream().filter(expectedFilter).collect(Collectors.toList()), result);
+  }
+
+  @BeforeClass
+  public static void createFile() throws IOException {
+    int pageSize = DATA.size() / 10;     // Ensure that several pages will be created
+    int rowGroupSize = pageSize * 6 * 5; // Ensure that there are more row-groups created
+    PhoneBookWriter.write(ExampleParquetWriter.builder(FILE_V1)
+        .withWriteMode(OVERWRITE)
+        .withRowGroupSize(rowGroupSize)
+        .withPageSize(pageSize)
+        .withWriterVersion(WriterVersion.PARQUET_1_0),
+        DATA);
+    PhoneBookWriter.write(ExampleParquetWriter.builder(FILE_V2)
+        .withWriteMode(OVERWRITE)
+        .withRowGroupSize(rowGroupSize)
+        .withPageSize(pageSize)
+        .withWriterVersion(WriterVersion.PARQUET_2_0),
+        DATA);
+  }
+
+  @AfterClass
+  public static void deleteFile() throws IOException {
+    FILE_V1.getFileSystem(new Configuration()).delete(FILE_V1, false);
+    FILE_V2.getFileSystem(new Configuration()).delete(FILE_V2, false);
+  }
+
+  @Test
+  public void testSimpleFiltering() throws IOException {
+    assertCorrectFiltering(
+        record -> record.getId() == 1234,
+        eq(longColumn("id"), 1234l));
+    assertCorrectFiltering(
+        record -> "miller".equals(record.getName()),
+        eq(binaryColumn("name"), Binary.fromString("miller")));
+    assertCorrectFiltering(
+        record -> record.getName() == null,
+        eq(binaryColumn("name"), null));
+  }
+
+  @Test
+  public void testNoFiltering() throws IOException {
+    // Column index filtering with no-op filter
+    assertEquals(DATA, readUsers(FilterCompat.NOOP, false));
+    assertEquals(DATA, readUsers(FilterCompat.NOOP, true));
+
+    // Column index filtering turned off
+    assertEquals(DATA.stream().filter(user -> user.getId() == 1234).collect(Collectors.toList()),
+        readUsers(eq(longColumn("id"), 1234l), true, false));
+    assertEquals(DATA.stream().filter(user -> "miller".equals(user.getName())).collect(Collectors.toList()),
+        readUsers(eq(binaryColumn("name"), Binary.fromString("miller")), true, false));
+    assertEquals(DATA.stream().filter(user -> user.getName() == null).collect(Collectors.toList()),
+        readUsers(eq(binaryColumn("name"), null), true, false));
+
+    // Every filtering mechanism turned off
+    assertEquals(DATA, readUsers(eq(longColumn("id"), 1234l), false, false));
+    assertEquals(DATA, readUsers(eq(binaryColumn("name"), Binary.fromString("miller")), false, false));
+    assertEquals(DATA, readUsers(eq(binaryColumn("name"), null), false, false));
+  }
+
+  @Test
+  public void testComplexFiltering() throws IOException {
+    assertCorrectFiltering(
+        record -> {
+          Location loc = record.getLocation();
+          Double lat = loc == null ? null : loc.getLat();
+          Double lon = loc == null ? null : loc.getLon();
+          return lat != null && lon != null && 37 <= lat && lat <= 70 && -21 <= lon && lon <= 35;
+        },
+        and(and(gtEq(doubleColumn("location.lat"), 37.0), ltEq(doubleColumn("location.lat"), 70.0)),
+            and(gtEq(doubleColumn("location.lon"), -21.0), ltEq(doubleColumn("location.lon"), 35.0))));
+    assertCorrectFiltering(
+        record -> {
+          Location loc = record.getLocation();
+          return loc == null || (loc.getLat() == null && loc.getLon() == null);
+        },
+        and(eq(doubleColumn("location.lat"), null), eq(doubleColumn("location.lon"), null)));
+    assertCorrectFiltering(
+        record -> {
+          String name = record.getName();
+          return name != null && name.compareTo("thomas") < 0 && record.getId() <= 3 * DATA.size() / 4;
+        },
+        and(lt(binaryColumn("name"), Binary.fromString("thomas")), ltEq(longColumn("id"), 3l * DATA.size() / 4)));
+  }
+
+  public static class NameStartsWithVowel extends UserDefinedPredicate<Binary> {
+    private static final Binary A = Binary.fromString("a");
+    private static final Binary B = Binary.fromString("b");
+    private static final Binary E = Binary.fromString("e");
+    private static final Binary F = Binary.fromString("f");
+    private static final Binary I = Binary.fromString("i");
+    private static final Binary J = Binary.fromString("j");
+    private static final Binary O = Binary.fromString("o");
+    private static final Binary P = Binary.fromString("p");
+    private static final Binary U = Binary.fromString("u");
+    private static final Binary V = Binary.fromString("v");
+
+    private static boolean isStartingWithVowel(String str) {
+      if (str == null || str.isEmpty()) {
+        return false;
+      }
+      switch (str.charAt(0)) {
+        case 'a':
+        case 'e':
+        case 'i':
+        case 'o':
+        case 'u':
+          return true;
+        default:
+          return false;
+      }
+    }
+
+    @Override
+    public boolean keep(Binary value) {
+      return value != null && isStartingWithVowel(value.toStringUsingUTF8());
+    }
+
+    @Override
+    public boolean canDrop(Statistics<Binary> statistics) {
+      Comparator<Binary> cmp = statistics.getComparator();
+      Binary min = statistics.getMin();
+      Binary max = statistics.getMax();
+      return cmp.compare(max, A) < 0
+          || (cmp.compare(min, B) >= 0 && cmp.compare(max, E) < 0)
+          || (cmp.compare(min, F) >= 0 && cmp.compare(max, I) < 0)
+          || (cmp.compare(min, J) >= 0 && cmp.compare(max, O) < 0)
+          || (cmp.compare(min, P) >= 0 && cmp.compare(max, U) < 0)
+          || cmp.compare(min, V) >= 0;
+    }
+
+    @Override
+    public boolean inverseCanDrop(Statistics<Binary> statistics) {
+      Comparator<Binary> cmp = statistics.getComparator();
+      Binary min = statistics.getMin();
+      Binary max = statistics.getMax();
+      return (cmp.compare(min, A) >= 0 && cmp.compare(max, B) < 0)
+          || (cmp.compare(min, E) >= 0 && cmp.compare(max, F) < 0)
+          || (cmp.compare(min, I) >= 0 && cmp.compare(max, J) < 0)
+          || (cmp.compare(min, O) >= 0 && cmp.compare(max, P) < 0)
+          || (cmp.compare(min, U) >= 0 && cmp.compare(max, V) < 0);
+    }
+  }
+
+  public static class IsDivisibleBy extends UserDefinedPredicate<Long> implements Serializable {
+    private long divisor;
+
+    IsDivisibleBy(long divisor) {
+      this.divisor = divisor;
+    }
+
+    @Override
+    public boolean keep(Long value) {
+      return value != null && value % divisor == 0;
+    }
+
+    @Override
+    public boolean canDrop(Statistics<Long> statistics) {
+      long min = statistics.getMin();
+      long max = statistics.getMax();
+      return min % divisor != 0 && max % divisor != 0 && min / divisor == max / divisor;
+    }
+
+    @Override
+    public boolean inverseCanDrop(Statistics<Long> statistics) {
+      long min = statistics.getMin();
+      long max = statistics.getMax();
+      return min == max && min % divisor == 0;
+    }
+  }
+
+  @Test
+  public void testUDF() throws IOException {
+    assertCorrectFiltering(
+        record -> NameStartsWithVowel.isStartingWithVowel(record.getName()) || record.getId() % 234 == 0,
+        or(userDefined(binaryColumn("name"), NameStartsWithVowel.class),
+            userDefined(longColumn("id"), new IsDivisibleBy(234))));
+    assertCorrectFiltering(
+        record -> !(NameStartsWithVowel.isStartingWithVowel(record.getName()) || record.getId() % 234 == 0),
+            not(or(userDefined(binaryColumn("name"), NameStartsWithVowel.class),
+                userDefined(longColumn("id"), new IsDivisibleBy(234)))));
+  }
+
+  @Test
+  public void testFilteringWithMissingColumns() throws IOException {
+    // Missing column filter is always true
+    assertEquals(DATA, readUsers(notEq(binaryColumn("not-existing-binary"), Binary.EMPTY), true));
+    assertCorrectFiltering(
+        record -> record.getId() == 1234,
+        and(eq(longColumn("id"), 1234l),
+            eq(longColumn("not-existing-long"), null)));
+    assertCorrectFiltering(
+        record -> "miller".equals(record.getName()),
+        and(eq(binaryColumn("name"), Binary.fromString("miller")),
+            invert(userDefined(binaryColumn("not-existing-binary"), NameStartsWithVowel.class))));
+
+    // Missing column filter is always false
+    assertEquals(emptyList(), readUsers(lt(longColumn("not-existing-long"), 0l), true));
+    assertCorrectFiltering(
+        record -> "miller".equals(record.getName()),
+        or(eq(binaryColumn("name"), Binary.fromString("miller")),
+            gtEq(binaryColumn("not-existing-binary"), Binary.EMPTY)));
+    assertCorrectFiltering(
+        record -> record.getId() == 1234,
+        or(eq(longColumn("id"), 1234l),
+            userDefined(longColumn("not-existing-long"), new IsDivisibleBy(1))));
+  }
+}
diff --git a/parquet-hadoop/src/test/java/org/apache/parquet/hadoop/TestParquetFileWriter.java b/parquet-hadoop/src/test/java/org/apache/parquet/hadoop/TestParquetFileWriter.java
index 535394b370..e4a1d350cc 100644
--- a/parquet-hadoop/src/test/java/org/apache/parquet/hadoop/TestParquetFileWriter.java
+++ b/parquet-hadoop/src/test/java/org/apache/parquet/hadoop/TestParquetFileWriter.java
@@ -24,8 +24,12 @@
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
+import org.apache.parquet.ParquetReadOptions;
 import org.apache.parquet.Version;
 import org.apache.parquet.bytes.BytesUtils;
+import org.apache.parquet.column.values.bloomfilter.BlockSplitBloomFilter;
+import org.apache.parquet.column.values.bloomfilter.BloomFilter;
+import org.apache.parquet.column.values.bloomfilter.BloomFilterReader;
 import org.apache.parquet.hadoop.ParquetOutputFormat.JobSummaryLevel;
 import org.junit.Assume;
 import org.junit.Rule;
@@ -39,10 +43,13 @@
 import org.apache.parquet.column.page.PageReader;
 import org.apache.parquet.column.statistics.BinaryStatistics;
 import org.apache.parquet.column.statistics.LongStatistics;
-import org.apache.parquet.column.values.bloomfilter.*;
 import org.apache.parquet.format.Statistics;
 import org.apache.parquet.hadoop.metadata.*;
+import org.apache.parquet.hadoop.util.HadoopInputFile;
 import org.apache.parquet.hadoop.util.HiddenFileFilter;
+import org.apache.parquet.internal.column.columnindex.BoundaryOrder;
+import org.apache.parquet.internal.column.columnindex.ColumnIndex;
+import org.apache.parquet.internal.column.columnindex.OffsetIndex;
 import org.apache.parquet.io.api.Binary;
 import org.apache.parquet.schema.MessageType;
 import org.apache.parquet.schema.MessageTypeParser;
@@ -52,6 +59,8 @@
 
 import java.io.File;
 import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.nio.charset.StandardCharsets;
 import java.util.*;
 
 import static org.apache.parquet.CorruptStatistics.shouldIgnoreStatistics;
@@ -59,6 +68,7 @@
 import static org.junit.Assert.*;
 import static org.apache.parquet.column.Encoding.BIT_PACKED;
 import static org.apache.parquet.column.Encoding.PLAIN;
+import static org.apache.parquet.format.converter.ParquetMetadataConverter.MAX_STATS_SIZE;
 import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.BINARY;
 import static org.apache.parquet.schema.Type.Repetition.*;
 import static org.apache.parquet.hadoop.TestUtils.enforceEmptyDir;
@@ -133,44 +143,6 @@ public void testWriteMode() throws Exception {
     testFile.delete();
   }
 
-  @Test
-  public void testBloomWriteRead() throws Exception {
-    MessageType schema = MessageTypeParser.parseMessageType("message test { required binary foo; }");
-    File testFile = temp.newFile();
-    testFile.delete();
-
-    Path path = new Path(testFile.toURI());
-    Configuration configuration = new Configuration();
-    configuration.set("parquet.bloomFilter.filter.column.names", "foo");
-    String colPath[] = {"foo"};
-    ColumnDescriptor col = schema.getColumnDescription(colPath);
-
-    BinaryStatistics stats1 = new BinaryStatistics();
-
-    ParquetFileWriter w = new ParquetFileWriter(configuration, schema, path);
-    w.start();
-    w.startBlock(3);
-    w.startColumn(col, 5, CODEC);
-    w.writeDataPage(2, 4, BytesInput.from(BYTES1),stats1, BIT_PACKED, BIT_PACKED, PLAIN);
-    w.writeDataPage(3, 4, BytesInput.from(BYTES1),stats1, BIT_PACKED, BIT_PACKED, PLAIN);
-    BloomFilter bloomData = new BlockSplitBloomFilter(0);
-    bloomData.insert(bloomData.hash(Binary.fromString("hello")));
-    bloomData.insert(bloomData.hash(Binary.fromString("world")));
-    long blStarts = w.getPos();
-    w.writeBloomFilter(bloomData);
-    w.endColumn();
-    w.endBlock();
-    w.end(new HashMap<String, String>());
-    ParquetMetadata readFooter = ParquetFileReader.readFooter(configuration, path);
-    assertEquals("bloomFilter offset", blStarts, readFooter.getBlocks().get(0).getColumns().get(0).getBloomFilterOffset());
-    ParquetFileReader r = new ParquetFileReader(configuration, readFooter.getFileMetaData(), path,
-      Arrays.asList(readFooter.getBlocks().get(0)), Arrays.asList(schema.getColumnDescription(colPath)));
-    BloomFilterReader bloomFilterReader =  r.getBloomFilterDataReader(readFooter.getBlocks().get(0));
-    BloomFilter bloomDataRead = bloomFilterReader.readBloomFilter(col);
-    assertTrue(bloomDataRead.find(bloomData.hash(Binary.fromString("hello"))));
-    assertTrue(bloomDataRead.find(bloomData.hash(Binary.fromString("world"))));
-  }
-
   @Test
   public void testWriteRead() throws Exception {
     File testFile = temp.newFile();
@@ -250,6 +222,42 @@ public void testWriteRead() throws Exception {
     PrintFooter.main(new String[] {path.toString()});
   }
 
+  @Test
+  public void testBloomWriteRead() throws Exception {
+    MessageType schema = MessageTypeParser.parseMessageType("message test { required binary foo; }");
+    File testFile = temp.newFile();
+    testFile.delete();
+    Path path = new Path(testFile.toURI());
+    Configuration configuration = new Configuration();
+    configuration.set("parquet.bloomFilter.filter.column.names", "foo");
+    String colPath[] = {"foo"};
+    ColumnDescriptor col = schema.getColumnDescription(colPath);
+    BinaryStatistics stats1 = new BinaryStatistics();
+    ParquetFileWriter w = new ParquetFileWriter(configuration, schema, path);
+    w.start();
+    w.startBlock(3);
+    w.startColumn(col, 5, CODEC);
+    w.writeDataPage(2, 4, BytesInput.from(BYTES1),stats1, BIT_PACKED, BIT_PACKED, PLAIN);
+    w.writeDataPage(3, 4, BytesInput.from(BYTES1),stats1, BIT_PACKED, BIT_PACKED, PLAIN);
+    BloomFilter bloomData = new BlockSplitBloomFilter(0);
+    bloomData.insert(bloomData.hash(Binary.fromString("hello")));
+    bloomData.insert(bloomData.hash(Binary.fromString("world")));
+    long blStarts = w.getPos();
+    w.writeBloomFilter(bloomData);
+    w.endColumn();
+    w.endBlock();
+    w.end(new HashMap<String, String>());
+    ParquetMetadata readFooter = ParquetFileReader.readFooter(configuration, path);
+    assertEquals("bloomFilter offset",
+      blStarts, readFooter.getBlocks().get(0).getColumns().get(0).getBloomFilterOffset());
+    ParquetFileReader r = new ParquetFileReader(configuration, readFooter.getFileMetaData(), path,
+      Arrays.asList(readFooter.getBlocks().get(0)), Arrays.asList(schema.getColumnDescription(colPath)));
+    BloomFilterReader bloomFilterReader =  r.getBloomFilterDataReader(readFooter.getBlocks().get(0));
+    BloomFilter bloomDataRead = bloomFilterReader.readBloomFilter(col);
+    assertTrue(bloomDataRead.find(bloomData.hash(Binary.fromString("hello"))));
+    assertTrue(bloomDataRead.find(bloomData.hash(Binary.fromString("world"))));
+  }
+
   @Test
   public void testAlignmentWithPadding() throws Exception {
     File testFile = temp.newFile();
@@ -805,4 +813,142 @@ public void testWriteMetadataFileWithRelativeOutputPath() throws IOException {
     ParquetFileWriter.writeMetadataFile(conf, relativeRoot, footers, JobSummaryLevel.ALL);
   }
 
+  @Test
+  public void testColumnIndexWriteRead() throws Exception {
+    File testFile = temp.newFile();
+    testFile.delete();
+
+    Path path = new Path(testFile.toURI());
+    Configuration configuration = new Configuration();
+
+    ParquetFileWriter w = new ParquetFileWriter(configuration, SCHEMA, path);
+    w.start();
+    w.startBlock(4);
+    w.startColumn(C1, 7, CODEC);
+    w.writeDataPage(7, 4, BytesInput.from(BYTES3), EMPTY_STATS, BIT_PACKED, BIT_PACKED, PLAIN);
+    w.endColumn();
+    w.startColumn(C2, 8, CODEC);
+    w.writeDataPage(8, 4, BytesInput.from(BYTES4), EMPTY_STATS, BIT_PACKED, BIT_PACKED, PLAIN);
+    w.endColumn();
+    w.endBlock();
+    w.startBlock(4);
+    w.startColumn(C1, 5, CODEC);
+    long c1p1Starts = w.getPos();
+    w.writeDataPage(2, 4, BytesInput.from(BYTES1), statsC1(null, Binary.fromString("aaa")), 1, BIT_PACKED, BIT_PACKED,
+        PLAIN);
+    long c1p2Starts = w.getPos();
+    w.writeDataPage(3, 4, BytesInput.from(BYTES1), statsC1(Binary.fromString("bbb"), Binary.fromString("ccc")), 3,
+        BIT_PACKED, BIT_PACKED, PLAIN);
+    w.endColumn();
+    long c1Ends = w.getPos();
+    w.startColumn(C2, 6, CODEC);
+    long c2p1Starts = w.getPos();
+    w.writeDataPage(2, 4, BytesInput.from(BYTES2), statsC2(117l, 100l), 1, BIT_PACKED, BIT_PACKED, PLAIN);
+    long c2p2Starts = w.getPos();
+    w.writeDataPage(3, 4, BytesInput.from(BYTES2), statsC2(null, null, null), 2, BIT_PACKED, BIT_PACKED, PLAIN);
+    long c2p3Starts = w.getPos();
+    w.writeDataPage(1, 4, BytesInput.from(BYTES2), statsC2(0l), 1, BIT_PACKED, BIT_PACKED, PLAIN);
+    w.endColumn();
+    long c2Ends = w.getPos();
+    w.endBlock();
+    w.startBlock(4);
+    w.startColumn(C1, 7, CODEC);
+    w.writeDataPage(7, 4, BytesInput.from(BYTES3),
+        // Creating huge stats so the column index will reach the limit and won't be written
+        statsC1(
+            Binary.fromConstantByteArray(new byte[(int) MAX_STATS_SIZE]),
+            Binary.fromConstantByteArray(new byte[1])),
+        4, BIT_PACKED, BIT_PACKED, PLAIN);
+    w.endColumn();
+    w.startColumn(C2, 8, CODEC);
+    w.writeDataPage(8, 4, BytesInput.from(BYTES4), EMPTY_STATS, BIT_PACKED, BIT_PACKED, PLAIN);
+    w.endColumn();
+    w.endBlock();
+    w.end(new HashMap<String, String>());
+
+    try (ParquetFileReader reader = new ParquetFileReader(HadoopInputFile.fromPath(path, configuration),
+        ParquetReadOptions.builder().build())) {
+      ParquetMetadata footer = reader.getFooter();
+      assertEquals(3, footer.getBlocks().size());
+      BlockMetaData blockMeta = footer.getBlocks().get(1);
+      assertEquals(2, blockMeta.getColumns().size());
+
+      ColumnIndex columnIndex = reader.readColumnIndex(blockMeta.getColumns().get(0));
+      assertEquals(BoundaryOrder.ASCENDING, columnIndex.getBoundaryOrder());
+      assertTrue(Arrays.asList(1l, 0l).equals(columnIndex.getNullCounts()));
+      assertTrue(Arrays.asList(false, false).equals(columnIndex.getNullPages()));
+      List<ByteBuffer> minValues = columnIndex.getMinValues();
+      assertEquals(2, minValues.size());
+      List<ByteBuffer> maxValues = columnIndex.getMaxValues();
+      assertEquals(2, maxValues.size());
+      assertEquals("aaa", new String(minValues.get(0).array(), StandardCharsets.UTF_8));
+      assertEquals("aaa", new String(maxValues.get(0).array(), StandardCharsets.UTF_8));
+      assertEquals("bbb", new String(minValues.get(1).array(), StandardCharsets.UTF_8));
+      assertEquals("ccc", new String(maxValues.get(1).array(), StandardCharsets.UTF_8));
+
+      columnIndex = reader.readColumnIndex(blockMeta.getColumns().get(1));
+      assertEquals(BoundaryOrder.DESCENDING, columnIndex.getBoundaryOrder());
+      assertTrue(Arrays.asList(0l, 3l, 0l).equals(columnIndex.getNullCounts()));
+      assertTrue(Arrays.asList(false, true, false).equals(columnIndex.getNullPages()));
+      minValues = columnIndex.getMinValues();
+      assertEquals(3, minValues.size());
+      maxValues = columnIndex.getMaxValues();
+      assertEquals(3, maxValues.size());
+      assertEquals(100, BytesUtils.bytesToLong(minValues.get(0).array()));
+      assertEquals(117, BytesUtils.bytesToLong(maxValues.get(0).array()));
+      assertEquals(0, minValues.get(1).array().length);
+      assertEquals(0, maxValues.get(1).array().length);
+      assertEquals(0, BytesUtils.bytesToLong(minValues.get(2).array()));
+      assertEquals(0, BytesUtils.bytesToLong(maxValues.get(2).array()));
+
+      OffsetIndex offsetIndex = reader.readOffsetIndex(blockMeta.getColumns().get(0));
+      assertEquals(2, offsetIndex.getPageCount());
+      assertEquals(c1p1Starts, offsetIndex.getOffset(0));
+      assertEquals(c1p2Starts, offsetIndex.getOffset(1));
+      assertEquals(c1p2Starts - c1p1Starts, offsetIndex.getCompressedPageSize(0));
+      assertEquals(c1Ends - c1p2Starts, offsetIndex.getCompressedPageSize(1));
+      assertEquals(0, offsetIndex.getFirstRowIndex(0));
+      assertEquals(1, offsetIndex.getFirstRowIndex(1));
+
+      offsetIndex = reader.readOffsetIndex(blockMeta.getColumns().get(1));
+      assertEquals(3, offsetIndex.getPageCount());
+      assertEquals(c2p1Starts, offsetIndex.getOffset(0));
+      assertEquals(c2p2Starts, offsetIndex.getOffset(1));
+      assertEquals(c2p3Starts, offsetIndex.getOffset(2));
+      assertEquals(c2p2Starts - c2p1Starts, offsetIndex.getCompressedPageSize(0));
+      assertEquals(c2p3Starts - c2p2Starts, offsetIndex.getCompressedPageSize(1));
+      assertEquals(c2Ends - c2p3Starts, offsetIndex.getCompressedPageSize(2));
+      assertEquals(0, offsetIndex.getFirstRowIndex(0));
+      assertEquals(1, offsetIndex.getFirstRowIndex(1));
+      assertEquals(3, offsetIndex.getFirstRowIndex(2));
+
+      assertNull(reader.readColumnIndex(footer.getBlocks().get(2).getColumns().get(0)));
+    }
+  }
+
+  private org.apache.parquet.column.statistics.Statistics<?> statsC1(Binary... values) {
+    org.apache.parquet.column.statistics.Statistics<?> stats = org.apache.parquet.column.statistics.Statistics
+        .createStats(C1.getPrimitiveType());
+    for (Binary value : values) {
+      if (value == null) {
+        stats.incrementNumNulls();
+      } else {
+        stats.updateStats(value);
+      }
+    }
+    return stats;
+  }
+
+  private org.apache.parquet.column.statistics.Statistics<?> statsC2(Long... values) {
+    org.apache.parquet.column.statistics.Statistics<?> stats = org.apache.parquet.column.statistics.Statistics
+        .createStats(C2.getPrimitiveType());
+    for (Long value : values) {
+      if (value == null) {
+        stats.incrementNumNulls();
+      } else {
+        stats.updateStats(value);
+      }
+    }
+    return stats;
+  }
 }
diff --git a/parquet-hadoop/src/test/java/org/apache/parquet/hadoop/TestParquetWriterMergeBlocks.java b/parquet-hadoop/src/test/java/org/apache/parquet/hadoop/TestParquetWriterMergeBlocks.java
new file mode 100644
index 0000000000..a972238cbe
--- /dev/null
+++ b/parquet-hadoop/src/test/java/org/apache/parquet/hadoop/TestParquetWriterMergeBlocks.java
@@ -0,0 +1,280 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.parquet.hadoop;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.parquet.Preconditions;
+import org.apache.parquet.column.statistics.Statistics;
+import org.apache.parquet.example.data.Group;
+import org.apache.parquet.example.data.simple.SimpleGroupFactory;
+import org.apache.parquet.hadoop.example.ExampleParquetWriter;
+import org.apache.parquet.hadoop.example.GroupReadSupport;
+import org.apache.parquet.hadoop.metadata.BlockMetaData;
+import org.apache.parquet.hadoop.metadata.ColumnChunkMetaData;
+import org.apache.parquet.hadoop.metadata.CompressionCodecName;
+import org.apache.parquet.hadoop.metadata.FileMetaData;
+import org.apache.parquet.hadoop.metadata.ParquetMetadata;
+import org.apache.parquet.hadoop.util.HadoopInputFile;
+import org.apache.parquet.io.InputFile;
+import org.apache.parquet.schema.MessageType;
+import org.apache.parquet.schema.Types;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.TemporaryFolder;
+import java.io.File;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Map;
+import java.util.UUID;
+import java.util.stream.Collectors;
+
+import static java.util.Arrays.asList;
+import static org.apache.parquet.format.converter.ParquetMetadataConverter.NO_FILTER;
+import static org.apache.parquet.hadoop.ParquetWriter.DEFAULT_PAGE_SIZE;
+import static org.apache.parquet.schema.OriginalType.UTF8;
+import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.BINARY;
+import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.INT32;
+
+public class TestParquetWriterMergeBlocks {
+
+  @Rule
+  public TemporaryFolder temp = new TemporaryFolder();
+
+  public static final int FILE_SIZE = 10000;
+  public static final Configuration CONF = new Configuration();
+  public static final Map<String, String> EMPTY_METADATA =
+    new HashMap<String, String>();
+  public static final MessageType FILE_SCHEMA = Types.buildMessage()
+    .required(INT32).named("id")
+    .required(BINARY).as(UTF8).named("string")
+    .named("AppendTest");
+  public static final SimpleGroupFactory GROUP_FACTORY =
+    new SimpleGroupFactory(FILE_SCHEMA);
+
+  public Path file1;
+  public List<Group> file1content = new ArrayList<Group>();
+  public Path file2;
+  public List<Group> file2content = new ArrayList<Group>();
+
+  @Before
+  public void createSourceData() throws IOException {
+    this.file1 = newTemp();
+    this.file2 = newTemp();
+
+    ParquetWriter<Group> writer1 = ExampleParquetWriter.builder(file1)
+      .withType(FILE_SCHEMA)
+      .build();
+    ParquetWriter<Group> writer2 = ExampleParquetWriter.builder(file2)
+      .withType(FILE_SCHEMA)
+      .build();
+
+    for (int i = 0; i < FILE_SIZE; i += 1) {
+      Group group1 = GROUP_FACTORY.newGroup();
+      group1.add("id", i);
+      group1.add("string", UUID.randomUUID().toString());
+      writer1.write(group1);
+      file1content.add(group1);
+
+      Group group2 = GROUP_FACTORY.newGroup();
+      group2.add("id", FILE_SIZE+i);
+      group2.add("string", UUID.randomUUID().toString());
+      writer2.write(group2);
+      file2content.add(group2);
+    }
+
+    writer1.close();
+    writer2.close();
+  }
+
+  @Test
+  public void testBasicBehavior() throws IOException {
+    Path combinedFile = newTemp();
+    ParquetFileWriter writer = new ParquetFileWriter(
+      CONF, FILE_SCHEMA, combinedFile);
+
+    // Merge schema and extraMeta
+    List<Path> inputFiles = asList(file1, file2);
+    FileMetaData mergedMeta = ParquetFileWriter.mergeMetadataFiles(inputFiles, CONF).getFileMetaData();
+    List<InputFile> inputFileList = toInputFiles(inputFiles);
+    CodecFactory.BytesCompressor compressor = new CodecFactory(CONF, DEFAULT_PAGE_SIZE).getCompressor(CompressionCodecName.SNAPPY);
+
+    writer.merge(inputFileList, compressor, mergedMeta.getCreatedBy(), 128 * 1024 * 1024);
+
+    LinkedList<Group> expected = new LinkedList<>();
+    expected.addAll(file1content);
+    expected.addAll(file2content);
+
+    ParquetReader<Group> reader = ParquetReader
+      .builder(new GroupReadSupport(), combinedFile)
+      .build();
+
+    Group next;
+    while ((next = reader.read()) != null) {
+      Group expectedNext = expected.removeFirst();
+      // check each value; equals is not supported for simple records
+      Assert.assertEquals("Each id should match",
+        expectedNext.getInteger("id", 0), next.getInteger("id", 0));
+      Assert.assertEquals("Each string should match",
+        expectedNext.getString("string", 0), next.getString("string", 0));
+    }
+
+    Assert.assertEquals("All records should be present", 0, expected.size());
+  }
+
+  private List<InputFile> toInputFiles(List<Path> inputFiles) {
+    return inputFiles.stream()
+      .map(input -> {
+        try {
+          return HadoopInputFile.fromPath(input, CONF);
+        } catch (Exception e) {
+          throw new RuntimeException(e);
+        }
+      }).collect(Collectors.toList());
+  }
+
+  @Test
+  public void testMergedMetadata() throws IOException {
+    Path combinedFile = newTemp();
+    ParquetFileWriter writer = new ParquetFileWriter(
+      CONF, FILE_SCHEMA, combinedFile);
+
+    // Merge schema and extraMeta
+    List<Path> inputFiles = asList(file1, file2);
+    FileMetaData mergedMeta = ParquetFileWriter.mergeMetadataFiles(inputFiles, CONF).getFileMetaData();
+    List<InputFile> inputFileList = toInputFiles(inputFiles);
+    CompressionCodecName codecName = CompressionCodecName.GZIP;
+    CodecFactory.BytesCompressor compressor = new CodecFactory(CONF, DEFAULT_PAGE_SIZE).getCompressor(codecName);
+    writer.merge(inputFileList, compressor, mergedMeta.getCreatedBy(), 128 * 1024 * 1024);
+
+    ParquetMetadata combinedFooter = ParquetFileReader.readFooter(
+      CONF, combinedFile, NO_FILTER);
+    ParquetMetadata f1Footer = ParquetFileReader.readFooter(
+      CONF, file1, NO_FILTER);
+    ParquetMetadata f2Footer = ParquetFileReader.readFooter(
+      CONF, file2, NO_FILTER);
+
+    LinkedList<BlockMetaData> expectedRowGroups = new LinkedList<>();
+    expectedRowGroups.addAll(f1Footer.getBlocks());
+    expectedRowGroups.addAll(f2Footer.getBlocks());
+    long totalRowCount = expectedRowGroups.stream().mapToLong(BlockMetaData::getRowCount).sum();
+    Assert.assertEquals("Combined should have a single row group",
+      1,
+      combinedFooter.getBlocks().size());
+
+    BlockMetaData rowGroup = combinedFooter.getBlocks().get(0);
+    Assert.assertEquals("Row count should match",
+      totalRowCount, rowGroup.getRowCount());
+    assertColumnsEquivalent(f1Footer.getBlocks().get(0).getColumns(), rowGroup.getColumns(), codecName);
+  }
+
+  public void assertColumnsEquivalent(List<ColumnChunkMetaData> expected,
+                                      List<ColumnChunkMetaData> actual,
+                                      CompressionCodecName codecName) {
+    Assert.assertEquals("Should have the expected columns",
+      expected.size(), actual.size());
+    for (int i = 0; i < actual.size(); i += 1) {
+      long numNulls = 0;
+      long valueCount = 0;
+      ColumnChunkMetaData current = actual.get(i);
+      Statistics statistics = current.getStatistics();
+      numNulls += statistics.getNumNulls();
+      valueCount += current.getValueCount();
+      if (i != 0) {
+        ColumnChunkMetaData previous = actual.get(i - 1);
+        long expectedStart = previous.getStartingPos() + previous.getTotalSize();
+        Assert.assertEquals("Should start after the previous column",
+          expectedStart, current.getStartingPos());
+      }
+
+      assertColumnMetadataEquivalent(expected.get(i), current, codecName, numNulls, valueCount);
+    }
+  }
+
+  public void assertColumnMetadataEquivalent(ColumnChunkMetaData expected,
+                                             ColumnChunkMetaData actual,
+                                             CompressionCodecName codecName,
+                                             long numNulls,
+                                             long valueCount) {
+    Assert.assertEquals("Should be the expected column",
+      expected.getPath(), expected.getPath());
+    Assert.assertEquals("Primitive type should not change",
+      expected.getType(), actual.getType());
+    Assert.assertEquals("Compression codec should not change",
+      codecName, actual.getCodec());
+    Assert.assertEquals("Data encodings should not change",
+      expected.getEncodings(), actual.getEncodings());
+    Assert.assertEquals("Statistics should not change",
+      numNulls, actual.getStatistics().getNumNulls());
+    Assert.assertEquals("Number of values should not change",
+      valueCount, actual.getValueCount());
+
+  }
+
+  @Test
+  public void testAllowDroppingColumns() throws IOException {
+    MessageType droppedColumnSchema = Types.buildMessage()
+      .required(BINARY).as(UTF8).named("string")
+      .named("AppendTest");
+
+    Path droppedColumnFile = newTemp();
+    List<Path> inputFiles = asList(file1, file2);
+    ParquetFileWriter writer = new ParquetFileWriter(
+      CONF, droppedColumnSchema, droppedColumnFile);
+    List<InputFile> inputFileList = toInputFiles(inputFiles);
+    CompressionCodecName codecName = CompressionCodecName.GZIP;
+    CodecFactory.BytesCompressor compressor = new CodecFactory(CONF, DEFAULT_PAGE_SIZE).getCompressor(codecName);
+    writer.merge(inputFileList, compressor, "", 128*1024*1024);
+
+    LinkedList<Group> expected = new LinkedList<Group>();
+    expected.addAll(file1content);
+    expected.addAll(file2content);
+
+    ParquetMetadata footer = ParquetFileReader.readFooter(
+      CONF, droppedColumnFile, NO_FILTER);
+    for (BlockMetaData rowGroup : footer.getBlocks()) {
+      Assert.assertEquals("Should have only the string column",
+        1, rowGroup.getColumns().size());
+    }
+
+    ParquetReader<Group> reader = ParquetReader
+      .builder(new GroupReadSupport(), droppedColumnFile)
+      .build();
+
+    Group next;
+    while ((next = reader.read()) != null) {
+      Group expectedNext = expected.removeFirst();
+      Assert.assertEquals("Each string should match",
+        expectedNext.getString("string", 0), next.getString("string", 0));
+    }
+
+    Assert.assertEquals("All records should be present", 0, expected.size());
+  }
+
+  private Path newTemp() throws IOException {
+    File file = temp.newFile();
+    Preconditions.checkArgument(file.delete(), "Could not remove temp file");
+    return new Path(file.toString());
+  }
+}
diff --git a/parquet-hive/parquet-hive-storage-handler/src/main/java/org/apache/hadoop/hive/ql/io/parquet/convert/HiveSchemaConverter.java b/parquet-hive/parquet-hive-storage-handler/src/main/java/org/apache/hadoop/hive/ql/io/parquet/convert/HiveSchemaConverter.java
index 5d3ab488b2..6d229a696f 100644
--- a/parquet-hive/parquet-hive-storage-handler/src/main/java/org/apache/hadoop/hive/ql/io/parquet/convert/HiveSchemaConverter.java
+++ b/parquet-hive/parquet-hive-storage-handler/src/main/java/org/apache/hadoop/hive/ql/io/parquet/convert/HiveSchemaConverter.java
@@ -1,4 +1,4 @@
-/* 
+/*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
@@ -6,9 +6,9 @@
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
- * 
+ *
  *   http://www.apache.org/licenses/LICENSE-2.0
- * 
+ *
  * Unless required by applicable law or agreed to in writing,
  * software distributed under the License is distributed on an
  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
@@ -30,12 +30,15 @@
 
 import org.apache.parquet.schema.ConversionPatterns;
 import org.apache.parquet.schema.GroupType;
+import org.apache.parquet.schema.LogicalTypeAnnotation;
 import org.apache.parquet.schema.MessageType;
-import org.apache.parquet.schema.OriginalType;
 import org.apache.parquet.schema.PrimitiveType;
 import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName;
 import org.apache.parquet.schema.Type;
 import org.apache.parquet.schema.Type.Repetition;
+import org.apache.parquet.schema.Types;
+
+import static org.apache.parquet.schema.LogicalTypeAnnotation.listType;
 
 public class HiveSchemaConverter {
 
@@ -105,7 +108,7 @@ private static Type convertType(final String name, final TypeInfo typeInfo, fina
   // 1 anonymous element "array_element"
   private static GroupType convertArrayType(final String name, final ListTypeInfo typeInfo) {
     final TypeInfo subType = typeInfo.getListElementTypeInfo();
-    return listWrapper(name, OriginalType.LIST, new GroupType(Repetition.REPEATED,
+    return listWrapper(name, listType(), new GroupType(Repetition.REPEATED,
         ParquetHiveSerDe.ARRAY.toString(), convertType("array_element", subType)));
   }
 
@@ -127,8 +130,8 @@ private static GroupType convertMapType(final String name, final MapTypeInfo typ
     return ConversionPatterns.mapType(Repetition.OPTIONAL, name, keyType, valueType);
   }
 
-  private static GroupType listWrapper(final String name, final OriginalType originalType,
+  private static GroupType listWrapper(final String name, final LogicalTypeAnnotation logicalTypeAnnotation,
       final GroupType groupType) {
-    return new GroupType(Repetition.OPTIONAL, name, originalType, groupType);
+    return Types.optionalGroup().addField(groupType).as(logicalTypeAnnotation).named(name);
   }
 }
diff --git a/parquet-pig/pom.xml b/parquet-pig/pom.xml
index 3b7e5703fe..0d3f202c27 100644
--- a/parquet-pig/pom.xml
+++ b/parquet-pig/pom.xml
@@ -48,8 +48,8 @@
     </dependency>
     <dependency>
       <groupId>org.apache.parquet</groupId>
-      <artifactId>parquet-format</artifactId>
-      <version>${parquet.format.version}</version>
+      <artifactId>parquet-format-structures</artifactId>
+      <version>${project.version}</version>
     </dependency>
     <dependency>
       <groupId>org.apache.pig</groupId>
diff --git a/parquet-pig/src/main/java/org/apache/parquet/pig/PigSchemaConverter.java b/parquet-pig/src/main/java/org/apache/parquet/pig/PigSchemaConverter.java
index 24f7ee8c9a..19356616ae 100644
--- a/parquet-pig/src/main/java/org/apache/parquet/pig/PigSchemaConverter.java
+++ b/parquet-pig/src/main/java/org/apache/parquet/pig/PigSchemaConverter.java
@@ -23,7 +23,10 @@
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.List;
+import java.util.Optional;
 
+import org.apache.parquet.schema.LogicalTypeAnnotation;
+import org.apache.parquet.schema.Types;
 import org.apache.pig.LoadPushDown.RequiredField;
 import org.apache.pig.LoadPushDown.RequiredFieldList;
 import org.apache.pig.data.DataType;
@@ -38,7 +41,6 @@
 import org.apache.parquet.schema.ConversionPatterns;
 import org.apache.parquet.schema.GroupType;
 import org.apache.parquet.schema.MessageType;
-import org.apache.parquet.schema.OriginalType;
 import org.apache.parquet.schema.PrimitiveType;
 import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName;
 import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeNameConverter;
@@ -47,6 +49,9 @@
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import static java.util.Optional.of;
+import static org.apache.parquet.schema.LogicalTypeAnnotation.stringType;
+
 
 /**
  * Converts a Pig Schema into a Parquet schema
@@ -205,7 +210,7 @@ private FieldSchema getSimpleFieldSchema(final String fieldName, Type parquetTyp
       throws FrontendException {
     final PrimitiveTypeName parquetPrimitiveTypeName =
         parquetType.asPrimitiveType().getPrimitiveTypeName();
-    final OriginalType originalType = parquetType.getOriginalType();
+    final LogicalTypeAnnotation logicalTypeAnnotation = parquetType.getLogicalTypeAnnotation();
     return parquetPrimitiveTypeName.convert(
         new PrimitiveTypeNameConverter<Schema.FieldSchema, FrontendException>() {
       @Override
@@ -242,7 +247,7 @@ public FieldSchema convertINT96(PrimitiveTypeName primitiveTypeName)
       @Override
       public FieldSchema convertFIXED_LEN_BYTE_ARRAY(
         PrimitiveTypeName primitiveTypeName) throws FrontendException {
-        if (originalType == OriginalType.DECIMAL) {
+        if (logicalTypeAnnotation instanceof LogicalTypeAnnotation.DecimalLogicalTypeAnnotation) {
           return new FieldSchema(fieldName, null, DataType.BIGDECIMAL);
         } else {
           return new FieldSchema(fieldName, null, DataType.BYTEARRAY);
@@ -258,7 +263,7 @@ public FieldSchema convertBOOLEAN(PrimitiveTypeName primitiveTypeName)
       @Override
       public FieldSchema convertBINARY(PrimitiveTypeName primitiveTypeName)
           throws FrontendException {
-        if (originalType != null && originalType == OriginalType.UTF8) {
+        if (logicalTypeAnnotation instanceof LogicalTypeAnnotation.StringLogicalTypeAnnotation) {
           return new FieldSchema(fieldName, null, DataType.CHARARRAY);
         } else {
           return new FieldSchema(fieldName, null, DataType.BYTEARRAY);
@@ -267,47 +272,71 @@ public FieldSchema convertBINARY(PrimitiveTypeName primitiveTypeName)
     });
   }
 
+  /*
+   * RuntimeException class to workaround throwing checked FrontendException in logical type visitors.
+   * Wrap the FrontendException inside the visitor in an inner catch block, and rethrow it outside of the visitor
+   */
+  private static final class FrontendExceptionWrapper extends RuntimeException {
+    final FrontendException frontendException;
+
+    FrontendExceptionWrapper(FrontendException frontendException) {
+      this.frontendException = frontendException;
+    }
+  }
+
   private FieldSchema getComplexFieldSchema(String fieldName, Type parquetType)
       throws FrontendException {
     GroupType parquetGroupType = parquetType.asGroupType();
-    OriginalType originalType = parquetGroupType.getOriginalType();
-    if (originalType !=  null) {
-      switch(originalType) {
-      case MAP:
-        // verify that its a map
-        if (parquetGroupType.getFieldCount() != 1 || parquetGroupType.getType(0).isPrimitive()) {
-          throw new SchemaConversionException("Invalid map type " + parquetGroupType);
-        }
-        GroupType mapKeyValType = parquetGroupType.getType(0).asGroupType();
-        if (!mapKeyValType.isRepetition(Repetition.REPEATED) ||
-            (mapKeyValType.getOriginalType() != null && !mapKeyValType.getOriginalType().equals(OriginalType.MAP_KEY_VALUE)) ||
-            mapKeyValType.getFieldCount()!=2) {
-          throw new SchemaConversionException("Invalid map type " + parquetGroupType);
-        }
-        // if value is not primitive wrap it in a tuple
-        Type valueType = mapKeyValType.getType(1);
-        Schema s = convertField(valueType);
-        s.getField(0).alias = null;
-        return new FieldSchema(fieldName, s, DataType.MAP);
-      case LIST:
-        Type type = parquetGroupType.getType(0);
-        if (parquetGroupType.getFieldCount()!= 1 || type.isPrimitive()) {
-          // an array is effectively a bag
-          Schema primitiveSchema = new Schema(getSimpleFieldSchema(parquetGroupType.getFieldName(0), type));
-          Schema tupleSchema = new Schema(new FieldSchema(ARRAY_VALUE_NAME, primitiveSchema, DataType.TUPLE));
-          return new FieldSchema(fieldName, tupleSchema, DataType.BAG);
-        }
-        GroupType tupleType = parquetGroupType.getType(0).asGroupType();
-        if (!tupleType.isRepetition(Repetition.REPEATED)) {
-          throw new SchemaConversionException("Invalid list type " + parquetGroupType);
-        }
-        Schema tupleSchema = new Schema(new FieldSchema(tupleType.getName(), convertFields(tupleType.getFields()), DataType.TUPLE));
-        return new FieldSchema(fieldName, tupleSchema, DataType.BAG);
-      case MAP_KEY_VALUE:
-      case ENUM:
-      case UTF8:
-      default:
-        throw new SchemaConversionException("Unexpected original type for " + parquetType + ": " + originalType);
+    LogicalTypeAnnotation logicalTypeAnnotation = parquetGroupType.getLogicalTypeAnnotation();
+    if (logicalTypeAnnotation !=  null) {
+      try {
+        return logicalTypeAnnotation.accept(new LogicalTypeAnnotation.LogicalTypeAnnotationVisitor<FieldSchema>() {
+          @Override
+          public Optional<FieldSchema> visit(LogicalTypeAnnotation.MapLogicalTypeAnnotation mapLogicalType) {
+            try {
+              // verify that its a map
+              if (parquetGroupType.getFieldCount() != 1 || parquetGroupType.getType(0).isPrimitive()) {
+                throw new SchemaConversionException("Invalid map type " + parquetGroupType);
+              }
+              GroupType mapKeyValType = parquetGroupType.getType(0).asGroupType();
+              if (!mapKeyValType.isRepetition(Repetition.REPEATED) ||
+                (mapKeyValType.getLogicalTypeAnnotation() != null && !mapKeyValType.getLogicalTypeAnnotation().equals(LogicalTypeAnnotation.MapKeyValueTypeAnnotation.getInstance())) ||
+                mapKeyValType.getFieldCount() != 2) {
+                throw new SchemaConversionException("Invalid map type " + parquetGroupType);
+              }
+              // if value is not primitive wrap it in a tuple
+              Type valueType = mapKeyValType.getType(1);
+              Schema s = convertField(valueType);
+              s.getField(0).alias = null;
+              return of(new FieldSchema(fieldName, s, DataType.MAP));
+            } catch (FrontendException e) {
+              throw new FrontendExceptionWrapper(e);
+            }
+          }
+
+          @Override
+          public Optional<FieldSchema> visit(LogicalTypeAnnotation.ListLogicalTypeAnnotation listLogicalType) {
+            try {
+              Type type = parquetGroupType.getType(0);
+              if (parquetGroupType.getFieldCount() != 1 || type.isPrimitive()) {
+                // an array is effectively a bag
+                Schema primitiveSchema = new Schema(getSimpleFieldSchema(parquetGroupType.getFieldName(0), type));
+                Schema tupleSchema = new Schema(new FieldSchema(ARRAY_VALUE_NAME, primitiveSchema, DataType.TUPLE));
+                return of(new FieldSchema(fieldName, tupleSchema, DataType.BAG));
+              }
+              GroupType tupleType = parquetGroupType.getType(0).asGroupType();
+              if (!tupleType.isRepetition(Repetition.REPEATED)) {
+                throw new SchemaConversionException("Invalid list type " + parquetGroupType);
+              }
+              Schema tupleSchema = new Schema(new FieldSchema(tupleType.getName(), convertFields(tupleType.getFields()), DataType.TUPLE));
+              return of(new FieldSchema(fieldName, tupleSchema, DataType.BAG));
+            } catch (FrontendException e) {
+              throw new FrontendExceptionWrapper(e);
+            }
+          }
+        }).orElseThrow(() -> new SchemaConversionException("Unexpected original type for " + parquetType + ": " + logicalTypeAnnotation));
+      } catch (FrontendExceptionWrapper e) {
+        throw e.frontendException;
       }
     } else {
       // if original type is not set, we assume it to be tuple
@@ -359,7 +388,7 @@ private Type convertWithName(FieldSchema fieldSchema, String name) {
       case DataType.BOOLEAN:
         return primitive(name, PrimitiveTypeName.BOOLEAN);
       case DataType.CHARARRAY:
-        return primitive(name, PrimitiveTypeName.BINARY, OriginalType.UTF8);
+        return primitive(name, PrimitiveTypeName.BINARY, stringType());
       case DataType.INTEGER:
         return primitive(name, PrimitiveTypeName.INT32);
       case DataType.LONG:
@@ -403,12 +432,12 @@ private String name(String fieldAlias, String defaultName) {
     return fieldAlias == null ? defaultName : fieldAlias;
   }
 
-  private Type primitive(String name, PrimitiveTypeName primitive, OriginalType originalType) {
-    return new PrimitiveType(Repetition.OPTIONAL, primitive, name, originalType);
+  private Type primitive(String name, PrimitiveTypeName primitive, LogicalTypeAnnotation logicalTypeAnnotation) {
+    return Types.primitive(primitive, Repetition.OPTIONAL).as(logicalTypeAnnotation).named(name);
   }
 
   private PrimitiveType primitive(String name, PrimitiveTypeName primitive) {
-    return new PrimitiveType(Repetition.OPTIONAL, primitive, name, null);
+    return Types.primitive(primitive, Repetition.OPTIONAL).named(name);
   }
 
   /**
@@ -511,7 +540,8 @@ private Type filterBag(GroupType bagType, FieldSchema bagFieldSchema) throws Fro
     }
     Type nested = bagType.getType(0);
     FieldSchema innerField = bagFieldSchema.schema.getField(0);
-    if (nested.isPrimitive() || nested.getOriginalType() == OriginalType.MAP || nested.getOriginalType() == OriginalType.LIST) {
+    if (nested.isPrimitive() || nested.getLogicalTypeAnnotation() instanceof LogicalTypeAnnotation.MapLogicalTypeAnnotation
+      || nested.getLogicalTypeAnnotation() instanceof LogicalTypeAnnotation.ListLogicalTypeAnnotation) {
       // Bags always contain tuples => we skip the extra tuple that was inserted in that case.
       innerField = innerField.schema.getField(0);
     }
diff --git a/parquet-pig/src/main/java/org/apache/parquet/pig/convert/TupleConverter.java b/parquet-pig/src/main/java/org/apache/parquet/pig/convert/TupleConverter.java
index 18ea9e451e..48bb7539aa 100644
--- a/parquet-pig/src/main/java/org/apache/parquet/pig/convert/TupleConverter.java
+++ b/parquet-pig/src/main/java/org/apache/parquet/pig/convert/TupleConverter.java
@@ -1,4 +1,4 @@
-/* 
+/*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
@@ -6,9 +6,9 @@
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
- * 
+ *
  *   http://www.apache.org/licenses/LICENSE-2.0
- * 
+ *
  * Unless required by applicable law or agreed to in writing,
  * software distributed under the License is distributed on an
  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
@@ -23,6 +23,7 @@
 import java.util.List;
 import java.math.BigDecimal;
 
+import org.apache.parquet.schema.LogicalTypeAnnotation;
 import org.apache.pig.backend.executionengine.ExecException;
 import org.apache.pig.data.DataByteArray;
 import org.apache.pig.data.DataType;
@@ -40,11 +41,8 @@
 import org.apache.parquet.io.api.GroupConverter;
 import org.apache.parquet.io.api.PrimitiveConverter;
 import org.apache.parquet.pig.TupleConversionException;
-import org.apache.parquet.pig.convert.DecimalUtils;
 import org.apache.parquet.schema.GroupType;
-import org.apache.parquet.schema.OriginalType;
 import org.apache.parquet.schema.PrimitiveType;
-import org.apache.parquet.schema.DecimalMetadata;
 import org.apache.parquet.schema.Type;
 import org.apache.parquet.schema.Type.Repetition;
 
@@ -74,7 +72,7 @@ public TupleConverter(GroupType parquetSchema, Schema pigSchema, boolean elephan
         FieldSchema field = pigSchema.getField(i);
         if(parquetSchema.containsField(field.alias) || columnIndexAccess) {
           Type type = getType(columnIndexAccess, field.alias, i);
-          
+
           if(type != null) {
             final int index = i;
             converters[c++] = newConverter(field, type, new ParentValueContainer() {
@@ -85,7 +83,7 @@ void add(Object value) {
             }, elephantBirdCompatible, columnIndexAccess);
           }
         }
-        
+
       }
     } catch (FrontendException e) {
       throw new ParquetDecodingException("can not initialize pig converter from:\n" + parquetSchema + "\n" + pigSchema, e);
@@ -100,10 +98,10 @@ private Type getType(boolean columnIndexAccess, String alias, int index) {
     } else {
       return parquetSchema.getType(parquetSchema.getFieldIndex(alias));
     }
-    
+
     return null;
   }
-  
+
   static Converter newConverter(FieldSchema pigField, Type type, final ParentValueContainer parent, boolean elephantBirdCompatible, boolean columnIndexAccess) {
     try {
       switch (pigField.type) {
@@ -122,7 +120,7 @@ public void end() {
       case DataType.CHARARRAY:
           //If the orignal type isn't a string, we don't want to use the dictionary because
           //a custom implementation will be needed for each type.  Just default to no dictionary.
-        return new FieldStringConverter(parent, type.getOriginalType() == OriginalType.UTF8);
+        return new FieldStringConverter(parent, type.getLogicalTypeAnnotation() instanceof LogicalTypeAnnotation.StringLogicalTypeAnnotation);
       case DataType.BYTEARRAY:
         return new FieldByteArrayConverter(parent);
       case DataType.INTEGER:
@@ -277,8 +275,6 @@ public void addDouble(double value) {
     public void addBoolean(boolean value) {
       parent.add(Boolean.toString(value));
     }
-    
-    
   }
 
   /**
@@ -403,7 +399,7 @@ final public void addLong(long value) {
 
     @Override
     public void addInt(int value) {
-      parent.add((long)value); 
+      parent.add((long)value);
     }
 
     @Override
@@ -425,7 +421,7 @@ public void addBoolean(boolean value) {
     public void addBinary(Binary value) {
       parent.add(Long.parseLong(value.toStringUsingUTF8()));
     }
-    
+
   }
 
   /**
@@ -511,8 +507,6 @@ public void addDouble(double value) {
     public void addBinary(Binary value) {
       parent.add(Boolean.parseBoolean(value.toStringUsingUTF8()));
     }
-
-    
   }
 
   /**
@@ -554,7 +548,8 @@ static class BagConverter extends GroupConverter {
 
       ParentValueContainer childsParent;
       FieldSchema pigField;
-      if (nestedType.isPrimitive() || nestedType.getOriginalType() == OriginalType.MAP || nestedType.getOriginalType() == OriginalType.LIST) {
+      if (nestedType.isPrimitive() || nestedType.getLogicalTypeAnnotation() instanceof LogicalTypeAnnotation.MapLogicalTypeAnnotation
+        || nestedType.getLogicalTypeAnnotation() instanceof LogicalTypeAnnotation.ListLogicalTypeAnnotation) {
         // Pig bags always contain tuples
         // In that case we need to wrap the value in an extra tuple
         childsParent = new ParentValueContainer() {
diff --git a/parquet-protobuf/pom.xml b/parquet-protobuf/pom.xml
index b6f4627b16..329046db78 100644
--- a/parquet-protobuf/pom.xml
+++ b/parquet-protobuf/pom.xml
@@ -86,6 +86,17 @@
     </dependency>
   </dependencies>
 
+  <dependencyManagement>
+    <dependencies>
+      <!-- com.twitter.elephantbird brings in an older version of libthrift so we force to use our own one -->
+      <dependency>
+        <groupId>org.apache.thrift</groupId>
+        <artifactId>libthrift</artifactId>
+        <version>${format.thrift.version}</version>
+      </dependency>
+    </dependencies>
+  </dependencyManagement>
+
   <developers>
     <developer>
       <id>lukasnalezenec</id>
diff --git a/parquet-protobuf/src/main/java/org/apache/parquet/proto/ProtoMessageConverter.java b/parquet-protobuf/src/main/java/org/apache/parquet/proto/ProtoMessageConverter.java
index 979d78ea71..92d8b624d9 100644
--- a/parquet-protobuf/src/main/java/org/apache/parquet/proto/ProtoMessageConverter.java
+++ b/parquet-protobuf/src/main/java/org/apache/parquet/proto/ProtoMessageConverter.java
@@ -1,4 +1,4 @@
-/* 
+/*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
@@ -6,9 +6,9 @@
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
- * 
+ *
  *   http://www.apache.org/licenses/LICENSE-2.0
- * 
+ *
  * Unless required by applicable law or agreed to in writing,
  * software distributed under the License is distributed on an
  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
@@ -31,15 +31,17 @@
 import org.apache.parquet.io.api.PrimitiveConverter;
 import org.apache.parquet.schema.GroupType;
 import org.apache.parquet.schema.IncompatibleSchemaModificationException;
-import org.apache.parquet.schema.OriginalType;
+import org.apache.parquet.schema.LogicalTypeAnnotation;
 import org.apache.parquet.schema.Type;
 
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
+import java.util.Optional;
 import java.util.Set;
 
 import static com.google.protobuf.Descriptors.FieldDescriptor.JavaType;
+import static java.util.Optional.of;
 
 /**
  * Converts Protocol Buffer message (both top level and inner) to parquet.
@@ -128,13 +130,22 @@ public void add(Object value) {
       };
     }
 
-    if (OriginalType.LIST == parquetType.getOriginalType()) {
-      return new ListConverter(parentBuilder, fieldDescriptor, parquetType);
+    LogicalTypeAnnotation logicalTypeAnnotation = parquetType.getLogicalTypeAnnotation();
+    if (logicalTypeAnnotation == null) {
+      return newScalarConverter(parent, parentBuilder, fieldDescriptor, parquetType);
     }
-    if (OriginalType.MAP == parquetType.getOriginalType()) {
-      return new MapConverter(parentBuilder, fieldDescriptor, parquetType);
-    }
-    return newScalarConverter(parent, parentBuilder, fieldDescriptor, parquetType);
+
+    return logicalTypeAnnotation.accept(new LogicalTypeAnnotation.LogicalTypeAnnotationVisitor<Converter>() {
+      @Override
+      public Optional<Converter> visit(LogicalTypeAnnotation.ListLogicalTypeAnnotation listLogicalType) {
+        return of(new ListConverter(parentBuilder, fieldDescriptor, parquetType));
+      }
+
+      @Override
+      public Optional<Converter> visit(LogicalTypeAnnotation.MapLogicalTypeAnnotation mapLogicalType) {
+        return of(new MapConverter(parentBuilder, fieldDescriptor, parquetType));
+      }
+    }).orElse(newScalarConverter(parent, parentBuilder, fieldDescriptor, parquetType));
   }
 
   private Converter newScalarConverter(ParentValueContainer pvc, Message.Builder parentBuilder, Descriptors.FieldDescriptor fieldDescriptor, Type parquetType) {
@@ -376,9 +387,9 @@ final class ListConverter extends GroupConverter {
     private final Converter converter;
 
     public ListConverter(Message.Builder parentBuilder, Descriptors.FieldDescriptor fieldDescriptor, Type parquetType) {
-      OriginalType originalType = parquetType.getOriginalType();
-      if (originalType != OriginalType.LIST || parquetType.isPrimitive()) {
-        throw new ParquetDecodingException("Expected LIST wrapper. Found: " + originalType + " instead.");
+      LogicalTypeAnnotation logicalTypeAnnotation = parquetType.getLogicalTypeAnnotation();
+      if (!(logicalTypeAnnotation instanceof LogicalTypeAnnotation.ListLogicalTypeAnnotation) || parquetType.isPrimitive()) {
+        throw new ParquetDecodingException("Expected LIST wrapper. Found: " + logicalTypeAnnotation + " instead.");
       }
 
       GroupType rootWrapperType = parquetType.asGroupType();
@@ -435,9 +446,9 @@ final class MapConverter extends GroupConverter {
     private final Converter converter;
 
     public MapConverter(Message.Builder parentBuilder, Descriptors.FieldDescriptor fieldDescriptor, Type parquetType) {
-      OriginalType originalType = parquetType.getOriginalType();
-      if (originalType != OriginalType.MAP) {
-        throw new ParquetDecodingException("Expected MAP wrapper. Found: " + originalType + " instead.");
+      LogicalTypeAnnotation logicalTypeAnnotation = parquetType.getLogicalTypeAnnotation();
+      if (!(logicalTypeAnnotation instanceof LogicalTypeAnnotation.MapLogicalTypeAnnotation)) {
+        throw new ParquetDecodingException("Expected MAP wrapper. Found: " + logicalTypeAnnotation + " instead.");
       }
 
       Type parquetSchema;
diff --git a/parquet-protobuf/src/main/java/org/apache/parquet/proto/ProtoSchemaConverter.java b/parquet-protobuf/src/main/java/org/apache/parquet/proto/ProtoSchemaConverter.java
index 0e1aa20100..db5be1409f 100644
--- a/parquet-protobuf/src/main/java/org/apache/parquet/proto/ProtoSchemaConverter.java
+++ b/parquet-protobuf/src/main/java/org/apache/parquet/proto/ProtoSchemaConverter.java
@@ -6,9 +6,9 @@
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
- * 
+ *
  *   http://www.apache.org/licenses/LICENSE-2.0
- * 
+ *
  * Unless required by applicable law or agreed to in writing,
  * software distributed under the License is distributed on an
  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
@@ -23,8 +23,8 @@
 import com.google.protobuf.Descriptors.FieldDescriptor.JavaType;
 import com.google.protobuf.Message;
 import com.twitter.elephantbird.util.Protobufs;
+import org.apache.parquet.schema.LogicalTypeAnnotation;
 import org.apache.parquet.schema.MessageType;
-import org.apache.parquet.schema.OriginalType;
 import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName;
 import org.apache.parquet.schema.Type;
 import org.apache.parquet.schema.Types;
@@ -35,8 +35,10 @@
 
 import java.util.List;
 
-import static org.apache.parquet.schema.OriginalType.ENUM;
-import static org.apache.parquet.schema.OriginalType.UTF8;
+import static org.apache.parquet.schema.LogicalTypeAnnotation.enumType;
+import static org.apache.parquet.schema.LogicalTypeAnnotation.listType;
+import static org.apache.parquet.schema.LogicalTypeAnnotation.mapType;
+import static org.apache.parquet.schema.LogicalTypeAnnotation.stringType;
 import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.*;
 
 /**
@@ -101,20 +103,19 @@ private <T> Builder<? extends Builder<?, GroupBuilder<T>>, GroupBuilder<T>> addF
     ParquetType parquetType = getParquetType(descriptor);
     if (descriptor.isRepeated() && parquetSpecsCompliant) {
       // the old schema style did not include the LIST wrapper around repeated fields
-      return addRepeatedPrimitive(descriptor, parquetType.primitiveType, parquetType.originalType, builder);
+      return addRepeatedPrimitive(parquetType.primitiveType, parquetType.logicalTypeAnnotation, builder);
     }
 
-    return builder.primitive(parquetType.primitiveType, getRepetition(descriptor)).as(parquetType.originalType);
+    return builder.primitive(parquetType.primitiveType, getRepetition(descriptor)).as(parquetType.logicalTypeAnnotation);
   }
 
-  private <T> Builder<? extends Builder<?, GroupBuilder<T>>, GroupBuilder<T>> addRepeatedPrimitive(FieldDescriptor descriptor,
-                                                                                                   PrimitiveTypeName primitiveType,
-                                                                                                   OriginalType originalType,
+  private <T> Builder<? extends Builder<?, GroupBuilder<T>>, GroupBuilder<T>> addRepeatedPrimitive(PrimitiveTypeName primitiveType,
+                                                                                                   LogicalTypeAnnotation logicalTypeAnnotation,
                                                                                                    final GroupBuilder<T> builder) {
     return builder
-        .group(Type.Repetition.OPTIONAL).as(OriginalType.LIST)
+        .group(Type.Repetition.OPTIONAL).as(listType())
           .group(Type.Repetition.REPEATED)
-            .primitive(primitiveType, Type.Repetition.REQUIRED).as(originalType)
+            .primitive(primitiveType, Type.Repetition.REQUIRED).as(logicalTypeAnnotation)
           .named("element")
         .named("list");
   }
@@ -122,7 +123,7 @@ private <T> Builder<? extends Builder<?, GroupBuilder<T>>, GroupBuilder<T>> addR
   private <T> GroupBuilder<GroupBuilder<T>> addRepeatedMessage(FieldDescriptor descriptor, GroupBuilder<T> builder) {
     GroupBuilder<GroupBuilder<GroupBuilder<GroupBuilder<T>>>> result =
       builder
-        .group(Type.Repetition.OPTIONAL).as(OriginalType.LIST)
+        .group(Type.Repetition.OPTIONAL).as(listType())
         .group(Type.Repetition.REPEATED)
         .group(Type.Repetition.OPTIONAL);
 
@@ -156,9 +157,9 @@ private <T> GroupBuilder<GroupBuilder<T>> addMapField(FieldDescriptor descriptor
     ParquetType mapKeyParquetType = getParquetType(fields.get(0));
 
     GroupBuilder<GroupBuilder<GroupBuilder<T>>> group = builder
-      .group(Type.Repetition.OPTIONAL).as(OriginalType.MAP) // only optional maps are allowed in Proto3
+      .group(Type.Repetition.OPTIONAL).as(mapType()) // only optional maps are allowed in Proto3
       .group(Type.Repetition.REPEATED) // key_value wrapper
-      .primitive(mapKeyParquetType.primitiveType, Type.Repetition.REQUIRED).as(mapKeyParquetType.originalType).named("key");
+      .primitive(mapKeyParquetType.primitiveType, Type.Repetition.REQUIRED).as(mapKeyParquetType.logicalTypeAnnotation).named("key");
 
     return addField(fields.get(1), group).named("value")
       .named("key_value");
@@ -173,8 +174,8 @@ private ParquetType getParquetType(FieldDescriptor fieldDescriptor) {
       case DOUBLE: return ParquetType.of(DOUBLE);
       case BOOLEAN: return ParquetType.of(BOOLEAN);
       case FLOAT: return ParquetType.of(FLOAT);
-      case STRING: return ParquetType.of(BINARY, UTF8);
-      case ENUM: return ParquetType.of(BINARY, ENUM);
+      case STRING: return ParquetType.of(BINARY, stringType());
+      case ENUM: return ParquetType.of(BINARY, enumType());
       case BYTE_STRING: return ParquetType.of(BINARY);
       default:
         throw new UnsupportedOperationException("Cannot convert Protocol Buffer: unknown type " + javaType);
@@ -183,15 +184,15 @@ private ParquetType getParquetType(FieldDescriptor fieldDescriptor) {
 
   private static class ParquetType {
     PrimitiveTypeName primitiveType;
-    OriginalType originalType;
+    LogicalTypeAnnotation logicalTypeAnnotation;
 
-    private ParquetType(PrimitiveTypeName primitiveType, OriginalType originalType) {
+    private ParquetType(PrimitiveTypeName primitiveType, LogicalTypeAnnotation logicalTypeAnnotation) {
       this.primitiveType = primitiveType;
-      this.originalType = originalType;
+      this.logicalTypeAnnotation = logicalTypeAnnotation;
     }
 
-    public static ParquetType of(PrimitiveTypeName primitiveType, OriginalType originalType) {
-      return new ParquetType(primitiveType, originalType);
+    public static ParquetType of(PrimitiveTypeName primitiveType, LogicalTypeAnnotation logicalTypeAnnotation) {
+      return new ParquetType(primitiveType, logicalTypeAnnotation);
     }
 
     public static ParquetType of(PrimitiveTypeName primitiveType) {
diff --git a/parquet-protobuf/src/main/java/org/apache/parquet/proto/ProtoWriteSupport.java b/parquet-protobuf/src/main/java/org/apache/parquet/proto/ProtoWriteSupport.java
index 59c236f312..7436b04c6e 100644
--- a/parquet-protobuf/src/main/java/org/apache/parquet/proto/ProtoWriteSupport.java
+++ b/parquet-protobuf/src/main/java/org/apache/parquet/proto/ProtoWriteSupport.java
@@ -1,4 +1,4 @@
-/* 
+/*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
@@ -6,9 +6,9 @@
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
- * 
+ *
  *   http://www.apache.org/licenses/LICENSE-2.0
- * 
+ *
  * Unless required by applicable law or agreed to in writing,
  * software distributed under the License is distributed on an
  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
@@ -38,6 +38,9 @@
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
+import java.util.Optional;
+
+import static java.util.Optional.ofNullable;
 
 /**
  * Implementation of {@link WriteSupport} for writing Protocol Buffers.
@@ -216,15 +219,21 @@ private FieldWriter createMessageWriter(FieldDescriptor fieldDescriptor, Type ty
     }
 
     private GroupType getGroupType(Type type) {
-      if (type.getOriginalType() == OriginalType.LIST) {
-        return type.asGroupType().getType("list").asGroupType().getType("element").asGroupType();
-      }
-
-      if (type.getOriginalType() == OriginalType.MAP) {
-        return type.asGroupType().getType("key_value").asGroupType().getType("value").asGroupType();
+      LogicalTypeAnnotation logicalTypeAnnotation = type.getLogicalTypeAnnotation();
+      if (logicalTypeAnnotation == null) {
+        return type.asGroupType();
       }
+      return logicalTypeAnnotation.accept(new LogicalTypeAnnotation.LogicalTypeAnnotationVisitor<GroupType>() {
+        @Override
+        public Optional<GroupType> visit(LogicalTypeAnnotation.ListLogicalTypeAnnotation listLogicalType) {
+          return ofNullable(type.asGroupType().getType("list").asGroupType().getType("element").asGroupType());
+        }
 
-      return type.asGroupType();
+        @Override
+        public Optional<GroupType> visit(LogicalTypeAnnotation.MapLogicalTypeAnnotation mapLogicalType) {
+          return ofNullable(type.asGroupType().getType("key_value").asGroupType().getType("value").asGroupType());
+        }
+      }).orElse(type.asGroupType());
     }
 
     private MapWriter createMapWriter(FieldDescriptor fieldDescriptor, Type type) {
diff --git a/parquet-thrift/pom.xml b/parquet-thrift/pom.xml
index 51a6b9b17f..4340430b0a 100644
--- a/parquet-thrift/pom.xml
+++ b/parquet-thrift/pom.xml
@@ -144,6 +144,17 @@
 
   </dependencies>
 
+  <dependencyManagement>
+    <dependencies>
+      <!-- com.twitter.elephantbird brings in an older version of libthrift so we force to use our own one -->
+      <dependency>
+        <groupId>org.apache.thrift</groupId>
+        <artifactId>libthrift</artifactId>
+        <version>${thrift.version}</version>
+      </dependency>
+    </dependencies>
+  </dependencyManagement>
+
   <build>
     <plugins>
       <plugin>
diff --git a/parquet-thrift/src/main/java/org/apache/parquet/thrift/ThriftSchemaConvertVisitor.java b/parquet-thrift/src/main/java/org/apache/parquet/thrift/ThriftSchemaConvertVisitor.java
index 1185382e01..7bfcdb1adf 100644
--- a/parquet-thrift/src/main/java/org/apache/parquet/thrift/ThriftSchemaConvertVisitor.java
+++ b/parquet-thrift/src/main/java/org/apache/parquet/thrift/ThriftSchemaConvertVisitor.java
@@ -1,4 +1,4 @@
-/* 
+/*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
@@ -6,9 +6,9 @@
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
- * 
+ *
  *   http://www.apache.org/licenses/LICENSE-2.0
- * 
+ *
  * Unless required by applicable law or agreed to in writing,
  * software distributed under the License is distributed on an
  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
@@ -23,8 +23,8 @@
 
 import org.apache.parquet.ShouldNeverHappenException;
 import org.apache.parquet.schema.GroupType;
+import org.apache.parquet.schema.LogicalTypeAnnotation;
 import org.apache.parquet.schema.MessageType;
-import org.apache.parquet.schema.OriginalType;
 import org.apache.parquet.schema.PrimitiveType;
 import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName;
 import org.apache.parquet.schema.Type;
@@ -55,8 +55,8 @@
 import static org.apache.parquet.Preconditions.checkNotNull;
 import static org.apache.parquet.schema.ConversionPatterns.listType;
 import static org.apache.parquet.schema.ConversionPatterns.mapType;
-import static org.apache.parquet.schema.OriginalType.ENUM;
-import static org.apache.parquet.schema.OriginalType.UTF8;
+import static org.apache.parquet.schema.LogicalTypeAnnotation.enumType;
+import static org.apache.parquet.schema.LogicalTypeAnnotation.stringType;
 import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.BINARY;
 import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.BOOLEAN;
 import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.DOUBLE;
@@ -278,7 +278,7 @@ private ConvertedField visitPrimitiveType(PrimitiveTypeName type, State state) {
     return visitPrimitiveType(type, null, state);
   }
 
-  private ConvertedField visitPrimitiveType(PrimitiveTypeName type, OriginalType orig, State state) {
+  private ConvertedField visitPrimitiveType(PrimitiveTypeName type, LogicalTypeAnnotation orig, State state) {
     PrimitiveBuilder<PrimitiveType> b = primitive(type, state.repetition);
 
     if (orig != null) {
@@ -294,7 +294,7 @@ private ConvertedField visitPrimitiveType(PrimitiveTypeName type, OriginalType o
 
   @Override
   public ConvertedField visit(EnumType enumType, State state) {
-    return visitPrimitiveType(BINARY, ENUM, state);
+    return visitPrimitiveType(BINARY, enumType(), state);
   }
 
   @Override
@@ -329,7 +329,7 @@ public ConvertedField visit(I64Type i64Type, State state) {
 
   @Override
   public ConvertedField visit(StringType stringType, State state) {
-    return stringType.isBinary() ? visitPrimitiveType(BINARY, state) : visitPrimitiveType(BINARY, UTF8, state);
+    return stringType.isBinary() ? visitPrimitiveType(BINARY, state) : visitPrimitiveType(BINARY, stringType(), state);
   }
 
   private static boolean isUnion(StructOrUnionType s) {
diff --git a/parquet-tools/pom.xml b/parquet-tools/pom.xml
index 566f8f1c31..32ee4d8eda 100644
--- a/parquet-tools/pom.xml
+++ b/parquet-tools/pom.xml
@@ -48,8 +48,8 @@
   <dependencies>
     <dependency>
       <groupId>org.apache.parquet</groupId>
-      <artifactId>parquet-format</artifactId>
-      <version>${parquet.format.version}</version>
+      <artifactId>parquet-format-structures</artifactId>
+      <version>${project.version}</version>
     </dependency>
     <dependency>
       <groupId>org.apache.parquet</groupId>
diff --git a/parquet-tools/src/main/java/org/apache/parquet/tools/command/ColumnIndexCommand.java b/parquet-tools/src/main/java/org/apache/parquet/tools/command/ColumnIndexCommand.java
new file mode 100644
index 0000000000..cbbd8a1faa
--- /dev/null
+++ b/parquet-tools/src/main/java/org/apache/parquet/tools/command/ColumnIndexCommand.java
@@ -0,0 +1,182 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.parquet.tools.command;
+
+import java.io.PrintWriter;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+import org.apache.commons.cli.CommandLine;
+import org.apache.commons.cli.Option;
+import org.apache.commons.cli.Options;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.parquet.hadoop.ParquetFileReader;
+import org.apache.parquet.hadoop.metadata.BlockMetaData;
+import org.apache.parquet.hadoop.metadata.ColumnChunkMetaData;
+import org.apache.parquet.hadoop.util.HadoopInputFile;
+import org.apache.parquet.internal.column.columnindex.ColumnIndex;
+import org.apache.parquet.internal.column.columnindex.OffsetIndex;
+import org.apache.parquet.io.InputFile;
+import org.apache.parquet.tools.Main;
+
+/**
+ * parquet-tools command to print column and offset indexes.
+ */
+public class ColumnIndexCommand extends ArgsOnlyCommand {
+  public static final String[] USAGE = new String[] {
+      "<input>",
+      "where <input> is the parquet file to print the column and offset indexes for"
+  };
+
+  public static final Options OPTIONS;
+  static {
+    OPTIONS = new Options();
+    OPTIONS.addOption(Option.builder("c")
+        .longOpt("column")
+        .desc("Shows the column/offset indexes for the given column only; "
+            + "multiple columns shall be separated by commas")
+        .hasArg()
+        .build());
+    OPTIONS.addOption(Option.builder("r")
+        .longOpt("row-group")
+        .desc("Shows the column/offset indexes for the given row-groups only; "
+            + "multiple row-groups shall be speparated by commas; "
+            + "row-groups are referenced by their indexes from 0")
+        .hasArg()
+        .build());
+    OPTIONS.addOption(Option.builder("i")
+        .longOpt("column-index")
+        .desc("Shows the column indexes; "
+            + "active by default unless -o is used")
+        .hasArg(false)
+        .build());
+    OPTIONS.addOption(Option.builder("o")
+        .longOpt("offset-index")
+        .desc("Shows the offset indexes; "
+            + "active by default unless -i is used")
+        .hasArg(false)
+        .build());
+  }
+
+  public ColumnIndexCommand() {
+    super(1, 1);
+  }
+
+  @Override
+  public String[] getUsageDescription() {
+    return USAGE;
+  }
+
+  @Override
+  public String getCommandDescription() {
+    return "Prints the column and offset indexes of a Parquet file.";
+  }
+
+  @Override
+  public Options getOptions() {
+    return OPTIONS;
+  }
+
+  @Override
+  public void execute(CommandLine options) throws Exception {
+    super.execute(options);
+
+    String[] args = options.getArgs();
+    InputFile in = HadoopInputFile.fromPath(new Path(args[0]), new Configuration());
+    PrintWriter out = new PrintWriter(Main.out, true);
+    String rowGroupValue = options.getOptionValue("r");
+    Set<String> indexes = new HashSet<>();
+    if (rowGroupValue != null) {
+      indexes.addAll(Arrays.asList(rowGroupValue.split("\\s*,\\s*")));
+    }
+    boolean showColumnIndex = options.hasOption("i");
+    boolean showOffsetIndex = options.hasOption("o");
+    if (!showColumnIndex && !showOffsetIndex) {
+      showColumnIndex = true;
+      showOffsetIndex = true;
+    }
+
+    try (ParquetFileReader reader = ParquetFileReader.open(in)) {
+      boolean firstBlock = true;
+      int rowGroupIndex = 0;
+      for (BlockMetaData block : reader.getFooter().getBlocks()) {
+        if (!indexes.isEmpty() && !indexes.contains(Integer.toString(rowGroupIndex))) {
+          ++rowGroupIndex;
+          continue;
+        }
+        if (!firstBlock) {
+          out.println();
+          firstBlock = false;
+        }
+        out.format("row group %d:%n", rowGroupIndex);
+        for (ColumnChunkMetaData column : getColumns(block, options)) {
+          String path = column.getPath().toDotString();
+          if (showColumnIndex) {
+            out.format("column index for column %s:%n", path);
+            ColumnIndex columnIndex = reader.readColumnIndex(column);
+            if (columnIndex == null) {
+              out.println("NONE");
+            } else {
+              out.println(columnIndex);
+            }
+          }
+          if (showOffsetIndex) {
+            out.format("offset index for column %s:%n", path);
+            OffsetIndex offsetIndex = reader.readOffsetIndex(column);
+            if (offsetIndex == null) {
+              out.println("NONE");
+            } else {
+              out.println(offsetIndex);
+            }
+          }
+        }
+        ++rowGroupIndex;
+      }
+    }
+  }
+
+  private static List<ColumnChunkMetaData> getColumns(BlockMetaData block, CommandLine options) {
+    List<ColumnChunkMetaData> columns = block.getColumns();
+    String pathValue = options.getOptionValue("c");
+    if (pathValue == null) {
+      return columns;
+    }
+    String[] paths = pathValue.split("\\s*,\\s*");
+    Map<String, ColumnChunkMetaData> pathMap = new HashMap<>();
+    for (ColumnChunkMetaData column : columns) {
+      pathMap.put(column.getPath().toDotString(), column);
+    }
+
+    List<ColumnChunkMetaData> filtered = new ArrayList<>();
+    for (String path : paths) {
+      ColumnChunkMetaData column = pathMap.get(path);
+      if (column != null) {
+        filtered.add(column);
+      }
+    }
+    return filtered;
+  }
+
+}
diff --git a/parquet-tools/src/main/java/org/apache/parquet/tools/command/DumpCommand.java b/parquet-tools/src/main/java/org/apache/parquet/tools/command/DumpCommand.java
index 26b5562ff5..27043b9480 100644
--- a/parquet-tools/src/main/java/org/apache/parquet/tools/command/DumpCommand.java
+++ b/parquet-tools/src/main/java/org/apache/parquet/tools/command/DumpCommand.java
@@ -58,7 +58,6 @@
 import org.apache.parquet.io.api.PrimitiveConverter;
 import org.apache.parquet.schema.MessageType;
 import org.apache.parquet.schema.PrimitiveStringifier;
-import org.apache.parquet.tools.util.MetadataUtils;
 import org.apache.parquet.tools.util.PrettyPrintWriter;
 import org.apache.parquet.tools.util.PrettyPrintWriter.WhiteSpaceHandler;
 
diff --git a/parquet-tools/src/main/java/org/apache/parquet/tools/command/MergeCommand.java b/parquet-tools/src/main/java/org/apache/parquet/tools/command/MergeCommand.java
index fe64587560..6d5b31380f 100644
--- a/parquet-tools/src/main/java/org/apache/parquet/tools/command/MergeCommand.java
+++ b/parquet-tools/src/main/java/org/apache/parquet/tools/command/MergeCommand.java
@@ -19,20 +19,29 @@
 package org.apache.parquet.tools.command;
 
 import org.apache.commons.cli.CommandLine;
+import org.apache.commons.cli.Option;
+import org.apache.commons.cli.Options;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
+import org.apache.parquet.hadoop.CodecFactory;
+import org.apache.parquet.hadoop.metadata.CompressionCodecName;
 import org.apache.parquet.hadoop.util.HadoopInputFile;
 import org.apache.parquet.hadoop.util.HiddenFileFilter;
 import org.apache.parquet.hadoop.ParquetFileWriter;
 import org.apache.parquet.hadoop.metadata.FileMetaData;
+import org.apache.parquet.io.InputFile;
 import org.apache.parquet.tools.Main;
 
 import java.io.IOException;
 import java.io.PrintWriter;
 import java.util.ArrayList;
 import java.util.List;
+import java.util.stream.Collectors;
+
+import static org.apache.parquet.hadoop.ParquetWriter.DEFAULT_BLOCK_SIZE;
+import static org.apache.parquet.hadoop.ParquetWriter.DEFAULT_PAGE_SIZE;
 
 public class MergeCommand extends ArgsOnlyCommand {
   public static final String[] USAGE = new String[] {
@@ -49,12 +58,43 @@ public class MergeCommand extends ArgsOnlyCommand {
 
   private Configuration conf;
 
+  private static final Options OPTIONS;
+  static {
+    OPTIONS = new Options();
+
+    Option block = Option.builder("b")
+      .longOpt("block")
+      .desc("Merge adjacent blocks into one up to upper bound size limit default to 128 MB")
+      .build();
+
+    Option limit = Option.builder("l")
+      .longOpt("limit")
+      .desc("Upper bound for merged block size in megabytes. Default: 128 MB")
+      .hasArg()
+      .build();
+
+    Option codec = Option.builder("c")
+      .longOpt("codec")
+      .desc("Compression codec name. Default: SNAPPY. Valid values: UNCOMPRESSED, SNAPPY, GZIP, LZO, BROTLI, LZ4, ZSTD")
+      .hasArg()
+      .build();
+
+    OPTIONS.addOption(limit);
+    OPTIONS.addOption(block);
+    OPTIONS.addOption(codec);
+  }
+
   public MergeCommand() {
     super(2, MAX_FILE_NUM + 1);
 
     conf = new Configuration();
   }
 
+  @Override
+  public Options getOptions() {
+    return OPTIONS;
+  }
+
   @Override
   public String[] getUsageDescription() {
     return USAGE;
@@ -63,18 +103,32 @@ public String[] getUsageDescription() {
   @Override
   public String getCommandDescription() {
     return "Merges multiple Parquet files into one. " +
-      "The command doesn't merge row groups, just places one after the other. " +
+      "Without -b option the command doesn't merge row groups, just places one after the other. " +
       "When used to merge many small files, the resulting file will still contain small row groups, " +
-      "which usually leads to bad query performance.";
+      "which usually leads to bad query performance. " +
+      "To have adjacent small blocks merged together use -b option. " +
+      "Blocks will be grouped into larger one until the upper bound is reached. " +
+      "Default block upper bound 128 MB and default compression SNAPPY can be customized using -l and -c options";
   }
 
   @Override
   public void execute(CommandLine options) throws Exception {
+    boolean mergeBlocks = options.hasOption('b');
+    int maxBlockSize = options.hasOption('l')? Integer.parseInt(options.getOptionValue('l')) * 1024 * 1024 : DEFAULT_BLOCK_SIZE;
+    CompressionCodecName compressionCodec = options.hasOption('c') ? CompressionCodecName.valueOf(options.getOptionValue('c')) : CompressionCodecName.SNAPPY;
     // Prepare arguments
     List<String> args = options.getArgList();
     List<Path> inputFiles = getInputFiles(args.subList(0, args.size() - 1));
     Path outputFile = new Path(args.get(args.size() - 1));
+    if (mergeBlocks) {
+      CodecFactory.BytesCompressor compressor = new CodecFactory(conf, DEFAULT_PAGE_SIZE).getCompressor(compressionCodec);
+      mergeBlocks(maxBlockSize, compressor, inputFiles, outputFile);
+    } else {
+      mergeFiles(inputFiles, outputFile);
+    }
+  }
 
+  private void mergeFiles(List<Path> inputFiles, Path outputFile) throws IOException {
     // Merge schema and extraMeta
     FileMetaData mergedMeta = mergedMetadata(inputFiles);
     PrintWriter out = new PrintWriter(Main.out, true);
@@ -103,6 +157,23 @@ public void execute(CommandLine options) throws Exception {
     writer.end(mergedMeta.getKeyValueMetaData());
   }
 
+  private void mergeBlocks(int maxBlockSize, CodecFactory.BytesCompressor compressor, List<Path> inputFiles, Path outputFile) throws IOException {
+    // Merge schema and extraMeta
+    FileMetaData mergedMeta = mergedMetadata(inputFiles);
+
+    // Merge data
+    ParquetFileWriter writer = new ParquetFileWriter(conf, mergedMeta.getSchema(), outputFile, ParquetFileWriter.Mode.CREATE);
+    List<InputFile> inputFileList = inputFiles.stream()
+      .map(input -> {
+        try {
+          return HadoopInputFile.fromPath(input, conf);
+        } catch (Exception e) {
+          throw new RuntimeException(e);
+        }
+      }).collect(Collectors.toList());
+    writer.merge(inputFileList, compressor, mergedMeta.getCreatedBy(), maxBlockSize);
+  }
+
   private FileMetaData mergedMetadata(List<Path> inputFiles) throws IOException {
     return ParquetFileWriter.mergeMetadataFiles(inputFiles, conf).getFileMetaData();
   }
diff --git a/parquet-tools/src/main/java/org/apache/parquet/tools/command/MetadataUtils.java b/parquet-tools/src/main/java/org/apache/parquet/tools/command/MetadataUtils.java
new file mode 100644
index 0000000000..0bade37002
--- /dev/null
+++ b/parquet-tools/src/main/java/org/apache/parquet/tools/command/MetadataUtils.java
@@ -0,0 +1,212 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.parquet.tools.command;
+
+import com.google.common.base.Joiner;
+import com.google.common.base.Strings;
+import org.apache.parquet.column.ColumnDescriptor;
+import org.apache.parquet.column.statistics.Statistics;
+import org.apache.parquet.hadoop.metadata.BlockMetaData;
+import org.apache.parquet.hadoop.metadata.ColumnChunkMetaData;
+import org.apache.parquet.hadoop.metadata.FileMetaData;
+import org.apache.parquet.hadoop.metadata.ParquetMetadata;
+import org.apache.parquet.schema.GroupType;
+import org.apache.parquet.schema.LogicalTypeAnnotation;
+import org.apache.parquet.schema.MessageType;
+import org.apache.parquet.schema.OriginalType;
+import org.apache.parquet.schema.PrimitiveType;
+import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName;
+import org.apache.parquet.schema.Type;
+import org.apache.parquet.schema.Type.Repetition;
+import org.apache.parquet.tools.util.PrettyPrintWriter;
+
+import java.util.ArrayList;
+import java.util.LinkedHashMap;
+import java.util.List;
+import java.util.Map;
+
+class MetadataUtils {
+  static void showDetails(PrettyPrintWriter out, ParquetMetadata meta, boolean showOriginalTypes) {
+    showDetails(out, meta.getFileMetaData(), showOriginalTypes);
+
+    long i = 1;
+    for (BlockMetaData bmeta : meta.getBlocks()) {
+      out.println();
+      showDetails(out, bmeta, i++);
+    }
+  }
+
+  static void showDetails(PrettyPrintWriter out, FileMetaData meta, boolean showOriginalTypes) {
+    out.format("creator: %s%n", meta.getCreatedBy());
+
+    Map<String,String> extra = meta.getKeyValueMetaData();
+    if (extra != null) {
+      for (Map.Entry<String,String> entry : meta.getKeyValueMetaData().entrySet()) {
+        out.print("extra: ");
+        out.incrementTabLevel();
+        out.format("%s = %s%n", entry.getKey(), entry.getValue());
+        out.decrementTabLevel();
+      }
+    }
+
+    out.println();
+    out.format("file schema: %s%n", meta.getSchema().getName());
+    out.rule('-');
+    showDetails(out, meta.getSchema(), showOriginalTypes);
+  }
+
+  private static void showDetails(PrettyPrintWriter out, BlockMetaData meta, Long num) {
+    long rows = meta.getRowCount();
+    long tbs = meta.getTotalByteSize();
+    long offset = meta.getStartingPos();
+
+    out.format("row group%s: RC:%d TS:%d OFFSET:%d%n", (num == null ? "" : " " + num), rows, tbs, offset);
+    out.rule('-');
+    showDetails(out, meta.getColumns());
+  }
+
+  static void showDetails(PrettyPrintWriter out, List<ColumnChunkMetaData> ccmeta) {
+    Map<String,Object> chunks = new LinkedHashMap<String,Object>();
+    for (ColumnChunkMetaData cmeta : ccmeta) {
+      String[] path = cmeta.getPath().toArray();
+
+      Map<String,Object> current = chunks;
+      for (int i = 0; i < path.length - 1; ++i) {
+        String next = path[i];
+        if (!current.containsKey(next)) {
+          current.put(next, new LinkedHashMap<String,Object>());
+        }
+
+        current = (Map<String,Object>)current.get(next);
+      }
+
+      current.put(path[path.length - 1], cmeta);
+    }
+
+    showColumnChunkDetails(out, chunks, 0);
+  }
+
+  private static void showColumnChunkDetails(PrettyPrintWriter out, Map<String,Object> current, int depth) {
+    for (Map.Entry<String,Object> entry : current.entrySet()) {
+      String name = Strings.repeat(".", depth) + entry.getKey();
+      Object value = entry.getValue();
+
+      if (value instanceof Map) {
+        out.println(name + ": ");
+        showColumnChunkDetails(out, (Map<String,Object>)value, depth + 1);
+      } else {
+        out.print(name + ": ");
+        showDetails(out, (ColumnChunkMetaData)value, false);
+      }
+    }
+  }
+
+  private static void showDetails(PrettyPrintWriter out, ColumnChunkMetaData meta, boolean name) {
+    long doff = meta.getDictionaryPageOffset();
+    long foff = meta.getFirstDataPageOffset();
+    long tsize = meta.getTotalSize();
+    long usize = meta.getTotalUncompressedSize();
+    long count = meta.getValueCount();
+    double ratio = usize / (double)tsize;
+    String encodings = Joiner.on(',').skipNulls().join(meta.getEncodings());
+
+    if (name) {
+      String path = Joiner.on('.').skipNulls().join(meta.getPath());
+      out.format("%s: ", path);
+    }
+
+    out.format(" %s", meta.getType());
+    out.format(" %s", meta.getCodec());
+    out.format(" DO:%d", doff);
+    out.format(" FPO:%d", foff);
+    out.format(" SZ:%d/%d/%.2f", tsize, usize, ratio);
+    out.format(" VC:%d", count);
+    if (!encodings.isEmpty()) out.format(" ENC:%s", encodings);
+    Statistics<?> stats = meta.getStatistics();
+    if (stats != null) {
+      out.format(" ST:[%s]", stats);
+    } else {
+      out.format(" ST:[none]");
+    }
+    out.println();
+  }
+
+  static void showDetails(PrettyPrintWriter out, MessageType type, boolean showOriginalTypes) {
+    List<String> cpath = new ArrayList<String>();
+    for (Type ftype : type.getFields()) {
+      showDetails(out, ftype, 0, type, cpath, showOriginalTypes);
+    }
+  }
+
+  private static void showDetails(PrettyPrintWriter out, GroupType type, int depth, MessageType container, List<String> cpath, boolean showOriginalTypes) {
+    String name = Strings.repeat(".", depth) + type.getName();
+    Repetition rep = type.getRepetition();
+    int fcount = type.getFieldCount();
+    out.format("%s: %s F:%d%n", name, rep, fcount);
+
+    cpath.add(type.getName());
+    for (Type ftype : type.getFields()) {
+      showDetails(out, ftype, depth + 1, container, cpath, showOriginalTypes);
+    }
+    cpath.remove(cpath.size() - 1);
+  }
+
+  private static void showDetails(PrettyPrintWriter out, PrimitiveType type, int depth, MessageType container, List<String> cpath, boolean showOriginalTypes) {
+    String name = Strings.repeat(".", depth) + type.getName();
+    Repetition rep = type.getRepetition();
+    PrimitiveTypeName ptype = type.getPrimitiveTypeName();
+
+    out.format("%s: %s %s", name, rep, ptype);
+    if (showOriginalTypes) {
+      OriginalType otype;
+      try {
+        otype = type.getOriginalType();
+      } catch (Exception e) {
+        otype = null;
+      }
+      if (otype != null) out.format(" O:%s", otype);
+    } else {
+      LogicalTypeAnnotation ltype = type.getLogicalTypeAnnotation();
+      if (ltype != null) out.format(" L:%s", ltype);
+    }
+
+    if (container != null) {
+      cpath.add(type.getName());
+      String[] paths = cpath.toArray(new String[cpath.size()]);
+      cpath.remove(cpath.size() - 1);
+
+      ColumnDescriptor desc = container.getColumnDescription(paths);
+
+      int defl = desc.getMaxDefinitionLevel();
+      int repl = desc.getMaxRepetitionLevel();
+      out.format(" R:%d D:%d", repl, defl);
+    }
+    out.println();
+  }
+
+  private static void showDetails(PrettyPrintWriter out, Type type, int depth, MessageType container, List<String> cpath, boolean showOriginalTypes) {
+    if (type instanceof GroupType) {
+      showDetails(out, type.asGroupType(), depth, container, cpath, showOriginalTypes);
+      return;
+    } else if (type instanceof PrimitiveType) {
+      showDetails(out, type.asPrimitiveType(), depth, container, cpath, showOriginalTypes);
+      return;
+    }
+  }
+}
diff --git a/parquet-tools/src/main/java/org/apache/parquet/tools/command/Registry.java b/parquet-tools/src/main/java/org/apache/parquet/tools/command/Registry.java
index 6df84be37a..399efb7316 100644
--- a/parquet-tools/src/main/java/org/apache/parquet/tools/command/Registry.java
+++ b/parquet-tools/src/main/java/org/apache/parquet/tools/command/Registry.java
@@ -34,6 +34,7 @@ public final class Registry {
     registry.put("merge", MergeCommand.class);
     registry.put("rowcount", RowCountCommand.class);
     registry.put("size", SizeCommand.class);
+    registry.put("column-index", ColumnIndexCommand.class);
   }
 
   public static Map<String,Command> allCommands() {
diff --git a/parquet-tools/src/main/java/org/apache/parquet/tools/command/ShowMetaCommand.java b/parquet-tools/src/main/java/org/apache/parquet/tools/command/ShowMetaCommand.java
index 8d35551525..b07fa7a693 100644
--- a/parquet-tools/src/main/java/org/apache/parquet/tools/command/ShowMetaCommand.java
+++ b/parquet-tools/src/main/java/org/apache/parquet/tools/command/ShowMetaCommand.java
@@ -1,4 +1,4 @@
-/* 
+/*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
@@ -6,9 +6,9 @@
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
- * 
+ *
  *   http://www.apache.org/licenses/LICENSE-2.0
- * 
+ *
  * Unless required by applicable law or agreed to in writing,
  * software distributed under the License is distributed on an
  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
@@ -19,13 +19,15 @@
 package org.apache.parquet.tools.command;
 
 import org.apache.commons.cli.CommandLine;
+import org.apache.commons.cli.Option;
+import org.apache.commons.cli.OptionBuilder;
+import org.apache.commons.cli.Options;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.Path;
 
 import org.apache.parquet.hadoop.Footer;
 import org.apache.parquet.hadoop.ParquetFileReader;
-import org.apache.parquet.tools.util.MetadataUtils;
 import org.apache.parquet.tools.util.PrettyPrintWriter;
 import org.apache.parquet.tools.util.PrettyPrintWriter.WhiteSpaceHandler;
 
@@ -37,6 +39,15 @@ public class ShowMetaCommand extends ArgsOnlyCommand {
     "where <input> is the parquet file to print to stdout"
   };
 
+  public static final Options OPTIONS;
+  static {
+    OPTIONS = new Options();
+    Option originalType = OptionBuilder.withLongOpt("originalType")
+      .withDescription("Print logical types in OriginalType representation.")
+      .create('o');
+    OPTIONS.addOption(originalType);
+  }
+
   public ShowMetaCommand() {
     super(1, 1);
   }
@@ -51,13 +62,19 @@ public String getCommandDescription() {
     return "Prints the metadata of Parquet file(s)";
   }
 
+  @Override
+  public Options getOptions() {
+    return OPTIONS;
+  }
+
   @Override
   public void execute(CommandLine options) throws Exception {
     super.execute(options);
 
     String[] args = options.getArgs();
     String input = args[0];
-    
+    boolean showOriginalTypes = options.hasOption('o');
+
     Configuration conf = new Configuration();
     Path inputPath = new Path(input);
     FileStatus inputFileStatus = inputPath.getFileSystem(conf).getFileStatus(inputPath);
@@ -71,7 +88,7 @@ public void execute(CommandLine options) throws Exception {
 
     for(Footer f: footers) {
       out.format("file: %s%n" , f.getFile());
-      MetadataUtils.showDetails(out, f.getParquetMetadata());
+      MetadataUtils.showDetails(out, f.getParquetMetadata(), showOriginalTypes);
       out.flushColumns();
     }
   }
diff --git a/parquet-tools/src/main/java/org/apache/parquet/tools/command/ShowSchemaCommand.java b/parquet-tools/src/main/java/org/apache/parquet/tools/command/ShowSchemaCommand.java
index d83e5649e4..6f83857b3b 100644
--- a/parquet-tools/src/main/java/org/apache/parquet/tools/command/ShowSchemaCommand.java
+++ b/parquet-tools/src/main/java/org/apache/parquet/tools/command/ShowSchemaCommand.java
@@ -1,4 +1,4 @@
-/* 
+/*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
@@ -6,9 +6,9 @@
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
- * 
+ *
  *   http://www.apache.org/licenses/LICENSE-2.0
- * 
+ *
  * Unless required by applicable law or agreed to in writing,
  * software distributed under the License is distributed on an
  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
@@ -32,7 +32,6 @@
 import org.apache.parquet.hadoop.util.HiddenFileFilter;
 import org.apache.parquet.schema.MessageType;
 import org.apache.parquet.tools.Main;
-import org.apache.parquet.tools.util.MetadataUtils;
 import org.apache.parquet.tools.util.PrettyPrintWriter;
 
 import static org.apache.parquet.format.converter.ParquetMetadataConverter.NO_FILTER;
@@ -49,7 +48,11 @@ public class ShowSchemaCommand extends ArgsOnlyCommand {
     Option help = OptionBuilder.withLongOpt("detailed")
                                .withDescription("Show detailed information about the schema.")
                                .create('d');
+    Option originalType = OptionBuilder.withLongOpt("originalType")
+      .withDescription("Print logical types in OriginalType representation.")
+      .create('o');
     OPTIONS.addOption(help);
+    OPTIONS.addOption(originalType);
   }
 
   public ShowSchemaCommand() {
@@ -98,8 +101,9 @@ public void execute(CommandLine options) throws Exception {
 
     Main.out.println(schema);
     if (options.hasOption('d')) {
+      boolean showOriginalTypes = options.hasOption('o');
       PrettyPrintWriter out = PrettyPrintWriter.stdoutPrettyPrinter().build();
-      MetadataUtils.showDetails(out, metaData);
+      MetadataUtils.showDetails(out, metaData, showOriginalTypes);
     }
   }
 }
diff --git a/parquet-tools/src/main/java/org/apache/parquet/tools/read/SimpleRecordConverter.java b/parquet-tools/src/main/java/org/apache/parquet/tools/read/SimpleRecordConverter.java
index a119a347e7..7a1c81d6f8 100644
--- a/parquet-tools/src/main/java/org/apache/parquet/tools/read/SimpleRecordConverter.java
+++ b/parquet-tools/src/main/java/org/apache/parquet/tools/read/SimpleRecordConverter.java
@@ -1,4 +1,4 @@
-/* 
+/*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
@@ -6,9 +6,9 @@
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
- * 
+ *
  *   http://www.apache.org/licenses/LICENSE-2.0
- * 
+ *
  * Unless required by applicable law or agreed to in writing,
  * software distributed under the License is distributed on an
  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
@@ -20,15 +20,18 @@
 
 import java.math.BigDecimal;
 import java.math.BigInteger;
+import java.util.Optional;
 
 import org.apache.parquet.io.api.Binary;
 import org.apache.parquet.io.api.Converter;
 import org.apache.parquet.io.api.GroupConverter;
 import org.apache.parquet.io.api.PrimitiveConverter;
 import org.apache.parquet.schema.GroupType;
-import org.apache.parquet.schema.OriginalType;
+import org.apache.parquet.schema.LogicalTypeAnnotation;
 import org.apache.parquet.schema.Type;
 
+import static java.util.Optional.of;
+
 public class SimpleRecordConverter extends GroupConverter {
   private final Converter converters[];
   private final String name;
@@ -51,31 +54,38 @@ public SimpleRecordConverter(GroupType schema, String name, SimpleRecordConverte
   }
 
   private Converter createConverter(Type field) {
-    OriginalType otype = field.getOriginalType();
+    LogicalTypeAnnotation ltype = field.getLogicalTypeAnnotation();
 
     if (field.isPrimitive()) {
-      if (otype != null) {
-        switch (otype) {
-          case MAP: break;
-          case LIST: break;
-          case UTF8: return new StringConverter(field.getName());
-          case MAP_KEY_VALUE: break;
-          case ENUM: break;
-          case DECIMAL:
-            int scale = field.asPrimitiveType().getDecimalMetadata().getScale();
-            return new DecimalConverter(field.getName(), scale);
-        }
+      if (ltype != null) {
+        return ltype.accept(new LogicalTypeAnnotation.LogicalTypeAnnotationVisitor<Converter>() {
+          @Override
+          public Optional<Converter> visit(LogicalTypeAnnotation.StringLogicalTypeAnnotation stringLogicalType) {
+            return of(new StringConverter(field.getName()));
+          }
+
+          @Override
+          public Optional<Converter> visit(LogicalTypeAnnotation.DecimalLogicalTypeAnnotation decimalLogicalType) {
+            int scale = decimalLogicalType.getScale();
+            return of(new DecimalConverter(field.getName(), scale));
+          }
+        }).orElse(new SimplePrimitiveConverter(field.getName()));
       }
-
-      return new SimplePrimitiveConverter(field.getName());
     }
 
     GroupType groupType = field.asGroupType();
-    if (otype != null) {
-      switch (otype) {
-        case MAP: return new SimpleMapRecordConverter(groupType, field.getName(), this);
-        case LIST: return new SimpleListRecordConverter(groupType, field.getName(), this);
-      }
+    if (ltype != null) {
+      return ltype.accept(new LogicalTypeAnnotation.LogicalTypeAnnotationVisitor<Converter>() {
+        @Override
+        public Optional<Converter> visit(LogicalTypeAnnotation.MapLogicalTypeAnnotation mapLogicalType) {
+          return of(new SimpleMapRecordConverter(groupType, field.getName(), SimpleRecordConverter.this));
+        }
+
+        @Override
+        public Optional<Converter> visit(LogicalTypeAnnotation.ListLogicalTypeAnnotation listLogicalType) {
+          return of(new SimpleListRecordConverter(groupType, field.getName(), SimpleRecordConverter.this));
+        }
+      }).orElse(new SimpleRecordConverter(groupType, field.getName(), this));
     }
     return new SimpleRecordConverter(groupType, field.getName(), this);
   }
@@ -162,6 +172,16 @@ public DecimalConverter(String name, int scale) {
     public void addBinary(Binary value) {
       record.add(name, new BigDecimal(new BigInteger(value.getBytes()), scale));
     }
+
+    @Override
+    public void addInt(int value) {
+      record.add(name, BigDecimal.valueOf(value).movePointLeft(scale));
+    }
+
+    @Override
+    public void addLong(long value) {
+      record.add(name, BigDecimal.valueOf(value).movePointLeft(scale));
+    }
   }
 }
 
diff --git a/parquet-tools/src/main/java/org/apache/parquet/tools/util/MetadataUtils.java b/parquet-tools/src/main/java/org/apache/parquet/tools/util/MetadataUtils.java
index 870b8c18a0..206028a303 100644
--- a/parquet-tools/src/main/java/org/apache/parquet/tools/util/MetadataUtils.java
+++ b/parquet-tools/src/main/java/org/apache/parquet/tools/util/MetadataUtils.java
@@ -1,4 +1,4 @@
-/* 
+/*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
@@ -6,9 +6,9 @@
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
- * 
+ *
  *   http://www.apache.org/licenses/LICENSE-2.0
- * 
+ *
  * Unless required by applicable law or agreed to in writing,
  * software distributed under the License is distributed on an
  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
@@ -40,6 +40,7 @@
 import org.apache.parquet.schema.Type;
 import org.apache.parquet.schema.Type.Repetition;
 
+@Deprecated
 public class MetadataUtils {
   public static final double BAD_COMPRESSION_RATIO_CUTOFF = 0.97;
   public static final double GOOD_COMPRESSION_RATIO_CUTOFF = 1.2;
@@ -163,7 +164,7 @@ public static void showDetails(PrettyPrintWriter out, ColumnDescriptor desc) {
     int defl = desc.getMaxDefinitionLevel();
     int repl = desc.getMaxRepetitionLevel();
 
-    out.format("column desc: %s T:%s R:%d D:%d%n", path, type, repl, defl); 
+    out.format("column desc: %s T:%s R:%d D:%d%n", path, type, repl, defl);
   }
 
   public static void showDetails(PrettyPrintWriter out, MessageType type) {
diff --git a/pom.xml b/pom.xml
index 6e6902847b..a6ba552caa 100644
--- a/pom.xml
+++ b/pom.xml
@@ -84,6 +84,7 @@
     <parquet.format.version>2.7.0-SNAPSHOT</parquet.format.version>
     <previous.version>1.7.0</previous.version>
     <thrift.executable>thrift</thrift.executable>
+    <format.thrift.executable>thrift</format.thrift.executable>
     <scala.version>2.10.6</scala.version>
     <!-- scala.binary.version is used for projects that fetch dependencies that are in scala -->
     <scala.binary.version>2.10</scala.binary.version>
@@ -92,6 +93,7 @@
     <pig.classifier>h2</pig.classifier>
     <thrift-maven-plugin.version>0.10.0</thrift-maven-plugin.version>
     <thrift.version>0.9.3</thrift.version>
+    <format.thrift.version>0.9.3</format.thrift.version>
     <fastutil.version>7.0.13</fastutil.version>
     <semver.api.version>0.9.33</semver.api.version>
     <slf4j.version>1.7.22</slf4j.version>
@@ -117,6 +119,7 @@
     <module>parquet-column</module>
     <module>parquet-common</module>
     <module>parquet-encoding</module>
+    <module>parquet-format-structures</module>
     <module>parquet-generator</module>
     <module>parquet-hadoop</module>
     <module>parquet-jackson</module>
@@ -175,6 +178,11 @@
             </reports>
           </reportSet>
         </reportSets>
+        <configuration>
+          <sourceFileExcludes>
+            <sourceFileExclude>**/generated-sources/**/*.java</sourceFileExclude>
+          </sourceFileExcludes>
+        </configuration>
       </plugin>
       <plugin>
         <groupId>org.codehaus.mojo</groupId>
@@ -213,13 +221,13 @@
             </execution>
           </executions>
         </plugin>
-        
+
         <plugin>
           <groupId>org.apache.maven.plugins</groupId>
           <artifactId>maven-resources-plugin</artifactId>
           <version>2.7</version>
         </plugin>
-              
+
         <plugin>
           <artifactId>maven-enforcer-plugin</artifactId>
           <version>1.3.1</version>
@@ -373,7 +381,7 @@
           </execution>
         </executions-->
       </plugin>
-      
+
       <plugin>
         <groupId>org.apache.maven.plugins</groupId>
         <artifactId>maven-resources-plugin</artifactId>
@@ -388,8 +396,8 @@
             <skip>true</skip>
           </configuration>
       </plugin>
-      
-            
+
+
       <plugin>
         <!-- Override source and target from the ASF parent -->
         <groupId>org.apache.maven.plugins</groupId>

From b8a0f5c2b6008b7bfeef3b78dd9b4a3a73eb9913 Mon Sep 17 00:00:00 2001
From: Junjie Chen <jimmyjchen@tencent.com>
Date: Sun, 21 Oct 2018 22:52:44 +0800
Subject: [PATCH 4/9] Fix conflicts after rebase and merge

---
 .../parquet/column/impl/ColumnWriterV2.java   |  2 +
 .../TestBlockSplitBloomFilter.java            |  2 +-
 .../parquet/hadoop/ParquetFileWriter.java     | 40 -------------------
 3 files changed, 3 insertions(+), 41 deletions(-)

diff --git a/parquet-column/src/main/java/org/apache/parquet/column/impl/ColumnWriterV2.java b/parquet-column/src/main/java/org/apache/parquet/column/impl/ColumnWriterV2.java
index f60c9b2da9..8e9e6f7fa2 100644
--- a/parquet-column/src/main/java/org/apache/parquet/column/impl/ColumnWriterV2.java
+++ b/parquet-column/src/main/java/org/apache/parquet/column/impl/ColumnWriterV2.java
@@ -60,6 +60,8 @@ public BytesInput getBytes() {
     super(path, pageWriter, props);
   }
 
+  private static final ValuesWriter NULL_WRITER = new DevNullValuesWriter();
+
   ColumnWriterV2(ColumnDescriptor path, PageWriter pageWriter, BloomFilterWriter bloomFilterWriter,
                  ParquetProperties props) {
     super(path, pageWriter, bloomFilterWriter, props);
diff --git a/parquet-column/src/test/java/org/apache/parquet/column/values/bloomfilter/TestBlockSplitBloomFilter.java b/parquet-column/src/test/java/org/apache/parquet/column/values/bloomfilter/TestBlockSplitBloomFilter.java
index 0f85195706..a76109f127 100644
--- a/parquet-column/src/test/java/org/apache/parquet/column/values/bloomfilter/TestBlockSplitBloomFilter.java
+++ b/parquet-column/src/test/java/org/apache/parquet/column/values/bloomfilter/TestBlockSplitBloomFilter.java
@@ -112,7 +112,7 @@ public void testFPP() throws IOException {
         exist ++;
       }
     }
-    
+
     // The exist should be probably less than 1000 according FPP 0.01.
     assertTrue(exist < totalCount * FPP);
   }
diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetFileWriter.java b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetFileWriter.java
index 6556c6ada0..0a13e543e4 100644
--- a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetFileWriter.java
+++ b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetFileWriter.java
@@ -490,46 +490,6 @@ public void writeDataPage(
     innerWriteDataPage(valueCount, uncompressedPageSize, bytes, statistics, rlEncoding, dlEncoding, valuesEncoding);
   }
 
-  /**
-   * Writes a single page
-   * @param valueCount count of values
-   * @param uncompressedPageSize the size of the data once uncompressed
-   * @param bytes the compressed data for the page without header
-   * @param statistics the statistics of the page
-   * @param rowCount the number of rows in the page
-   * @param rlEncoding encoding of the repetition level
-   * @param dlEncoding encoding of the definition level
-   * @param valuesEncoding encoding of values
-   * @throws IOException if any I/O error occurs during writing the file
-   */
-  @Deprecated
-  public void writeDataPage(
-      int valueCount, int uncompressedPageSize,
-      BytesInput bytes,
-      Statistics statistics,
-      long rowCount,
-      Encoding rlEncoding,
-      Encoding dlEncoding,
-      Encoding valuesEncoding) throws IOException {
-    long beforeHeader = out.getPos();
-    innerWriteDataPage(valueCount, uncompressedPageSize, bytes, statistics, rlEncoding, dlEncoding, valuesEncoding);
-
-    offsetIndexBuilder.add((int) (out.getPos() - beforeHeader), rowCount);
-  }
-
-  private void innerWriteDataPage(
-      int valueCount, int uncompressedPageSize,
-      BytesInput bytes,
-      Statistics statistics,
-      Encoding rlEncoding,
-      Encoding dlEncoding,
-      Encoding valuesEncoding) throws IOException {
-    // We are unable to build indexes without rowCount so skip them for this column
-    offsetIndexBuilder = OffsetIndexBuilder.getNoOpBuilder();
-    columnIndexBuilder = ColumnIndexBuilder.getNoOpBuilder();
-    innerWriteDataPage(valueCount, uncompressedPageSize, bytes, statistics, rlEncoding, dlEncoding, valuesEncoding);
-  }
-
   /**
    * Writes a single page
    * @param valueCount count of values

From 1b646a9261accadb6a5ab78307dccb3aface8aa2 Mon Sep 17 00:00:00 2001
From: Junjie Chen <jimmyjchen@tencent.com>
Date: Wed, 31 Oct 2018 01:30:54 +0800
Subject: [PATCH 5/9] address comments

---
 .../parquet/column/ParquetProperties.java     |  2 +
 .../column/impl/ColumnWriteStoreBase.java     |  1 +
 .../parquet/column/impl/ColumnWriterBase.java | 12 ++--
 .../bloomfilter/BlockSplitBloomFilter.java    | 70 +++++++++----------
 .../values/bloomfilter/BloomFilter.java       | 32 ++++-----
 .../TestBlockSplitBloomFilter.java            | 14 ++--
 .../parquet/hadoop/ParquetOutputFormat.java   | 28 +++++++-
 .../parquet/hadoop/TestParquetFileWriter.java |  8 +--
 8 files changed, 94 insertions(+), 73 deletions(-)

diff --git a/parquet-column/src/main/java/org/apache/parquet/column/ParquetProperties.java b/parquet-column/src/main/java/org/apache/parquet/column/ParquetProperties.java
index 525af61021..1690b68e80 100644
--- a/parquet-column/src/main/java/org/apache/parquet/column/ParquetProperties.java
+++ b/parquet-column/src/main/java/org/apache/parquet/column/ParquetProperties.java
@@ -90,6 +90,8 @@ public static WriterVersion fromString(String name) {
   private final ValuesWriterFactory valuesWriterFactory;
   private final int columnIndexTruncateLength;
   private final boolean enableBloomFilter;
+
+  // The key-value pair represents the column name and its expected distinct number of values in a row group.
   private final HashMap<String, Long> bloomFilterExpectedDistinctNumbers;
 
   private ParquetProperties(WriterVersion writerVersion, int pageSize, int dictPageSize, boolean enableDict, int minRowCountForPageSizeCheck,
diff --git a/parquet-column/src/main/java/org/apache/parquet/column/impl/ColumnWriteStoreBase.java b/parquet-column/src/main/java/org/apache/parquet/column/impl/ColumnWriteStoreBase.java
index dc4946e4ff..a0658640e8 100644
--- a/parquet-column/src/main/java/org/apache/parquet/column/impl/ColumnWriteStoreBase.java
+++ b/parquet-column/src/main/java/org/apache/parquet/column/impl/ColumnWriteStoreBase.java
@@ -107,6 +107,7 @@ public ColumnWriter getColumnWriter(ColumnDescriptor path) {
     };
   }
 
+  // The Bloom filter is written to a specified bitset instead of pages. So it needs a separated write store abstract.
   ColumnWriteStoreBase(
     MessageType schema,
     PageWriteStore pageWriteStore,
diff --git a/parquet-column/src/main/java/org/apache/parquet/column/impl/ColumnWriterBase.java b/parquet-column/src/main/java/org/apache/parquet/column/impl/ColumnWriterBase.java
index af8e90ecc2..84a25e3757 100644
--- a/parquet-column/src/main/java/org/apache/parquet/column/impl/ColumnWriterBase.java
+++ b/parquet-column/src/main/java/org/apache/parquet/column/impl/ColumnWriterBase.java
@@ -81,7 +81,7 @@ abstract class ColumnWriterBase implements ColumnWriter {
   ) {
     this(path, pageWriter, props);
 
-    // Current not support nested column.
+    // Bloom filters don't support nested columns yet; see PARQUET-1453.
     if (path.getPath().length != 1 || bloomFilterWriter == null) {
       return;
     }
@@ -154,31 +154,31 @@ public long getBufferedSizeInMemory() {
 
   private void updateBloomFilter(int value) {
     if (bloomFilter != null) {
-      bloomFilter.insert(bloomFilter.hash(value));
+      bloomFilter.insertHash(bloomFilter.hash(value));
     }
   }
 
   private void updateBloomFilter(long value) {
     if (bloomFilter != null) {
-      bloomFilter.insert(bloomFilter.hash(value));
+      bloomFilter.insertHash(bloomFilter.hash(value));
     }
   }
 
   private void updateBloomFilter(double value) {
     if (bloomFilter != null) {
-      bloomFilter.insert(bloomFilter.hash(value));
+      bloomFilter.insertHash(bloomFilter.hash(value));
     }
   }
 
   private void updateBloomFilter(float value) {
     if (bloomFilter != null) {
-      bloomFilter.insert(bloomFilter.hash(value));
+      bloomFilter.insertHash(bloomFilter.hash(value));
     }
   }
 
   private void updateBloomFilter(Binary value) {
     if (bloomFilter != null) {
-      bloomFilter.insert(bloomFilter.hash(value));
+      bloomFilter.insertHash(bloomFilter.hash(value));
     }
   }
 
diff --git a/parquet-column/src/main/java/org/apache/parquet/column/values/bloomfilter/BlockSplitBloomFilter.java b/parquet-column/src/main/java/org/apache/parquet/column/values/bloomfilter/BlockSplitBloomFilter.java
index f5ceadc428..18d1876aaf 100644
--- a/parquet-column/src/main/java/org/apache/parquet/column/values/bloomfilter/BlockSplitBloomFilter.java
+++ b/parquet-column/src/main/java/org/apache/parquet/column/values/bloomfilter/BlockSplitBloomFilter.java
@@ -36,24 +36,25 @@
  * each tiny Bloom filter. Each tiny Bloom filter is 32 bytes to take advantage of 32-byte SIMD
  * instruction.
  */
-public class BlockSplitBloomFilter extends BloomFilter {
+public class BlockSplitBloomFilter implements BloomFilter {
   // Bytes in a tiny Bloom filter block.
-  private static final int BYTES_PER_FILTER_BLOCK = 32;
+  private static final int BYTES_PER_BLOCK = 32;
 
-  // Default seed for hash function, it comes from System.nanoTime().
+  // Default seed for the hash function. It comes from System.nanoTime().
   private static final int DEFAULT_SEED = 1361930890;
 
-  // Minimum Bloom filter size, set to size of a tiny Bloom filter block
-  public static final int MINIMUM_BLOOM_FILTER_BYTES = 32;
+  // Minimum Bloom filter size, set to the size of a tiny Bloom filter block
+  public static final int MINIMUM_BYTES = 32;
 
-  // Maximum Bloom filter size, it sets to default HDFS block size for upper boundary check
+  // Maximum Bloom filter size, set to the default HDFS block size for upper boundary check
   // This should be re-consider when implementing write side logic.
-  public static final int MAXIMUM_BLOOM_FILTER_BYTES = 128 * 1024 * 1024;
+  public static final int MAXIMUM_BYTES = 128 * 1024 * 1024;
 
   // The number of bits to set in a tiny Bloom filter
   private static final int BITS_SET_PER_BLOCK = 8;
 
-  // The header of Bloom filter, it includes number of bytes, algorithm and hash enumeration.
+  // The metadata in the header of a serialized Bloom filter is three four-byte values: the number of bytes,
+  // the filter algorithm, and the hash algorithm.
   public static final int HEADER_SIZE = 12;
 
   // The default false positive probability value
@@ -62,9 +63,6 @@ public class BlockSplitBloomFilter extends BloomFilter {
   // Hash strategy used in this Bloom filter.
   public final HashStrategy hashStrategy;
 
-  // Algorithm used in this Bloom filter.
-  public final Algorithm algorithm;
-
   // The underlying byte array for Bloom filter bitset.
   private byte[] bitset;
 
@@ -74,18 +72,18 @@ public class BlockSplitBloomFilter extends BloomFilter {
   // Hash function use to compute hash for column value.
   private HashFunction hashFunction;
 
-  // The block-based algorithm needs 8 odd SALT values to calculate eight index
-  // of bit to set, one bit in 32-bit word.
+  // The block-based algorithm needs 8 odd SALT values to calculate eight indexes
+  // of bits to set, one per 32-bit word.
   private static final int SALT[] = {0x47b6137b, 0x44974d91, 0x8824ad5b, 0xa2b7289d,
     0x705495c7, 0x2df1424b, 0x9efc4947, 0x5c6bfb31};
+
   /**
    * Constructor of Bloom filter.
    *
    * @param numBytes The number of bytes for Bloom filter bitset. The range of num_bytes should be within
-   *                 [MINIMUM_BLOOM_FILTER_BYTES, MAXIMUM_BLOOM_FILTER_BYTES], it will be rounded up/down
-   *                 to lower/upper bound if num_bytes is out of range and also will rounded up to a power
-   *                 of 2. It uses murmur3_x64_128 as its default hash function and block-based algorithm
-   *                 as default algorithm.
+   *                 [MINIMUM_BYTES, MAXIMUM_BYTES], it will be rounded up/down
+   *                 to lower/upper bound if num_bytes is out of range. It will also be rounded up to a power
+   *                 of 2. It uses murmur3_x64_128 as its default hash function.
    */
   public BlockSplitBloomFilter(int numBytes) {
     this(numBytes, HashStrategy.MURMUR3_X64_128, Algorithm.BLOCK);
@@ -107,9 +105,8 @@ private BlockSplitBloomFilter(int numBytes, HashStrategy hashStrategy, Algorithm
         hashFunction = Hashing.murmur3_128(DEFAULT_SEED);
         break;
       default:
-        throw new RuntimeException("Not supported hash strategy");
+        throw new RuntimeException("Unsupported hash strategy");
     }
-    this.algorithm = algorithm;
   }
 
   /**
@@ -146,28 +143,27 @@ private BlockSplitBloomFilter(byte[] bitset, HashStrategy hashStrategy, Algorith
       default:
         throw new RuntimeException("Not supported hash strategy");
     }
-    this.algorithm = algorithm;
   }
 
   /**
    * Create a new bitset for Bloom filter.
    *
    * @param numBytes The number of bytes for Bloom filter bitset. The range of num_bytes should be within
-   *                 [MINIMUM_BLOOM_FILTER_BYTES, MAXIMUM_BLOOM_FILTER_BYTES], it will be rounded up/down
+   *                 [MINIMUM_BYTES, MAXIMUM_BYTES], it will be rounded up/down
    *                 to lower/upper bound if num_bytes is out of range and also will rounded up to a power
    *                 of 2. It uses murmur3_x64_128 as its default hash function and block-based algorithm
    *                 as default algorithm.
    */
   private void initBitset(int numBytes) {
-    if (numBytes < MINIMUM_BLOOM_FILTER_BYTES) {
-      numBytes = MINIMUM_BLOOM_FILTER_BYTES;
+    if (numBytes < MINIMUM_BYTES) {
+      numBytes = MINIMUM_BYTES;
     }
     // Get next power of 2 if it is not power of 2.
     if ((numBytes & (numBytes - 1)) != 0) {
       numBytes = Integer.highestOneBit(numBytes) << 1;
     }
-    if (numBytes > MAXIMUM_BLOOM_FILTER_BYTES || numBytes < 0) {
-      numBytes = MAXIMUM_BLOOM_FILTER_BYTES;
+    if (numBytes > MAXIMUM_BYTES || numBytes < 0) {
+      numBytes = MAXIMUM_BYTES;
     }
     this.bitset = new byte[numBytes];
     this.intBuffer = ByteBuffer.wrap(bitset).order(ByteOrder.LITTLE_ENDIAN).asIntBuffer();
@@ -178,9 +174,9 @@ public void writeTo(OutputStream out) throws IOException {
     // Write number of bytes of bitset.
     out.write(BytesUtils.intToBytes(bitset.length));
     // Write hash strategy
-    out.write(BytesUtils.intToBytes(this.hashStrategy.ordinal()));
+    out.write(BytesUtils.intToBytes(hashStrategy.ordinal()));
     // Write algorithm
-    out.write(BytesUtils.intToBytes(this.algorithm.ordinal()));
+    out.write(BytesUtils.intToBytes(Algorithm.BLOCK.ordinal()));
     // Write bitset
     out.write(bitset);
   }
@@ -202,28 +198,28 @@ private int[] setMask(int key) {
   }
 
   @Override
-  public void insert(long hash) {
-    int bucketIndex = (int)(hash >> 32) & (bitset.length / BYTES_PER_FILTER_BLOCK - 1);
+  public void insertHash(long hash) {
+    int bucketIndex = (int)(hash >> 32) & (bitset.length / BYTES_PER_BLOCK - 1);
     int key = (int)hash;
 
     // Calculate mask for bucket.
     int mask[] = setMask(key);
     for (int i = 0; i < BITS_SET_PER_BLOCK; i++) {
-      int value = intBuffer.get(bucketIndex * (BYTES_PER_FILTER_BLOCK / 4) + i);
+      int value = intBuffer.get(bucketIndex * (BYTES_PER_BLOCK / 4) + i);
       value |= mask[i];
-      intBuffer.put(bucketIndex * (BYTES_PER_FILTER_BLOCK / 4) + i, value);
+      intBuffer.put(bucketIndex * (BYTES_PER_BLOCK / 4) + i, value);
     }
   }
 
   @Override
-  public boolean find(long hash) {
-    int bucketIndex = (int)(hash >> 32) & (bitset.length / BYTES_PER_FILTER_BLOCK - 1);
+  public boolean findHash(long hash) {
+    int bucketIndex = (int)(hash >> 32) & (bitset.length / BYTES_PER_BLOCK - 1);
     int key = (int)hash;
 
     // Calculate mask for the tiny Bloom filter.
     int mask[] = setMask(key);
     for (int i = 0; i < BITS_SET_PER_BLOCK; i++) {
-      if (0 == (intBuffer.get(bucketIndex * (BYTES_PER_FILTER_BLOCK / 4) + i) & mask[i])) {
+      if (0 == (intBuffer.get(bucketIndex * (BYTES_PER_BLOCK / 4) + i) & mask[i])) {
         return false;
       }
     }
@@ -242,7 +238,7 @@ public static int optimalNumOfBits(long n, double p) {
     Preconditions.checkArgument((p > 0.0 && p < 1.0),
       "FPP should be less than 1.0 and great than 0.0");
     final double m = -8 * n / Math.log(1 - Math.pow(p, 1.0 / 8));
-    final double MAX = MAXIMUM_BLOOM_FILTER_BYTES << 3;
+    final double MAX = MAXIMUM_BYTES << 3;
     int numBits = (int)m;
 
     // Handle overflow.
@@ -253,8 +249,8 @@ public static int optimalNumOfBits(long n, double p) {
     if ((numBits & (numBits - 1)) != 0) {
       numBits = Integer.highestOneBit(numBits) << 1;
     }
-    if (numBits < (MINIMUM_BLOOM_FILTER_BYTES << 3)) {
-      numBits = MINIMUM_BLOOM_FILTER_BYTES << 3;
+    if (numBits < (MINIMUM_BYTES << 3)) {
+      numBits = MINIMUM_BYTES << 3;
     }
 
     return numBits;
diff --git a/parquet-column/src/main/java/org/apache/parquet/column/values/bloomfilter/BloomFilter.java b/parquet-column/src/main/java/org/apache/parquet/column/values/bloomfilter/BloomFilter.java
index 4199497fd9..d02fa52398 100644
--- a/parquet-column/src/main/java/org/apache/parquet/column/values/bloomfilter/BloomFilter.java
+++ b/parquet-column/src/main/java/org/apache/parquet/column/values/bloomfilter/BloomFilter.java
@@ -27,24 +27,24 @@
  * in a set. The Bloom filter usually consists of a bit set that represents a elements set,
  * a hash strategy and a Bloom filter algorithm.
  */
-public abstract class BloomFilter {
+public interface BloomFilter {
   // Bloom filter Hash strategy.
-  public enum HashStrategy {
-    MURMUR3_X64_128,
+  enum HashStrategy {
+    MURMUR3_X64_128
   }
 
   // Bloom filter algorithm.
-  public enum Algorithm {
-    BLOCK,
+  enum Algorithm {
+    BLOCK
   }
 
   /**
-   * Write the Bloom filter to an output stream. It writes the Bloom filter header includes the
-   * bitset's length in size of byte, the hash strategy, the algorithm, and the bitset.
+   * Write the Bloom filter to an output stream. It writes the Bloom filter header including the
+   * bitset's length in bytes, the hash strategy, the algorithm, and the bitset.
    *
    * @param out the output stream to write
    */
-  public abstract void writeTo(OutputStream out) throws IOException;
+  void writeTo(OutputStream out) throws IOException;
 
   /**
    * Insert an element to the Bloom filter, the element content is represented by
@@ -52,7 +52,7 @@ public enum Algorithm {
    *
    * @param hash the hash result of element.
    */
-  public abstract void insert(long hash);
+  void insertHash(long hash);
 
   /**
    * Determine whether an element is in set or not.
@@ -60,7 +60,7 @@ public enum Algorithm {
    * @param hash the hash value of element plain encoding result.
    * @return false if element is must not in set, true if element probably in set.
    */
-  public abstract boolean find(long hash);
+  boolean findHash(long hash);
 
   /**
    * Compute hash for int value by using its plain encoding result.
@@ -68,7 +68,7 @@ public enum Algorithm {
    * @param value the value to hash
    * @return hash result
    */
-  public abstract long hash(int value);
+  long hash(int value);
 
   /**
    * Compute hash for long value by using its plain encoding result.
@@ -76,7 +76,7 @@ public enum Algorithm {
    * @param value the value to hash
    * @return hash result
    */
-  public abstract long hash(long value) ;
+  long hash(long value) ;
 
   /**
    * Compute hash for double value by using its plain encoding result.
@@ -84,7 +84,7 @@ public enum Algorithm {
    * @param value the value to hash
    * @return hash result
    */
-  public abstract long hash(double value);
+  long hash(double value);
 
   /**
    * Compute hash for float value by using its plain encoding result.
@@ -92,7 +92,7 @@ public enum Algorithm {
    * @param value the value to hash
    * @return hash result
    */
-  public abstract long hash(float value);
+  long hash(float value);
 
   /**
    * Compute hash for Binary value by using its plain encoding result.
@@ -100,12 +100,12 @@ public enum Algorithm {
    * @param value the value to hash
    * @return hash result
    */
-  public abstract long hash(Binary value);
+  long hash(Binary value);
 
   /**
    * Get the number of bytes for bitset in this Bloom filter.
    *
    * @return The number of bytes for bitset in this Bloom filter.
    */
-  public abstract long getBitsetSize();
+  long getBitsetSize();
 }
diff --git a/parquet-column/src/test/java/org/apache/parquet/column/values/bloomfilter/TestBlockSplitBloomFilter.java b/parquet-column/src/test/java/org/apache/parquet/column/values/bloomfilter/TestBlockSplitBloomFilter.java
index a76109f127..8dbb0ba193 100644
--- a/parquet-column/src/test/java/org/apache/parquet/column/values/bloomfilter/TestBlockSplitBloomFilter.java
+++ b/parquet-column/src/test/java/org/apache/parquet/column/values/bloomfilter/TestBlockSplitBloomFilter.java
@@ -39,9 +39,9 @@ public class TestBlockSplitBloomFilter {
   @Test
   public void testConstructor () throws IOException {
     BloomFilter bloomFilter1 = new BlockSplitBloomFilter(0);
-    assertEquals(bloomFilter1.getBitsetSize(), BlockSplitBloomFilter.MINIMUM_BLOOM_FILTER_BYTES);
-    BloomFilter bloomFilter2 = new BlockSplitBloomFilter(256 * 1024 * 1024);
-    assertEquals(bloomFilter2.getBitsetSize(), BlockSplitBloomFilter.MAXIMUM_BLOOM_FILTER_BYTES);
+    assertEquals(bloomFilter1.getBitsetSize(), BlockSplitBloomFilter.MINIMUM_BYTES);
+    BloomFilter bloomFilter2 = new BlockSplitBloomFilter(BlockSplitBloomFilter.MAXIMUM_BYTES + 1);
+    assertEquals(bloomFilter2.getBitsetSize(), BlockSplitBloomFilter.MAXIMUM_BYTES);
     BloomFilter bloomFilter3 = new BlockSplitBloomFilter(1000);
     assertEquals(bloomFilter3.getBitsetSize(), 1024);
   }
@@ -59,7 +59,7 @@ public void testBasic () throws IOException {
     BloomFilter bloomFilter = new BlockSplitBloomFilter(1024);
 
     for(int i = 0; i < testStrings.length; i++) {
-      bloomFilter.insert(bloomFilter.hash(Binary.fromString(testStrings[i])));
+      bloomFilter.insertHash(bloomFilter.hash(Binary.fromString(testStrings[i])));
     }
 
     File testFile = temp.newFile();
@@ -85,7 +85,7 @@ public void testBasic () throws IOException {
     fileInputStream.read(bitset);
     bloomFilter = new BlockSplitBloomFilter(bitset);
     for(int i = 0; i < testStrings.length; i++) {
-      assertTrue(bloomFilter.find(bloomFilter.hash(Binary.fromString(testStrings[i]))));
+      assertTrue(bloomFilter.findHash(bloomFilter.hash(Binary.fromString(testStrings[i]))));
     }
   }
 
@@ -101,14 +101,14 @@ public void testFPP() throws IOException {
     for(int i = 0; i < totalCount; i++) {
       String str = randomStr.get(10);
       strings.add(str);
-      bloomFilter.insert(bloomFilter.hash(Binary.fromString(str)));
+      bloomFilter.insertHash(bloomFilter.hash(Binary.fromString(str)));
     }
 
     // The exist counts the number of times FindHash returns true.
     int exist = 0;
     for (int i = 0; i < totalCount; i++) {
       String str = randomStr.get(8);
-      if (bloomFilter.find(bloomFilter.hash(Binary.fromString(str)))) {
+      if (bloomFilter.findHash(bloomFilter.hash(Binary.fromString(str)))) {
         exist ++;
       }
     }
diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetOutputFormat.java b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetOutputFormat.java
index 0789bf50d4..355c46b749 100644
--- a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetOutputFormat.java
+++ b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetOutputFormat.java
@@ -1,4 +1,4 @@
-/* 
+/*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
@@ -6,9 +6,9 @@
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
- * 
+ *
  *   http://www.apache.org/licenses/LICENSE-2.0
- * 
+ *
  * Unless required by applicable law or agreed to in writing,
  * software distributed under the License is distributed on an
  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
@@ -144,6 +144,9 @@ public static enum JobSummaryLevel {
   public static final String MAX_ROW_COUNT_FOR_PAGE_SIZE_CHECK = "parquet.page.size.row.check.max";
   public static final String ESTIMATE_PAGE_SIZE_CHECK = "parquet.page.size.check.estimate";
   public static final String COLUMN_INDEX_TRUNCATE_LENGTH = "parquet.columnindex.truncate.length";
+  public static final String BLOOM_FILTER_COLUMN_NAMES = "parquet.bloom.filter.column.names";
+  public static final String BLOOM_FILTER_EXPECTED_NDV = "parquet.bloom.filter.expected.ndv";
+  public static final String ENABLE_BLOOM_FILTER = "parquet.enable.bloom.filter";
 
   public static JobSummaryLevel getJobSummaryLevel(Configuration conf) {
     String level = conf.get(JOB_SUMMARY_LEVEL);
@@ -209,6 +212,19 @@ public static boolean getEnableDictionary(JobContext jobContext) {
     return getEnableDictionary(getConfiguration(jobContext));
   }
 
+  public static String getBloomFilterColumnNames(Configuration conf) {
+    return conf.get(BLOOM_FILTER_COLUMN_NAMES);
+  }
+
+  public static String getBloomFilterExpectedNDV(Configuration configuration) {
+    return configuration.get(BLOOM_FILTER_EXPECTED_NDV);
+  }
+
+  public static boolean getEnableBloomFilter(Configuration configuration) {
+    return configuration.getBoolean(ENABLE_BLOOM_FILTER,
+      ParquetProperties.DEFAULT_BLOOM_FILTER_ENABLED);
+  }
+
   public static int getBlockSize(JobContext jobContext) {
     return getBlockSize(getConfiguration(jobContext));
   }
@@ -375,6 +391,8 @@ public RecordWriter<Void, T> getRecordWriter(Configuration conf, Path file, Comp
         .withPageSize(getPageSize(conf))
         .withDictionaryPageSize(getDictionaryPageSize(conf))
         .withDictionaryEncoding(getEnableDictionary(conf))
+        .withBloomFilterEnabled(getEnableBloomFilter(conf))
+        .withBloomFilterInfo(getBloomFilterColumnNames(conf), getBloomFilterExpectedNDV(conf))
         .withWriterVersion(getWriterVersion(conf))
         .estimateRowCountForPageSizeCheck(getEstimatePageSizeCheck(conf))
         .withMinRowCountForPageSizeCheck(getMinRowCountForPageSizeCheck(conf))
@@ -398,6 +416,10 @@ public RecordWriter<Void, T> getRecordWriter(Configuration conf, Path file, Comp
       LOG.info("Min row count for page size check is: {}", props.getMinRowCountForPageSizeCheck());
       LOG.info("Max row count for page size check is: {}", props.getMaxRowCountForPageSizeCheck());
       LOG.info("Truncate length for column indexes is: {}", props.getColumnIndexTruncateLength());
+      LOG.info("Bloom Filter is {}", props.isBloomFilterEnabled()? "on": "off");
+      LOG.info("Bloom filter enabled column names are: {}", props.getBloomFilterExpectedDistinctNumbers().keySet());
+      LOG.info("Bloom filter enabled column expected number of distinct values are: {}",
+        props.getBloomFilterExpectedDistinctNumbers().values());
     }
 
     WriteContext init = writeSupport.init(conf);
diff --git a/parquet-hadoop/src/test/java/org/apache/parquet/hadoop/TestParquetFileWriter.java b/parquet-hadoop/src/test/java/org/apache/parquet/hadoop/TestParquetFileWriter.java
index e4a1d350cc..0cfb001d49 100644
--- a/parquet-hadoop/src/test/java/org/apache/parquet/hadoop/TestParquetFileWriter.java
+++ b/parquet-hadoop/src/test/java/org/apache/parquet/hadoop/TestParquetFileWriter.java
@@ -240,8 +240,8 @@ public void testBloomWriteRead() throws Exception {
     w.writeDataPage(2, 4, BytesInput.from(BYTES1),stats1, BIT_PACKED, BIT_PACKED, PLAIN);
     w.writeDataPage(3, 4, BytesInput.from(BYTES1),stats1, BIT_PACKED, BIT_PACKED, PLAIN);
     BloomFilter bloomData = new BlockSplitBloomFilter(0);
-    bloomData.insert(bloomData.hash(Binary.fromString("hello")));
-    bloomData.insert(bloomData.hash(Binary.fromString("world")));
+    bloomData.insertHash(bloomData.hash(Binary.fromString("hello")));
+    bloomData.insertHash(bloomData.hash(Binary.fromString("world")));
     long blStarts = w.getPos();
     w.writeBloomFilter(bloomData);
     w.endColumn();
@@ -254,8 +254,8 @@ public void testBloomWriteRead() throws Exception {
       Arrays.asList(readFooter.getBlocks().get(0)), Arrays.asList(schema.getColumnDescription(colPath)));
     BloomFilterReader bloomFilterReader =  r.getBloomFilterDataReader(readFooter.getBlocks().get(0));
     BloomFilter bloomDataRead = bloomFilterReader.readBloomFilter(col);
-    assertTrue(bloomDataRead.find(bloomData.hash(Binary.fromString("hello"))));
-    assertTrue(bloomDataRead.find(bloomData.hash(Binary.fromString("world"))));
+    assertTrue(bloomDataRead.findHash(bloomData.hash(Binary.fromString("hello"))));
+    assertTrue(bloomDataRead.findHash(bloomData.hash(Binary.fromString("world"))));
   }
 
   @Test

From f03d875322716c2dca34187affc3d2b068df8055 Mon Sep 17 00:00:00 2001
From: Junjie Chen <jimmyjchen@tencent.com>
Date: Thu, 1 Nov 2018 00:02:56 +0800
Subject: [PATCH 6/9] address comments and fix enum issue

---
 .../parquet/column/ParquetProperties.java     | 21 ++++++----------
 .../column/impl/ColumnWriteStoreBase.java     |  4 +--
 .../parquet/column/impl/ColumnWriterBase.java |  6 ++---
 .../bloomfilter/BlockSplitBloomFilter.java    | 20 +++++++--------
 .../values/bloomfilter/BloomFilter.java       | 12 +++++++--
 .../parquet/hadoop/ParquetOutputFormat.java   | 25 +++++++++++++------
 6 files changed, 48 insertions(+), 40 deletions(-)

diff --git a/parquet-column/src/main/java/org/apache/parquet/column/ParquetProperties.java b/parquet-column/src/main/java/org/apache/parquet/column/ParquetProperties.java
index 1690b68e80..65cd4c0afc 100644
--- a/parquet-column/src/main/java/org/apache/parquet/column/ParquetProperties.java
+++ b/parquet-column/src/main/java/org/apache/parquet/column/ParquetProperties.java
@@ -211,7 +211,7 @@ public boolean isBloomFilterEnabled() {
     return enableBloomFilter;
   }
 
-  public HashMap<String, Long> getBloomFilterExpectedDistinctNumbers() {
+  public HashMap<String, Long> getBloomFilterColumnExpectedNDVs() {
     return bloomFilterExpectedDistinctNumbers;
   }
 
@@ -235,7 +235,7 @@ public static class Builder {
     private ValuesWriterFactory valuesWriterFactory = DEFAULT_VALUES_WRITER_FACTORY;
     private int columnIndexTruncateLength = DEFAULT_COLUMN_INDEX_TRUNCATE_LENGTH;
     private boolean enableBloomFilter = DEFAULT_BLOOM_FILTER_ENABLED;
-    private HashMap<String, Long> bloomFilterExpectedDistinctNumbers = new HashMap<>();
+    private HashMap<String, Long> bloomFilterColumnExpectedNDVs = new HashMap<>();
 
     private Builder() {
     }
@@ -249,7 +249,7 @@ private Builder(ParquetProperties toCopy) {
       this.estimateNextSizeCheck = toCopy.estimateNextSizeCheck;
       this.allocator = toCopy.allocator;
       this.enableBloomFilter = toCopy.enableBloomFilter;
-      this.bloomFilterExpectedDistinctNumbers = toCopy.bloomFilterExpectedDistinctNumbers;
+      this.bloomFilterColumnExpectedNDVs = toCopy.bloomFilterExpectedDistinctNumbers;
     }
 
     /**
@@ -351,18 +351,11 @@ public Builder withBloomFilterEnabled(boolean enableBloomFilter) {
     /**
      * Set Bloom filter info for columns.
      *
-     * @param bloomFilterColumnNames the columns to be enabled for Bloom filter
-     * @param bloomFilterDistinctNumbers the expected distinct number of values corresponding to columns
+     * @param columnExpectedNDVs the columns expected number of distinct values in a row group
      * @return this builder for method chaining
      */
-    public Builder withBloomFilterInfo(String bloomFilterColumnNames, String bloomFilterDistinctNumbers) {
-      String[] columnNames = bloomFilterColumnNames.split(",");
-      String[] expectedDistinctNumber = bloomFilterDistinctNumbers.split(",");
-      Preconditions.checkArgument(columnNames.length == expectedDistinctNumber.length,
-        "Column names are not matched to sizes");
-      for (int i = 0; i < columnNames.length; i++) {
-        this.bloomFilterExpectedDistinctNumbers.put(columnNames[i], Long.getLong(expectedDistinctNumber[i]));
-      }
+    public Builder withBloomFilterInfo(HashMap<String, Long> columnExpectedNDVs) {
+      this.bloomFilterColumnExpectedNDVs = columnExpectedNDVs;
       return this;
     }
 
@@ -371,7 +364,7 @@ public ParquetProperties build() {
         new ParquetProperties(writerVersion, pageSize, dictPageSize,
           enableDict, minRowCountForPageSizeCheck, maxRowCountForPageSizeCheck,
           estimateNextSizeCheck, allocator, valuesWriterFactory, columnIndexTruncateLength,
-          enableBloomFilter, bloomFilterExpectedDistinctNumbers);
+          enableBloomFilter, bloomFilterColumnExpectedNDVs);
       // we pass a constructed but uninitialized factory to ParquetProperties above as currently
       // creation of ValuesWriters is invoked from within ParquetProperties. In the future
       // we'd like to decouple that and won't need to pass an object to properties and then pass the
diff --git a/parquet-column/src/main/java/org/apache/parquet/column/impl/ColumnWriteStoreBase.java b/parquet-column/src/main/java/org/apache/parquet/column/impl/ColumnWriteStoreBase.java
index a0658640e8..744c24de78 100644
--- a/parquet-column/src/main/java/org/apache/parquet/column/impl/ColumnWriteStoreBase.java
+++ b/parquet-column/src/main/java/org/apache/parquet/column/impl/ColumnWriteStoreBase.java
@@ -107,7 +107,7 @@ public ColumnWriter getColumnWriter(ColumnDescriptor path) {
     };
   }
 
-  // The Bloom filter is written to a specified bitset instead of pages. So it needs a separated write store abstract.
+  // The Bloom filter is written to a specified bitset instead of pages, so it needs a separate write store abstract.
   ColumnWriteStoreBase(
     MessageType schema,
     PageWriteStore pageWriteStore,
@@ -118,7 +118,7 @@ public ColumnWriter getColumnWriter(ColumnDescriptor path) {
     Map<ColumnDescriptor, ColumnWriterBase> mcolumns = new TreeMap<>();
     for (ColumnDescriptor path : schema.getColumns()) {
       PageWriter pageWriter = pageWriteStore.getPageWriter(path);
-      if (props.isBloomFilterEnabled() && props.getBloomFilterExpectedDistinctNumbers() != null) {
+      if (props.isBloomFilterEnabled() && props.getBloomFilterColumnExpectedNDVs() != null) {
         BloomFilterWriter bloomFilterWriter = bloomFilterWriteStore.getBloomFilterWriter(path);
         mcolumns.put(path, createColumnWriter(path, pageWriter, bloomFilterWriter, props));
       } else {
diff --git a/parquet-column/src/main/java/org/apache/parquet/column/impl/ColumnWriterBase.java b/parquet-column/src/main/java/org/apache/parquet/column/impl/ColumnWriterBase.java
index 84a25e3757..c03b04fc5e 100644
--- a/parquet-column/src/main/java/org/apache/parquet/column/impl/ColumnWriterBase.java
+++ b/parquet-column/src/main/java/org/apache/parquet/column/impl/ColumnWriterBase.java
@@ -87,10 +87,10 @@ abstract class ColumnWriterBase implements ColumnWriter {
     }
 
     this.bloomFilterWriter = bloomFilterWriter;
-    HashMap<String, Long> bloomFilterExpectValues = props.getBloomFilterExpectedDistinctNumbers();
+    HashMap<String, Long> bloomFilterColumnExpectedNDVs = props.getBloomFilterColumnExpectedNDVs();
     String column = path.getPath()[0];
-    if (bloomFilterExpectValues.keySet().contains(column)) {
-      int optimalNumOfBits = BlockSplitBloomFilter.optimalNumOfBits(bloomFilterExpectValues.get(column).intValue(),
+    if (bloomFilterColumnExpectedNDVs.keySet().contains(column)) {
+      int optimalNumOfBits = BlockSplitBloomFilter.optimalNumOfBits(bloomFilterColumnExpectedNDVs.get(column).intValue(),
         BlockSplitBloomFilter.DEFAULT_FPP);
       this.bloomFilter = new BlockSplitBloomFilter(optimalNumOfBits/8);
     }
diff --git a/parquet-column/src/main/java/org/apache/parquet/column/values/bloomfilter/BlockSplitBloomFilter.java b/parquet-column/src/main/java/org/apache/parquet/column/values/bloomfilter/BlockSplitBloomFilter.java
index 18d1876aaf..b6378976c3 100644
--- a/parquet-column/src/main/java/org/apache/parquet/column/values/bloomfilter/BlockSplitBloomFilter.java
+++ b/parquet-column/src/main/java/org/apache/parquet/column/values/bloomfilter/BlockSplitBloomFilter.java
@@ -86,18 +86,17 @@ public class BlockSplitBloomFilter implements BloomFilter {
    *                 of 2. It uses murmur3_x64_128 as its default hash function.
    */
   public BlockSplitBloomFilter(int numBytes) {
-    this(numBytes, HashStrategy.MURMUR3_X64_128, Algorithm.BLOCK);
+    this(numBytes, HashStrategy.MURMUR3_X64_128);
   }
 
   /**
-   * Constructor of Bloom filter. It uses murmur3_x64_128 as its default hash
-   * function and block-based algorithm as its default algorithm.
+   * Constructor of block-based Bloom filter. It uses murmur3_x64_128 as its default hash
+   * function.
    *
    * @param numBytes The number of bytes for Bloom filter bitset
    * @param hashStrategy The hash strategy of Bloom filter.
-   * @param algorithm The algorithm of Bloom filter.
    */
-  private BlockSplitBloomFilter(int numBytes, HashStrategy hashStrategy, Algorithm algorithm) {
+  private BlockSplitBloomFilter(int numBytes, HashStrategy hashStrategy) {
     initBitset(numBytes);
     switch (hashStrategy) {
       case MURMUR3_X64_128:
@@ -112,12 +111,12 @@ private BlockSplitBloomFilter(int numBytes, HashStrategy hashStrategy, Algorithm
   /**
    * Construct the Bloom filter with given bitset, it is used when reconstructing
    * Bloom filter from parquet file. It use murmur3_x64_128 as its default hash
-   * function and block-based algorithm as default algorithm.
+   * function.
    *
    * @param bitset The given bitset to construct Bloom filter.
    */
   public BlockSplitBloomFilter(byte[] bitset) {
-    this(bitset, HashStrategy.MURMUR3_X64_128, Algorithm.BLOCK);
+    this(bitset, HashStrategy.MURMUR3_X64_128);
   }
 
   /**
@@ -126,9 +125,8 @@ public BlockSplitBloomFilter(byte[] bitset) {
    *
    * @param bitset The given bitset to construct Bloom filter.
    * @param hashStrategy The hash strategy Bloom filter apply.
-   * @param algorithm The algorithm of Bloom filter.
    */
-  private BlockSplitBloomFilter(byte[] bitset, HashStrategy hashStrategy, Algorithm algorithm) {
+  private BlockSplitBloomFilter(byte[] bitset, HashStrategy hashStrategy) {
     if (bitset == null) {
       throw new RuntimeException("Given bitset is null");
     }
@@ -174,9 +172,9 @@ public void writeTo(OutputStream out) throws IOException {
     // Write number of bytes of bitset.
     out.write(BytesUtils.intToBytes(bitset.length));
     // Write hash strategy
-    out.write(BytesUtils.intToBytes(hashStrategy.ordinal()));
+    out.write(BytesUtils.intToBytes(hashStrategy.value));
     // Write algorithm
-    out.write(BytesUtils.intToBytes(Algorithm.BLOCK.ordinal()));
+    out.write(BytesUtils.intToBytes(Algorithm.BLOCK.value));
     // Write bitset
     out.write(bitset);
   }
diff --git a/parquet-column/src/main/java/org/apache/parquet/column/values/bloomfilter/BloomFilter.java b/parquet-column/src/main/java/org/apache/parquet/column/values/bloomfilter/BloomFilter.java
index d02fa52398..3ec192e3e0 100644
--- a/parquet-column/src/main/java/org/apache/parquet/column/values/bloomfilter/BloomFilter.java
+++ b/parquet-column/src/main/java/org/apache/parquet/column/values/bloomfilter/BloomFilter.java
@@ -30,12 +30,20 @@
 public interface BloomFilter {
   // Bloom filter Hash strategy.
   enum HashStrategy {
-    MURMUR3_X64_128
+    MURMUR3_X64_128(0);
+    HashStrategy(int value) {
+      this.value = value;
+    }
+    int value;
   }
 
   // Bloom filter algorithm.
   enum Algorithm {
-    BLOCK
+    BLOCK(0);
+    Algorithm(int value) {
+      this.value = value;
+    }
+    int value;
   }
 
   /**
diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetOutputFormat.java b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetOutputFormat.java
index 355c46b749..d716201d47 100644
--- a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetOutputFormat.java
+++ b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetOutputFormat.java
@@ -23,6 +23,7 @@
 import static org.apache.parquet.hadoop.util.ContextUtil.getConfiguration;
 
 import java.io.IOException;
+import java.util.HashMap;
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.Path;
@@ -212,12 +213,20 @@ public static boolean getEnableDictionary(JobContext jobContext) {
     return getEnableDictionary(getConfiguration(jobContext));
   }
 
-  public static String getBloomFilterColumnNames(Configuration conf) {
-    return conf.get(BLOOM_FILTER_COLUMN_NAMES);
-  }
+  public static HashMap<String, Long> getBloomFilterColumnExpectedNDVs(Configuration conf) {
+    HashMap<String, Long> kv = new HashMap<>();
+    String[] columnNames = conf.get(BLOOM_FILTER_COLUMN_NAMES).split(",");
+    String[] expectedNDVs = conf.get(BLOOM_FILTER_EXPECTED_NDV).split(",");
+
+    if (columnNames.length == expectedNDVs.length) {
+      for (int i = 0; i < columnNames.length; i++) {
+        kv.put(columnNames[i], Long.getLong(expectedNDVs[i]));
+      }
+    } else {
+      LOG.warn("Bloom filter column names are not match expected NDVs");
+    }
 
-  public static String getBloomFilterExpectedNDV(Configuration configuration) {
-    return configuration.get(BLOOM_FILTER_EXPECTED_NDV);
+    return kv;
   }
 
   public static boolean getEnableBloomFilter(Configuration configuration) {
@@ -392,7 +401,7 @@ public RecordWriter<Void, T> getRecordWriter(Configuration conf, Path file, Comp
         .withDictionaryPageSize(getDictionaryPageSize(conf))
         .withDictionaryEncoding(getEnableDictionary(conf))
         .withBloomFilterEnabled(getEnableBloomFilter(conf))
-        .withBloomFilterInfo(getBloomFilterColumnNames(conf), getBloomFilterExpectedNDV(conf))
+        .withBloomFilterInfo(getBloomFilterColumnExpectedNDVs(conf))
         .withWriterVersion(getWriterVersion(conf))
         .estimateRowCountForPageSizeCheck(getEstimatePageSizeCheck(conf))
         .withMinRowCountForPageSizeCheck(getMinRowCountForPageSizeCheck(conf))
@@ -417,9 +426,9 @@ public RecordWriter<Void, T> getRecordWriter(Configuration conf, Path file, Comp
       LOG.info("Max row count for page size check is: {}", props.getMaxRowCountForPageSizeCheck());
       LOG.info("Truncate length for column indexes is: {}", props.getColumnIndexTruncateLength());
       LOG.info("Bloom Filter is {}", props.isBloomFilterEnabled()? "on": "off");
-      LOG.info("Bloom filter enabled column names are: {}", props.getBloomFilterExpectedDistinctNumbers().keySet());
+      LOG.info("Bloom filter enabled column names are: {}", props.getBloomFilterColumnExpectedNDVs().keySet());
       LOG.info("Bloom filter enabled column expected number of distinct values are: {}",
-        props.getBloomFilterExpectedDistinctNumbers().values());
+        props.getBloomFilterColumnExpectedNDVs().values());
     }
 
     WriteContext init = writeSupport.init(conf);

From 5e4647fddfacb50e278757f13f5496f60e24549b Mon Sep 17 00:00:00 2001
From: "Chen, Junjie" <cjjnjust@gmail.com>
Date: Mon, 24 Dec 2018 16:16:06 +0800
Subject: [PATCH 7/9] Fix build issue caused by merge

---
 .../org/apache/parquet/column/ParquetProperties.java  |  9 +++------
 .../apache/parquet/hadoop/ParquetOutputFormat.java    | 11 +++++++++--
 2 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/parquet-column/src/main/java/org/apache/parquet/column/ParquetProperties.java b/parquet-column/src/main/java/org/apache/parquet/column/ParquetProperties.java
index 078d74eec2..4df5b71260 100644
--- a/parquet-column/src/main/java/org/apache/parquet/column/ParquetProperties.java
+++ b/parquet-column/src/main/java/org/apache/parquet/column/ParquetProperties.java
@@ -98,10 +98,8 @@ public static WriterVersion fromString(String name) {
 
   private ParquetProperties(WriterVersion writerVersion, int pageSize, int dictPageSize, boolean enableDict, int minRowCountForPageSizeCheck,
                             int maxRowCountForPageSizeCheck, boolean estimateNextSizeCheck, ByteBufferAllocator allocator,
-                            ValuesWriterFactory writerFactory, int columnIndexMinMaxTruncateLength, boolean enableBloomFilter,
-                            HashMap<String, Long> bloomFilterExpectedDistinctNumber) {
-
-                            ValuesWriterFactory writerFactory, int columnIndexMinMaxTruncateLength, int pageRowCountLimit) {
+                            ValuesWriterFactory writerFactory, int columnIndexMinMaxTruncateLength, int pageRowCountLimit,
+                            boolean enableBloomFilter, HashMap<String, Long> bloomFilterExpectedDistinctNumber) {
     this.pageSizeThreshold = pageSize;
     this.initialSlabSize = CapacityByteArrayOutputStream
       .initialSlabSizeHeuristic(MIN_SLAB_SIZE, pageSizeThreshold, 10);
@@ -381,8 +379,7 @@ public ParquetProperties build() {
       ParquetProperties properties =
         new ParquetProperties(writerVersion, pageSize, dictPageSize,
           enableDict, minRowCountForPageSizeCheck, maxRowCountForPageSizeCheck,
-          estimateNextSizeCheck, allocator, valuesWriterFactory, columnIndexTruncateLength, pageRowCountLimit);
-          estimateNextSizeCheck, allocator, valuesWriterFactory, columnIndexTruncateLength,
+          estimateNextSizeCheck, allocator, valuesWriterFactory, columnIndexTruncateLength, pageRowCountLimit,
           enableBloomFilter, bloomFilterColumnExpectedNDVs);
       // we pass a constructed but uninitialized factory to ParquetProperties above as currently
       // creation of ValuesWriters is invoked from within ParquetProperties. In the future
diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetOutputFormat.java b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetOutputFormat.java
index a9aa97856b..33c3715378 100644
--- a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetOutputFormat.java
+++ b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetOutputFormat.java
@@ -216,8 +216,15 @@ public static boolean getEnableDictionary(JobContext jobContext) {
 
   public static HashMap<String, Long> getBloomFilterColumnExpectedNDVs(Configuration conf) {
     HashMap<String, Long> kv = new HashMap<>();
-    String[] columnNames = conf.get(BLOOM_FILTER_COLUMN_NAMES).split(",");
-    String[] expectedNDVs = conf.get(BLOOM_FILTER_EXPECTED_NDV).split(",");
+    String columnNamesConf = conf.get(BLOOM_FILTER_COLUMN_NAMES);
+    String expectedNDVsConf = conf.get(BLOOM_FILTER_EXPECTED_NDV);
+
+    if (columnNamesConf == null || expectedNDVsConf == null) {
+      return kv;
+    }
+
+    String[] columnNames = columnNamesConf.split(",");
+    String[] expectedNDVs = expectedNDVsConf.split(",");
 
     if (columnNames.length == expectedNDVs.length) {
       for (int i = 0; i < columnNames.length; i++) {

From 894040d47a08aedb042f5c2fd125116826856313 Mon Sep 17 00:00:00 2001
From: Junjie Chen <jimmyjchen@tencent.com>
Date: Tue, 8 Jan 2019 07:26:12 +0800
Subject: [PATCH 8/9] test build

---
 .travis.yml | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/.travis.yml b/.travis.yml
index e4e623f03b..17d7ee7dbb 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -29,6 +29,10 @@ before_install:
   - sudo make install
   - cd ..
   - date
+  - git clone https://github.com/apache/parquet-format.git
+  - cd parquet-format
+  - mvn install -DskipTests
+  - cd ..
 
 env:
   - HADOOP_PROFILE=default TEST_CODECS=uncompressed,brotli

From fb0ab5c4d2212cd036906ca1a451ede37d6eb36a Mon Sep 17 00:00:00 2001
From: Junjie Chen <jimmyjchen@tencent.com>
Date: Thu, 10 Jan 2019 23:31:57 +0800
Subject: [PATCH 9/9] update check for Bloom filter reader

---
 .../apache/parquet/hadoop/ParquetFileReader.java   | 14 +++++++++++++-
 .../hadoop/metadata/ColumnChunkMetaData.java       |  4 ++--
 2 files changed, 15 insertions(+), 3 deletions(-)

diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetFileReader.java b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetFileReader.java
index 6f03dd555e..7fe0e410ee 100644
--- a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetFileReader.java
+++ b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetFileReader.java
@@ -1062,16 +1062,28 @@ public BloomFilterDataReader getBloomFilterDataReader(BlockMetaData block) {
    */
   public BloomFilter readBloomFilter(ColumnChunkMetaData meta) throws IOException {
     long bloomFilterOffset = meta.getBloomFilterOffset();
-    if (bloomFilterOffset == Long.MAX_VALUE) return null;
     f.seek(bloomFilterOffset);
+
     // Read Bloom filter data header.
     byte[] bytes = new byte[BlockSplitBloomFilter.HEADER_SIZE];
     f.read(bytes);
     ByteBuffer bloomHeader = ByteBuffer.wrap(bytes);
     IntBuffer headerBuffer = bloomHeader.order(ByteOrder.LITTLE_ENDIAN).asIntBuffer();
     int numBytes = headerBuffer.get();
+    if (numBytes <= 0 || numBytes > BlockSplitBloomFilter.MAXIMUM_BYTES) {
+      return null;
+    }
+
     BloomFilter.HashStrategy hash = BloomFilter.HashStrategy.values()[headerBuffer.get()];
+    if (hash != BlockSplitBloomFilter.HashStrategy.MURMUR3_X64_128) {
+      return null;
+    }
+
     BloomFilter.Algorithm algorithm = BloomFilter.Algorithm.values()[headerBuffer.get()];
+    if (algorithm != BlockSplitBloomFilter.Algorithm.BLOCK) {
+      return null;
+    }
+
     byte[] bitset = new byte[numBytes];
     f.readFully(bitset);
     return new BlockSplitBloomFilter(bitset);
diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/metadata/ColumnChunkMetaData.java b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/metadata/ColumnChunkMetaData.java
index c55225c176..3156132534 100644
--- a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/metadata/ColumnChunkMetaData.java
+++ b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/metadata/ColumnChunkMetaData.java
@@ -126,7 +126,7 @@ && positiveLongFitsInAnInt(totalUncompressedSize)) {
           statistics,
           firstDataPage,
           dictionaryPageOffset,
-          Long.MAX_VALUE,
+          0,
           valueCount,
           totalSize,
           totalUncompressedSize);
@@ -137,7 +137,7 @@ && positiveLongFitsInAnInt(totalUncompressedSize)) {
           statistics,
           firstDataPage,
           dictionaryPageOffset,
-          Long.MAX_VALUE,
+          0,
           valueCount,
           totalSize,
           totalUncompressedSize);