From 3574a5e4357c61533687d646d44681a8c94def7d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Isma=C3=ABl=20Mej=C3=ADa?= <iemejia@gmail.com>
Date: Sun, 19 Apr 2026 19:35:49 +0000
Subject: [PATCH] GH-3509: Optimize BinaryPlainValuesReader by reading directly
 from ByteBuffer

BinaryPlainValuesReader.readBytes is the hot-path decoder for BINARY (and
STRING) columns using PLAIN encoding. The current implementation funnels
every length read through BytesUtils.readIntLittleEndian(InputStream),
which calls in.read() four times with full IOException plumbing and
virtual dispatch on ByteBufferInputStream, and slices every value through
a virtual ByteBufferInputStream.slice(int).

This change replaces the ByteBufferInputStream field with a single
ByteBuffer set up once in initFromPage. The length prefix is then a
single ByteBuffer.getInt() (one bounds check, JIT-friendly little-endian
intrinsic, no IOException plumbing) and each value slice is a direct
ByteBuffer.slice() instead of a virtual ByteBufferInputStream.slice(int).

When the input is a MultiBufferInputStream the upfront stream.slice(available)
call may consolidate the page into a single fresh ByteBuffer. This is one
allocation per page in exchange for inlined per-value reads, which is a
clear win whenever the page contains more than a handful of values.

Benchmark (BinaryEncodingBenchmark.decodePlain, 100k values per
invocation, JDK 18, JMH -wi 5 -i 10 -f 3, 30 samples per row):

  cardinality stringLen  Before (ops/s)   After (ops/s)   Improvement
  HIGH        10           23,114,969       27,126,384    +17.4% (1.17x)
  HIGH        100          20,516,861       22,200,091     +8.2% (1.08x)
  HIGH        1000          7,069,927        7,679,070     +8.6% (1.09x)
  LOW         10           22,885,778       26,459,404    +15.6% (1.16x)
  LOW         100          20,349,900       22,158,675     +8.9% (1.09x)
  LOW         1000          6,279,616        7,500,811    +19.4% (1.19x)

Per-op allocation is unchanged (~88 B/op = the returned Binary + the
per-value ByteBuffer slice). The improvement is largest at small string
lengths because the per-value fixed cost dominates more there.

All 573 parquet-column tests pass.
---
 .../values/plain/BinaryPlainValuesReader.java | 39 +++++++++++--------
 1 file changed, 23 insertions(+), 16 deletions(-)
diff --git a/parquet-column/src/main/java/org/apache/parquet/column/values/plain/BinaryPlainValuesReader.java b/parquet-column/src/main/java/org/apache/parquet/column/values/plain/BinaryPlainValuesReader.java
index 6ce2f31a43..c0532f2961 100644
--- a/parquet-column/src/main/java/org/apache/parquet/column/values/plain/BinaryPlainValuesReader.java
+++ b/parquet-column/src/main/java/org/apache/parquet/column/values/plain/BinaryPlainValuesReader.java
@@ -19,36 +19,38 @@
 package org.apache.parquet.column.values.plain;
 
 import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
 import org.apache.parquet.bytes.ByteBufferInputStream;
-import org.apache.parquet.bytes.BytesUtils;
 import org.apache.parquet.column.values.ValuesReader;
-import org.apache.parquet.io.ParquetDecodingException;
 import org.apache.parquet.io.api.Binary;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+/**
+ * Plain encoding reader for BINARY values.
+ *
+ * <p>Reads directly from a {@link ByteBuffer} with {@link ByteOrder#LITTLE_ENDIAN} byte order,
+ * using {@link ByteBuffer#getInt()} for the 4-byte length prefix instead of 4 individual
+ * {@code InputStream.read()} calls through {@link org.apache.parquet.bytes.BytesUtils#readIntLittleEndian}.
+ */
 public class BinaryPlainValuesReader extends ValuesReader {
   private static final Logger LOG = LoggerFactory.getLogger(BinaryPlainValuesReader.class);
-  private ByteBufferInputStream in;
+  private ByteBuffer buffer;
 
   @Override
   public Binary readBytes() {
-    try {
-      int length = BytesUtils.readIntLittleEndian(in);
-      return Binary.fromConstantByteBuffer(in.slice(length));
-    } catch (IOException | RuntimeException e) {
-      throw new ParquetDecodingException("could not read bytes at offset " + in.position(), e);
-    }
+    int length = buffer.getInt();
+    ByteBuffer valueSlice = buffer.slice();
+    valueSlice.limit(length);
+    buffer.position(buffer.position() + length);
+    return Binary.fromConstantByteBuffer(valueSlice);
   }
 
   @Override
   public void skip() {
-    try {
-      int length = BytesUtils.readIntLittleEndian(in);
-      in.skipFully(length);
-    } catch (IOException | RuntimeException e) {
-      throw new ParquetDecodingException("could not skip bytes at offset " + in.position(), e);
-    }
+    int length = buffer.getInt();
+    buffer.position(buffer.position() + length);
   }
 
   @Override
@@ -57,6 +59,11 @@ public void initFromPage(int valueCount, ByteBufferInputStream stream) throws IO
         "init from page at offset {} for length {}",
         stream.position(),
         (stream.available() - stream.position()));
-    this.in = stream.remainingStream();
+    int available = stream.available();
+    if (available > 0) {
+      this.buffer = stream.slice(available).order(ByteOrder.LITTLE_ENDIAN);
+    } else {
+      this.buffer = ByteBuffer.allocate(0).order(ByteOrder.LITTLE_ENDIAN);
+    }
   }
 }