From 42dd898db2b0b43dbaa98517d7bf222f0c76ff9b Mon Sep 17 00:00:00 2001 From: Jasper Potts <1466205+jasperpotts@users.noreply.github.com> Date: Mon, 27 Apr 2026 14:00:20 -0700 Subject: [PATCH 1/5] Updated varint benchmarks Signed-off-by: Jasper Potts <1466205+jasperpotts@users.noreply.github.com> --- .../pbj/integration/jmh/VarIntBench.java | 258 ----------------- .../jmh/varint/VarIntReaderBench.java | 263 +++++++++--------- .../jmh/varint/VarIntWriterBench.java | 42 ++- .../writers/GoogleCodedOutputStream.java | 1 + .../jmh/varint/writers/PbjMemoryData.java | 27 ++ .../writers/PbjWritableStreamingData.java | 7 +- .../writers/RichardStartinByteArray.java | 2 + .../SmartNoDataDependencyByteArray.java | 81 ++++++ .../writers/SteinbornBlendedByteArray.java | 113 ++++++++ .../writers/SteinbornBulkByteArray.java | 130 +++++++++ 10 files changed, 536 insertions(+), 388 deletions(-) delete mode 100644 pbj-integration-tests/src/jmh/java/com/hedera/pbj/integration/jmh/VarIntBench.java create mode 100644 pbj-integration-tests/src/jmh/java/com/hedera/pbj/integration/jmh/varint/writers/PbjMemoryData.java create mode 100644 pbj-integration-tests/src/jmh/java/com/hedera/pbj/integration/jmh/varint/writers/SmartNoDataDependencyByteArray.java create mode 100644 pbj-integration-tests/src/jmh/java/com/hedera/pbj/integration/jmh/varint/writers/SteinbornBlendedByteArray.java create mode 100644 pbj-integration-tests/src/jmh/java/com/hedera/pbj/integration/jmh/varint/writers/SteinbornBulkByteArray.java diff --git a/pbj-integration-tests/src/jmh/java/com/hedera/pbj/integration/jmh/VarIntBench.java b/pbj-integration-tests/src/jmh/java/com/hedera/pbj/integration/jmh/VarIntBench.java deleted file mode 100644 index 1fab7834a..000000000 --- a/pbj-integration-tests/src/jmh/java/com/hedera/pbj/integration/jmh/VarIntBench.java +++ /dev/null @@ -1,258 +0,0 @@ -// SPDX-License-Identifier: Apache-2.0 -package com.hedera.pbj.integration.jmh; - -import com.google.protobuf.CodedInputStream; -import com.google.protobuf.CodedOutputStream; -import com.hedera.pbj.integration.NonSynchronizedByteArrayInputStream; -import com.hedera.pbj.runtime.MalformedProtobufException; -import com.hedera.pbj.runtime.io.UnsafeUtils; -import com.hedera.pbj.runtime.io.buffer.BufferedData; -import com.hedera.pbj.runtime.io.buffer.Bytes; -import com.hedera.pbj.runtime.io.stream.ReadableStreamingData; -import java.io.ByteArrayInputStream; -import java.io.IOException; -import java.io.InputStream; -import java.nio.*; -import java.util.Random; -import java.util.concurrent.TimeUnit; -import org.openjdk.jmh.annotations.*; -import org.openjdk.jmh.infra.Blackhole; - -@SuppressWarnings("unused") -@State(Scope.Benchmark) -@Fork(1) -@Warmup(iterations = 4, time = 2) -@Measurement(iterations = 5, time = 2) -@OutputTimeUnit(TimeUnit.NANOSECONDS) -@BenchmarkMode(Mode.AverageTime) -public class VarIntBench { - - ByteBuffer buffer = ByteBuffer.allocate(256 * 1024); - final ByteBuffer bufferDirect = ByteBuffer.allocateDirect(256 * 1024); - final BufferedData dataBuffer = BufferedData.wrap(buffer); - final BufferedData dataBufferDirect = BufferedData.wrap(bufferDirect); - - Bytes bytes = Bytes.EMPTY; - - InputStream bais = null; - ReadableStreamingData rsd = null; - - InputStream baisNonSync = null; - ReadableStreamingData rsdNonSync = null; - - private final int[] offsets = new int[1201]; - - public VarIntBench() { - try { - CodedOutputStream cout = CodedOutputStream.newInstance(buffer); - Random random = new Random(9387498731984L); - int pos = 0; - offsets[pos++] = 0; - for (int i = 0; i < 600; i++) { - cout.writeUInt64NoTag(random.nextLong(0, 128)); - offsets[pos++] = cout.getTotalBytesWritten(); - } - for (int i = 0; i < 150; i++) { - cout.writeUInt64NoTag(random.nextLong(128, 256)); - offsets[pos++] = cout.getTotalBytesWritten(); - } - for (int i = 0; i < 150; i++) { - cout.writeUInt64NoTag(random.nextLong(256, Integer.MAX_VALUE)); - offsets[pos++] = cout.getTotalBytesWritten(); - } - for (int i = 0; i < 150; i++) { - cout.writeUInt64NoTag(random.nextLong(Integer.MIN_VALUE, Integer.MAX_VALUE)); - offsets[pos++] = cout.getTotalBytesWritten(); - } - for (int i = 0; i < 150; i++) { - cout.writeUInt64NoTag(random.nextLong(0, Long.MAX_VALUE)); - offsets[pos++] = cout.getTotalBytesWritten(); - } - cout.flush(); - // copy to direct buffer - buffer.flip(); - bufferDirect.put(buffer); - byte[] bts = new byte[buffer.limit()]; - for (int i = 0; i < buffer.limit(); i++) { - bts[i] = buffer.get(i); - } - bytes = Bytes.wrap(bts); - bais = new ByteArrayInputStream(bts.clone()); - rsd = new ReadableStreamingData(bais); - baisNonSync = new NonSynchronizedByteArrayInputStream(bts.clone()); - rsdNonSync = new ReadableStreamingData(baisNonSync); - } catch (IOException e) { - e.printStackTrace(); - } - } - - @Benchmark - @OperationsPerInvocation(1200) - public void dataBufferRead(Blackhole blackhole) throws IOException { - dataBuffer.reset(); - for (int i = 0; i < 1200; i++) { - blackhole.consume(dataBuffer.readVarLong(false)); - } - } - - @Benchmark - @OperationsPerInvocation(1200) - public void dataBufferGet(Blackhole blackhole) throws IOException { - dataBuffer.reset(); - int offset = 0; - for (int i = 0; i < 1200; i++) { - blackhole.consume(dataBuffer.getVarLong(offsets[offset++], false)); - } - } - - @Benchmark - @OperationsPerInvocation(1200) - public void dataBufferDirectRead(Blackhole blackhole) throws IOException { - dataBufferDirect.reset(); - for (int i = 0; i < 1200; i++) { - blackhole.consume(dataBufferDirect.readVarLong(false)); - } - } - - @Benchmark - @OperationsPerInvocation(1200) - public void dataBytesGet(Blackhole blackhole) throws IOException { - int offset = 0; - for (int i = 0; i < 1200; i++) { - blackhole.consume(bytes.getVarLong(offsets[offset++], false)); - } - } - - @Benchmark - @OperationsPerInvocation(1200) - public void dataSyncInputStreamRead(Blackhole blackhole) throws IOException { - bais.reset(); - for (int i = 0; i < 1200; i++) { - blackhole.consume(rsd.readVarLong(false)); - } - } - - @Benchmark - @OperationsPerInvocation(1200) - public void dataNonSyncInputStreamRead(Blackhole blackhole) throws IOException { - baisNonSync.reset(); - for (int i = 0; i < 1200; i++) { - blackhole.consume(rsdNonSync.readVarLong(false)); - } - } - - @Benchmark - @OperationsPerInvocation(1200) - public void richardGet(Blackhole blackhole) throws MalformedProtobufException { - int offset = 0; - buffer.clear(); - for (int i = 0; i < 1200; i++) { - blackhole.consume(getVarLongRichard(offsets[offset++], buffer)); - } - } - - @Benchmark - @OperationsPerInvocation(1200) - public void googleRead(Blackhole blackhole) throws IOException { - buffer.clear(); - final CodedInputStream codedInputStream = CodedInputStream.newInstance(buffer); - for (int i = 0; i < 1200; i++) { - blackhole.consume(codedInputStream.readRawVarint64()); - } - } - - @Benchmark - @OperationsPerInvocation(1200) - public void googleDirecRead(Blackhole blackhole) throws IOException { - bufferDirect.clear(); - final CodedInputStream codedInputStream = CodedInputStream.newInstance(bufferDirect); - for (int i = 0; i < 1200; i++) { - blackhole.consume(codedInputStream.readRawVarint64()); - } - } - - @Benchmark - @OperationsPerInvocation(1200) - public void googleSlowPathRead(Blackhole blackhole) throws MalformedProtobufException { - buffer.clear(); - for (int i = 0; i < 1200; i++) { - blackhole.consume(readRawVarint64SlowPath(buffer)); - } - } - - @Benchmark - @OperationsPerInvocation(1200) - public void googleSlowPathDirectRead(Blackhole blackhole) throws MalformedProtobufException { - bufferDirect.clear(); - for (int i = 0; i < 1200; i++) { - blackhole.consume(readRawVarint64SlowPath(bufferDirect)); - } - } - - private static long readRawVarint64SlowPath(ByteBuffer buf) throws MalformedProtobufException { - long result = 0; - for (int shift = 0; shift < 64; shift += 7) { - final byte b = buf.get(); - result |= (long) (b & 0x7F) << shift; - if ((b & 0x80) == 0) { - return result; - } - } - throw new MalformedProtobufException("Malformed varInt"); - } - - private static final int VARINT_CONTINUATION_MASK = 0b1000_0000; - private static final int VARINT_DATA_MASK = 0b0111_1111; - private static final int NUM_BITS_PER_VARINT_BYTE = 7; - - public static long getVarLongRichard(int offset, ByteBuffer buf) throws MalformedProtobufException { - // Protobuf encodes smaller integers with fewer bytes than larger integers. It takes a full byte - // to encode 7 bits of information. So, if all 64 bits of a long are in use (for example, if the - // leading bit is 1, or even all bits are 1) then it will take 10 bytes to transmit what would - // have otherwise been 8 bytes of data! - // - // Thus, at most, reading a varint should involve reading 10 bytes of data. - // - // The leading bit of each byte is a continuation bit. If set, another byte will follow. - // If we read 10 bytes in sequence with a continuation bit set, then we have a malformed - // byte stream. - // The bytes come least to most significant 7 bits. So the first byte we read represents - // the lowest 7 bytes, then the next byte is the next highest 7 bytes, etc. - - // The final value. - long value = 0; - // The amount to shift the bits we read by before AND with the value - int shift = -NUM_BITS_PER_VARINT_BYTE; - - // This method works with heap byte buffers only - final byte[] arr = buf.array(); - final int arrOffset = buf.arrayOffset() + offset; - - int i = 0; - for (; i < 10; i++) { - // Use UnsafeUtil instead of arr[arrOffset + i] to avoid array range checks - byte b = UnsafeUtils.getArrayByteNoChecks(arr, arrOffset + i); - value |= (long) (b & 0x7F) << (shift += NUM_BITS_PER_VARINT_BYTE); - - if (b >= 0) { - return value; - } - } - // If we read 10 in a row all with the leading continuation bit set, then throw a malformed - // protobuf exception - throw new MalformedProtobufException("Malformed var int"); - } - - public static void main(String[] args) throws Exception { - final Blackhole blackhole = new Blackhole( - "Today's password is swordfish. I understand instantiating Blackholes directly is dangerous."); - final VarIntBench bench = new VarIntBench(); - bench.dataBufferRead(blackhole); - bench.dataBufferGet(blackhole); - bench.dataBufferDirectRead(blackhole); - bench.dataBytesGet(blackhole); - bench.dataSyncInputStreamRead(blackhole); - bench.dataNonSyncInputStreamRead(blackhole); - bench.googleRead(blackhole); - } -} diff --git a/pbj-integration-tests/src/jmh/java/com/hedera/pbj/integration/jmh/varint/VarIntReaderBench.java b/pbj-integration-tests/src/jmh/java/com/hedera/pbj/integration/jmh/varint/VarIntReaderBench.java index 592ebdc58..0a40dbc8d 100644 --- a/pbj-integration-tests/src/jmh/java/com/hedera/pbj/integration/jmh/varint/VarIntReaderBench.java +++ b/pbj-integration-tests/src/jmh/java/com/hedera/pbj/integration/jmh/varint/VarIntReaderBench.java @@ -8,11 +8,13 @@ import com.hedera.pbj.runtime.io.UnsafeUtils; import com.hedera.pbj.runtime.io.buffer.BufferedData; import com.hedera.pbj.runtime.io.buffer.Bytes; +import com.hedera.pbj.runtime.io.buffer.MemoryData; import com.hedera.pbj.runtime.io.stream.ReadableStreamingData; import java.io.ByteArrayInputStream; import java.io.IOException; import java.io.InputStream; -import java.nio.*; +import java.nio.ByteBuffer; +import java.util.Arrays; import java.util.Random; import java.util.concurrent.TimeUnit; import org.openjdk.jmh.annotations.*; @@ -29,173 +31,211 @@ @OutputTimeUnit(TimeUnit.NANOSECONDS) @BenchmarkMode(Mode.AverageTime) public class VarIntReaderBench { - private static final int NUM_OF_VALUES = 1201; + private static final int NUM_OF_VALUES = 1200; - ByteBuffer buffer = ByteBuffer.allocate(256 * 1024); - final ByteBuffer bufferDirect = ByteBuffer.allocateDirect(256 * 1024); - final BufferedData dataBuffer = BufferedData.wrap(buffer); - final BufferedData dataBufferDirect = BufferedData.wrap(bufferDirect); + /** + * Number of bytes each varint occupies (1, 2, 4, or 8). All values are generated + * in the range [{@code 1L << ((numOfBytes-1)*7)}, {@code (1L << (numOfBytes*7)) - 1}]. + */ + @Param({"1", "2", "3", "4", "8"}) + public int numOfBytes; - Bytes bytes = Bytes.EMPTY; + // PBJ BufferedData benchmarks use their own backing buffers so that google benchmarks + // calling buffer.clear() do not clobber limit for PBJ reads. + private BufferedData dataBuffer; + private BufferedData dataBufferDirect; + private MemoryData dataMemory; - InputStream bais = null; - ReadableStreamingData rsd = null; + // Google / Richard benchmarks share heap and direct ByteBuffers. + // Capacity equals dataLength so buffer.clear() correctly resets to position=0, limit=dataLength. + private ByteBuffer googleBuffer; + private ByteBuffer googleBufferDirect; - InputStream baisNonSync = null; - ReadableStreamingData rsdNonSync = null; + private Bytes bytes; + private InputStream bais; + private ReadableStreamingData rsd; + private InputStream baisNonSync; + private ReadableStreamingData rsdNonSync; private final int[] offsets = new int[NUM_OF_VALUES]; - /** - * Number of bytes to read at a time (1, 2, 4, or 8). So create inputs with 1 byte siz,e, 2 byte size, 4 byte size, - * and 8 byte size. - */ - @Param({"1", "2", "4", "8"}) - public int numOfBytes; - public VarIntReaderBench() { - try { - CodedOutputStream cout = CodedOutputStream.newInstance(buffer); - Random random = new Random(9387498731984L); - int pos = 0; - offsets[pos++] = 0; - for (int i = 0; i < 600; i++) { - cout.writeUInt64NoTag(random.nextLong(0, 128)); - offsets[pos++] = cout.getTotalBytesWritten(); - } - for (int i = 0; i < 150; i++) { - cout.writeUInt64NoTag(random.nextLong(128, 256)); - offsets[pos++] = cout.getTotalBytesWritten(); - } - for (int i = 0; i < 150; i++) { - cout.writeUInt64NoTag(random.nextLong(256, Integer.MAX_VALUE)); - offsets[pos++] = cout.getTotalBytesWritten(); - } - for (int i = 0; i < 150; i++) { - cout.writeUInt64NoTag(random.nextLong(Integer.MIN_VALUE, Integer.MAX_VALUE)); - offsets[pos++] = cout.getTotalBytesWritten(); - } - for (int i = 0; i < 150; i++) { - cout.writeUInt64NoTag(random.nextLong(0, Long.MAX_VALUE)); - offsets[pos++] = cout.getTotalBytesWritten(); - } - cout.flush(); - // copy to direct buffer - buffer.flip(); - bufferDirect.put(buffer); - byte[] bts = new byte[buffer.limit()]; - for (int i = 0; i < buffer.limit(); i++) { - bts[i] = buffer.get(i); - } - bytes = Bytes.wrap(bts); - bais = new ByteArrayInputStream(bts.clone()); - rsd = new ReadableStreamingData(bais); - baisNonSync = new NonSynchronizedByteArrayInputStream(bts.clone()); - rsdNonSync = new ReadableStreamingData(baisNonSync); - } catch (IOException e) { - e.printStackTrace(); + @Setup(Level.Trial) + public void setup() throws IOException { + final long minValue = numOfBytes == 1 ? 0L : 1L << ((numOfBytes - 1) * 7); + final long maxValue = (1L << (numOfBytes * 7)) - 1; + final int maxBufSize = (numOfBytes + 1) * NUM_OF_VALUES; + + byte[] scratch = new byte[maxBufSize]; + CodedOutputStream cout = CodedOutputStream.newInstance(scratch); + Random random = new Random(9387498731984L); + for (int i = 0; i < NUM_OF_VALUES; i++) { + offsets[i] = cout.getTotalBytesWritten(); + cout.writeUInt64NoTag(random.nextLong(minValue, maxValue)); } + cout.flush(); + byte[] data = Arrays.copyOf(scratch, cout.getTotalBytesWritten()); + + // Exact-sized copy for Google/Richard heap buffer — clear() sets limit back to dataLength + googleBuffer = ByteBuffer.wrap(data.clone()); + + // Direct buffer for Google direct benchmarks + googleBufferDirect = ByteBuffer.allocateDirect(data.length); + googleBufferDirect.put(data); + googleBufferDirect.flip(); + + // PBJ heap BufferedData — separate backing array, limit stays correct after resetPosition() + dataBuffer = BufferedData.wrap(ByteBuffer.wrap(data.clone())); + + // PBJ direct BufferedData + ByteBuffer directBuf = ByteBuffer.allocateDirect(data.length); + directBuf.put(data); + directBuf.flip(); + dataBufferDirect = BufferedData.wrap(directBuf); + + // PBJ MemoryData — wrap exact byte array, limit = data.length + dataMemory = MemoryData.wrap(data.clone()); + + // Bytes (immutable) + bytes = Bytes.wrap(data.clone()); + + // Synchronized stream + bais = new ByteArrayInputStream(data.clone()); + rsd = new ReadableStreamingData(bais); + + // Non-synchronized stream + baisNonSync = new NonSynchronizedByteArrayInputStream(data.clone()); + rsdNonSync = new ReadableStreamingData(baisNonSync); } @Benchmark - @OperationsPerInvocation(1200) - public void dataBufferRead(Blackhole blackhole) throws IOException { - dataBuffer.reset(); - for (int i = 0; i < 1200; i++) { + @CompilerControl(CompilerControl.Mode.DONT_INLINE) + @OperationsPerInvocation(NUM_OF_VALUES) + public void dataBufferRead(Blackhole blackhole) { + dataBuffer.resetPosition(); + for (int i = 0; i < NUM_OF_VALUES; i++) { blackhole.consume(dataBuffer.readVarLong(false)); } } @Benchmark - @OperationsPerInvocation(1200) - public void dataBufferGet(Blackhole blackhole) throws IOException { - dataBuffer.reset(); + @CompilerControl(CompilerControl.Mode.DONT_INLINE) + @OperationsPerInvocation(NUM_OF_VALUES) + public void dataBufferGet(Blackhole blackhole) { int offset = 0; - for (int i = 0; i < 1200; i++) { + for (int i = 0; i < NUM_OF_VALUES; i++) { blackhole.consume(dataBuffer.getVarLong(offsets[offset++], false)); } } @Benchmark - @OperationsPerInvocation(1200) - public void dataBufferDirectRead(Blackhole blackhole) throws IOException { - dataBufferDirect.reset(); - for (int i = 0; i < 1200; i++) { + @CompilerControl(CompilerControl.Mode.DONT_INLINE) + @OperationsPerInvocation(NUM_OF_VALUES) + public void dataBufferDirectRead(Blackhole blackhole) { + dataBufferDirect.resetPosition(); + for (int i = 0; i < NUM_OF_VALUES; i++) { blackhole.consume(dataBufferDirect.readVarLong(false)); } } @Benchmark - @OperationsPerInvocation(1200) - public void dataBytesGet(Blackhole blackhole) throws IOException { + @CompilerControl(CompilerControl.Mode.DONT_INLINE) + @OperationsPerInvocation(NUM_OF_VALUES) + public void dataMemoryRead(Blackhole blackhole) { + dataMemory.resetPosition(); + for (int i = 0; i < NUM_OF_VALUES; i++) { + blackhole.consume(dataMemory.readVarLong(false)); + } + } + + @Benchmark + @CompilerControl(CompilerControl.Mode.DONT_INLINE) + @OperationsPerInvocation(NUM_OF_VALUES) + public void dataMemoryGet(Blackhole blackhole) { int offset = 0; - for (int i = 0; i < 1200; i++) { + for (int i = 0; i < NUM_OF_VALUES; i++) { + blackhole.consume(dataMemory.getVarLong(offsets[offset++], false)); + } + } + + @Benchmark + @CompilerControl(CompilerControl.Mode.DONT_INLINE) + @OperationsPerInvocation(NUM_OF_VALUES) + public void dataBytesGet(Blackhole blackhole) { + int offset = 0; + for (int i = 0; i < NUM_OF_VALUES; i++) { blackhole.consume(bytes.getVarLong(offsets[offset++], false)); } } @Benchmark - @OperationsPerInvocation(1200) + @CompilerControl(CompilerControl.Mode.DONT_INLINE) + @OperationsPerInvocation(NUM_OF_VALUES) public void dataSyncInputStreamRead(Blackhole blackhole) throws IOException { bais.reset(); - for (int i = 0; i < 1200; i++) { + for (int i = 0; i < NUM_OF_VALUES; i++) { blackhole.consume(rsd.readVarLong(false)); } } @Benchmark - @OperationsPerInvocation(1200) + @CompilerControl(CompilerControl.Mode.DONT_INLINE) + @OperationsPerInvocation(NUM_OF_VALUES) public void dataNonSyncInputStreamRead(Blackhole blackhole) throws IOException { baisNonSync.reset(); - for (int i = 0; i < 1200; i++) { + for (int i = 0; i < NUM_OF_VALUES; i++) { blackhole.consume(rsdNonSync.readVarLong(false)); } } @Benchmark - @OperationsPerInvocation(1200) + @CompilerControl(CompilerControl.Mode.DONT_INLINE) + @OperationsPerInvocation(NUM_OF_VALUES) public void richardGet(Blackhole blackhole) throws MalformedProtobufException { int offset = 0; - buffer.clear(); - for (int i = 0; i < 1200; i++) { - blackhole.consume(getVarLongRichard(offsets[offset++], buffer)); + for (int i = 0; i < NUM_OF_VALUES; i++) { + blackhole.consume(getVarLongRichard(offsets[offset++], googleBuffer)); } } @Benchmark - @OperationsPerInvocation(1200) + @CompilerControl(CompilerControl.Mode.DONT_INLINE) + @OperationsPerInvocation(NUM_OF_VALUES) public void googleRead(Blackhole blackhole) throws IOException { - buffer.clear(); - final CodedInputStream codedInputStream = CodedInputStream.newInstance(buffer); - for (int i = 0; i < 1200; i++) { + googleBuffer.clear(); + final CodedInputStream codedInputStream = CodedInputStream.newInstance(googleBuffer); + for (int i = 0; i < NUM_OF_VALUES; i++) { blackhole.consume(codedInputStream.readRawVarint64()); } } @Benchmark - @OperationsPerInvocation(1200) - public void googleDirecRead(Blackhole blackhole) throws IOException { - bufferDirect.clear(); - final CodedInputStream codedInputStream = CodedInputStream.newInstance(bufferDirect); - for (int i = 0; i < 1200; i++) { + @CompilerControl(CompilerControl.Mode.DONT_INLINE) + @OperationsPerInvocation(NUM_OF_VALUES) + public void googleDirectRead(Blackhole blackhole) throws IOException { + googleBufferDirect.clear(); + final CodedInputStream codedInputStream = CodedInputStream.newInstance(googleBufferDirect); + for (int i = 0; i < NUM_OF_VALUES; i++) { blackhole.consume(codedInputStream.readRawVarint64()); } } @Benchmark - @OperationsPerInvocation(1200) + @CompilerControl(CompilerControl.Mode.DONT_INLINE) + @OperationsPerInvocation(NUM_OF_VALUES) public void googleSlowPathRead(Blackhole blackhole) throws MalformedProtobufException { - buffer.clear(); - for (int i = 0; i < 1200; i++) { - blackhole.consume(readRawVarint64SlowPath(buffer)); + googleBuffer.clear(); + for (int i = 0; i < NUM_OF_VALUES; i++) { + blackhole.consume(readRawVarint64SlowPath(googleBuffer)); } } @Benchmark - @OperationsPerInvocation(1200) + @CompilerControl(CompilerControl.Mode.DONT_INLINE) + @OperationsPerInvocation(NUM_OF_VALUES) public void googleSlowPathDirectRead(Blackhole blackhole) throws MalformedProtobufException { - bufferDirect.clear(); - for (int i = 0; i < 1200; i++) { - blackhole.consume(readRawVarint64SlowPath(bufferDirect)); + googleBufferDirect.clear(); + for (int i = 0; i < NUM_OF_VALUES; i++) { + blackhole.consume(readRawVarint64SlowPath(googleBufferDirect)); } } @@ -211,45 +251,20 @@ private static long readRawVarint64SlowPath(ByteBuffer buf) throws MalformedProt throw new MalformedProtobufException("Malformed varInt"); } - private static final int VARINT_CONTINUATION_MASK = 0b1000_0000; - private static final int VARINT_DATA_MASK = 0b0111_1111; private static final int NUM_BITS_PER_VARINT_BYTE = 7; public static long getVarLongRichard(int offset, ByteBuffer buf) throws MalformedProtobufException { - // Protobuf encodes smaller integers with fewer bytes than larger integers. It takes a full byte - // to encode 7 bits of information. So, if all 64 bits of a long are in use (for example, if the - // leading bit is 1, or even all bits are 1) then it will take 10 bytes to transmit what would - // have otherwise been 8 bytes of data! - // - // Thus, at most, reading a varint should involve reading 10 bytes of data. - // - // The leading bit of each byte is a continuation bit. If set, another byte will follow. - // If we read 10 bytes in sequence with a continuation bit set, then we have a malformed - // byte stream. - // The bytes come least to most significant 7 bits. So the first byte we read represents - // the lowest 7 bytes, then the next byte is the next highest 7 bytes, etc. - - // The final value. long value = 0; - // The amount to shift the bits we read by before AND with the value int shift = -NUM_BITS_PER_VARINT_BYTE; - - // This method works with heap byte buffers only final byte[] arr = buf.array(); final int arrOffset = buf.arrayOffset() + offset; - - int i = 0; - for (; i < 10; i++) { - // Use UnsafeUtil instead of arr[arrOffset + i] to avoid array range checks + for (int i = 0; i < 10; i++) { byte b = UnsafeUtils.getArrayByteNoChecks(arr, arrOffset + i); value |= (long) (b & 0x7F) << (shift += NUM_BITS_PER_VARINT_BYTE); - if (b >= 0) { return value; } } - // If we read 10 in a row all with the leading continuation bit set, then throw a malformed - // protobuf exception throw new MalformedProtobufException("Malformed var int"); } diff --git a/pbj-integration-tests/src/jmh/java/com/hedera/pbj/integration/jmh/varint/VarIntWriterBench.java b/pbj-integration-tests/src/jmh/java/com/hedera/pbj/integration/jmh/varint/VarIntWriterBench.java index fac11c3ce..c91d4a7c1 100644 --- a/pbj-integration-tests/src/jmh/java/com/hedera/pbj/integration/jmh/varint/VarIntWriterBench.java +++ b/pbj-integration-tests/src/jmh/java/com/hedera/pbj/integration/jmh/varint/VarIntWriterBench.java @@ -7,8 +7,12 @@ import com.hedera.pbj.integration.jmh.varint.writers.KafkaByteBuffer; import com.hedera.pbj.integration.jmh.varint.writers.PbjBufferedData; import com.hedera.pbj.integration.jmh.varint.writers.PbjBufferedDataDirect; +import com.hedera.pbj.integration.jmh.varint.writers.PbjMemoryData; import com.hedera.pbj.integration.jmh.varint.writers.PbjWritableStreamingData; import com.hedera.pbj.integration.jmh.varint.writers.RichardStartinByteArray; +import com.hedera.pbj.integration.jmh.varint.writers.SmartNoDataDependencyByteArray; +import com.hedera.pbj.integration.jmh.varint.writers.SteinbornBlendedByteArray; +import com.hedera.pbj.integration.jmh.varint.writers.SteinbornBulkByteArray; import java.io.IOException; import java.util.Random; import java.util.concurrent.TimeUnit; @@ -42,8 +46,8 @@ public class VarIntWriterBench { * Number of bytes to read at a time (1, 2, 4, or 8). So create inputs with 1 byte siz,e, 2 byte size, 4 byte size, * and 8 byte size. */ - // @Param({"1", "2", "3", "4", "8"}) - @Param({"4"}) + @Param({"1", "2", "3", "4", "8"}) +// @Param({"4"}) public int numOfBytes; private long[] numbers; @@ -119,7 +123,39 @@ public void richardStartinByteArray(RichardStartinByteArray state) throws IOExce @Benchmark @CompilerControl(CompilerControl.Mode.DONT_INLINE) @OperationsPerInvocation(NUM_OF_VALUES) - public void kafkaByteBuffer(KafkaByteBuffer state) throws IOException { + public void kafkaByteBuffer(KafkaByteBuffer state) { + for (int i = 0; i < NUM_OF_VALUES; i++) state.writeVarint(numbers[i]); + state.endLoop(); + } + + @Benchmark + @CompilerControl(CompilerControl.Mode.DONT_INLINE) + @OperationsPerInvocation(NUM_OF_VALUES) + public void pbjMemoryData(PbjMemoryData state) { + for (int i = 0; i < NUM_OF_VALUES; i++) state.writeVarint(numbers[i]); + state.endLoop(); + } + + @Benchmark + @CompilerControl(CompilerControl.Mode.DONT_INLINE) + @OperationsPerInvocation(NUM_OF_VALUES) + public void smartNoDataDependencyByteArray(SmartNoDataDependencyByteArray state) { + for (int i = 0; i < NUM_OF_VALUES; i++) state.writeVarint(numbers[i]); + state.endLoop(); + } + + @Benchmark + @CompilerControl(CompilerControl.Mode.DONT_INLINE) + @OperationsPerInvocation(NUM_OF_VALUES) + public void steinbornBlendedByteArray(SteinbornBlendedByteArray state) { + for (int i = 0; i < NUM_OF_VALUES; i++) state.writeVarint(numbers[i]); + state.endLoop(); + } + + @Benchmark + @CompilerControl(CompilerControl.Mode.DONT_INLINE) + @OperationsPerInvocation(NUM_OF_VALUES) + public void steinbornBulkByteArray(SteinbornBulkByteArray state) { for (int i = 0; i < NUM_OF_VALUES; i++) state.writeVarint(numbers[i]); state.endLoop(); } diff --git a/pbj-integration-tests/src/jmh/java/com/hedera/pbj/integration/jmh/varint/writers/GoogleCodedOutputStream.java b/pbj-integration-tests/src/jmh/java/com/hedera/pbj/integration/jmh/varint/writers/GoogleCodedOutputStream.java index 561a6ff4d..8125b5e00 100644 --- a/pbj-integration-tests/src/jmh/java/com/hedera/pbj/integration/jmh/varint/writers/GoogleCodedOutputStream.java +++ b/pbj-integration-tests/src/jmh/java/com/hedera/pbj/integration/jmh/varint/writers/GoogleCodedOutputStream.java @@ -27,5 +27,6 @@ public void writeVarint(long value) throws IOException { public void endLoop() { byteArrayOutputStream.reset(); + output = CodedOutputStream.newInstance(byteArrayOutputStream); } } diff --git a/pbj-integration-tests/src/jmh/java/com/hedera/pbj/integration/jmh/varint/writers/PbjMemoryData.java b/pbj-integration-tests/src/jmh/java/com/hedera/pbj/integration/jmh/varint/writers/PbjMemoryData.java new file mode 100644 index 000000000..21d5271e1 --- /dev/null +++ b/pbj-integration-tests/src/jmh/java/com/hedera/pbj/integration/jmh/varint/writers/PbjMemoryData.java @@ -0,0 +1,27 @@ +// SPDX-License-Identifier: Apache-2.0 +package com.hedera.pbj.integration.jmh.varint.writers; + +import com.hedera.pbj.integration.jmh.varint.VarIntWriterBench; +import com.hedera.pbj.runtime.io.buffer.MemoryData; +import org.openjdk.jmh.annotations.Level; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.Setup; +import org.openjdk.jmh.annotations.State; + +@State(Scope.Benchmark) +public class PbjMemoryData { + private MemoryData output; + + @Setup(Level.Trial) + public void setup() { + output = MemoryData.allocate(10 * VarIntWriterBench.NUM_OF_VALUES); + } + + public void writeVarint(long value) { + output.writeVarLong(value, false); + } + + public void endLoop() { + output.reset(); + } +} diff --git a/pbj-integration-tests/src/jmh/java/com/hedera/pbj/integration/jmh/varint/writers/PbjWritableStreamingData.java b/pbj-integration-tests/src/jmh/java/com/hedera/pbj/integration/jmh/varint/writers/PbjWritableStreamingData.java index aefca0777..2dffdce0b 100644 --- a/pbj-integration-tests/src/jmh/java/com/hedera/pbj/integration/jmh/varint/writers/PbjWritableStreamingData.java +++ b/pbj-integration-tests/src/jmh/java/com/hedera/pbj/integration/jmh/varint/writers/PbjWritableStreamingData.java @@ -1,9 +1,9 @@ // SPDX-License-Identifier: Apache-2.0 package com.hedera.pbj.integration.jmh.varint.writers; +import com.hedera.pbj.integration.NonSynchronizedByteArrayOutputStream; import com.hedera.pbj.integration.jmh.varint.VarIntWriterBench; import com.hedera.pbj.runtime.io.stream.WritableStreamingData; -import java.io.ByteArrayOutputStream; import java.io.IOException; import org.openjdk.jmh.annotations.Level; import org.openjdk.jmh.annotations.Scope; @@ -12,12 +12,12 @@ @State(Scope.Benchmark) public class PbjWritableStreamingData { - private ByteArrayOutputStream byteArrayOutputStream; + private NonSynchronizedByteArrayOutputStream byteArrayOutputStream; private WritableStreamingData output; @Setup(Level.Trial) public void setup() { - byteArrayOutputStream = new ByteArrayOutputStream(8 * VarIntWriterBench.NUM_OF_VALUES); + byteArrayOutputStream = new NonSynchronizedByteArrayOutputStream(8 * VarIntWriterBench.NUM_OF_VALUES); output = new WritableStreamingData(byteArrayOutputStream); } @@ -27,5 +27,6 @@ public void writeVarint(long value) throws IOException { public void endLoop() { byteArrayOutputStream.reset(); + output = new WritableStreamingData(byteArrayOutputStream); } } diff --git a/pbj-integration-tests/src/jmh/java/com/hedera/pbj/integration/jmh/varint/writers/RichardStartinByteArray.java b/pbj-integration-tests/src/jmh/java/com/hedera/pbj/integration/jmh/varint/writers/RichardStartinByteArray.java index 90423f9bf..7f4a867bd 100644 --- a/pbj-integration-tests/src/jmh/java/com/hedera/pbj/integration/jmh/varint/writers/RichardStartinByteArray.java +++ b/pbj-integration-tests/src/jmh/java/com/hedera/pbj/integration/jmh/varint/writers/RichardStartinByteArray.java @@ -12,6 +12,7 @@ * A varint writer based on the code from Richard Startin's post * Precompute varint lengths, 64 bit values */ +@SuppressWarnings("MismatchedReadAndWriteOfArray") @State(Scope.Benchmark) public class RichardStartinByteArray { private static final int[] VAR_INT_LENGTHS = new int[65]; @@ -62,6 +63,7 @@ public void writeVarint(long value) throws IOException { case 0: buffer[position] = (byte) (value | 0x80); } + position += length + 1; } public void endLoop() { diff --git a/pbj-integration-tests/src/jmh/java/com/hedera/pbj/integration/jmh/varint/writers/SmartNoDataDependencyByteArray.java b/pbj-integration-tests/src/jmh/java/com/hedera/pbj/integration/jmh/varint/writers/SmartNoDataDependencyByteArray.java new file mode 100644 index 000000000..a595d3320 --- /dev/null +++ b/pbj-integration-tests/src/jmh/java/com/hedera/pbj/integration/jmh/varint/writers/SmartNoDataDependencyByteArray.java @@ -0,0 +1,81 @@ +// SPDX-License-Identifier: Apache-2.0 +package com.hedera.pbj.integration.jmh.varint.writers; + +import com.hedera.pbj.integration.jmh.varint.VarIntWriterBench; +import org.openjdk.jmh.annotations.Level; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.Setup; +import org.openjdk.jmh.annotations.State; + +/** + * Varint writer adapted from Andrew Steinborn's + * SmartNoDataDependencyUnrolledVarIntWriter for 64-bit + * longs. + * + *
All branch conditions test the original unshifted value, eliminating sequential + * data dependencies between condition evaluations and enabling CPU instruction-level parallelism. + */ +@SuppressWarnings("MismatchedReadAndWriteOfArray") +@State(Scope.Benchmark) +public class SmartNoDataDependencyByteArray { + private byte[] buffer; + private int position = 0; + + @Setup(Level.Trial) + public void setup() { + buffer = new byte[10 * VarIntWriterBench.NUM_OF_VALUES]; + } + + public void writeVarint(long value) { + if ((value & ~0x7FL) == 0) { + buffer[position++] = (byte) value; + } else { + buffer[position++] = (byte) ((value & 0x7F) | 0x80); + if ((value & ~0x3FFFL) == 0) { + buffer[position++] = (byte) (value >>> 7); + } else { + buffer[position++] = (byte) ((value >>> 7) & 0x7F | 0x80); + if ((value & ~0x1FFFFFL) == 0) { + buffer[position++] = (byte) (value >>> 14); + } else { + buffer[position++] = (byte) ((value >>> 14) & 0x7F | 0x80); + if ((value & ~0xFFFFFFFL) == 0) { + buffer[position++] = (byte) (value >>> 21); + } else { + buffer[position++] = (byte) ((value >>> 21) & 0x7F | 0x80); + if ((value & ~0x7FFFFFFFFL) == 0) { + buffer[position++] = (byte) (value >>> 28); + } else { + buffer[position++] = (byte) ((value >>> 28) & 0x7F | 0x80); + if ((value & ~0x3FFFFFFFFFFL) == 0) { + buffer[position++] = (byte) (value >>> 35); + } else { + buffer[position++] = (byte) ((value >>> 35) & 0x7F | 0x80); + if ((value & ~0x1FFFFFFFFFFFFL) == 0) { + buffer[position++] = (byte) (value >>> 42); + } else { + buffer[position++] = (byte) ((value >>> 42) & 0x7F | 0x80); + if ((value & ~0xFFFFFFFFFFFFFFL) == 0) { + buffer[position++] = (byte) (value >>> 49); + } else { + buffer[position++] = (byte) ((value >>> 49) & 0x7F | 0x80); + if ((value & ~0x7FFFFFFFFFFFFFFFL) == 0) { + buffer[position++] = (byte) (value >>> 56); + } else { + buffer[position++] = (byte) ((value >>> 56) & 0x7F | 0x80); + buffer[position++] = (byte) (value >>> 63); + } + } + } + } + } + } + } + } + } + } + + public void endLoop() { + position = 0; + } +} diff --git a/pbj-integration-tests/src/jmh/java/com/hedera/pbj/integration/jmh/varint/writers/SteinbornBlendedByteArray.java b/pbj-integration-tests/src/jmh/java/com/hedera/pbj/integration/jmh/varint/writers/SteinbornBlendedByteArray.java new file mode 100644 index 000000000..f5e591b09 --- /dev/null +++ b/pbj-integration-tests/src/jmh/java/com/hedera/pbj/integration/jmh/varint/writers/SteinbornBlendedByteArray.java @@ -0,0 +1,113 @@ +// SPDX-License-Identifier: Apache-2.0 +package com.hedera.pbj.integration.jmh.varint.writers; + +import com.hedera.pbj.integration.jmh.varint.VarIntWriterBench; +import org.openjdk.jmh.annotations.Level; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.Setup; +import org.openjdk.jmh.annotations.State; + +/** + * Varint writer inspired by the "Blended" approach from Andrew Steinborn's varint writing showdown + * (link) adapted for 64-bit + * longs. + * + *
Uses per-size-case branches with direct sequential byte array writes. Avoids ByteBuffer or + * stream overhead; each branch writes only the exact bytes needed for that varint size. + */ +@SuppressWarnings("MismatchedReadAndWriteOfArray") +@State(Scope.Benchmark) +public class SteinbornBlendedByteArray { + private byte[] buffer; + private int position = 0; + + @Setup(Level.Trial) + public void setup() { + buffer = new byte[10 * VarIntWriterBench.NUM_OF_VALUES]; + } + + public void writeVarint(long value) { + if (value < (1L << 7)) { + buffer[position++] = (byte) value; + } else if (value < (1L << 14)) { + buffer[position] = (byte) ((value & 0x7F) | 0x80); + buffer[position + 1] = (byte) (value >>> 7); + position += 2; + } else if (value < (1L << 21)) { + buffer[position] = (byte) ((value & 0x7F) | 0x80); + buffer[position + 1] = (byte) (((value >>> 7) & 0x7F) | 0x80); + buffer[position + 2] = (byte) (value >>> 14); + position += 3; + } else if (value < (1L << 28)) { + buffer[position] = (byte) ((value & 0x7F) | 0x80); + buffer[position + 1] = (byte) (((value >>> 7) & 0x7F) | 0x80); + buffer[position + 2] = (byte) (((value >>> 14) & 0x7F) | 0x80); + buffer[position + 3] = (byte) (value >>> 21); + position += 4; + } else if (value < (1L << 35)) { + buffer[position] = (byte) ((value & 0x7F) | 0x80); + buffer[position + 1] = (byte) (((value >>> 7) & 0x7F) | 0x80); + buffer[position + 2] = (byte) (((value >>> 14) & 0x7F) | 0x80); + buffer[position + 3] = (byte) (((value >>> 21) & 0x7F) | 0x80); + buffer[position + 4] = (byte) (value >>> 28); + position += 5; + } else if (value < (1L << 42)) { + buffer[position] = (byte) ((value & 0x7F) | 0x80); + buffer[position + 1] = (byte) (((value >>> 7) & 0x7F) | 0x80); + buffer[position + 2] = (byte) (((value >>> 14) & 0x7F) | 0x80); + buffer[position + 3] = (byte) (((value >>> 21) & 0x7F) | 0x80); + buffer[position + 4] = (byte) (((value >>> 28) & 0x7F) | 0x80); + buffer[position + 5] = (byte) (value >>> 35); + position += 6; + } else if (value < (1L << 49)) { + buffer[position] = (byte) ((value & 0x7F) | 0x80); + buffer[position + 1] = (byte) (((value >>> 7) & 0x7F) | 0x80); + buffer[position + 2] = (byte) (((value >>> 14) & 0x7F) | 0x80); + buffer[position + 3] = (byte) (((value >>> 21) & 0x7F) | 0x80); + buffer[position + 4] = (byte) (((value >>> 28) & 0x7F) | 0x80); + buffer[position + 5] = (byte) (((value >>> 35) & 0x7F) | 0x80); + buffer[position + 6] = (byte) (value >>> 42); + position += 7; + } else if (value < (1L << 56)) { + buffer[position] = (byte) ((value & 0x7F) | 0x80); + buffer[position + 1] = (byte) (((value >>> 7) & 0x7F) | 0x80); + buffer[position + 2] = (byte) (((value >>> 14) & 0x7F) | 0x80); + buffer[position + 3] = (byte) (((value >>> 21) & 0x7F) | 0x80); + buffer[position + 4] = (byte) (((value >>> 28) & 0x7F) | 0x80); + buffer[position + 5] = (byte) (((value >>> 35) & 0x7F) | 0x80); + buffer[position + 6] = (byte) (((value >>> 42) & 0x7F) | 0x80); + buffer[position + 7] = (byte) (value >>> 49); + position += 8; + } else //noinspection ConstantValue + if (value >= 0) { + // 9 bytes: value in [2^56, Long.MAX_VALUE] + buffer[position] = (byte) ((value & 0x7F) | 0x80); + buffer[position + 1] = (byte) (((value >>> 7) & 0x7F) | 0x80); + buffer[position + 2] = (byte) (((value >>> 14) & 0x7F) | 0x80); + buffer[position + 3] = (byte) (((value >>> 21) & 0x7F) | 0x80); + buffer[position + 4] = (byte) (((value >>> 28) & 0x7F) | 0x80); + buffer[position + 5] = (byte) (((value >>> 35) & 0x7F) | 0x80); + buffer[position + 6] = (byte) (((value >>> 42) & 0x7F) | 0x80); + buffer[position + 7] = (byte) (((value >>> 49) & 0x7F) | 0x80); + buffer[position + 8] = (byte) (value >>> 56); + position += 9; + } else { + // 10 bytes: negative values (signed int64 treated as uint64 by protobuf) + buffer[position] = (byte) ((value & 0x7F) | 0x80); + buffer[position + 1] = (byte) (((value >>> 7) & 0x7F) | 0x80); + buffer[position + 2] = (byte) (((value >>> 14) & 0x7F) | 0x80); + buffer[position + 3] = (byte) (((value >>> 21) & 0x7F) | 0x80); + buffer[position + 4] = (byte) (((value >>> 28) & 0x7F) | 0x80); + buffer[position + 5] = (byte) (((value >>> 35) & 0x7F) | 0x80); + buffer[position + 6] = (byte) (((value >>> 42) & 0x7F) | 0x80); + buffer[position + 7] = (byte) (((value >>> 49) & 0x7F) | 0x80); + buffer[position + 8] = (byte) (((value >>> 56) & 0x7F) | 0x80); + buffer[position + 9] = (byte) (value >>> 63); + position += 10; + } + } + + public void endLoop() { + position = 0; + } +} diff --git a/pbj-integration-tests/src/jmh/java/com/hedera/pbj/integration/jmh/varint/writers/SteinbornBulkByteArray.java b/pbj-integration-tests/src/jmh/java/com/hedera/pbj/integration/jmh/varint/writers/SteinbornBulkByteArray.java new file mode 100644 index 000000000..dc366f0f6 --- /dev/null +++ b/pbj-integration-tests/src/jmh/java/com/hedera/pbj/integration/jmh/varint/writers/SteinbornBulkByteArray.java @@ -0,0 +1,130 @@ +// SPDX-License-Identifier: Apache-2.0 +package com.hedera.pbj.integration.jmh.varint.writers; + +import com.hedera.pbj.integration.jmh.varint.VarIntWriterBench; +import java.lang.invoke.MethodHandles; +import java.lang.invoke.VarHandle; +import java.nio.ByteOrder; +import org.openjdk.jmh.annotations.Level; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.Setup; +import org.openjdk.jmh.annotations.State; + +/** + * Varint writer based on the "Lucky 5" / bulk-write technique from Andrew Steinborn's showdown + * (link) adapted for 64-bit longs. + * + *
Packs multiple encoded varint bytes into a short, int, or long value and writes them with + * a single bulk VarHandle set, reducing the number of individual byte write operations. + * For example, a 4-byte varint is packed into one {@code int} and written in a single operation + * instead of four separate byte writes. + */ +@State(Scope.Benchmark) +public class SteinbornBulkByteArray { + private static final VarHandle SHORT_BE = + MethodHandles.byteArrayViewVarHandle(short[].class, ByteOrder.BIG_ENDIAN); + private static final VarHandle INT_BE = + MethodHandles.byteArrayViewVarHandle(int[].class, ByteOrder.BIG_ENDIAN); + private static final VarHandle LONG_BE = + MethodHandles.byteArrayViewVarHandle(long[].class, ByteOrder.BIG_ENDIAN); + + private byte[] buffer; + private int position = 0; + + @Setup(Level.Trial) + public void setup() { + buffer = new byte[10 * VarIntWriterBench.NUM_OF_VALUES]; + } + + public void writeVarint(long value) { + if (value < (1L << 7)) { + buffer[position++] = (byte) value; + } else if (value < (1L << 14)) { + SHORT_BE.set(buffer, position, + (short) (((value & 0x7F) | 0x80) << 8 | (value >>> 7))); + position += 2; + } else if (value < (1L << 21)) { + SHORT_BE.set(buffer, position, + (short) (((value & 0x7F) | 0x80) << 8 | (((value >>> 7) & 0x7F) | 0x80))); + buffer[position + 2] = (byte) (value >>> 14); + position += 3; + } else if (value < (1L << 28)) { + INT_BE.set(buffer, position, + (int) (((value & 0x7F) | 0x80) << 24 + | (((value >>> 7) & 0x7F) | 0x80) << 16 + | (((value >>> 14) & 0x7F) | 0x80) << 8 + | (value >>> 21))); + position += 4; + } else if (value < (1L << 35)) { + INT_BE.set(buffer, position, + (int) (((value & 0x7F) | 0x80) << 24 + | (((value >>> 7) & 0x7F) | 0x80) << 16 + | (((value >>> 14) & 0x7F) | 0x80) << 8 + | (((value >>> 21) & 0x7F) | 0x80))); + buffer[position + 4] = (byte) (value >>> 28); + position += 5; + } else if (value < (1L << 42)) { + INT_BE.set(buffer, position, + (int) (((value & 0x7F) | 0x80) << 24 + | (((value >>> 7) & 0x7F) | 0x80) << 16 + | (((value >>> 14) & 0x7F) | 0x80) << 8 + | (((value >>> 21) & 0x7F) | 0x80))); + SHORT_BE.set(buffer, position + 4, + (short) ((((value >>> 28) & 0x7F) | 0x80) << 8 | (value >>> 35))); + position += 6; + } else if (value < (1L << 49)) { + INT_BE.set(buffer, position, + (int) (((value & 0x7F) | 0x80) << 24 + | (((value >>> 7) & 0x7F) | 0x80) << 16 + | (((value >>> 14) & 0x7F) | 0x80) << 8 + | (((value >>> 21) & 0x7F) | 0x80))); + SHORT_BE.set(buffer, position + 4, + (short) ((((value >>> 28) & 0x7F) | 0x80) << 8 | (((value >>> 35) & 0x7F) | 0x80))); + buffer[position + 6] = (byte) (value >>> 42); + position += 7; + } else if (value < (1L << 56)) { + LONG_BE.set(buffer, position, + ((value & 0x7FL) | 0x80L) << 56 + | (((value >>> 7) & 0x7FL) | 0x80L) << 48 + | (((value >>> 14) & 0x7FL) | 0x80L) << 40 + | (((value >>> 21) & 0x7FL) | 0x80L) << 32 + | (((value >>> 28) & 0x7FL) | 0x80L) << 24 + | (((value >>> 35) & 0x7FL) | 0x80L) << 16 + | (((value >>> 42) & 0x7FL) | 0x80L) << 8 + | (value >>> 49)); + position += 8; + } else //noinspection ConstantValue + if (value >= 0) { + // 9 bytes: value in [2^56, Long.MAX_VALUE] + LONG_BE.set(buffer, position, + ((value & 0x7FL) | 0x80L) << 56 + | (((value >>> 7) & 0x7FL) | 0x80L) << 48 + | (((value >>> 14) & 0x7FL) | 0x80L) << 40 + | (((value >>> 21) & 0x7FL) | 0x80L) << 32 + | (((value >>> 28) & 0x7FL) | 0x80L) << 24 + | (((value >>> 35) & 0x7FL) | 0x80L) << 16 + | (((value >>> 42) & 0x7FL) | 0x80L) << 8 + | (((value >>> 49) & 0x7FL) | 0x80L)); + buffer[position + 8] = (byte) (value >>> 56); + position += 9; + } else { + // 10 bytes: negative long (protobuf treats int64 as uint64) + LONG_BE.set(buffer, position, + ((value & 0x7FL) | 0x80L) << 56 + | (((value >>> 7) & 0x7FL) | 0x80L) << 48 + | (((value >>> 14) & 0x7FL) | 0x80L) << 40 + | (((value >>> 21) & 0x7FL) | 0x80L) << 32 + | (((value >>> 28) & 0x7FL) | 0x80L) << 24 + | (((value >>> 35) & 0x7FL) | 0x80L) << 16 + | (((value >>> 42) & 0x7FL) | 0x80L) << 8 + | (((value >>> 49) & 0x7FL) | 0x80L)); + SHORT_BE.set(buffer, position + 8, + (short) ((((value >>> 56) & 0x7F) | 0x80) << 8 | (value >>> 63))); + position += 10; + } + } + + public void endLoop() { + position = 0; + } +} From d4f26989c3a6f51f8cd580b7368366809455a831 Mon Sep 17 00:00:00 2001 From: Jasper Potts <1466205+jasperpotts@users.noreply.github.com> Date: Mon, 27 Apr 2026 14:17:11 -0700 Subject: [PATCH 2/5] Cleaned up and updated ProtobufObjectBench Signed-off-by: Jasper Potts <1466205+jasperpotts@users.noreply.github.com> --- .../integration/jmh/ProtobufObjectBench.java | 380 ++++++++++++++---- 1 file changed, 294 insertions(+), 86 deletions(-) diff --git a/pbj-integration-tests/src/jmh/java/com/hedera/pbj/integration/jmh/ProtobufObjectBench.java b/pbj-integration-tests/src/jmh/java/com/hedera/pbj/integration/jmh/ProtobufObjectBench.java index e0cf43b57..29ab888b9 100644 --- a/pbj-integration-tests/src/jmh/java/com/hedera/pbj/integration/jmh/ProtobufObjectBench.java +++ b/pbj-integration-tests/src/jmh/java/com/hedera/pbj/integration/jmh/ProtobufObjectBench.java @@ -12,6 +12,8 @@ import com.hedera.pbj.runtime.Codec; import com.hedera.pbj.runtime.ParseException; import com.hedera.pbj.runtime.io.buffer.BufferedData; +import com.hedera.pbj.runtime.io.buffer.Bytes; +import com.hedera.pbj.runtime.io.buffer.MemoryData; import com.hedera.pbj.runtime.io.stream.ReadableStreamingData; import com.hedera.pbj.runtime.io.stream.WritableStreamingData; import com.hedera.pbj.test.proto.pbj.Everything; @@ -22,6 +24,7 @@ import java.util.concurrent.TimeUnit; import org.openjdk.jmh.annotations.Benchmark; import org.openjdk.jmh.annotations.BenchmarkMode; +import org.openjdk.jmh.annotations.CompilerControl; import org.openjdk.jmh.annotations.Fork; import org.openjdk.jmh.annotations.Measurement; import org.openjdk.jmh.annotations.Mode; @@ -33,6 +36,28 @@ import org.openjdk.jmh.annotations.Warmup; import org.openjdk.jmh.infra.Blackhole; +/** + * Benchmark matrix for protobuf object serialization and deserialization across all PBJ and + * Google Protobuf IO types. + * + *
Method names encode the full backing-store identity: + *
+ * {parse|write}{Pbj|ProtoC}{WrapperType[BackingDetail]}
+ *
+ *
+ * PBJ backing-store coverage: + *
+ * BufferedDataHeap — BufferedData wrapping a heap ByteBuffer (ByteArrayBufferedData) + * BufferedDataDirect — BufferedData wrapping a direct ByteBuffer (DirectBufferedData) + * MemoryDataHeap — MemoryData backed by MemorySegment.ofArray(byte[]) (heap) + * MemoryDataNative — MemoryData backed by MemorySegment.ofBuffer(directBB) (native) + * MemoryDataOffHeap — MemoryData backed by Arena.ofAuto().allocate() (native) + * Bytes — immutable Bytes wrapping a byte[] (parse only) + * InputStream — ReadableStreamingData over a NonSynchronizedByteArrayInputStream + * OutputStream — WritableStreamingData over a NonSynchronizedByteArrayOutputStream + * ByteArray — raw byte[] via Codec.write(T, byte[], int) (write only) + *+ */ @SuppressWarnings("unused") @Fork(1) @Warmup(iterations = 3, time = 2) @@ -40,7 +65,7 @@ @OutputTimeUnit(TimeUnit.NANOSECONDS) @BenchmarkMode(Mode.AverageTime) public abstract class ProtobufObjectBench
{ - /** we repeat all operations 1000 times so that measured times are nig enough */ + /** Repeat each operation this many times so per-op times are large enough to measure. */ private static final int OPERATION_COUNT = 1000; @State(Scope.Benchmark) @@ -49,25 +74,61 @@ public static class BenchmarkState
{
private ProtobufParseFunction benchmarkState, Blackhole blackhole) throws ParseException {
+ public void parsePbjBufferedDataHeap(BenchmarkState s, Blackhole bh) throws ParseException {
for (int i = 0; i < OPERATION_COUNT; i++) {
- benchmarkState.protobufDataBuffer.resetPosition();
- blackhole.consume(benchmarkState.pbjCodec.parse(benchmarkState.protobufDataBuffer));
+ s.protobufBufferedDataHeap.resetPosition();
+ bh.consume(s.pbjCodec.parse(s.protobufBufferedDataHeap));
}
}
+ /** Parse: PBJ · BufferedData (direct) — DirectBufferedData, ByteBuffer.allocateDirect() */
@Benchmark
+ @CompilerControl(CompilerControl.Mode.DONT_INLINE)
@OperationsPerInvocation(OPERATION_COUNT)
- public void parsePbjByteBuffer(BenchmarkState benchmarkState, Blackhole blackhole) throws ParseException {
+ public void parsePbjBufferedDataDirect(BenchmarkState s, Blackhole bh) throws ParseException {
for (int i = 0; i < OPERATION_COUNT; i++) {
- benchmarkState.protobufDataBuffer.resetPosition();
- blackhole.consume(benchmarkState.pbjCodec.parse(benchmarkState.protobufDataBuffer));
+ s.protobufBufferedDataDirect.resetPosition();
+ bh.consume(s.pbjCodec.parse(s.protobufBufferedDataDirect));
}
}
+ /** Parse: PBJ · MemoryData (heap) — MemorySegment.ofArray(byte[]) */
@Benchmark
+ @CompilerControl(CompilerControl.Mode.DONT_INLINE)
@OperationsPerInvocation(OPERATION_COUNT)
- public void parsePbjByteBufferDirect(BenchmarkState benchmarkState, Blackhole blackhole)
- throws ParseException {
+ public void parsePbjMemoryDataHeap(BenchmarkState s, Blackhole bh) throws ParseException {
for (int i = 0; i < OPERATION_COUNT; i++) {
- benchmarkState.protobufDataBufferDirect.resetPosition();
- blackhole.consume(benchmarkState.pbjCodec.parse(benchmarkState.protobufDataBufferDirect));
+ s.protobufMemoryDataHeap.resetPosition();
+ bh.consume(s.pbjCodec.parse(s.protobufMemoryDataHeap));
}
}
+ /** Parse: PBJ · MemoryData (native) — MemorySegment.ofBuffer(directByteBuffer) */
@Benchmark
+ @CompilerControl(CompilerControl.Mode.DONT_INLINE)
@OperationsPerInvocation(OPERATION_COUNT)
- public void parsePbjInputStream(BenchmarkState benchmarkState, Blackhole blackhole) throws ParseException {
+ public void parsePbjMemoryDataNative(BenchmarkState s, Blackhole bh) throws ParseException {
for (int i = 0; i < OPERATION_COUNT; i++) {
- benchmarkState.bin.resetPosition();
- blackhole.consume(benchmarkState.pbjCodec.parse(new ReadableStreamingData(benchmarkState.bin)));
+ s.protobufMemoryDataNative.resetPosition();
+ bh.consume(s.pbjCodec.parse(s.protobufMemoryDataNative));
}
}
+ /** Parse: PBJ · MemoryData (off-heap) — Arena.ofAuto().allocate() native segment */
@Benchmark
+ @CompilerControl(CompilerControl.Mode.DONT_INLINE)
@OperationsPerInvocation(OPERATION_COUNT)
- public void parseProtoCByteArray(BenchmarkState benchmarkState, Blackhole blackhole) throws IOException {
+ public void parsePbjMemoryDataOffHeap(BenchmarkState s, Blackhole bh) throws ParseException {
for (int i = 0; i < OPERATION_COUNT; i++) {
- blackhole.consume(benchmarkState.googleByteArrayParseMethod.parse(benchmarkState.protobuf));
+ s.protobufMemoryDataOffHeap.resetPosition();
+ bh.consume(s.pbjCodec.parse(s.protobufMemoryDataOffHeap));
}
}
+ /** Parse: PBJ · Bytes (immutable) — backed by byte[], no reset needed */
@Benchmark
+ @CompilerControl(CompilerControl.Mode.DONT_INLINE)
@OperationsPerInvocation(OPERATION_COUNT)
- public void parseProtoCByteBufferDirect(BenchmarkState benchmarkState, Blackhole blackhole)
- throws IOException {
+ public void parsePbjBytes(BenchmarkState s, Blackhole bh) throws ParseException {
for (int i = 0; i < OPERATION_COUNT; i++) {
- benchmarkState.protobufByteBufferDirect.position(0);
- blackhole.consume(
- benchmarkState.googleByteBufferParseMethod.parse(benchmarkState.protobufByteBufferDirect));
+ bh.consume(s.pbjCodec.parse(s.protobufBytes));
}
}
+ /** Parse: PBJ · ReadableStreamingData wrapping NonSynchronizedByteArrayInputStream */
@Benchmark
+ @CompilerControl(CompilerControl.Mode.DONT_INLINE)
@OperationsPerInvocation(OPERATION_COUNT)
- public void parseProtoCByteBuffer(BenchmarkState benchmarkState, Blackhole blackhole) throws IOException {
+ public void parsePbjInputStream(BenchmarkState s, Blackhole bh) throws ParseException {
for (int i = 0; i < OPERATION_COUNT; i++) {
- blackhole.consume(benchmarkState.googleByteBufferParseMethod.parse(benchmarkState.protobufByteBuffer));
+ s.bin.resetPosition();
+ bh.consume(s.pbjCodec.parse(new ReadableStreamingData(s.bin)));
}
}
+ // ════════════════════════════════════════════════════════════════════════
+ // Google Protobuf parse benchmarks
+ // ════════════════════════════════════════════════════════════════════════
+
+ /** Parse: Google · byte[] */
@Benchmark
+ @CompilerControl(CompilerControl.Mode.DONT_INLINE)
@OperationsPerInvocation(OPERATION_COUNT)
- public void parseProtoCInputStream(BenchmarkState benchmarkState, Blackhole blackhole) throws IOException {
+ public void parseProtoCByteArray(BenchmarkState s, Blackhole bh) throws IOException {
for (int i = 0; i < OPERATION_COUNT; i++) {
- benchmarkState.bin.resetPosition();
- blackhole.consume(benchmarkState.googleInputStreamParseMethod.parse(benchmarkState.bin));
+ bh.consume(s.googleByteArrayParseMethod.parse(s.protobuf));
}
}
- /** Same as writePbjByteBuffer because DataBuffer.wrap(byte[]) uses ByteBuffer today, added this because makes result plotting easier */
+ /** Parse: Google · ByteBuffer (heap) */
@Benchmark
+ @CompilerControl(CompilerControl.Mode.DONT_INLINE)
@OperationsPerInvocation(OPERATION_COUNT)
- public void writePbjByteArray(BenchmarkState benchmarkState, Blackhole blackhole) throws IOException {
+ public void parseProtoCByteBufferHeap(BenchmarkState s, Blackhole bh) throws IOException {
for (int i = 0; i < OPERATION_COUNT; i++) {
- benchmarkState.pbjCodec.write(benchmarkState.pbjModelObject, benchmarkState.outArray, 0);
- blackhole.consume(benchmarkState.outArray);
+ s.protobufByteBufferHeap.position(0);
+ bh.consume(s.googleByteBufferParseMethod.parse(s.protobufByteBufferHeap));
}
}
+ /** Parse: Google · ByteBuffer (direct) */
@Benchmark
+ @CompilerControl(CompilerControl.Mode.DONT_INLINE)
@OperationsPerInvocation(OPERATION_COUNT)
- public void writePbjByteBuffer(BenchmarkState benchmarkState, Blackhole blackhole) throws IOException {
+ public void parseProtoCByteBufferDirect(BenchmarkState s, Blackhole bh) throws IOException {
for (int i = 0; i < OPERATION_COUNT; i++) {
- benchmarkState.outDataBuffer.reset();
- benchmarkState.pbjCodec.write(benchmarkState.pbjModelObject, benchmarkState.outDataBuffer);
- blackhole.consume(benchmarkState.outDataBuffer);
+ s.protobufByteBufferDirect.position(0);
+ bh.consume(s.googleByteBufferParseMethod.parse(s.protobufByteBufferDirect));
}
}
+ /** Parse: Google · InputStream */
@Benchmark
+ @CompilerControl(CompilerControl.Mode.DONT_INLINE)
@OperationsPerInvocation(OPERATION_COUNT)
- public void writePbjByteDirect(BenchmarkState benchmarkState, Blackhole blackhole) throws IOException {
+ public void parseProtoCInputStream(BenchmarkState s, Blackhole bh) throws IOException {
for (int i = 0; i < OPERATION_COUNT; i++) {
- benchmarkState.outDataBufferDirect.reset();
- benchmarkState.pbjCodec.write(benchmarkState.pbjModelObject, benchmarkState.outDataBufferDirect);
- blackhole.consume(benchmarkState.outDataBufferDirect);
+ s.bin.resetPosition();
+ bh.consume(s.googleInputStreamParseMethod.parse(s.bin));
}
}
+ // ════════════════════════════════════════════════════════════════════════
+ // PBJ write benchmarks
+ // ════════════════════════════════════════════════════════════════════════
+
+ /** Write: PBJ · raw byte[] via Codec.write(T, byte[], int) */
@Benchmark
+ @CompilerControl(CompilerControl.Mode.DONT_INLINE)
@OperationsPerInvocation(OPERATION_COUNT)
- public void writePbjOutputStream(BenchmarkState benchmarkState, Blackhole blackhole) throws IOException {
+ public void writePbjByteArray(BenchmarkState s, Blackhole bh) {
for (int i = 0; i < OPERATION_COUNT; i++) {
- benchmarkState.bout.reset();
- benchmarkState.pbjCodec.write(
- benchmarkState.pbjModelObject, new WritableStreamingData(benchmarkState.bout));
- blackhole.consume(benchmarkState.bout.toByteArray());
+ s.pbjCodec.write(s.pbjModelObject, s.outByteArray, 0);
+ bh.consume(s.outByteArray);
}
}
+ /** Write: PBJ · BufferedData (heap) — ByteArrayBufferedData */
@Benchmark
+ @CompilerControl(CompilerControl.Mode.DONT_INLINE)
@OperationsPerInvocation(OPERATION_COUNT)
- public void writeProtoCByteArray(BenchmarkState benchmarkState, Blackhole blackhole) {
+ public void writePbjBufferedDataHeap(BenchmarkState s, Blackhole bh) throws IOException {
for (int i = 0; i < OPERATION_COUNT; i++) {
- blackhole.consume(benchmarkState.googleModelObject.toByteArray());
+ s.outBufferedDataHeap.reset();
+ s.pbjCodec.write(s.pbjModelObject, s.outBufferedDataHeap);
+ bh.consume(s.outBufferedDataHeap);
}
}
+ /** Write: PBJ · BufferedData (direct) — DirectBufferedData */
@Benchmark
+ @CompilerControl(CompilerControl.Mode.DONT_INLINE)
@OperationsPerInvocation(OPERATION_COUNT)
- public void writeProtoCByteBuffer(BenchmarkState benchmarkState, Blackhole blackhole) throws IOException {
+ public void writePbjBufferedDataDirect(BenchmarkState s, Blackhole bh) throws IOException {
for (int i = 0; i < OPERATION_COUNT; i++) {
- CodedOutputStream cout = CodedOutputStream.newInstance(benchmarkState.bbout);
- benchmarkState.googleModelObject.writeTo(cout);
- blackhole.consume(benchmarkState.bbout);
+ s.outBufferedDataDirect.reset();
+ s.pbjCodec.write(s.pbjModelObject, s.outBufferedDataDirect);
+ bh.consume(s.outBufferedDataDirect);
}
}
+ /** Write: PBJ · MemoryData (heap) — MemorySegment.ofArray(byte[]) */
@Benchmark
+ @CompilerControl(CompilerControl.Mode.DONT_INLINE)
@OperationsPerInvocation(OPERATION_COUNT)
- public void writeProtoCByteBufferDirect(BenchmarkState benchmarkState, Blackhole blackhole)
- throws IOException {
+ public void writePbjMemoryDataHeap(BenchmarkState s, Blackhole bh) throws IOException {
for (int i = 0; i < OPERATION_COUNT; i++) {
- CodedOutputStream cout = CodedOutputStream.newInstance(benchmarkState.bboutDirect);
- benchmarkState.googleModelObject.writeTo(cout);
- blackhole.consume(benchmarkState.bbout);
+ s.outMemoryDataHeap.reset();
+ s.pbjCodec.write(s.pbjModelObject, s.outMemoryDataHeap);
+ bh.consume(s.outMemoryDataHeap);
}
}
+ /** Write: PBJ · MemoryData (native) — MemorySegment.ofBuffer(directByteBuffer) */
@Benchmark
+ @CompilerControl(CompilerControl.Mode.DONT_INLINE)
@OperationsPerInvocation(OPERATION_COUNT)
- public void writeProtoCOutputStream(BenchmarkState benchmarkState, Blackhole blackhole) throws IOException {
+ public void writePbjMemoryDataNative(BenchmarkState s, Blackhole bh) throws IOException {
for (int i = 0; i < OPERATION_COUNT; i++) {
- benchmarkState.bout.reset();
- benchmarkState.googleModelObject.writeTo(benchmarkState.bout);
- blackhole.consume(benchmarkState.bout.toByteArray());
+ s.outMemoryDataNative.reset();
+ s.pbjCodec.write(s.pbjModelObject, s.outMemoryDataNative);
+ bh.consume(s.outMemoryDataNative);
+ }
+ }
+
+ /** Write: PBJ · MemoryData (off-heap) — Arena.ofAuto().allocate() native segment */
+ @Benchmark
+ @CompilerControl(CompilerControl.Mode.DONT_INLINE)
+ @OperationsPerInvocation(OPERATION_COUNT)
+ public void writePbjMemoryDataOffHeap(BenchmarkState s, Blackhole bh) throws IOException {
+ for (int i = 0; i < OPERATION_COUNT; i++) {
+ s.outMemoryDataOffHeap.reset();
+ s.pbjCodec.write(s.pbjModelObject, s.outMemoryDataOffHeap);
+ bh.consume(s.outMemoryDataOffHeap);
+ }
+ }
+
+ /** Write: PBJ · WritableStreamingData wrapping NonSynchronizedByteArrayOutputStream */
+ @Benchmark
+ @CompilerControl(CompilerControl.Mode.DONT_INLINE)
+ @OperationsPerInvocation(OPERATION_COUNT)
+ public void writePbjOutputStream(BenchmarkState s, Blackhole bh) throws IOException {
+ for (int i = 0; i < OPERATION_COUNT; i++) {
+ s.bout.reset();
+ s.pbjCodec.write(s.pbjModelObject, new WritableStreamingData(s.bout));
+ bh.consume(s.bout.toByteArray());
+ }
+ }
+
+ // ════════════════════════════════════════════════════════════════════════
+ // Google Protobuf write benchmarks
+ // ════════════════════════════════════════════════════════════════════════
+
+ /** Write: Google · byte[] via toByteArray() */
+ @Benchmark
+ @CompilerControl(CompilerControl.Mode.DONT_INLINE)
+ @OperationsPerInvocation(OPERATION_COUNT)
+ public void writeProtoCByteArray(BenchmarkState s, Blackhole bh) {
+ for (int i = 0; i < OPERATION_COUNT; i++) {
+ bh.consume(s.googleModelObject.toByteArray());
+ }
+ }
+
+ /** Write: Google · ByteBuffer (heap) via CodedOutputStream */
+ @Benchmark
+ @CompilerControl(CompilerControl.Mode.DONT_INLINE)
+ @OperationsPerInvocation(OPERATION_COUNT)
+ public void writeProtoCByteBufferHeap(BenchmarkState s, Blackhole bh) throws IOException {
+ for (int i = 0; i < OPERATION_COUNT; i++) {
+ s.bboutHeap.clear();
+ CodedOutputStream cout = CodedOutputStream.newInstance(s.bboutHeap);
+ s.googleModelObject.writeTo(cout);
+ bh.consume(s.bboutHeap);
+ }
+ }
+
+ /** Write: Google · ByteBuffer (direct) via CodedOutputStream */
+ @Benchmark
+ @CompilerControl(CompilerControl.Mode.DONT_INLINE)
+ @OperationsPerInvocation(OPERATION_COUNT)
+ public void writeProtoCByteBufferDirect(BenchmarkState s, Blackhole bh) throws IOException {
+ for (int i = 0; i < OPERATION_COUNT; i++) {
+ s.bboutDirect.clear();
+ CodedOutputStream cout = CodedOutputStream.newInstance(s.bboutDirect);
+ s.googleModelObject.writeTo(cout);
+ bh.consume(s.bboutDirect);
+ }
+ }
+
+ /** Write: Google · OutputStream */
+ @Benchmark
+ @CompilerControl(CompilerControl.Mode.DONT_INLINE)
+ @OperationsPerInvocation(OPERATION_COUNT)
+ public void writeProtoCOutputStream(BenchmarkState s, Blackhole bh) throws IOException {
+ for (int i = 0; i < OPERATION_COUNT; i++) {
+ s.bout.reset();
+ s.googleModelObject.writeTo(s.bout);
+ bh.consume(s.bout.toByteArray());
}
}
@@ -273,6 +477,10 @@ public interface ProtobufParseFunction PBJ backing-store coverage:
+ * {
+ /** Repeat each operation this many times so per-op times are large enough to measure. */
+ private static final int OPERATION_COUNT = 1000;
+
+ @SuppressWarnings("DuplicatedCode")
+ @State(Scope.Benchmark)
+ public static class BenchmarkState {
+ private Codec pbjCodec;
+ private ProtobufParseFunction pbjCodec, ProtobufParseFunction s, Blackhole bh) throws ParseException {
+ for (int i = 0; i < OPERATION_COUNT; i++) {
+ bh.consume(s.pbjCodec.parse(s.protobufBytes));
+ }
+ }
+
+ // ════════════════════════════════════════════════════════════════════════
+ // Google Protobuf parse benchmarks
+ // ════════════════════════════════════════════════════════════════════════
+
+ /** Parse: Google · byte[] */
+ @Benchmark
+ @CompilerControl(CompilerControl.Mode.DONT_INLINE)
+ @OperationsPerInvocation(OPERATION_COUNT)
+ public void parseProtoC(BenchmarkState s, Blackhole bh) throws IOException {
+ for (int i = 0; i < OPERATION_COUNT; i++) {
+ bh.consume(s.googleByteArrayParseMethod.parse(s.protobuf));
+ }
+ }
+
+ // ════════════════════════════════════════════════════════════════════════
+ // write benchmarks
+ // ════════════════════════════════════════════════════════════════════════
+
+ /** Write: PBJ · raw byte[] via Codec.write(T, byte[], int) */
+ @Benchmark
+ @CompilerControl(CompilerControl.Mode.DONT_INLINE)
+ @OperationsPerInvocation(OPERATION_COUNT)
+ public void toBytesPbj(BenchmarkState s, Blackhole bh) {
+ for (int i = 0; i < OPERATION_COUNT; i++) {
+ bh.consume(s.pbjCodec.toBytes(s.pbjModelObject));
+ }
+ }
+
+ /** Write: Google · byte[] via toByteArray() */
+ @Benchmark
+ @CompilerControl(CompilerControl.Mode.DONT_INLINE)
+ @OperationsPerInvocation(OPERATION_COUNT)
+ public void toBytesProtoC(BenchmarkState s, Blackhole bh) {
+ for (int i = 0; i < OPERATION_COUNT; i++) {
+ bh.consume(s.googleModelObject.toByteArray());
+ }
+ }
+
+ /** Custom interface for method references as java.util.Function does not throw IOException */
+ public interface ProtobufParseFunction
+ * protobuf — Original byte array (parse only)
+ * Bytes — immutable Bytes wrapping a byte[] (parse only)
+ *
+ */
+@SuppressWarnings("unused")
+@Fork(1)
+@Warmup(iterations = 3, time = 2)
+@Measurement(iterations = 7, time = 2)
+@OutputTimeUnit(TimeUnit.NANOSECONDS)
+@BenchmarkMode(Mode.AverageTime)
+public abstract class ProtobufObjectArrayBench