From b66aa2ca7700be62bf6ecba626ed35a96a9b5aa0 Mon Sep 17 00:00:00 2001 From: Ryan Blue Date: Tue, 9 Jun 2026 16:00:24 -0700 Subject: [PATCH 1/2] API, Core: Reuse VariantUtil methods through ByteBuffers. --- .../org/apache/iceberg/util/ByteBuffers.java | 63 +++++++++++++++- .../iceberg/variants/SerializedArray.java | 7 +- .../iceberg/variants/SerializedMetadata.java | 13 ++-- .../iceberg/variants/SerializedObject.java | 9 +-- .../iceberg/variants/SerializedPrimitive.java | 25 +++---- .../iceberg/variants/SerializedValue.java | 3 +- .../apache/iceberg/variants/VariantUtil.java | 72 ------------------- .../apache/iceberg/variants/VariantValue.java | 3 +- .../apache/iceberg/util/TestByteBuffers.java | 46 ++++++++++++ .../iceberg/variants/TestVariantUtil.java | 44 ------------ .../iceberg/variants/VariantTestUtil.java | 40 ++++------- .../iceberg/variants/PrimitiveWrapper.java | 11 +-- .../iceberg/variants/ShreddedObject.java | 21 +++--- .../apache/iceberg/variants/ValueArray.java | 9 +-- .../org/apache/iceberg/variants/Variants.java | 13 ++-- 15 files changed, 188 insertions(+), 191 deletions(-) create mode 100644 api/src/test/java/org/apache/iceberg/util/TestByteBuffers.java delete mode 100644 api/src/test/java/org/apache/iceberg/variants/TestVariantUtil.java diff --git a/api/src/main/java/org/apache/iceberg/util/ByteBuffers.java b/api/src/main/java/org/apache/iceberg/util/ByteBuffers.java index a679bd7ad0ac..ac8bcc2432ca 100644 --- a/api/src/main/java/org/apache/iceberg/util/ByteBuffers.java +++ b/api/src/main/java/org/apache/iceberg/util/ByteBuffers.java @@ -24,6 +24,8 @@ public class ByteBuffers { + private ByteBuffers() {} + public static byte[] toByteArray(ByteBuffer buffer) { if (buffer == null) { return null; @@ -73,5 +75,64 @@ public static ByteBuffer copy(ByteBuffer buffer) { return ByteBuffer.wrap(copyArray); } - private ByteBuffers() {} + public static void writeByte(ByteBuffer buffer, int value, int offset) { + buffer.put(buffer.position() + offset, (byte) (value & 0xFF)); + } + + public static void writeLittleEndianUnsigned(ByteBuffer buffer, int value, int offset, int size) { + int base = buffer.position() + offset; + switch (size) { + case 4: + buffer.putInt(base, value); + return; + case 3: + buffer.putShort(base, (short) (value & 0xFFFF)); + buffer.put(base + 2, (byte) ((value >> 16) & 0xFF)); + return; + case 2: + buffer.putShort(base, (short) (value & 0xFFFF)); + return; + case 1: + buffer.put(base, (byte) (value & 0xFF)); + return; + } + + throw new IllegalArgumentException("Invalid size: " + size); + } + + public static byte readLittleEndianInt8(ByteBuffer buffer, int offset) { + return buffer.get(buffer.position() + offset); + } + + public static short readLittleEndianInt16(ByteBuffer buffer, int offset) { + return buffer.getShort(buffer.position() + offset); + } + + public static int readByte(ByteBuffer buffer, int offset) { + return buffer.get(buffer.position() + offset) & 0xFF; + } + + public static int readLittleEndianUnsigned(ByteBuffer buffer, int offset, int size) { + int base = buffer.position() + offset; + switch (size) { + case 4: + return buffer.getInt(base); + case 3: + return (((int) buffer.getShort(base)) & 0xFFFF) | ((buffer.get(base + 2) & 0xFF) << 16); + case 2: + return ((int) buffer.getShort(base)) & 0xFFFF; + case 1: + return buffer.get(base) & 0xFF; + } + + throw new IllegalArgumentException("Invalid size: " + size); + } + + public static int readLittleEndianInt32(ByteBuffer buffer, int offset) { + return buffer.getInt(buffer.position() + offset); + } + + public static long readLittleEndianInt64(ByteBuffer buffer, int offset) { + return buffer.getLong(buffer.position() + offset); + } } diff --git a/api/src/main/java/org/apache/iceberg/variants/SerializedArray.java b/api/src/main/java/org/apache/iceberg/variants/SerializedArray.java index b390d96f31ed..d215e018449e 100644 --- a/api/src/main/java/org/apache/iceberg/variants/SerializedArray.java +++ b/api/src/main/java/org/apache/iceberg/variants/SerializedArray.java @@ -22,6 +22,7 @@ import java.nio.ByteOrder; import org.apache.iceberg.relocated.com.google.common.annotations.VisibleForTesting; import org.apache.iceberg.relocated.com.google.common.base.Preconditions; +import org.apache.iceberg.util.ByteBuffers; class SerializedArray implements VariantArray, SerializedValue { private static final int HEADER_SIZE = 1; @@ -55,7 +56,7 @@ private SerializedArray(VariantMetadata metadata, ByteBuffer value, int header) this.value = value; this.offsetSize = 1 + ((header & OFFSET_SIZE_MASK) >> OFFSET_SIZE_SHIFT); int numElementsSize = ((header & IS_LARGE) == IS_LARGE) ? 4 : 1; - int numElements = VariantUtil.readLittleEndianUnsigned(value, HEADER_SIZE, numElementsSize); + int numElements = ByteBuffers.readLittleEndianUnsigned(value, HEADER_SIZE, numElementsSize); this.offsetListOffset = HEADER_SIZE + numElementsSize; this.dataOffset = offsetListOffset + ((1 + numElements) * offsetSize); this.array = new VariantValue[numElements]; @@ -70,10 +71,10 @@ public int numElements() { public VariantValue get(int index) { if (null == array[index]) { int offset = - VariantUtil.readLittleEndianUnsigned( + ByteBuffers.readLittleEndianUnsigned( value, offsetListOffset + (offsetSize * index), offsetSize); int next = - VariantUtil.readLittleEndianUnsigned( + ByteBuffers.readLittleEndianUnsigned( value, offsetListOffset + (offsetSize * (1 + index)), offsetSize); array[index] = VariantValue.from(metadata, VariantUtil.slice(value, dataOffset + offset, next - offset)); diff --git a/api/src/main/java/org/apache/iceberg/variants/SerializedMetadata.java b/api/src/main/java/org/apache/iceberg/variants/SerializedMetadata.java index 9113a8c1c969..9eff21536de6 100644 --- a/api/src/main/java/org/apache/iceberg/variants/SerializedMetadata.java +++ b/api/src/main/java/org/apache/iceberg/variants/SerializedMetadata.java @@ -22,6 +22,7 @@ import java.nio.ByteOrder; import org.apache.iceberg.relocated.com.google.common.annotations.VisibleForTesting; import org.apache.iceberg.relocated.com.google.common.base.Preconditions; +import org.apache.iceberg.util.ByteBuffers; class SerializedMetadata implements VariantMetadata, Serialized { private static final int HEADER_SIZE = 1; @@ -42,7 +43,7 @@ static SerializedMetadata from(byte[] bytes) { static SerializedMetadata from(ByteBuffer metadata) { Preconditions.checkArgument( metadata.order() == ByteOrder.LITTLE_ENDIAN, "Unsupported byte order: big endian"); - int header = VariantUtil.readByte(metadata, 0); + int header = ByteBuffers.readByte(metadata, 0); int version = header & VERSION_MASK; Preconditions.checkArgument(SUPPORTED_VERSION == version, "Unsupported version: %s", version); return new SerializedMetadata(metadata, header); @@ -58,13 +59,13 @@ static SerializedMetadata from(ByteBuffer metadata) { private SerializedMetadata(ByteBuffer metadata, int header) { this.isSorted = (header & SORTED_STRINGS) == SORTED_STRINGS; this.offsetSize = 1 + ((header & OFFSET_SIZE_MASK) >> OFFSET_SIZE_SHIFT); - int dictSize = VariantUtil.readLittleEndianUnsigned(metadata, HEADER_SIZE, offsetSize); + int dictSize = ByteBuffers.readLittleEndianUnsigned(metadata, HEADER_SIZE, offsetSize); this.dict = new String[dictSize]; this.offsetListOffset = HEADER_SIZE + offsetSize; this.dataOffset = offsetListOffset + ((1 + dictSize) * offsetSize); int endOffset = dataOffset - + VariantUtil.readLittleEndianUnsigned( + + ByteBuffers.readLittleEndianUnsigned( metadata, offsetListOffset + (offsetSize * dictSize), offsetSize); if (endOffset < metadata.limit()) { this.metadata = VariantUtil.slice(metadata, 0, endOffset); @@ -106,10 +107,10 @@ public int id(String name) { public String get(int index) { if (null == dict[index]) { int offset = - VariantUtil.readLittleEndianUnsigned( + ByteBuffers.readLittleEndianUnsigned( metadata, offsetListOffset + (offsetSize * index), offsetSize); int next = - VariantUtil.readLittleEndianUnsigned( + ByteBuffers.readLittleEndianUnsigned( metadata, offsetListOffset + (offsetSize * (1 + index)), offsetSize); dict[index] = VariantUtil.readString(metadata, dataOffset + offset, next - offset); } @@ -129,7 +130,7 @@ public int sizeInBytes() { @Override public int writeTo(ByteBuffer buffer, int offset) { ByteBuffer value = buffer(); - VariantUtil.writeBufferAbsolute(buffer, offset, value); + buffer.put(offset, value, value.position(), value.remaining()); return value.remaining(); } diff --git a/api/src/main/java/org/apache/iceberg/variants/SerializedObject.java b/api/src/main/java/org/apache/iceberg/variants/SerializedObject.java index d74565891a8a..bd9510c9a9b2 100644 --- a/api/src/main/java/org/apache/iceberg/variants/SerializedObject.java +++ b/api/src/main/java/org/apache/iceberg/variants/SerializedObject.java @@ -27,6 +27,7 @@ import org.apache.iceberg.relocated.com.google.common.annotations.VisibleForTesting; import org.apache.iceberg.relocated.com.google.common.base.Preconditions; import org.apache.iceberg.relocated.com.google.common.collect.Maps; +import org.apache.iceberg.util.ByteBuffers; class SerializedObject implements VariantObject, SerializedValue { private static final int HEADER_SIZE = 1; @@ -67,7 +68,7 @@ private SerializedObject(VariantMetadata metadata, ByteBuffer value, int header) this.offsetSize = 1 + ((header & OFFSET_SIZE_MASK) >> OFFSET_SIZE_SHIFT); this.fieldIdSize = 1 + ((header & FIELD_ID_SIZE_MASK) >> FIELD_ID_SIZE_SHIFT); int numElementsSize = ((header & IS_LARGE) == IS_LARGE) ? 4 : 1; - int numElements = VariantUtil.readLittleEndianUnsigned(value, HEADER_SIZE, numElementsSize); + int numElements = ByteBuffers.readLittleEndianUnsigned(value, HEADER_SIZE, numElementsSize); this.fieldIdListOffset = HEADER_SIZE + numElementsSize; this.fieldIds = new Integer[numElements]; this.offsetListOffset = fieldIdListOffset + (numElements * fieldIdSize); @@ -86,14 +87,14 @@ private void initOffsetsAndLengths(int numElements) { Map offsetToLength = Maps.newHashMap(); for (int index = 0; index < numElements; index += 1) { offsets[index] = - VariantUtil.readLittleEndianUnsigned( + ByteBuffers.readLittleEndianUnsigned( value, offsetListOffset + (index * offsetSize), offsetSize); offsetToLength.put(offsets[index], 0); } int dataLength = - VariantUtil.readLittleEndianUnsigned( + ByteBuffers.readLittleEndianUnsigned( value, offsetListOffset + (numElements * offsetSize), offsetSize); offsetToLength.put(dataLength, 0); @@ -163,7 +164,7 @@ public String next() { private int id(int index) { if (null == fieldIds[index]) { fieldIds[index] = - VariantUtil.readLittleEndianUnsigned( + ByteBuffers.readLittleEndianUnsigned( value, fieldIdListOffset + (index * fieldIdSize), fieldIdSize); } diff --git a/api/src/main/java/org/apache/iceberg/variants/SerializedPrimitive.java b/api/src/main/java/org/apache/iceberg/variants/SerializedPrimitive.java index 840a210ac0b7..8ea35312737d 100644 --- a/api/src/main/java/org/apache/iceberg/variants/SerializedPrimitive.java +++ b/api/src/main/java/org/apache/iceberg/variants/SerializedPrimitive.java @@ -23,6 +23,7 @@ import java.nio.ByteBuffer; import java.nio.ByteOrder; import org.apache.iceberg.relocated.com.google.common.base.Preconditions; +import org.apache.iceberg.util.ByteBuffers; import org.apache.iceberg.util.UUIDUtil; class SerializedPrimitive implements VariantPrimitive, SerializedValue { @@ -61,52 +62,52 @@ private Object read() { case BOOLEAN_FALSE: return false; case INT8: - return VariantUtil.readLittleEndianInt8(value, PRIMITIVE_OFFSET); + return ByteBuffers.readLittleEndianInt8(value, PRIMITIVE_OFFSET); case INT16: - return VariantUtil.readLittleEndianInt16(value, PRIMITIVE_OFFSET); + return ByteBuffers.readLittleEndianInt16(value, PRIMITIVE_OFFSET); case INT32: case DATE: - return VariantUtil.readLittleEndianInt32(value, PRIMITIVE_OFFSET); + return ByteBuffers.readLittleEndianInt32(value, PRIMITIVE_OFFSET); case INT64: case TIMESTAMPTZ: case TIMESTAMPNTZ: case TIME: case TIMESTAMPTZ_NANOS: case TIMESTAMPNTZ_NANOS: - return VariantUtil.readLittleEndianInt64(value, PRIMITIVE_OFFSET); + return ByteBuffers.readLittleEndianInt64(value, PRIMITIVE_OFFSET); case FLOAT: return VariantUtil.readFloat(value, PRIMITIVE_OFFSET); case DOUBLE: return VariantUtil.readDouble(value, PRIMITIVE_OFFSET); case DECIMAL4: { - int scale = VariantUtil.readByte(value, PRIMITIVE_OFFSET); - int unscaled = VariantUtil.readLittleEndianInt32(value, PRIMITIVE_OFFSET + 1); + int scale = ByteBuffers.readByte(value, PRIMITIVE_OFFSET); + int unscaled = ByteBuffers.readLittleEndianInt32(value, PRIMITIVE_OFFSET + 1); return new BigDecimal(BigInteger.valueOf(unscaled), scale); } case DECIMAL8: { - int scale = VariantUtil.readByte(value, PRIMITIVE_OFFSET); - long unscaled = VariantUtil.readLittleEndianInt64(value, PRIMITIVE_OFFSET + 1); + int scale = ByteBuffers.readByte(value, PRIMITIVE_OFFSET); + long unscaled = ByteBuffers.readLittleEndianInt64(value, PRIMITIVE_OFFSET + 1); return new BigDecimal(BigInteger.valueOf(unscaled), scale); } case DECIMAL16: { - int scale = VariantUtil.readByte(value, PRIMITIVE_OFFSET); + int scale = ByteBuffers.readByte(value, PRIMITIVE_OFFSET); byte[] unscaled = new byte[16]; for (int i = 0; i < 16; i += 1) { - unscaled[i] = (byte) VariantUtil.readByte(value, PRIMITIVE_OFFSET + 16 - i); + unscaled[i] = (byte) ByteBuffers.readByte(value, PRIMITIVE_OFFSET + 16 - i); } return new BigDecimal(new BigInteger(unscaled), scale); } case BINARY: { - int size = VariantUtil.readLittleEndianInt32(value, PRIMITIVE_OFFSET); + int size = ByteBuffers.readLittleEndianInt32(value, PRIMITIVE_OFFSET); return VariantUtil.slice(value, PRIMITIVE_OFFSET + 4, size); } case STRING: { - int size = VariantUtil.readLittleEndianInt32(value, PRIMITIVE_OFFSET); + int size = ByteBuffers.readLittleEndianInt32(value, PRIMITIVE_OFFSET); return VariantUtil.readString(value, PRIMITIVE_OFFSET + 4, size); } case UUID: diff --git a/api/src/main/java/org/apache/iceberg/variants/SerializedValue.java b/api/src/main/java/org/apache/iceberg/variants/SerializedValue.java index 19b9e880f5a6..b339c9d94f4a 100644 --- a/api/src/main/java/org/apache/iceberg/variants/SerializedValue.java +++ b/api/src/main/java/org/apache/iceberg/variants/SerializedValue.java @@ -19,6 +19,7 @@ package org.apache.iceberg.variants; import java.nio.ByteBuffer; +import org.apache.iceberg.util.ByteBuffers; interface SerializedValue extends VariantValue, Serialized { @Override @@ -29,7 +30,7 @@ default int sizeInBytes() { @Override default int writeTo(ByteBuffer buffer, int offset) { ByteBuffer value = buffer(); - VariantUtil.writeBufferAbsolute(buffer, offset, value); + buffer.put(offset, value, value.position(), value.remaining()); return value.remaining(); } } diff --git a/api/src/main/java/org/apache/iceberg/variants/VariantUtil.java b/api/src/main/java/org/apache/iceberg/variants/VariantUtil.java index d4335df8b567..14c3b2226531 100644 --- a/api/src/main/java/org/apache/iceberg/variants/VariantUtil.java +++ b/api/src/main/java/org/apache/iceberg/variants/VariantUtil.java @@ -33,78 +33,6 @@ class VariantUtil { private VariantUtil() {} - /** A hacky absolute put for ByteBuffer */ - static int writeBufferAbsolute(ByteBuffer buffer, int offset, ByteBuffer toCopy) { - int originalPosition = buffer.position(); - buffer.position(offset); - ByteBuffer copy = toCopy.duplicate(); - buffer.put(copy); // duplicate so toCopy is not modified - buffer.position(originalPosition); - Preconditions.checkArgument(copy.remaining() <= 0, "Not fully written"); - return toCopy.remaining(); - } - - static void writeByte(ByteBuffer buffer, int value, int offset) { - buffer.put(buffer.position() + offset, (byte) (value & 0xFF)); - } - - static void writeLittleEndianUnsigned(ByteBuffer buffer, int value, int offset, int size) { - int base = buffer.position() + offset; - switch (size) { - case 4: - buffer.putInt(base, value); - return; - case 3: - buffer.putShort(base, (short) (value & 0xFFFF)); - buffer.put(base + 2, (byte) ((value >> 16) & 0xFF)); - return; - case 2: - buffer.putShort(base, (short) (value & 0xFFFF)); - return; - case 1: - buffer.put(base, (byte) (value & 0xFF)); - return; - } - - throw new IllegalArgumentException("Invalid size: " + size); - } - - static byte readLittleEndianInt8(ByteBuffer buffer, int offset) { - return buffer.get(buffer.position() + offset); - } - - static short readLittleEndianInt16(ByteBuffer buffer, int offset) { - return buffer.getShort(buffer.position() + offset); - } - - static int readByte(ByteBuffer buffer, int offset) { - return buffer.get(buffer.position() + offset) & 0xFF; - } - - static int readLittleEndianUnsigned(ByteBuffer buffer, int offset, int size) { - int base = buffer.position() + offset; - switch (size) { - case 4: - return buffer.getInt(base); - case 3: - return (((int) buffer.getShort(base)) & 0xFFFF) | ((buffer.get(base + 2) & 0xFF) << 16); - case 2: - return ((int) buffer.getShort(base)) & 0xFFFF; - case 1: - return buffer.get(base) & 0xFF; - } - - throw new IllegalArgumentException("Invalid size: " + size); - } - - static int readLittleEndianInt32(ByteBuffer buffer, int offset) { - return buffer.getInt(buffer.position() + offset); - } - - static long readLittleEndianInt64(ByteBuffer buffer, int offset) { - return buffer.getLong(buffer.position() + offset); - } - static float readFloat(ByteBuffer buffer, int offset) { return buffer.getFloat(buffer.position() + offset); } diff --git a/api/src/main/java/org/apache/iceberg/variants/VariantValue.java b/api/src/main/java/org/apache/iceberg/variants/VariantValue.java index 1cda7b2d3ca2..4a477d360ca8 100644 --- a/api/src/main/java/org/apache/iceberg/variants/VariantValue.java +++ b/api/src/main/java/org/apache/iceberg/variants/VariantValue.java @@ -19,6 +19,7 @@ package org.apache.iceberg.variants; import java.nio.ByteBuffer; +import org.apache.iceberg.util.ByteBuffers; /** A variant value. */ public interface VariantValue { @@ -61,7 +62,7 @@ default VariantArray asArray() { } static VariantValue from(VariantMetadata metadata, ByteBuffer value) { - int header = VariantUtil.readByte(value, 0); + int header = ByteBuffers.readByte(value, 0); BasicType basicType = VariantUtil.basicType(header); switch (basicType) { case PRIMITIVE: diff --git a/api/src/test/java/org/apache/iceberg/util/TestByteBuffers.java b/api/src/test/java/org/apache/iceberg/util/TestByteBuffers.java new file mode 100644 index 000000000000..b6bc5e6bcdda --- /dev/null +++ b/api/src/test/java/org/apache/iceberg/util/TestByteBuffers.java @@ -0,0 +1,46 @@ +/* + * + * * Licensed to the Apache Software Foundation (ASF) under one + * * or more contributor license agreements. See the NOTICE file + * * distributed with this work for additional information + * * regarding copyright ownership. The ASF licenses this file + * * to you under the Apache License, Version 2.0 (the + * * "License"); you may not use this file except in compliance + * * with the License. You may obtain a copy of the License at + * * + * * http://www.apache.org/licenses/LICENSE-2.0 + * * + * * Unless required by applicable law or agreed to in writing, + * * software distributed under the License is distributed on an + * * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * * KIND, either express or implied. See the License for the + * * specific language governing permissions and limitations + * * under the License. + * + */ +package org.apache.iceberg.util; + +import static org.assertj.core.api.Assertions.assertThat; + +import java.nio.ByteBuffer; +import org.junit.jupiter.api.Test; + +public class TestByteBuffers { + @Test + public void testReadByteUnsigned() { + ByteBuffer buffer = ByteBuffer.wrap(new byte[] {(byte) 0xFF}); + assertThat(ByteBuffers.readByte(buffer, 0)).isEqualTo(255); + } + + @Test + public void testRead2ByteUnsigned() { + ByteBuffer buffer = ByteBuffer.wrap(new byte[] {(byte) 0xFF, (byte) 0xFF}); + assertThat(ByteBuffers.readLittleEndianUnsigned(buffer, 0, 2)).isEqualTo(65535); + } + + @Test + public void testRead3ByteUnsigned() { + ByteBuffer buffer = ByteBuffer.wrap(new byte[] {(byte) 0xFF, (byte) 0xFF, (byte) 0xFF}); + assertThat(ByteBuffers.readLittleEndianUnsigned(buffer, 0, 3)).isEqualTo(16777215); + } +} diff --git a/api/src/test/java/org/apache/iceberg/variants/TestVariantUtil.java b/api/src/test/java/org/apache/iceberg/variants/TestVariantUtil.java deleted file mode 100644 index 9d9536fbf0d6..000000000000 --- a/api/src/test/java/org/apache/iceberg/variants/TestVariantUtil.java +++ /dev/null @@ -1,44 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.iceberg.variants; - -import static org.assertj.core.api.Assertions.assertThat; - -import java.nio.ByteBuffer; -import org.junit.jupiter.api.Test; - -public class TestVariantUtil { - @Test - public void testReadByteUnsigned() { - ByteBuffer buffer = ByteBuffer.wrap(new byte[] {(byte) 0xFF}); - assertThat(VariantUtil.readByte(buffer, 0)).isEqualTo(255); - } - - @Test - public void testRead2ByteUnsigned() { - ByteBuffer buffer = ByteBuffer.wrap(new byte[] {(byte) 0xFF, (byte) 0xFF}); - assertThat(VariantUtil.readLittleEndianUnsigned(buffer, 0, 2)).isEqualTo(65535); - } - - @Test - public void testRead3ByteUnsigned() { - ByteBuffer buffer = ByteBuffer.wrap(new byte[] {(byte) 0xFF, (byte) 0xFF, (byte) 0xFF}); - assertThat(VariantUtil.readLittleEndianUnsigned(buffer, 0, 3)).isEqualTo(16777215); - } -} diff --git a/api/src/test/java/org/apache/iceberg/variants/VariantTestUtil.java b/api/src/test/java/org/apache/iceberg/variants/VariantTestUtil.java index e69adaeed9fe..7aeddf53cab5 100644 --- a/api/src/test/java/org/apache/iceberg/variants/VariantTestUtil.java +++ b/api/src/test/java/org/apache/iceberg/variants/VariantTestUtil.java @@ -30,6 +30,7 @@ import java.util.stream.Stream; import org.apache.iceberg.relocated.com.google.common.base.Preconditions; import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList; +import org.apache.iceberg.util.ByteBuffers; public class VariantTestUtil { @@ -107,24 +108,13 @@ private static byte metadataHeader(boolean isSorted, int offsetSize) { return (byte) (((offsetSize - 1) << 6) | (isSorted ? 0b10000 : 0) | 0b0001); } - /** A hacky absolute put for ByteBuffer */ - private static int writeBufferAbsolute(ByteBuffer buffer, int offset, ByteBuffer toCopy) { - int originalPosition = buffer.position(); - buffer.position(offset); - ByteBuffer copy = toCopy.duplicate(); - buffer.put(copy); // duplicate so toCopy is not modified - buffer.position(originalPosition); - Preconditions.checkArgument(copy.remaining() <= 0, "Not fully written"); - return toCopy.remaining(); - } - /** Creates a random string primitive of the given length for forcing large offset sizes */ public static VariantPrimitive createString(String string) { byte[] utf8 = string.getBytes(StandardCharsets.UTF_8); ByteBuffer buffer = ByteBuffer.allocate(5 + utf8.length).order(ByteOrder.LITTLE_ENDIAN); buffer.put(0, primitiveHeader(16)); buffer.putInt(1, utf8.length); - writeBufferAbsolute(buffer, 5, ByteBuffer.wrap(utf8)); + buffer.put(5, ByteBuffer.wrap(utf8), 0, utf8.length); return SerializedPrimitive.from(buffer, buffer.get(0)); } @@ -136,7 +126,7 @@ static SerializedShortString createShortString(String string) { byte[] utf8 = string.getBytes(StandardCharsets.UTF_8); ByteBuffer buffer = ByteBuffer.allocate(1 + utf8.length).order(ByteOrder.LITTLE_ENDIAN); buffer.put(0, VariantUtil.shortStringHeader(utf8.length)); - writeBufferAbsolute(buffer, 1, ByteBuffer.wrap(utf8)); + buffer.put(1, ByteBuffer.wrap(utf8), 0, utf8.length); return SerializedShortString.from(buffer, buffer.get(0)); } @@ -154,8 +144,8 @@ public static ByteBuffer variantBuffer(Map data) { ByteBuffer value = VariantTestUtil.createObject(meta, data); ByteBuffer buffer = ByteBuffer.allocate(meta.remaining() + value.remaining()).order(ByteOrder.LITTLE_ENDIAN); - writeBufferAbsolute(buffer, 0, meta); - writeBufferAbsolute(buffer, meta.remaining(), value); + buffer.put(0, meta, meta.position(), meta.remaining()); + buffer.put(meta.remaining(), value, value.position(), value.remaining()); return buffer; } @@ -193,23 +183,23 @@ public static ByteBuffer createMetadata(Collection fieldNames, boolean s ByteBuffer buffer = ByteBuffer.allocate(totalSize).order(ByteOrder.LITTLE_ENDIAN); buffer.put(0, header); - VariantUtil.writeLittleEndianUnsigned(buffer, numElements, 1, offsetSize); + ByteBuffers.writeLittleEndianUnsigned(buffer, numElements, 1, offsetSize); // write offsets and strings int nextOffset = 0; int index = 0; for (ByteBuffer nameBuffer : nameBuffers) { // write the offset and the string - VariantUtil.writeLittleEndianUnsigned( + ByteBuffers.writeLittleEndianUnsigned( buffer, nextOffset, offsetListOffset + (index * offsetSize), offsetSize); - int nameSize = writeBufferAbsolute(buffer, dataOffset + nextOffset, nameBuffer); + buffer.put(dataOffset + nextOffset, nameBuffer, nameBuffer.position(), nameBuffer.remaining()); // update the offset and index - nextOffset += nameSize; + nextOffset += nameBuffer.remaining(); index += 1; } // write the final size of the data section - VariantUtil.writeLittleEndianUnsigned( + ByteBuffers.writeLittleEndianUnsigned( buffer, nextOffset, offsetListOffset + (index * offsetSize), offsetSize); return buffer; @@ -255,9 +245,9 @@ public static ByteBuffer createObject(VariantMetadata metadata, Map sortedFieldNames = data.keySet().stream().sorted().collect(Collectors.toList()); for (String fieldName : sortedFieldNames) { int id = metadata.id(fieldName); - VariantUtil.writeLittleEndianUnsigned( + ByteBuffers.writeLittleEndianUnsigned( buffer, id, fieldIdListOffset + (index * fieldIdSize), fieldIdSize); - VariantUtil.writeLittleEndianUnsigned( + ByteBuffers.writeLittleEndianUnsigned( buffer, nextOffset, offsetListOffset + (index * offsetSize), offsetSize); int valueSize = data.get(fieldName).writeTo(buffer, dataOffset + nextOffset); @@ -267,7 +257,7 @@ public static ByteBuffer createObject(VariantMetadata metadata, Map implements VariantPrimitive { @@ -208,7 +209,7 @@ public int writeTo(ByteBuffer outBuffer, int offset) { ByteBuffer binary = (ByteBuffer) value; outBuffer.put(offset, BINARY_HEADER); outBuffer.putInt(offset + 1, binary.remaining()); - VariantUtil.writeBufferAbsolute(outBuffer, offset + 5, binary); + outBuffer.put(offset + 5, binary, binary.position(), binary.remaining()); return 5 + binary.remaining(); case STRING: if (null == buffer) { @@ -216,12 +217,12 @@ public int writeTo(ByteBuffer outBuffer, int offset) { } if (buffer.remaining() <= MAX_SHORT_STRING_LENGTH) { outBuffer.put(offset, VariantUtil.shortStringHeader(buffer.remaining())); - VariantUtil.writeBufferAbsolute(outBuffer, offset + 1, buffer); + outBuffer.put(offset + 1, buffer, buffer.position(), buffer.remaining()); return 1 + buffer.remaining(); } else { outBuffer.put(offset, STRING_HEADER); outBuffer.putInt(offset + 1, buffer.remaining()); - VariantUtil.writeBufferAbsolute(outBuffer, offset + 5, buffer); + outBuffer.put(offset + 5, buffer, buffer.position(), buffer.remaining()); return 5 + buffer.remaining(); } case TIME: @@ -238,8 +239,8 @@ public int writeTo(ByteBuffer outBuffer, int offset) { return 9; case UUID: outBuffer.put(offset, UUID_HEADER); - VariantUtil.writeBufferAbsolute( - outBuffer, offset + 1, UUIDUtil.convertToByteBuffer((UUID) value)); + ByteBuffer uuidBuffer = UUIDUtil.convertToByteBuffer((UUID) value); + outBuffer.put(offset + 1, uuidBuffer, uuidBuffer.position(), uuidBuffer.remaining()); return 17; } diff --git a/core/src/main/java/org/apache/iceberg/variants/ShreddedObject.java b/core/src/main/java/org/apache/iceberg/variants/ShreddedObject.java index 471097e855c6..bd1fbad07547 100644 --- a/core/src/main/java/org/apache/iceberg/variants/ShreddedObject.java +++ b/core/src/main/java/org/apache/iceberg/variants/ShreddedObject.java @@ -28,6 +28,7 @@ import org.apache.iceberg.relocated.com.google.common.collect.Iterables; import org.apache.iceberg.relocated.com.google.common.collect.Maps; import org.apache.iceberg.relocated.com.google.common.collect.Sets; +import org.apache.iceberg.util.ByteBuffers; import org.apache.iceberg.util.SortedMerge; /** @@ -216,8 +217,8 @@ private int writeTo(ByteBuffer buffer, int offset) { int dataOffset = offsetListOffset + ((1 + numElements) * offsetSize); byte header = VariantUtil.objectHeader(isLarge, fieldIdSize, offsetSize); - VariantUtil.writeByte(buffer, header, offset); - VariantUtil.writeLittleEndianUnsigned(buffer, numElements, offset + 1, isLarge ? 4 : 1); + ByteBuffers.writeByte(buffer, header, offset); + ByteBuffers.writeLittleEndianUnsigned(buffer, numElements, offset + 1, isLarge ? 4 : 1); // neither iterable is closeable, so it is okay to use Iterable Iterable fields = @@ -231,10 +232,10 @@ private int writeTo(ByteBuffer buffer, int offset) { // write the field ID from the metadata dictionary int id = metadata.id(field); Preconditions.checkState(id >= 0, "Invalid metadata, missing: %s", field); - VariantUtil.writeLittleEndianUnsigned( + ByteBuffers.writeLittleEndianUnsigned( buffer, id, fieldIdListOffset + (index * fieldIdSize), fieldIdSize); // write the data offset - VariantUtil.writeLittleEndianUnsigned( + ByteBuffers.writeLittleEndianUnsigned( buffer, nextValueOffset, offsetListOffset + (index * offsetSize), offsetSize); // copy or serialize the value into the data section @@ -243,9 +244,13 @@ private int writeTo(ByteBuffer buffer, int offset) { if (shreddedValue != null) { valueSize = shreddedValue.writeTo(buffer, dataOffset + nextValueOffset); } else { - valueSize = - VariantUtil.writeBufferAbsolute( - buffer, dataOffset + nextValueOffset, unshreddedFields.get(field)); + ByteBuffer unshreddedValue = unshreddedFields.get(field); + buffer.put( + dataOffset + nextValueOffset, + unshreddedValue, + unshreddedValue.position(), + unshreddedValue.remaining()); + valueSize = unshreddedValue.remaining(); } // update tracking @@ -254,7 +259,7 @@ private int writeTo(ByteBuffer buffer, int offset) { } // write the final size of the data section - VariantUtil.writeLittleEndianUnsigned( + ByteBuffers.writeLittleEndianUnsigned( buffer, nextValueOffset, offsetListOffset + (index * offsetSize), offsetSize); // return the total size diff --git a/core/src/main/java/org/apache/iceberg/variants/ValueArray.java b/core/src/main/java/org/apache/iceberg/variants/ValueArray.java index 2c6b29a51f63..c51cb2db74c3 100644 --- a/core/src/main/java/org/apache/iceberg/variants/ValueArray.java +++ b/core/src/main/java/org/apache/iceberg/variants/ValueArray.java @@ -23,6 +23,7 @@ import java.util.List; import org.apache.iceberg.relocated.com.google.common.base.Preconditions; import org.apache.iceberg.relocated.com.google.common.collect.Lists; +import org.apache.iceberg.util.ByteBuffers; public class ValueArray implements VariantArray { private SerializationState serializationState = null; @@ -101,15 +102,15 @@ private int writeTo(ByteBuffer buffer, int offset) { int dataOffset = offsetListOffset + ((1 + numElements) * offsetSize); byte header = VariantUtil.arrayHeader(isLarge, offsetSize); - VariantUtil.writeByte(buffer, header, offset); - VariantUtil.writeLittleEndianUnsigned(buffer, numElements, offset + 1, isLarge ? 4 : 1); + ByteBuffers.writeByte(buffer, header, offset); + ByteBuffers.writeLittleEndianUnsigned(buffer, numElements, offset + 1, isLarge ? 4 : 1); // Insert element offsets int nextValueOffset = 0; int index = 0; for (VariantValue element : elements) { // write the data offset - VariantUtil.writeLittleEndianUnsigned( + ByteBuffers.writeLittleEndianUnsigned( buffer, nextValueOffset, offsetListOffset + (index * offsetSize), offsetSize); // write the data @@ -120,7 +121,7 @@ private int writeTo(ByteBuffer buffer, int offset) { } // write the final size of the data section - VariantUtil.writeLittleEndianUnsigned( + ByteBuffers.writeLittleEndianUnsigned( buffer, nextValueOffset, offsetListOffset + (index * offsetSize), offsetSize); // return the total size diff --git a/core/src/main/java/org/apache/iceberg/variants/Variants.java b/core/src/main/java/org/apache/iceberg/variants/Variants.java index ab538dae44b5..95da533bf353 100644 --- a/core/src/main/java/org/apache/iceberg/variants/Variants.java +++ b/core/src/main/java/org/apache/iceberg/variants/Variants.java @@ -25,6 +25,7 @@ import java.util.Arrays; import java.util.Collection; import java.util.UUID; +import org.apache.iceberg.util.ByteBuffers; import org.apache.iceberg.util.DateTimeUtil; public class Variants { @@ -73,23 +74,25 @@ public static VariantMetadata metadata(Collection fieldNames) { ByteBuffer buffer = ByteBuffer.allocate(totalSize).order(ByteOrder.LITTLE_ENDIAN); buffer.put(0, header); - VariantUtil.writeLittleEndianUnsigned(buffer, numElements, 1, offsetSize); + ByteBuffers.writeLittleEndianUnsigned(buffer, numElements, 1, offsetSize); // write offsets and strings int nextOffset = 0; int index = 0; for (ByteBuffer nameBuffer : nameBuffers) { // write the offset and the string - VariantUtil.writeLittleEndianUnsigned( + ByteBuffers.writeLittleEndianUnsigned( buffer, nextOffset, offsetListOffset + (index * offsetSize), offsetSize); - int nameSize = VariantUtil.writeBufferAbsolute(buffer, dataOffset + nextOffset, nameBuffer); + buffer.put( + dataOffset + nextOffset, nameBuffer, nameBuffer.position(), nameBuffer.remaining()); + int nameSize = nameBuffer.remaining(); // update the offset and index nextOffset += nameSize; index += 1; } // write the final size of the data section - VariantUtil.writeLittleEndianUnsigned( + ByteBuffers.writeLittleEndianUnsigned( buffer, nextOffset, offsetListOffset + (index * offsetSize), offsetSize); return SerializedMetadata.from(buffer); @@ -118,7 +121,7 @@ public static ShreddedObject object(VariantObject object) { } public static boolean isNull(ByteBuffer valueBuffer) { - return VariantUtil.readByte(valueBuffer, 0) == 0; + return ByteBuffers.readByte(valueBuffer, 0) == 0; } public static ValueArray array() { From 48f25ce8d5a58c10bd4401ce1585671525e8332b Mon Sep 17 00:00:00 2001 From: Ryan Blue Date: Thu, 11 Jun 2026 11:46:37 -0700 Subject: [PATCH 2/2] Apply spotless. --- .../iceberg/variants/SerializedValue.java | 1 - .../apache/iceberg/variants/VariantUtil.java | 1 - .../apache/iceberg/util/TestByteBuffers.java | 30 +++++++++---------- .../iceberg/variants/VariantTestUtil.java | 3 +- .../iceberg/variants/PrimitiveWrapper.java | 1 - 5 files changed, 16 insertions(+), 20 deletions(-) diff --git a/api/src/main/java/org/apache/iceberg/variants/SerializedValue.java b/api/src/main/java/org/apache/iceberg/variants/SerializedValue.java index b339c9d94f4a..f1b6e101f885 100644 --- a/api/src/main/java/org/apache/iceberg/variants/SerializedValue.java +++ b/api/src/main/java/org/apache/iceberg/variants/SerializedValue.java @@ -19,7 +19,6 @@ package org.apache.iceberg.variants; import java.nio.ByteBuffer; -import org.apache.iceberg.util.ByteBuffers; interface SerializedValue extends VariantValue, Serialized { @Override diff --git a/api/src/main/java/org/apache/iceberg/variants/VariantUtil.java b/api/src/main/java/org/apache/iceberg/variants/VariantUtil.java index 14c3b2226531..62c83b51a58d 100644 --- a/api/src/main/java/org/apache/iceberg/variants/VariantUtil.java +++ b/api/src/main/java/org/apache/iceberg/variants/VariantUtil.java @@ -22,7 +22,6 @@ import java.nio.ByteOrder; import java.nio.charset.StandardCharsets; import java.util.function.Function; -import org.apache.iceberg.relocated.com.google.common.base.Preconditions; class VariantUtil { private static final int BASIC_TYPE_MASK = 0b11; diff --git a/api/src/test/java/org/apache/iceberg/util/TestByteBuffers.java b/api/src/test/java/org/apache/iceberg/util/TestByteBuffers.java index b6bc5e6bcdda..23739d78a5ec 100644 --- a/api/src/test/java/org/apache/iceberg/util/TestByteBuffers.java +++ b/api/src/test/java/org/apache/iceberg/util/TestByteBuffers.java @@ -1,22 +1,20 @@ /* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at * - * * Licensed to the Apache Software Foundation (ASF) under one - * * or more contributor license agreements. See the NOTICE file - * * distributed with this work for additional information - * * regarding copyright ownership. The ASF licenses this file - * * to you under the Apache License, Version 2.0 (the - * * "License"); you may not use this file except in compliance - * * with the License. You may obtain a copy of the License at - * * - * * http://www.apache.org/licenses/LICENSE-2.0 - * * - * * Unless required by applicable law or agreed to in writing, - * * software distributed under the License is distributed on an - * * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * * KIND, either express or implied. See the License for the - * * specific language governing permissions and limitations - * * under the License. + * http://www.apache.org/licenses/LICENSE-2.0 * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. */ package org.apache.iceberg.util; diff --git a/api/src/test/java/org/apache/iceberg/variants/VariantTestUtil.java b/api/src/test/java/org/apache/iceberg/variants/VariantTestUtil.java index 7aeddf53cab5..8a1cd515dff6 100644 --- a/api/src/test/java/org/apache/iceberg/variants/VariantTestUtil.java +++ b/api/src/test/java/org/apache/iceberg/variants/VariantTestUtil.java @@ -192,7 +192,8 @@ public static ByteBuffer createMetadata(Collection fieldNames, boolean s // write the offset and the string ByteBuffers.writeLittleEndianUnsigned( buffer, nextOffset, offsetListOffset + (index * offsetSize), offsetSize); - buffer.put(dataOffset + nextOffset, nameBuffer, nameBuffer.position(), nameBuffer.remaining()); + buffer.put( + dataOffset + nextOffset, nameBuffer, nameBuffer.position(), nameBuffer.remaining()); // update the offset and index nextOffset += nameBuffer.remaining(); index += 1; diff --git a/core/src/main/java/org/apache/iceberg/variants/PrimitiveWrapper.java b/core/src/main/java/org/apache/iceberg/variants/PrimitiveWrapper.java index efce1bbc8207..6fd211156b00 100644 --- a/core/src/main/java/org/apache/iceberg/variants/PrimitiveWrapper.java +++ b/core/src/main/java/org/apache/iceberg/variants/PrimitiveWrapper.java @@ -24,7 +24,6 @@ import java.nio.charset.StandardCharsets; import java.util.UUID; import org.apache.iceberg.relocated.com.google.common.base.Preconditions; -import org.apache.iceberg.util.ByteBuffers; import org.apache.iceberg.util.UUIDUtil; class PrimitiveWrapper implements VariantPrimitive {