From f9c86c33832c9168cc821658e201bbdb3b1bee22 Mon Sep 17 00:00:00 2001 From: Gabor Kaszab Date: Mon, 8 Jun 2026 12:17:11 +0200 Subject: [PATCH] Core: Introduce builder for TrackedFile --- .../apache/iceberg/TrackedFileBuilder.java | 345 ++++++++ .../org/apache/iceberg/TrackedFileStruct.java | 24 +- .../iceberg/TestTrackedFileBuilder.java | 776 ++++++++++++++++++ .../apache/iceberg/TestTrackedFileStruct.java | 57 +- 4 files changed, 1168 insertions(+), 34 deletions(-) create mode 100644 core/src/main/java/org/apache/iceberg/TrackedFileBuilder.java create mode 100644 core/src/test/java/org/apache/iceberg/TestTrackedFileBuilder.java diff --git a/core/src/main/java/org/apache/iceberg/TrackedFileBuilder.java b/core/src/main/java/org/apache/iceberg/TrackedFileBuilder.java new file mode 100644 index 000000000000..b6455c004b91 --- /dev/null +++ b/core/src/main/java/org/apache/iceberg/TrackedFileBuilder.java @@ -0,0 +1,345 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg; + +import java.nio.ByteBuffer; +import java.util.List; +import org.apache.iceberg.relocated.com.google.common.base.Preconditions; + +class TrackedFileBuilder { + private final long snapshotId; + private final FileContent contentType; + + // Required fields + // TODO gaborkaszab: Include writer_format_version once merged + private String location = null; + private FileFormat fileFormat = null; + private Long recordCount = null; + private Long fileSizeInBytes = null; + private PartitionData partitionData = null; + + // optional fields + private Integer specId = null; + private ContentStats contentStats = null; + private Integer sortOrderId = null; + private DeletionVector deletionVector = null; + private ManifestInfo manifestInfo = null; + private ByteBuffer keyMetadata = null; + private List splitOffsets = null; + private List equalityIds = null; + + // tracking-related fields + private Tracking sourceTracking = null; + private boolean dvUpdated = false; + private ByteBuffer deletedPositions = null; + private ByteBuffer replacedPositions = null; + + /** + * Creates a builder for a newly added data file entry. + * + * @param newSnapshotId the snapshot ID in which the new tracked file will be committed + */ + static TrackedFileBuilder data(long newSnapshotId) { + return new TrackedFileBuilder(FileContent.DATA, newSnapshotId); + } + + /** + * Creates a builder for a newly added equality delete file entry. + * + * @param newSnapshotId the snapshot ID in which the new tracked file will be committed + */ + static TrackedFileBuilder equalityDelete(long newSnapshotId) { + return new TrackedFileBuilder(FileContent.EQUALITY_DELETES, newSnapshotId); + } + + /** + * Creates a builder for a newly added data manifest entry. + * + * @param newSnapshotId the snapshot ID in which the new tracked file will be committed + */ + static TrackedFileBuilder dataManifest(long newSnapshotId) { + return new TrackedFileBuilder(FileContent.DATA_MANIFEST, newSnapshotId); + } + + /** + * Creates a builder for a newly added delete manifest entry. + * + * @param newSnapshotId the snapshot ID in which the new tracked file will be committed + */ + static TrackedFileBuilder deleteManifest(long newSnapshotId) { + return new TrackedFileBuilder(FileContent.DELETE_MANIFEST, newSnapshotId); + } + + /** + * Creates a builder for a tracked file derived from {@code newSource}. + * + * @param newSource source tracked file to copy fields from + * @param newSnapshotId the snapshot ID in which the new tracked file will be committed + */ + static TrackedFileBuilder from(TrackedFile newSource, long newSnapshotId) { + Preconditions.checkArgument(newSource != null, "Invalid source: null"); + return new TrackedFileBuilder(newSource, newSnapshotId); + } + + /** + * Returns a DELETED tracked file derived from {@code source}. + * + * @param source source tracked file + * @param newSnapshotId the snapshot ID in which the new tracked file will be committed + */ + static TrackedFile deleted(TrackedFile source, long newSnapshotId) { + Preconditions.checkArgument(source != null, "Invalid source: null"); + return terminal(source, TrackingBuilder.deleted(source.tracking(), newSnapshotId)); + } + + /** + * Returns a REPLACED tracked file derived from {@code source}. + * + *

Manifest entries cannot transition to REPLACED. + * + * @param source source tracked file + * @param newSnapshotId the snapshot ID in which the new tracked file will be committed + */ + static TrackedFile replaced(TrackedFile source, long newSnapshotId) { + Preconditions.checkArgument(source != null, "Invalid source: null"); + Preconditions.checkArgument( + source.contentType() != FileContent.DATA_MANIFEST + && source.contentType() != FileContent.DELETE_MANIFEST, + "Manifest entries cannot transition to REPLACED, but entry type is: %s", + source.contentType()); + return terminal(source, TrackingBuilder.replaced(source.tracking(), newSnapshotId)); + } + + private static TrackedFile terminal(TrackedFile source, Tracking tracking) { + return new TrackedFileStruct( + tracking, + source.contentType(), + source.location(), + source.fileFormat(), + (PartitionData) source.partition(), + source.recordCount(), + source.fileSizeInBytes(), + source.specId(), + source.contentStats(), + source.sortOrderId(), + source.deletionVector(), + source.manifestInfo(), + source.keyMetadata(), + source.splitOffsets(), + source.equalityIds()); + } + + private TrackedFileBuilder(FileContent contentType, long snapshotId) { + this.contentType = contentType; + this.snapshotId = snapshotId; + } + + private TrackedFileBuilder(TrackedFile source, long snapshotId) { + this.contentType = source.contentType(); + this.snapshotId = snapshotId; + this.location = source.location(); + this.fileFormat = source.fileFormat(); + this.recordCount = source.recordCount(); + this.fileSizeInBytes = source.fileSizeInBytes(); + this.partitionData = (PartitionData) source.partition(); + this.specId = source.specId(); + this.contentStats = source.contentStats(); + this.sortOrderId = source.sortOrderId(); + this.deletionVector = source.deletionVector(); + this.manifestInfo = source.manifestInfo(); + this.keyMetadata = source.keyMetadata(); + this.splitOffsets = source.splitOffsets(); + this.equalityIds = source.equalityIds(); + this.sourceTracking = source.tracking(); + } + + TrackedFileBuilder location(String newLocation) { + Preconditions.checkArgument(newLocation != null, "Invalid location: null"); + this.location = newLocation; + return this; + } + + TrackedFileBuilder fileFormat(FileFormat newFileFormat) { + Preconditions.checkArgument(newFileFormat != null, "Invalid file format: null"); + this.fileFormat = newFileFormat; + return this; + } + + TrackedFileBuilder recordCount(long newRecordCount) { + Preconditions.checkArgument( + newRecordCount >= 0, "Invalid record count: %s (must be >= 0)", newRecordCount); + this.recordCount = newRecordCount; + return this; + } + + TrackedFileBuilder fileSizeInBytes(long newFileSizeInBytes) { + Preconditions.checkArgument( + newFileSizeInBytes >= 0, + "Invalid file size in bytes: %s (must be >= 0)", + newFileSizeInBytes); + this.fileSizeInBytes = newFileSizeInBytes; + return this; + } + + TrackedFileBuilder specId(int newSpecId) { + Preconditions.checkArgument(newSpecId >= 0, "Invalid spec ID: %s (must be >= 0)", newSpecId); + this.specId = newSpecId; + return this; + } + + TrackedFileBuilder partition(PartitionData newPartitionData) { + Preconditions.checkArgument(newPartitionData != null, "Invalid partition: null"); + this.partitionData = newPartitionData; + return this; + } + + TrackedFileBuilder contentStats(ContentStats newContentStats) { + Preconditions.checkArgument(newContentStats != null, "Invalid content stats: null"); + this.contentStats = newContentStats; + return this; + } + + TrackedFileBuilder sortOrderId(int newSortOrderId) { + Preconditions.checkArgument( + contentType == FileContent.DATA, + "Sort order ID can only be added to DATA entries, but entry type is: %s", + contentType); + Preconditions.checkArgument( + newSortOrderId >= 0, "Invalid sort order ID: %s (must be >= 0)", newSortOrderId); + this.sortOrderId = newSortOrderId; + return this; + } + + TrackedFileBuilder deletionVector(DeletionVector newDeletionVector) { + Preconditions.checkArgument(newDeletionVector != null, "Invalid deletion vector: null"); + Preconditions.checkArgument( + contentType == FileContent.DATA, + "Deletion vector can only be added to DATA entries, but entry type is: %s", + contentType); + this.deletionVector = newDeletionVector; + this.dvUpdated = true; + return this; + } + + TrackedFileBuilder manifestInfo(ManifestInfo newManifestInfo) { + Preconditions.checkArgument(newManifestInfo != null, "Invalid manifest info: null"); + Preconditions.checkArgument( + contentType == FileContent.DATA_MANIFEST || contentType == FileContent.DELETE_MANIFEST, + "Manifest info can only be added to manifests, but entry type is: %s", + contentType); + this.manifestInfo = newManifestInfo; + return this; + } + + TrackedFileBuilder keyMetadata(ByteBuffer newKeyMetadata) { + Preconditions.checkArgument(newKeyMetadata != null, "Invalid key metadata: null"); + this.keyMetadata = newKeyMetadata; + return this; + } + + TrackedFileBuilder splitOffsets(List newSplitOffsets) { + Preconditions.checkArgument(newSplitOffsets != null, "Invalid split offsets: null"); + Preconditions.checkArgument( + contentType == FileContent.DATA, + "Split offsets can only be added to DATA entries, but entry type is: %s", + contentType); + this.splitOffsets = newSplitOffsets; + return this; + } + + TrackedFileBuilder equalityIds(List newEqualityIds) { + Preconditions.checkArgument(newEqualityIds != null, "Invalid equality IDs: null"); + Preconditions.checkArgument( + contentType == FileContent.EQUALITY_DELETES, + "Equality IDs can only be added to EQUALITY_DELETES entries, but entry type is: %s", + contentType); + this.equalityIds = newEqualityIds; + return this; + } + + TrackedFileBuilder deletedPositions(ByteBuffer newDeletedPositions) { + Preconditions.checkArgument(newDeletedPositions != null, "Invalid deleted positions: null"); + Preconditions.checkArgument( + contentType == FileContent.DATA_MANIFEST || contentType == FileContent.DELETE_MANIFEST, + "Deleted positions can only be added to manifests, but entry type is: %s", + contentType); + this.deletedPositions = newDeletedPositions; + return this; + } + + TrackedFileBuilder replacedPositions(ByteBuffer newReplacedPositions) { + Preconditions.checkArgument(newReplacedPositions != null, "Invalid replaced positions: null"); + Preconditions.checkArgument( + contentType == FileContent.DATA_MANIFEST || contentType == FileContent.DELETE_MANIFEST, + "Replaced positions can only be added to manifests, but entry type is: %s", + contentType); + this.replacedPositions = newReplacedPositions; + return this; + } + + TrackedFile build() { + Preconditions.checkArgument(location != null, "Missing required field: location"); + Preconditions.checkArgument(fileFormat != null, "Missing required field: file format"); + Preconditions.checkArgument(recordCount != null, "Missing required field: record count"); + Preconditions.checkArgument( + fileSizeInBytes != null, "Missing required field: file size in bytes"); + Preconditions.checkArgument(partitionData != null, "Missing required field: partition data"); + Preconditions.checkArgument( + (contentType != FileContent.DATA_MANIFEST && contentType != FileContent.DELETE_MANIFEST) + || manifestInfo != null, + "Missing required field: manifest info"); + Preconditions.checkArgument( + contentType != FileContent.EQUALITY_DELETES || equalityIds != null, + "Missing required field: equality IDs"); + + TrackingBuilder trackingBuilder = + sourceTracking == null + ? TrackingBuilder.added(snapshotId) + : TrackingBuilder.from(sourceTracking, snapshotId); + + if (dvUpdated) { + trackingBuilder.dvUpdated(); + } + + if (deletedPositions != null) { + trackingBuilder.deletedPositions(deletedPositions); + } + + if (replacedPositions != null) { + trackingBuilder.replacedPositions(replacedPositions); + } + + return new TrackedFileStruct( + trackingBuilder.build(), + contentType, + location, + fileFormat, + partitionData, + recordCount, + fileSizeInBytes, + specId, + contentStats, + sortOrderId, + deletionVector, + manifestInfo, + keyMetadata, + splitOffsets, + equalityIds); + } +} diff --git a/core/src/main/java/org/apache/iceberg/TrackedFileStruct.java b/core/src/main/java/org/apache/iceberg/TrackedFileStruct.java index 4830f69d6bf1..613152d66808 100644 --- a/core/src/main/java/org/apache/iceberg/TrackedFileStruct.java +++ b/core/src/main/java/org/apache/iceberg/TrackedFileStruct.java @@ -70,13 +70,13 @@ public PartitionData copy() { private FileContent contentType = null; private String location = null; private FileFormat fileFormat = null; + private Tracking tracking = null; private long recordCount = -1L; private long fileSizeInBytes = -1L; - private Integer specId = null; private PartitionData partitionData = EMPTY_PARTITION_DATA; // optional fields - private Tracking tracking = null; + private Integer specId = null; private ContentStats contentStats = null; private Integer sortOrderId = null; private DeletionVector deletionVector = null; @@ -100,7 +100,6 @@ public PartitionData copy() { super(BASE_TYPE.fields().size()); } - /** Constructor that accepts required fields. */ TrackedFileStruct( Tracking tracking, FileContent contentType, @@ -108,7 +107,15 @@ public PartitionData copy() { FileFormat fileFormat, PartitionData partition, long recordCount, - long fileSizeInBytes) { + long fileSizeInBytes, + Integer specId, + ContentStats contentStats, + Integer sortOrderId, + DeletionVector deletionVector, + ManifestInfo manifestInfo, + ByteBuffer keyMetadata, + List splitOffsets, + List equalityIds) { super(BASE_TYPE.fields().size()); this.tracking = tracking; this.contentType = contentType; @@ -119,6 +126,15 @@ public PartitionData copy() { if (partition != null) { this.partitionData = partition; } + + this.specId = specId; + this.contentStats = contentStats; + this.sortOrderId = sortOrderId; + this.deletionVector = deletionVector; + this.manifestInfo = manifestInfo; + this.keyMetadata = keyMetadata != null ? ByteBuffers.toByteArray(keyMetadata) : null; + this.splitOffsets = splitOffsets != null ? ArrayUtil.toLongArray(splitOffsets) : null; + this.equalityIds = equalityIds != null ? ArrayUtil.toIntArray(equalityIds) : null; } /** Copy constructor. */ diff --git a/core/src/test/java/org/apache/iceberg/TestTrackedFileBuilder.java b/core/src/test/java/org/apache/iceberg/TestTrackedFileBuilder.java new file mode 100644 index 000000000000..73685c9f312a --- /dev/null +++ b/core/src/test/java/org/apache/iceberg/TestTrackedFileBuilder.java @@ -0,0 +1,776 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg; + +import static org.apache.iceberg.types.Types.NestedField.optional; +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; + +import java.nio.ByteBuffer; +import java.util.stream.Stream; +import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList; +import org.apache.iceberg.types.Types; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; + +public class TestTrackedFileBuilder { + private static final Schema TABLE_SCHEMA = + new Schema( + optional(1, "id", Types.IntegerType.get()), optional(2, "data", Types.StringType.get())); + private static final Types.StructType PARTITION_TYPE = + PartitionSpec.builderFor(TABLE_SCHEMA).identity("id").build().partitionType(); + private static final PartitionData PARTITION_DATA = new PartitionData(PARTITION_TYPE); + private static final ManifestInfo MANIFEST_INFO = + ManifestInfoStruct.builder() + .addedFilesCount(10) + .existingFilesCount(20) + .deletedFilesCount(3) + .replacedFilesCount(2) + .addedRowsCount(1000L) + .existingRowsCount(2000L) + .deletedRowsCount(300L) + .replacedRowsCount(200L) + .minSequenceNumber(5L) + .build(); + private static final DeletionVector DELETION_VECTOR = + DeletionVectorStruct.builder() + .location("s3://bucket/data/dv.puffin") + .offset(0L) + .sizeInBytes(128L) + .cardinality(10L) + .build(); + private static final ContentStats CONTENT_STATS = + BaseContentStats.builder() + .withTableSchema(TABLE_SCHEMA) + .withFieldStats( + BaseFieldStats.builder() + .fieldId(1) + .type(Types.IntegerType.get()) + .valueCount(2000L) + .nullValueCount(0L) + .lowerBound(1) + .upperBound(1000) + .build()) + .withFieldStats( + BaseFieldStats.builder() + .fieldId(2) + .type(Types.StringType.get()) + .valueCount(2000L) + .nullValueCount(5L) + .lowerBound("a") + .upperBound("z") + .build()) + .build(); + private static final ByteBuffer KEY_METADATA = ByteBuffer.wrap(new byte[] {1, 2, 3}); + private static final ImmutableList SPLIT_OFFSETS = ImmutableList.of(0L, 4096L, 8192L); + private static final ByteBuffer DELETED_POSITIONS = ByteBuffer.wrap(new byte[] {10, 11, 12}); + private static final ByteBuffer REPLACED_POSITIONS = ByteBuffer.wrap(new byte[] {20, 21, 22}); + + private static Stream buildersForAllContentTypes() { + return Stream.of( + TrackedFileBuilder.data(50L), + TrackedFileBuilder.equalityDelete(50L), + TrackedFileBuilder.dataManifest(50L), + TrackedFileBuilder.deleteManifest(50L)); + } + + @ParameterizedTest + @MethodSource("buildersForAllContentTypes") + public void missingRequiredFields(TrackedFileBuilder builder) { + assertThatThrownBy(builder::build) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Missing required field: location"); + + assertThatThrownBy(() -> builder.location("s3://bucket/data/file").build()) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Missing required field: file format"); + + assertThatThrownBy(() -> builder.fileFormat(FileFormat.PARQUET).build()) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Missing required field: record count"); + + assertThatThrownBy(() -> builder.recordCount(2000L).build()) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Missing required field: file size in bytes"); + + assertThatThrownBy(() -> builder.fileSizeInBytes(12345L).build()) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Missing required field: partition data"); + } + + @Test + public void missingFieldsForManifests() { + assertThatThrownBy( + () -> + TrackedFileBuilder.dataManifest(50L) + .location("s3://bucket/data/data_manifest.avro") + .fileFormat(FileFormat.AVRO) + .recordCount(420L) + .fileSizeInBytes(556L) + .partition(PARTITION_DATA) + .build()) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Missing required field: manifest info"); + + assertThatThrownBy( + () -> + TrackedFileBuilder.deleteManifest(50L) + .location("s3://bucket/data/delete_manifest.avro") + .fileFormat(FileFormat.AVRO) + .recordCount(100L) + .fileSizeInBytes(543L) + .partition(PARTITION_DATA) + .build()) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Missing required field: manifest info"); + } + + @Test + public void missingEqualityIdsForEqualityDeletes() { + assertThatThrownBy( + () -> + TrackedFileBuilder.equalityDelete(50L) + .location("s3://bucket/data/eq_delete.parquet") + .fileFormat(FileFormat.PARQUET) + .recordCount(2000L) + .fileSizeInBytes(12345L) + .partition(PARTITION_DATA) + .build()) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Missing required field: equality IDs"); + } + + private static Stream nonEqualityDeleteBuilders() { + return Stream.of( + Arguments.of(TrackedFileBuilder.data(10L), "DATA"), + Arguments.of(TrackedFileBuilder.dataManifest(10L), "DATA_MANIFEST"), + Arguments.of(TrackedFileBuilder.deleteManifest(10L), "DELETE_MANIFEST"), + Arguments.of(TrackedFileBuilder.from(sourceData(12L), 20L), "DATA"), + Arguments.of(TrackedFileBuilder.from(sourceDataManifest(21L), 25L), "DATA_MANIFEST"), + Arguments.of(TrackedFileBuilder.from(sourceDeleteManifest(12L), 20L), "DELETE_MANIFEST")); + } + + @ParameterizedTest + @MethodSource("nonEqualityDeleteBuilders") + public void invalidEqualityIdsForContentType(TrackedFileBuilder builder, String contentType) { + assertThatThrownBy(() -> builder.equalityIds(ImmutableList.of(1))) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage( + "Equality IDs can only be added to EQUALITY_DELETES entries, but entry type is: " + + contentType); + } + + private static Stream nonDataBuilders() { + return Stream.of( + Arguments.of(TrackedFileBuilder.equalityDelete(10L), "EQUALITY_DELETES"), + Arguments.of(TrackedFileBuilder.dataManifest(10L), "DATA_MANIFEST"), + Arguments.of(TrackedFileBuilder.deleteManifest(10L), "DELETE_MANIFEST"), + Arguments.of(TrackedFileBuilder.from(sourceEqualityDelete(12L), 20L), "EQUALITY_DELETES"), + Arguments.of(TrackedFileBuilder.from(sourceDataManifest(21L), 25L), "DATA_MANIFEST"), + Arguments.of(TrackedFileBuilder.from(sourceDeleteManifest(12L), 20L), "DELETE_MANIFEST")); + } + + @ParameterizedTest + @MethodSource("nonDataBuilders") + public void invalidDeletionVectorForContentType(TrackedFileBuilder builder, String contentType) { + assertThatThrownBy(() -> builder.deletionVector(DELETION_VECTOR)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage( + "Deletion vector can only be added to DATA entries, but entry type is: " + contentType); + } + + @ParameterizedTest + @MethodSource("nonDataBuilders") + public void invalidSortOrderIdForContentType(TrackedFileBuilder builder, String contentType) { + assertThatThrownBy(() -> builder.sortOrderId(1)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage( + "Sort order ID can only be added to DATA entries, but entry type is: " + contentType); + } + + @ParameterizedTest + @MethodSource("nonDataBuilders") + public void invalidSplitOffsetsForContentType(TrackedFileBuilder builder, String contentType) { + assertThatThrownBy(() -> builder.splitOffsets(SPLIT_OFFSETS)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage( + "Split offsets can only be added to DATA entries, but entry type is: " + contentType); + } + + private static Stream nonManifestBuilders() { + return Stream.of( + Arguments.of(TrackedFileBuilder.data(10L), "DATA"), + Arguments.of(TrackedFileBuilder.equalityDelete(10L), "EQUALITY_DELETES"), + Arguments.of(TrackedFileBuilder.from(sourceData(12L), 20L), "DATA"), + Arguments.of(TrackedFileBuilder.from(sourceEqualityDelete(12L), 20L), "EQUALITY_DELETES")); + } + + @ParameterizedTest + @MethodSource("nonManifestBuilders") + public void invalidManifestInfoForContentType(TrackedFileBuilder builder, String contentType) { + assertThatThrownBy(() -> builder.manifestInfo(MANIFEST_INFO)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage( + "Manifest info can only be added to manifests, but entry type is: " + contentType); + } + + @ParameterizedTest + @MethodSource("nonManifestBuilders") + public void invalidDeletedPositionsForContentType( + TrackedFileBuilder builder, String contentType) { + assertThatThrownBy(() -> builder.deletedPositions(DELETED_POSITIONS)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage( + "Deleted positions can only be added to manifests, but entry type is: " + contentType); + } + + @ParameterizedTest + @MethodSource("nonManifestBuilders") + public void invalidReplacedPositionsForContentType( + TrackedFileBuilder builder, String contentType) { + assertThatThrownBy(() -> builder.replacedPositions(REPLACED_POSITIONS)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage( + "Replaced positions can only be added to manifests, but entry type is: " + contentType); + } + + @Test + public void invalidNullInputs() { + assertThatThrownBy(() -> TrackedFileBuilder.data(30L).location(null)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Invalid location: null"); + + assertThatThrownBy(() -> TrackedFileBuilder.data(30L).fileFormat(null)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Invalid file format: null"); + + assertThatThrownBy(() -> TrackedFileBuilder.data(30L).partition(null)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Invalid partition: null"); + + assertThatThrownBy(() -> TrackedFileBuilder.data(30L).contentStats(null)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Invalid content stats: null"); + + assertThatThrownBy(() -> TrackedFileBuilder.data(30L).deletionVector(null)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Invalid deletion vector: null"); + + assertThatThrownBy(() -> TrackedFileBuilder.dataManifest(30L).manifestInfo(null)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Invalid manifest info: null"); + + assertThatThrownBy(() -> TrackedFileBuilder.data(30L).keyMetadata(null)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Invalid key metadata: null"); + + assertThatThrownBy(() -> TrackedFileBuilder.data(30L).splitOffsets(null)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Invalid split offsets: null"); + + assertThatThrownBy(() -> TrackedFileBuilder.equalityDelete(30L).equalityIds(null)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Invalid equality IDs: null"); + + assertThatThrownBy(() -> TrackedFileBuilder.from(null, 20L)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Invalid source: null"); + + assertThatThrownBy( + () -> + TrackedFileBuilder.from( + entryWithInheritedSeqNums(sourceDataManifest(10L), 15L), 20L) + .deletedPositions(null)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Invalid deleted positions: null"); + + assertThatThrownBy( + () -> + TrackedFileBuilder.from( + entryWithInheritedSeqNums(sourceDeleteManifest(100L), 150L), 200L) + .replacedPositions(null)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Invalid replaced positions: null"); + + assertThatThrownBy(() -> TrackedFileBuilder.deleted(null, 20L)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Invalid source: null"); + + assertThatThrownBy(() -> TrackedFileBuilder.replaced(null, 20L)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Invalid source: null"); + } + + @Test + public void invalidNegativeInputs() { + assertThatThrownBy(() -> TrackedFileBuilder.dataManifest(40L).recordCount(-1)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Invalid record count: -1 (must be >= 0)"); + + assertThatThrownBy(() -> TrackedFileBuilder.dataManifest(40L).fileSizeInBytes(-1)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Invalid file size in bytes: -1 (must be >= 0)"); + + assertThatThrownBy(() -> TrackedFileBuilder.dataManifest(40L).specId(-1)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Invalid spec ID: -1 (must be >= 0)"); + + assertThatThrownBy(() -> TrackedFileBuilder.data(40L).sortOrderId(-1)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Invalid sort order ID: -1 (must be >= 0)"); + } + + @Test + public void buildDataFileWithRequiredFieldsOnly() { + TrackedFile trackedFile = + TrackedFileBuilder.data(50L) + .location("s3://bucket/data/file.parquet") + .fileFormat(FileFormat.PARQUET) + .recordCount(2000L) + .fileSizeInBytes(12345L) + .partition(PARTITION_DATA) + .build(); + + assertThat(trackedFile.contentType()).isEqualTo(FileContent.DATA); + assertThat(trackedFile.location()).isEqualTo("s3://bucket/data/file.parquet"); + assertThat(trackedFile.fileFormat()).isEqualTo(FileFormat.PARQUET); + assertThat(trackedFile.recordCount()).isEqualTo(2000L); + assertThat(trackedFile.fileSizeInBytes()).isEqualTo(12345L); + assertThat(trackedFile.partition()).isSameAs(PARTITION_DATA); + + assertThat(trackedFile.tracking().status()).isEqualTo(EntryStatus.ADDED); + assertThat(trackedFile.tracking().snapshotId()).isEqualTo(50L); + assertThat(trackedFile.tracking().dvSnapshotId()).isNull(); + + assertThat(trackedFile.specId()).isNull(); + assertThat(trackedFile.contentStats()).isNull(); + assertThat(trackedFile.sortOrderId()).isNull(); + assertThat(trackedFile.deletionVector()).isNull(); + assertThat(trackedFile.manifestInfo()).isNull(); + assertThat(trackedFile.keyMetadata()).isNull(); + assertThat(trackedFile.splitOffsets()).isNull(); + assertThat(trackedFile.equalityIds()).isNull(); + } + + @Test + public void buildDataFileWithAllFields() { + TrackedFile trackedFile = + TrackedFileBuilder.data(50L) + .location("s3://bucket/data/file.parquet") + .fileFormat(FileFormat.PARQUET) + .recordCount(2000L) + .fileSizeInBytes(12345L) + .specId(7) + .partition(PARTITION_DATA) + .contentStats(CONTENT_STATS) + .sortOrderId(3) + .deletionVector(DELETION_VECTOR) + .keyMetadata(KEY_METADATA) + .splitOffsets(SPLIT_OFFSETS) + .build(); + + assertThat(trackedFile.contentType()).isEqualTo(FileContent.DATA); + assertThat(trackedFile.location()).isEqualTo("s3://bucket/data/file.parquet"); + assertThat(trackedFile.fileFormat()).isEqualTo(FileFormat.PARQUET); + assertThat(trackedFile.recordCount()).isEqualTo(2000L); + assertThat(trackedFile.fileSizeInBytes()).isEqualTo(12345L); + assertThat(trackedFile.specId()).isEqualTo(7); + assertThat(trackedFile.partition()).isSameAs(PARTITION_DATA); + assertThat(trackedFile.contentStats()).isSameAs(CONTENT_STATS); + assertThat(trackedFile.sortOrderId()).isEqualTo(3); + assertThat(trackedFile.deletionVector()).isSameAs(DELETION_VECTOR); + assertThat(trackedFile.keyMetadata()).isEqualTo(KEY_METADATA); + assertThat(trackedFile.splitOffsets()).isEqualTo(SPLIT_OFFSETS); + + assertThat(trackedFile.tracking().status()).isEqualTo(EntryStatus.ADDED); + assertThat(trackedFile.tracking().snapshotId()).isEqualTo(50L); + assertThat(trackedFile.tracking().dvSnapshotId()).isEqualTo(50L); + + // Unsupported fields for data files + assertThat(trackedFile.manifestInfo()).isNull(); + assertThat(trackedFile.equalityIds()).isNull(); + } + + @Test + public void buildEqualityDeleteFileWithRequiredFieldsOnly() { + TrackedFile trackedFile = + TrackedFileBuilder.equalityDelete(50L) + .location("s3://bucket/data/eq_delete.parquet") + .fileFormat(FileFormat.PARQUET) + .recordCount(2000L) + .fileSizeInBytes(12345L) + .partition(PARTITION_DATA) + .equalityIds(ImmutableList.of(1)) + .build(); + + assertThat(trackedFile.contentType()).isEqualTo(FileContent.EQUALITY_DELETES); + assertThat(trackedFile.location()).isEqualTo("s3://bucket/data/eq_delete.parquet"); + assertThat(trackedFile.fileFormat()).isEqualTo(FileFormat.PARQUET); + assertThat(trackedFile.recordCount()).isEqualTo(2000L); + assertThat(trackedFile.fileSizeInBytes()).isEqualTo(12345L); + assertThat(trackedFile.partition()).isSameAs(PARTITION_DATA); + assertThat(trackedFile.equalityIds()).containsExactly(1); + + assertThat(trackedFile.tracking().status()).isEqualTo(EntryStatus.ADDED); + assertThat(trackedFile.tracking().snapshotId()).isEqualTo(50L); + + assertThat(trackedFile.specId()).isNull(); + assertThat(trackedFile.contentStats()).isNull(); + assertThat(trackedFile.sortOrderId()).isNull(); + assertThat(trackedFile.deletionVector()).isNull(); + assertThat(trackedFile.manifestInfo()).isNull(); + assertThat(trackedFile.keyMetadata()).isNull(); + assertThat(trackedFile.splitOffsets()).isNull(); + } + + @Test + public void buildEqualityDeleteFileWithAllFields() { + TrackedFile trackedFile = + TrackedFileBuilder.equalityDelete(50L) + .location("s3://bucket/data/eq_delete.parquet") + .fileFormat(FileFormat.PARQUET) + .recordCount(2000L) + .fileSizeInBytes(12345L) + .specId(7) + .partition(PARTITION_DATA) + .contentStats(CONTENT_STATS) + .keyMetadata(KEY_METADATA) + .equalityIds(ImmutableList.of(1, 2)) + .build(); + + assertThat(trackedFile.contentType()).isEqualTo(FileContent.EQUALITY_DELETES); + assertThat(trackedFile.location()).isEqualTo("s3://bucket/data/eq_delete.parquet"); + assertThat(trackedFile.fileFormat()).isEqualTo(FileFormat.PARQUET); + assertThat(trackedFile.recordCount()).isEqualTo(2000L); + assertThat(trackedFile.fileSizeInBytes()).isEqualTo(12345L); + assertThat(trackedFile.specId()).isEqualTo(7); + assertThat(trackedFile.partition()).isSameAs(PARTITION_DATA); + assertThat(trackedFile.contentStats()).isSameAs(CONTENT_STATS); + assertThat(trackedFile.keyMetadata()).isEqualTo(KEY_METADATA); + assertThat(trackedFile.equalityIds()).containsExactly(1, 2); + + assertThat(trackedFile.tracking().status()).isEqualTo(EntryStatus.ADDED); + assertThat(trackedFile.tracking().snapshotId()).isEqualTo(50L); + + // Unsupported fields for equality delete files + assertThat(trackedFile.sortOrderId()).isNull(); + assertThat(trackedFile.deletionVector()).isNull(); + assertThat(trackedFile.splitOffsets()).isNull(); + assertThat(trackedFile.manifestInfo()).isNull(); + } + + private static Stream manifestBuilders() { + return Stream.of( + Arguments.of(TrackedFileBuilder.dataManifest(50L), FileContent.DATA_MANIFEST), + Arguments.of(TrackedFileBuilder.deleteManifest(50L), FileContent.DELETE_MANIFEST)); + } + + @ParameterizedTest + @MethodSource("manifestBuilders") + public void buildManifestWithRequiredFieldsOnly( + TrackedFileBuilder builder, FileContent contentType) { + TrackedFile trackedFile = + builder + .location("s3://bucket/data/manifest.avro") + .fileFormat(FileFormat.AVRO) + .recordCount(420L) + .fileSizeInBytes(556L) + .partition(PARTITION_DATA) + .manifestInfo(MANIFEST_INFO) + .build(); + + assertThat(trackedFile.contentType()).isEqualTo(contentType); + assertThat(trackedFile.location()).isEqualTo("s3://bucket/data/manifest.avro"); + assertThat(trackedFile.fileFormat()).isEqualTo(FileFormat.AVRO); + assertThat(trackedFile.recordCount()).isEqualTo(420L); + assertThat(trackedFile.fileSizeInBytes()).isEqualTo(556L); + assertThat(trackedFile.partition()).isSameAs(PARTITION_DATA); + assertThat(trackedFile.manifestInfo()).isSameAs(MANIFEST_INFO); + + assertThat(trackedFile.tracking().status()).isEqualTo(EntryStatus.ADDED); + assertThat(trackedFile.tracking().snapshotId()).isEqualTo(50L); + + assertThat(trackedFile.specId()).isNull(); + assertThat(trackedFile.contentStats()).isNull(); + assertThat(trackedFile.sortOrderId()).isNull(); + assertThat(trackedFile.deletionVector()).isNull(); + assertThat(trackedFile.keyMetadata()).isNull(); + assertThat(trackedFile.splitOffsets()).isNull(); + assertThat(trackedFile.equalityIds()).isNull(); + } + + @ParameterizedTest + @MethodSource("manifestBuilders") + public void buildManifestWithAllFields(TrackedFileBuilder builder, FileContent contentType) { + TrackedFile trackedFile = + builder + .location("s3://bucket/data/manifest.avro") + .fileFormat(FileFormat.AVRO) + .recordCount(420L) + .fileSizeInBytes(556L) + .specId(7) + .partition(PARTITION_DATA) + .contentStats(CONTENT_STATS) + .keyMetadata(KEY_METADATA) + .manifestInfo(MANIFEST_INFO) + .build(); + + assertThat(trackedFile.contentType()).isEqualTo(contentType); + assertThat(trackedFile.location()).isEqualTo("s3://bucket/data/manifest.avro"); + assertThat(trackedFile.fileFormat()).isEqualTo(FileFormat.AVRO); + assertThat(trackedFile.recordCount()).isEqualTo(420L); + assertThat(trackedFile.fileSizeInBytes()).isEqualTo(556L); + assertThat(trackedFile.specId()).isEqualTo(7); + assertThat(trackedFile.partition()).isSameAs(PARTITION_DATA); + assertThat(trackedFile.contentStats()).isSameAs(CONTENT_STATS); + assertThat(trackedFile.keyMetadata()).isEqualTo(KEY_METADATA); + assertThat(trackedFile.manifestInfo()).isSameAs(MANIFEST_INFO); + + assertThat(trackedFile.tracking().status()).isEqualTo(EntryStatus.ADDED); + assertThat(trackedFile.tracking().snapshotId()).isEqualTo(50L); + + // Unsupported fields for manifests + assertThat(trackedFile.sortOrderId()).isNull(); + assertThat(trackedFile.deletionVector()).isNull(); + assertThat(trackedFile.splitOffsets()).isNull(); + assertThat(trackedFile.equalityIds()).isNull(); + } + + private static Stream manifestSources() { + return Stream.of( + Arguments.of( + entryWithInheritedSeqNums(sourceDataManifest(10L), 7L), FileContent.DATA_MANIFEST), + Arguments.of( + entryWithInheritedSeqNums(sourceDeleteManifest(10L), 7L), FileContent.DELETE_MANIFEST)); + } + + @ParameterizedTest + @MethodSource("manifestSources") + public void buildManifestFromSourceWithDeletedPositions( + TrackedFile source, FileContent contentType) { + TrackedFile trackedFile = + TrackedFileBuilder.from(source, 20L).deletedPositions(DELETED_POSITIONS).build(); + + // TODO gaborkaszab: when MODIFIED lands: + // - Status should be modified + // - dvSnapshotId should be 20L + assertThat(trackedFile.contentType()).isEqualTo(contentType); + assertThat(trackedFile.tracking().status()).isEqualTo(EntryStatus.EXISTING); + // assertThat(trackedFile.tracking().dvSnapshotId()).isEqualTo(20L); + assertThat(trackedFile.tracking().deletedPositions()).isEqualTo(DELETED_POSITIONS); + assertThat(trackedFile.tracking().replacedPositions()).isNull(); + } + + @ParameterizedTest + @MethodSource("manifestSources") + public void buildManifestFromSourceWithReplacedPositions( + TrackedFile source, FileContent contentType) { + TrackedFile trackedFile = + TrackedFileBuilder.from(source, 20L).replacedPositions(REPLACED_POSITIONS).build(); + + // TODO gaborkaszab: when MODIFIED lands: + // - Status should be modified + // - dvSnapshotId should be 20L + assertThat(trackedFile.contentType()).isEqualTo(contentType); + assertThat(trackedFile.tracking().status()).isEqualTo(EntryStatus.EXISTING); + // assertThat(trackedFile.tracking().dvSnapshotId()).isEqualTo(20L); + assertThat(trackedFile.tracking().deletedPositions()).isNull(); + assertThat(trackedFile.tracking().replacedPositions()).isEqualTo(REPLACED_POSITIONS); + } + + @ParameterizedTest + @MethodSource("manifestSources") + public void buildManifestFromSourceClearsPositions(TrackedFile source, FileContent contentType) { + TrackedFile sourceWithPositions = + TrackedFileBuilder.from(source, 15L) + .deletedPositions(DELETED_POSITIONS) + .replacedPositions(REPLACED_POSITIONS) + .build(); + + TrackedFile newEntry = TrackedFileBuilder.from(sourceWithPositions, 20L).build(); + + // Building a new entry from this source should not carry the positions over + assertThat(newEntry.contentType()).isEqualTo(contentType); + assertThat(newEntry.tracking().status()).isEqualTo(EntryStatus.EXISTING); + assertThat(newEntry.tracking().deletedPositions()).isNull(); + assertThat(newEntry.tracking().replacedPositions()).isNull(); + // TODO gaborkaszab: when MODIFIED lands: dvSnapshotId should be 15L + // assertThat(newEntry.tracking().dvSnapshotId()).isEqualTo(15L); + } + + @Test + public void buildDataFileFromSource() { + TrackedFile source = entryWithInheritedSeqNums(sourceData(10L), 45L); + + TrackedFile trackedFile = TrackedFileBuilder.from(source, 20L).build(); + + assertThat(trackedFile.contentType()).isEqualTo(FileContent.DATA); + assertThat(trackedFile.tracking().status()).isEqualTo(EntryStatus.EXISTING); + verifyFieldsAreFromSource(trackedFile, source); + } + + @Test + public void updateDVWhenBuildingDataFileFromSource() { + TrackedFile source = entryWithInheritedSeqNums(sourceData(10L), 45L); + + DeletionVector dv = + DeletionVectorStruct.builder() + .location("s3://bucket/data/new_dv.puffin") + .offset(5L) + .sizeInBytes(256L) + .cardinality(40L) + .build(); + + TrackedFile trackedFile = TrackedFileBuilder.from(source, 20L).deletionVector(dv).build(); + + assertThat(trackedFile.deletionVector()).isNotSameAs(source.deletionVector()).isSameAs(dv); + // TODO gaborkaszab: status should be MODIFIED once that's merged + assertThat(trackedFile.tracking().status()).isEqualTo(EntryStatus.EXISTING); + assertThat(trackedFile.tracking().snapshotId()).isEqualTo(10L); + assertThat(trackedFile.tracking().dataSequenceNumber()).isEqualTo(45L); + assertThat(trackedFile.tracking().fileSequenceNumber()).isEqualTo(45L); + assertThat(trackedFile.tracking().dvSnapshotId()).isEqualTo(20L); + } + + @Test + public void deletedFromDataSource() { + TrackedFile source = entryWithInheritedSeqNums(sourceData(10L), 45L); + + TrackedFile deleted = TrackedFileBuilder.deleted(source, 20L); + + assertThat(deleted.contentType()).isEqualTo(FileContent.DATA); + assertThat(deleted.tracking().status()).isEqualTo(EntryStatus.DELETED); + verifyFieldsAreFromSource(deleted, source); + } + + @Test + public void replacedFromDataSource() { + TrackedFile source = entryWithInheritedSeqNums(sourceData(10L), 45L); + + TrackedFile replaced = TrackedFileBuilder.replaced(source, 20L); + + assertThat(replaced.contentType()).isEqualTo(FileContent.DATA); + assertThat(replaced.tracking().status()).isEqualTo(EntryStatus.REPLACED); + verifyFieldsAreFromSource(replaced, source); + } + + @ParameterizedTest + @MethodSource("manifestSources") + public void deletedFromManifestSource(TrackedFile source, FileContent contentType) { + TrackedFile deleted = TrackedFileBuilder.deleted(source, 20L); + + assertThat(deleted.contentType()).isEqualTo(contentType); + assertThat(deleted.tracking().status()).isEqualTo(EntryStatus.DELETED); + verifyFieldsAreFromSource(deleted, source); + } + + @ParameterizedTest + @MethodSource("manifestSources") + public void replacedFromManifestSourceFails(TrackedFile source, FileContent contentType) { + assertThatThrownBy(() -> TrackedFileBuilder.replaced(source, 20L)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage( + "Manifest entries cannot transition to REPLACED, but entry type is: " + contentType); + } + + private static TrackedFile sourceData(long snapshotId) { + return TrackedFileBuilder.data(snapshotId) + .location("s3://bucket/data/file.parquet") + .fileFormat(FileFormat.PARQUET) + .recordCount(2000L) + .fileSizeInBytes(12345L) + .partition(PARTITION_DATA) + .specId(7) + .contentStats(CONTENT_STATS) + .sortOrderId(3) + .deletionVector(DELETION_VECTOR) + .keyMetadata(KEY_METADATA) + .splitOffsets(SPLIT_OFFSETS) + .build(); + } + + private static TrackedFile sourceEqualityDelete(long snapshotId) { + return TrackedFileBuilder.equalityDelete(snapshotId) + .location("s3://bucket/data/eq_delete.parquet") + .fileFormat(FileFormat.PARQUET) + .recordCount(2000L) + .fileSizeInBytes(12345L) + .partition(PARTITION_DATA) + .equalityIds(ImmutableList.of(1)) + .build(); + } + + private static TrackedFile sourceDataManifest(long snapshotId) { + return TrackedFileBuilder.dataManifest(snapshotId) + .location("s3://bucket/data/data_manifest.parquet") + .fileFormat(FileFormat.PARQUET) + .recordCount(420L) + .fileSizeInBytes(556L) + .partition(PARTITION_DATA) + .manifestInfo(MANIFEST_INFO) + .build(); + } + + private static TrackedFile sourceDeleteManifest(long snapshotId) { + return TrackedFileBuilder.deleteManifest(snapshotId) + .location("s3://bucket/data/delete_manifest.parquet") + .fileFormat(FileFormat.PARQUET) + .recordCount(100L) + .fileSizeInBytes(543L) + .partition(PARTITION_DATA) + .manifestInfo(MANIFEST_INFO) + .build(); + } + + private static TrackedFile entryWithInheritedSeqNums(TrackedFile entry, long sequenceNumber) { + Tracking manifestTrackingToInheritFrom = + new TrackingStruct( + EntryStatus.EXISTING, 123L, sequenceNumber, sequenceNumber, null, null, null, null); + + ((TrackingStruct) entry.tracking()).inheritFrom(manifestTrackingToInheritFrom); + return entry; + } + + private static void verifyFieldsAreFromSource(TrackedFile entry, TrackedFile source) { + assertThat(entry.location()).isEqualTo(source.location()); + assertThat(entry.fileFormat()).isEqualTo(source.fileFormat()); + assertThat(entry.recordCount()).isEqualTo(source.recordCount()); + assertThat(entry.fileSizeInBytes()).isEqualTo(source.fileSizeInBytes()); + assertThat(entry.specId()).isEqualTo(source.specId()); + assertThat(entry.partition()).isSameAs(source.partition()); + assertThat(entry.contentStats()).isSameAs(source.contentStats()); + assertThat(entry.sortOrderId()).isEqualTo(source.sortOrderId()); + assertThat(entry.deletionVector()).isSameAs(source.deletionVector()); + assertThat(entry.keyMetadata()).isEqualTo(source.keyMetadata()); + assertThat(entry.splitOffsets()).isEqualTo(source.splitOffsets()); + assertThat(entry.manifestInfo()).isSameAs(source.manifestInfo()); + assertThat(entry.equalityIds()).isSameAs(source.equalityIds()); + + assertThat(entry.tracking().dataSequenceNumber()) + .isEqualTo(source.tracking().dataSequenceNumber()); + assertThat(entry.tracking().fileSequenceNumber()) + .isEqualTo(source.tracking().fileSequenceNumber()); + assertThat(entry.tracking().dvSnapshotId()).isEqualTo(source.tracking().dvSnapshotId()); + assertThat(entry.tracking().deletedPositions()).isEqualTo(source.tracking().deletedPositions()); + assertThat(entry.tracking().replacedPositions()) + .isEqualTo(source.tracking().replacedPositions()); + } +} diff --git a/core/src/test/java/org/apache/iceberg/TestTrackedFileStruct.java b/core/src/test/java/org/apache/iceberg/TestTrackedFileStruct.java index 8891db408be5..b0c6d1d911d3 100644 --- a/core/src/test/java/org/apache/iceberg/TestTrackedFileStruct.java +++ b/core/src/test/java/org/apache/iceberg/TestTrackedFileStruct.java @@ -331,10 +331,6 @@ void testKryoSerializationRoundTrip() throws IOException { } static TrackedFileStruct createFullTrackedFile() { - TrackingStruct tracking = (TrackingStruct) TrackingBuilder.added(42L).build(); - tracking.setManifestLocation("s3://bucket/manifest.avro"); - tracking.set(MANIFEST_POS_ORDINAL, 3L); - DeletionVectorStruct dv = DeletionVectorStruct.builder() .location("s3://bucket/dv.puffin") @@ -344,19 +340,23 @@ static TrackedFileStruct createFullTrackedFile() { .build(); TrackedFileStruct file = - new TrackedFileStruct( - tracking, - FileContent.DATA, - "s3://bucket/data/file.parquet", - FileFormat.PARQUET, - newPartition(7, "music"), - 100L, - 1024L); - file.set(6, 0); - file.set(9, 1); - file.set(10, dv); - file.set(12, ByteBuffer.wrap(new byte[] {1, 2, 3})); - file.set(13, ImmutableList.of(50L)); + (TrackedFileStruct) + TrackedFileBuilder.data(42L) + .location("s3://bucket/data/file.parquet") + .fileFormat(FileFormat.PARQUET) + .partition(newPartition(7, "music")) + .recordCount(100L) + .fileSizeInBytes(1024L) + .specId(0) + .sortOrderId(1) + .deletionVector(dv) + .keyMetadata(ByteBuffer.wrap(new byte[] {1, 2, 3})) + .splitOffsets(ImmutableList.of(50L)) + .build(); + + TrackingStruct tracking = (TrackingStruct) file.tracking(); + tracking.setManifestLocation("s3://bucket/manifest.avro"); + tracking.set(MANIFEST_POS_ORDINAL, 3L); return file; } @@ -416,18 +416,15 @@ static TrackedFileStruct createTrackedFileWithStats() { .withFieldStats(fieldStatsList) .build(); - TrackedFileStruct file = - new TrackedFileStruct( - null, - FileContent.DATA, - "s3://bucket/data/file.parquet", - FileFormat.PARQUET, - new PartitionData(Types.StructType.of()), - 100L, - 1024L); - file.set(6, 0); - file.set(8, stats); - - return file; + return (TrackedFileStruct) + TrackedFileBuilder.data(0L) + .location("s3://bucket/data/file.parquet") + .fileFormat(FileFormat.PARQUET) + .partition(new PartitionData(Types.StructType.of())) + .recordCount(100L) + .fileSizeInBytes(1024L) + .specId(0) + .contentStats(stats) + .build(); } }