diff --git a/api/src/main/java/org/apache/iceberg/types/Types.java b/api/src/main/java/org/apache/iceberg/types/Types.java index ec6076b04fa0..24077b13e892 100644 --- a/api/src/main/java/org/apache/iceberg/types/Types.java +++ b/api/src/main/java/org/apache/iceberg/types/Types.java @@ -680,18 +680,19 @@ public boolean equals(Object o) { } GeographyType that = (GeographyType) o; - return Objects.equals(crs, that.crs) && Objects.equals(algorithm, that.algorithm); + // compare the resolved CRS and algorithm so an explicit default is equal to an omitted one + return crs().equals(that.crs()) && algorithm() == that.algorithm(); } @Override public int hashCode() { - return Objects.hash(GeographyType.class, crs, algorithm); + return Objects.hash(GeographyType.class, crs(), algorithm()); } @Override public String toString() { - if (algorithm != null) { - return String.format("geography(%s, %s)", crs != null ? crs : DEFAULT_CRS, algorithm); + if (algorithm() != DEFAULT_ALGORITHM) { + return String.format("geography(%s, %s)", crs(), algorithm()); } else if (crs != null) { return String.format("geography(%s)", crs); } else { diff --git a/api/src/test/java/org/apache/iceberg/types/TestTypes.java b/api/src/test/java/org/apache/iceberg/types/TestTypes.java index fa5ed4304d3c..c765790569c8 100644 --- a/api/src/test/java/org/apache/iceberg/types/TestTypes.java +++ b/api/src/test/java/org/apache/iceberg/types/TestTypes.java @@ -172,6 +172,32 @@ public void testGeospatialTypeToString() { .isEqualTo("geography(srid:4326, karney)"); assertThat(Types.GeographyType.of(null, EdgeAlgorithm.KARNEY).toString()) .isEqualTo("geography(OGC:CRS84, karney)"); + assertThat( + Types.GeographyType.of(Types.GeographyType.DEFAULT_CRS, EdgeAlgorithm.SPHERICAL) + .toString()) + .isEqualTo("geography"); + assertThat(Types.GeographyType.of("srid:4326", EdgeAlgorithm.SPHERICAL).toString()) + .isEqualTo("geography(srid:4326)"); + } + + @Test + public void testGeospatialTypeDefaultNormalization() { + // the default CRS and edge algorithm normalize so that equivalent type specs are equal + assertThat(Types.GeometryType.of(Types.GeometryType.DEFAULT_CRS)) + .isEqualTo(Types.GeometryType.crs84()); + assertThat(Types.GeographyType.of(Types.GeographyType.DEFAULT_CRS)) + .isEqualTo(Types.GeographyType.crs84()); + assertThat(Types.GeographyType.of(Types.GeographyType.DEFAULT_CRS, EdgeAlgorithm.SPHERICAL)) + .isEqualTo(Types.GeographyType.crs84()) + .hasSameHashCodeAs(Types.GeographyType.crs84()); + assertThat(Types.GeographyType.of("srid:4326", EdgeAlgorithm.SPHERICAL)) + .isEqualTo(Types.GeographyType.of("srid:4326")) + .hasSameHashCodeAs(Types.GeographyType.of("srid:4326")); + assertThat(Types.GeographyType.of("srid:4326", EdgeAlgorithm.SPHERICAL).algorithm()) + .isEqualTo(EdgeAlgorithm.SPHERICAL); + assertThat(Types.GeographyType.of("srid:4326", EdgeAlgorithm.KARNEY)) + .isNotEqualTo(Types.GeographyType.of("srid:4326")); + assertThat(Types.GeographyType.of("srid:4326")).isNotEqualTo(Types.GeographyType.crs84()); } @Test diff --git a/parquet/src/main/java/org/apache/iceberg/parquet/MessageTypeToType.java b/parquet/src/main/java/org/apache/iceberg/parquet/MessageTypeToType.java index 98023bafcb8f..6b985a8bab43 100644 --- a/parquet/src/main/java/org/apache/iceberg/parquet/MessageTypeToType.java +++ b/parquet/src/main/java/org/apache/iceberg/parquet/MessageTypeToType.java @@ -29,9 +29,11 @@ import org.apache.iceberg.relocated.com.google.common.base.Preconditions; import org.apache.iceberg.relocated.com.google.common.collect.Lists; import org.apache.iceberg.relocated.com.google.common.collect.Maps; +import org.apache.iceberg.types.EdgeAlgorithm; import org.apache.iceberg.types.Type; import org.apache.iceberg.types.Types; import org.apache.iceberg.types.Types.TimestampType; +import org.apache.parquet.column.schema.EdgeInterpolationAlgorithm; import org.apache.parquet.schema.GroupType; import org.apache.parquet.schema.LogicalTypeAnnotation; import org.apache.parquet.schema.MessageType; @@ -254,6 +256,25 @@ public Optional visit(LogicalTypeAnnotation.JsonLogicalTypeAnnotation json public Optional visit(LogicalTypeAnnotation.BsonLogicalTypeAnnotation bsonType) { return Optional.of(Types.BinaryType.get()); } + + @Override + public Optional visit(LogicalTypeAnnotation.GeometryLogicalTypeAnnotation geometryType) { + String crs = geometryType.getCrs(); + return Optional.of(Types.GeometryType.of(crs != null ? crs : Types.GeometryType.DEFAULT_CRS)); + } + + @Override + public Optional visit( + LogicalTypeAnnotation.GeographyLogicalTypeAnnotation geographyType) { + String crs = geographyType.getCrs(); + EdgeInterpolationAlgorithm algorithm = geographyType.getAlgorithm(); + return Optional.of( + Types.GeographyType.of( + crs != null ? crs : Types.GeographyType.DEFAULT_CRS, + algorithm != null + ? EdgeAlgorithm.fromName(algorithm.name()) + : Types.GeographyType.DEFAULT_ALGORITHM)); + } } private void addAlias(String name, int fieldId) { diff --git a/parquet/src/main/java/org/apache/iceberg/parquet/TypeToMessageType.java b/parquet/src/main/java/org/apache/iceberg/parquet/TypeToMessageType.java index d648cbf0694b..f05001f5f43d 100644 --- a/parquet/src/main/java/org/apache/iceberg/parquet/TypeToMessageType.java +++ b/parquet/src/main/java/org/apache/iceberg/parquet/TypeToMessageType.java @@ -30,12 +30,15 @@ import org.apache.iceberg.Schema; import org.apache.iceberg.avro.AvroSchemaUtil; import org.apache.iceberg.relocated.com.google.common.base.Preconditions; +import org.apache.iceberg.types.EdgeAlgorithm; import org.apache.iceberg.types.Type.NestedType; import org.apache.iceberg.types.Type.PrimitiveType; import org.apache.iceberg.types.Type.TypeID; import org.apache.iceberg.types.TypeUtil; import org.apache.iceberg.types.Types.DecimalType; import org.apache.iceberg.types.Types.FixedType; +import org.apache.iceberg.types.Types.GeographyType; +import org.apache.iceberg.types.Types.GeometryType; import org.apache.iceberg.types.Types.ListType; import org.apache.iceberg.types.Types.MapType; import org.apache.iceberg.types.Types.NestedField; @@ -43,6 +46,7 @@ import org.apache.iceberg.types.Types.TimestampNanoType; import org.apache.iceberg.types.Types.TimestampType; import org.apache.iceberg.variants.Variant; +import org.apache.parquet.column.schema.EdgeInterpolationAlgorithm; import org.apache.parquet.schema.GroupType; import org.apache.parquet.schema.LogicalTypeAnnotation; import org.apache.parquet.schema.LogicalTypeAnnotation.TimeUnit; @@ -280,11 +284,33 @@ public Type primitive( .id(id) .named(name); + case GEOMETRY: + GeometryType geometry = (GeometryType) primitive; + return Types.primitive(BINARY, repetition) + .as(LogicalTypeAnnotation.geometryType(geometry.crs())) + .id(id) + .named(name); + + case GEOGRAPHY: + GeographyType geography = (GeographyType) primitive; + return Types.primitive(BINARY, repetition) + .as( + LogicalTypeAnnotation.geographyType( + geography.crs(), toParquet(geography.algorithm()))) + .id(id) + .named(name); + default: throw new UnsupportedOperationException("Unsupported type for Parquet: " + primitive); } } + private static EdgeInterpolationAlgorithm toParquet(EdgeAlgorithm algorithm) { + // Iceberg and Parquet use the same algorithm names (SPHERICAL, VINCENTY, THOMAS, ANDOYER, + // KARNEY) so the algorithm is mapped by name + return EdgeInterpolationAlgorithm.valueOf(algorithm.name()); + } + private static LogicalTypeAnnotation decimalAnnotation(int precision, int scale) { return LogicalTypeAnnotation.decimalType(scale, precision); } diff --git a/parquet/src/test/java/org/apache/iceberg/parquet/TestParquetSchemaUtil.java b/parquet/src/test/java/org/apache/iceberg/parquet/TestParquetSchemaUtil.java index 51bfc1e811f4..b5163cb175b9 100644 --- a/parquet/src/test/java/org/apache/iceberg/parquet/TestParquetSchemaUtil.java +++ b/parquet/src/test/java/org/apache/iceberg/parquet/TestParquetSchemaUtil.java @@ -22,13 +22,17 @@ import static org.apache.iceberg.types.Types.NestedField.required; import static org.assertj.core.api.Assertions.assertThat; +import java.util.List; import java.util.concurrent.atomic.AtomicInteger; import org.apache.iceberg.Schema; import org.apache.iceberg.mapping.MappingUtil; import org.apache.iceberg.mapping.NameMapping; +import org.apache.iceberg.relocated.com.google.common.collect.Lists; +import org.apache.iceberg.types.EdgeAlgorithm; import org.apache.iceberg.types.TypeUtil; import org.apache.iceberg.types.Types; import org.apache.iceberg.variants.Variant; +import org.apache.parquet.column.schema.EdgeInterpolationAlgorithm; import org.apache.parquet.schema.GroupType; import org.apache.parquet.schema.LogicalTypeAnnotation; import org.apache.parquet.schema.MessageType; @@ -338,6 +342,83 @@ public void testVariantTypesWithoutAssigningIds() { .isEqualTo(expectedSchema.asStruct()); } + @Test + public void testGeospatialTypeRoundTrip() { + List fields = + Lists.newArrayList( + required(1, "geom_default", Types.GeometryType.crs84()), + optional(2, "geom_3857", Types.GeometryType.of("EPSG:3857")), + required(3, "geog_default", Types.GeographyType.crs84())); + // cover the by-name algorithm mapping for every edge algorithm in both directions + int nextId = fields.size() + 1; + for (EdgeAlgorithm algorithm : EdgeAlgorithm.values()) { + fields.add( + optional(nextId++, "geog_" + algorithm, Types.GeographyType.of("EPSG:4326", algorithm))); + } + + Schema schema = new Schema(fields); + MessageType messageType = ParquetSchemaUtil.convert(schema, "geo_table"); + Schema actualSchema = ParquetSchemaUtil.convert(messageType); + assertThat(actualSchema.asStruct()) + .as("Schema must round-trip through Parquet geometry/geography logical types") + .isEqualTo(schema.asStruct()); + } + + @Test + public void testGeospatialAnnotationsWithOmittedParameters() { + // unset CRS and algorithm parameters must map to Iceberg's defaults, and explicit values + // (including explicit default values) must be preserved + MessageType messageType = + org.apache.parquet.schema.Types.buildMessage() + .required(PrimitiveTypeName.BINARY) + .as(LogicalTypeAnnotation.geometryType(null)) + .id(1) + .named("geom_bare") + .optional(PrimitiveTypeName.BINARY) + .as(LogicalTypeAnnotation.geometryType("OGC:CRS84")) + .id(2) + .named("geom_explicit_default") + .required(PrimitiveTypeName.BINARY) + .as(LogicalTypeAnnotation.geographyType(null, null)) + .id(3) + .named("geog_bare") + .optional(PrimitiveTypeName.BINARY) + .as(LogicalTypeAnnotation.geographyType("EPSG:4326", null)) + .id(4) + .named("geog_crs_only") + .optional(PrimitiveTypeName.BINARY) + .as(LogicalTypeAnnotation.geographyType(null, EdgeInterpolationAlgorithm.ANDOYER)) + .id(5) + .named("geog_algorithm_only") + .optional(PrimitiveTypeName.BINARY) + .as( + LogicalTypeAnnotation.geographyType( + "EPSG:4326", EdgeInterpolationAlgorithm.SPHERICAL)) + .id(6) + .named("geog_explicit_spherical") + .named("geo_table"); + + Schema expectedSchema = + new Schema( + required(1, "geom_bare", Types.GeometryType.crs84()), + optional(2, "geom_explicit_default", Types.GeometryType.crs84()), + required(3, "geog_bare", Types.GeographyType.crs84()), + optional(4, "geog_crs_only", Types.GeographyType.of("EPSG:4326")), + optional( + 5, + "geog_algorithm_only", + Types.GeographyType.of(Types.GeographyType.DEFAULT_CRS, EdgeAlgorithm.ANDOYER)), + optional( + 6, + "geog_explicit_spherical", + Types.GeographyType.of("EPSG:4326", EdgeAlgorithm.SPHERICAL))); + + Schema actualSchema = ParquetSchemaUtil.convert(messageType); + assertThat(actualSchema.asStruct()) + .as("Geometry and geography annotations must convert to the expected Iceberg types") + .isEqualTo(expectedSchema.asStruct()); + } + @Test public void testSchemaConversionForHiveStyleLists() { String parquetSchemaString =