Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 5 additions & 4 deletions api/src/main/java/org/apache/iceberg/types/Types.java
Original file line number Diff line number Diff line change
Expand Up @@ -680,18 +680,19 @@ public boolean equals(Object o) {
}

GeographyType that = (GeographyType) o;
return Objects.equals(crs, that.crs) && Objects.equals(algorithm, that.algorithm);
// compare the resolved CRS and algorithm so an explicit default is equal to an omitted one
return crs().equals(that.crs()) && algorithm() == that.algorithm();
}

@Override
public int hashCode() {
return Objects.hash(GeographyType.class, crs, algorithm);
return Objects.hash(GeographyType.class, crs(), algorithm());
}

@Override
public String toString() {
if (algorithm != null) {
return String.format("geography(%s, %s)", crs != null ? crs : DEFAULT_CRS, algorithm);
if (algorithm() != DEFAULT_ALGORITHM) {
return String.format("geography(%s, %s)", crs(), algorithm());
} else if (crs != null) {
return String.format("geography(%s)", crs);
} else {
Expand Down
26 changes: 26 additions & 0 deletions api/src/test/java/org/apache/iceberg/types/TestTypes.java
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,32 @@ public void testGeospatialTypeToString() {
.isEqualTo("geography(srid:4326, karney)");
assertThat(Types.GeographyType.of(null, EdgeAlgorithm.KARNEY).toString())
.isEqualTo("geography(OGC:CRS84, karney)");
assertThat(
Types.GeographyType.of(Types.GeographyType.DEFAULT_CRS, EdgeAlgorithm.SPHERICAL)
.toString())
.isEqualTo("geography");
assertThat(Types.GeographyType.of("srid:4326", EdgeAlgorithm.SPHERICAL).toString())
.isEqualTo("geography(srid:4326)");
}

@Test
public void testGeospatialTypeDefaultNormalization() {
// the default CRS and edge algorithm normalize so that equivalent type specs are equal
assertThat(Types.GeometryType.of(Types.GeometryType.DEFAULT_CRS))
.isEqualTo(Types.GeometryType.crs84());
assertThat(Types.GeographyType.of(Types.GeographyType.DEFAULT_CRS))
.isEqualTo(Types.GeographyType.crs84());
assertThat(Types.GeographyType.of(Types.GeographyType.DEFAULT_CRS, EdgeAlgorithm.SPHERICAL))
.isEqualTo(Types.GeographyType.crs84())
.hasSameHashCodeAs(Types.GeographyType.crs84());
assertThat(Types.GeographyType.of("srid:4326", EdgeAlgorithm.SPHERICAL))
.isEqualTo(Types.GeographyType.of("srid:4326"))
.hasSameHashCodeAs(Types.GeographyType.of("srid:4326"));
assertThat(Types.GeographyType.of("srid:4326", EdgeAlgorithm.SPHERICAL).algorithm())
.isEqualTo(EdgeAlgorithm.SPHERICAL);
assertThat(Types.GeographyType.of("srid:4326", EdgeAlgorithm.KARNEY))
.isNotEqualTo(Types.GeographyType.of("srid:4326"));
assertThat(Types.GeographyType.of("srid:4326")).isNotEqualTo(Types.GeographyType.crs84());
}

@Test
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,11 @@
import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
import org.apache.iceberg.relocated.com.google.common.collect.Lists;
import org.apache.iceberg.relocated.com.google.common.collect.Maps;
import org.apache.iceberg.types.EdgeAlgorithm;
import org.apache.iceberg.types.Type;
import org.apache.iceberg.types.Types;
import org.apache.iceberg.types.Types.TimestampType;
import org.apache.parquet.column.schema.EdgeInterpolationAlgorithm;
import org.apache.parquet.schema.GroupType;
import org.apache.parquet.schema.LogicalTypeAnnotation;
import org.apache.parquet.schema.MessageType;
Expand Down Expand Up @@ -254,6 +256,25 @@ public Optional<Type> visit(LogicalTypeAnnotation.JsonLogicalTypeAnnotation json
public Optional<Type> visit(LogicalTypeAnnotation.BsonLogicalTypeAnnotation bsonType) {
return Optional.of(Types.BinaryType.get());
}

@Override
public Optional<Type> visit(LogicalTypeAnnotation.GeometryLogicalTypeAnnotation geometryType) {
String crs = geometryType.getCrs();
return Optional.of(Types.GeometryType.of(crs != null ? crs : Types.GeometryType.DEFAULT_CRS));
}

@Override
public Optional<Type> visit(
LogicalTypeAnnotation.GeographyLogicalTypeAnnotation geographyType) {
String crs = geographyType.getCrs();
EdgeInterpolationAlgorithm algorithm = geographyType.getAlgorithm();
return Optional.of(
Types.GeographyType.of(
crs != null ? crs : Types.GeographyType.DEFAULT_CRS,
algorithm != null
? EdgeAlgorithm.fromName(algorithm.name())
: Types.GeographyType.DEFAULT_ALGORITHM));
}
}

private void addAlias(String name, int fieldId) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,19 +30,23 @@
import org.apache.iceberg.Schema;
import org.apache.iceberg.avro.AvroSchemaUtil;
import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
import org.apache.iceberg.types.EdgeAlgorithm;
import org.apache.iceberg.types.Type.NestedType;
import org.apache.iceberg.types.Type.PrimitiveType;
import org.apache.iceberg.types.Type.TypeID;
import org.apache.iceberg.types.TypeUtil;
import org.apache.iceberg.types.Types.DecimalType;
import org.apache.iceberg.types.Types.FixedType;
import org.apache.iceberg.types.Types.GeographyType;
import org.apache.iceberg.types.Types.GeometryType;
import org.apache.iceberg.types.Types.ListType;
import org.apache.iceberg.types.Types.MapType;
import org.apache.iceberg.types.Types.NestedField;
import org.apache.iceberg.types.Types.StructType;
import org.apache.iceberg.types.Types.TimestampNanoType;
import org.apache.iceberg.types.Types.TimestampType;
import org.apache.iceberg.variants.Variant;
import org.apache.parquet.column.schema.EdgeInterpolationAlgorithm;
import org.apache.parquet.schema.GroupType;
import org.apache.parquet.schema.LogicalTypeAnnotation;
import org.apache.parquet.schema.LogicalTypeAnnotation.TimeUnit;
Expand Down Expand Up @@ -280,11 +284,33 @@ public Type primitive(
.id(id)
.named(name);

case GEOMETRY:
GeometryType geometry = (GeometryType) primitive;
return Types.primitive(BINARY, repetition)
.as(LogicalTypeAnnotation.geometryType(geometry.crs()))
.id(id)
.named(name);

case GEOGRAPHY:
GeographyType geography = (GeographyType) primitive;
return Types.primitive(BINARY, repetition)
.as(
LogicalTypeAnnotation.geographyType(
geography.crs(), toParquet(geography.algorithm())))
.id(id)
.named(name);

default:
throw new UnsupportedOperationException("Unsupported type for Parquet: " + primitive);
}
}

private static EdgeInterpolationAlgorithm toParquet(EdgeAlgorithm algorithm) {
// Iceberg and Parquet use the same algorithm names (SPHERICAL, VINCENTY, THOMAS, ANDOYER,
// KARNEY) so the algorithm is mapped by name
return EdgeInterpolationAlgorithm.valueOf(algorithm.name());
}

private static LogicalTypeAnnotation decimalAnnotation(int precision, int scale) {
return LogicalTypeAnnotation.decimalType(scale, precision);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,17 @@
import static org.apache.iceberg.types.Types.NestedField.required;
import static org.assertj.core.api.Assertions.assertThat;

import java.util.List;
import java.util.concurrent.atomic.AtomicInteger;
import org.apache.iceberg.Schema;
import org.apache.iceberg.mapping.MappingUtil;
import org.apache.iceberg.mapping.NameMapping;
import org.apache.iceberg.relocated.com.google.common.collect.Lists;
import org.apache.iceberg.types.EdgeAlgorithm;
import org.apache.iceberg.types.TypeUtil;
import org.apache.iceberg.types.Types;
import org.apache.iceberg.variants.Variant;
import org.apache.parquet.column.schema.EdgeInterpolationAlgorithm;
import org.apache.parquet.schema.GroupType;
import org.apache.parquet.schema.LogicalTypeAnnotation;
import org.apache.parquet.schema.MessageType;
Expand Down Expand Up @@ -338,6 +342,83 @@ public void testVariantTypesWithoutAssigningIds() {
.isEqualTo(expectedSchema.asStruct());
}

@Test
public void testGeospatialTypeRoundTrip() {
List<Types.NestedField> fields =
Lists.newArrayList(
required(1, "geom_default", Types.GeometryType.crs84()),
optional(2, "geom_3857", Types.GeometryType.of("EPSG:3857")),
required(3, "geog_default", Types.GeographyType.crs84()));
// cover the by-name algorithm mapping for every edge algorithm in both directions
int nextId = fields.size() + 1;
for (EdgeAlgorithm algorithm : EdgeAlgorithm.values()) {
fields.add(
optional(nextId++, "geog_" + algorithm, Types.GeographyType.of("EPSG:4326", algorithm)));
}

Schema schema = new Schema(fields);
MessageType messageType = ParquetSchemaUtil.convert(schema, "geo_table");
Schema actualSchema = ParquetSchemaUtil.convert(messageType);
assertThat(actualSchema.asStruct())
.as("Schema must round-trip through Parquet geometry/geography logical types")
.isEqualTo(schema.asStruct());
}

@Test
public void testGeospatialAnnotationsWithOmittedParameters() {
// unset CRS and algorithm parameters must map to Iceberg's defaults, and explicit values
// (including explicit default values) must be preserved
MessageType messageType =
org.apache.parquet.schema.Types.buildMessage()
.required(PrimitiveTypeName.BINARY)
.as(LogicalTypeAnnotation.geometryType(null))
.id(1)
.named("geom_bare")
.optional(PrimitiveTypeName.BINARY)
.as(LogicalTypeAnnotation.geometryType("OGC:CRS84"))
.id(2)
.named("geom_explicit_default")
.required(PrimitiveTypeName.BINARY)
.as(LogicalTypeAnnotation.geographyType(null, null))
.id(3)
.named("geog_bare")
.optional(PrimitiveTypeName.BINARY)
.as(LogicalTypeAnnotation.geographyType("EPSG:4326", null))
.id(4)
.named("geog_crs_only")
.optional(PrimitiveTypeName.BINARY)
.as(LogicalTypeAnnotation.geographyType(null, EdgeInterpolationAlgorithm.ANDOYER))
.id(5)
.named("geog_algorithm_only")
.optional(PrimitiveTypeName.BINARY)
.as(
LogicalTypeAnnotation.geographyType(
"EPSG:4326", EdgeInterpolationAlgorithm.SPHERICAL))
.id(6)
.named("geog_explicit_spherical")
.named("geo_table");

Schema expectedSchema =
new Schema(
required(1, "geom_bare", Types.GeometryType.crs84()),
optional(2, "geom_explicit_default", Types.GeometryType.crs84()),
required(3, "geog_bare", Types.GeographyType.crs84()),
optional(4, "geog_crs_only", Types.GeographyType.of("EPSG:4326")),
optional(
5,
"geog_algorithm_only",
Types.GeographyType.of(Types.GeographyType.DEFAULT_CRS, EdgeAlgorithm.ANDOYER)),
optional(
6,
"geog_explicit_spherical",
Types.GeographyType.of("EPSG:4326", EdgeAlgorithm.SPHERICAL)));

Schema actualSchema = ParquetSchemaUtil.convert(messageType);
assertThat(actualSchema.asStruct())
.as("Geometry and geography annotations must convert to the expected Iceberg types")
.isEqualTo(expectedSchema.asStruct());
}

@Test
public void testSchemaConversionForHiveStyleLists() {
String parquetSchemaString =
Expand Down