diff --git a/common/src/main/java/org/apache/sedona/common/Predicates.java b/common/src/main/java/org/apache/sedona/common/Predicates.java index 9a030748e55..f6fae94103d 100644 --- a/common/src/main/java/org/apache/sedona/common/Predicates.java +++ b/common/src/main/java/org/apache/sedona/common/Predicates.java @@ -18,6 +18,7 @@ */ package org.apache.sedona.common; +import org.apache.sedona.common.geometryObjects.Box2D; import org.apache.sedona.common.sphere.Spheroid; import org.locationtech.jts.geom.*; import org.locationtech.jts.operation.relate.RelateOp; @@ -27,6 +28,55 @@ public static boolean contains(Geometry leftGeometry, Geometry rightGeometry) { return leftGeometry.contains(rightGeometry); } + /** + * Closed-interval bbox intersection: true if {@code a} and {@code b} overlap on both the + * X and Y axes (matches PostGIS {@code &&} on box2d). Edge- and corner-touching boxes count as + * intersecting. + * + *
Both arguments must have ordered bounds ({@code xmin <= xmax} and {@code ymin <= ymax}). + * Sedona's Box2D type allows inverted bounds ({@code xmin > xmax}) — that ordering is reserved + * for a future antimeridian-wraparound semantics on geography bboxes (cf. sedona-db's {@code + * WraparoundInterval}). Until those semantics ship, planar predicates throw on inverted input + * rather than silently returning misleading results. SQL callers see NULL in/out null + * propagation; this Java entry point throws on null. + */ + public static boolean boxIntersects(Box2D a, Box2D b) { + requireOrderedPlanarBox(a, "a"); + requireOrderedPlanarBox(b, "b"); + return !(a.getXMax() < b.getXMin() + || a.getXMin() > b.getXMax() + || a.getYMax() < b.getYMin() + || a.getYMin() > b.getYMax()); + } + + /** + * True if {@code a} fully contains {@code b} on both the X and Y axes (closed intervals; + * matches PostGIS {@code ~} on box2d). Equal boxes contain each other. + * + *
Same ordered-bound contract as {@link #boxIntersects(Box2D, Box2D)} — inverted bounds throw + * because planar containment with inverted intervals has no defined meaning until antimeridian + * wraparound semantics ship. + */ + public static boolean boxContains(Box2D a, Box2D b) { + requireOrderedPlanarBox(a, "a"); + requireOrderedPlanarBox(b, "b"); + return a.getXMin() <= b.getXMin() + && a.getYMin() <= b.getYMin() + && a.getXMax() >= b.getXMax() + && a.getYMax() >= b.getYMax(); + } + + private static void requireOrderedPlanarBox(Box2D box, String argName) { + if (box.getXMin() > box.getXMax() || box.getYMin() > box.getYMax()) { + throw new IllegalArgumentException( + "Box2D argument '" + + argName + + "' has inverted bounds (xmin > xmax or ymin > ymax). Planar Box2D predicates " + + "require ordered intervals; inverted bounds are reserved for future antimeridian " + + "wraparound semantics."); + } + } + public static boolean intersects(Geometry leftGeometry, Geometry rightGeometry) { return leftGeometry.intersects(rightGeometry); } diff --git a/common/src/test/java/org/apache/sedona/common/PredicatesTest.java b/common/src/test/java/org/apache/sedona/common/PredicatesTest.java index 8d48ae14026..2c188109292 100644 --- a/common/src/test/java/org/apache/sedona/common/PredicatesTest.java +++ b/common/src/test/java/org/apache/sedona/common/PredicatesTest.java @@ -22,6 +22,7 @@ import static org.apache.sedona.common.Functions.crossesDateLine; import static org.junit.Assert.*; +import org.apache.sedona.common.geometryObjects.Box2D; import org.junit.Test; import org.locationtech.jts.geom.Coordinate; import org.locationtech.jts.geom.Geometry; @@ -32,6 +33,55 @@ public class PredicatesTest extends TestBase { private static final GeometryFactory GEOMETRY_FACTORY = new GeometryFactory(); + @Test + public void testBoxIntersects() { + Box2D a = new Box2D(0.0, 0.0, 5.0, 5.0); + + // Full overlap + assertTrue(Predicates.boxIntersects(a, new Box2D(1.0, 1.0, 2.0, 2.0))); + // Partial overlap + assertTrue(Predicates.boxIntersects(a, new Box2D(3.0, 3.0, 7.0, 7.0))); + // Edge-touching (closed intervals) + assertTrue(Predicates.boxIntersects(a, new Box2D(5.0, 0.0, 10.0, 5.0))); + // Corner-touching (closed intervals) + assertTrue(Predicates.boxIntersects(a, new Box2D(5.0, 5.0, 10.0, 10.0))); + // Disjoint on X + assertFalse(Predicates.boxIntersects(a, new Box2D(6.0, 0.0, 10.0, 5.0))); + // Disjoint on Y + assertFalse(Predicates.boxIntersects(a, new Box2D(0.0, 6.0, 5.0, 10.0))); + } + + @Test + public void testBoxContains() { + Box2D outer = new Box2D(0.0, 0.0, 10.0, 10.0); + + assertTrue(Predicates.boxContains(outer, new Box2D(2.0, 2.0, 5.0, 5.0))); + // Boundaries are inclusive + assertTrue(Predicates.boxContains(outer, new Box2D(0.0, 0.0, 10.0, 10.0))); + assertTrue(Predicates.boxContains(outer, new Box2D(0.0, 0.0, 1.0, 1.0))); + // Outside on X + assertFalse(Predicates.boxContains(outer, new Box2D(-1.0, 0.0, 5.0, 5.0))); + // Crosses boundary on X + assertFalse(Predicates.boxContains(outer, new Box2D(5.0, 0.0, 11.0, 5.0))); + } + + @Test + public void testBoxPredicatesRejectInvertedBounds() { + // Box2D allows xmin > xmax (reserved for future antimeridian wraparound); planar predicates + // refuse to evaluate them rather than silently returning misleading results. + Box2D normal = new Box2D(0.0, 0.0, 5.0, 5.0); + Box2D wrapX = new Box2D(170.0, 10.0, -170.0, 20.0); // longitude crosses antimeridian + Box2D wrapY = new Box2D(0.0, 5.0, 5.0, 0.0); // ymin > ymax + + IllegalArgumentException ex1 = + assertThrows(IllegalArgumentException.class, () -> Predicates.boxIntersects(wrapX, normal)); + assertTrue(ex1.getMessage().contains("inverted bounds")); + + IllegalArgumentException ex2 = + assertThrows(IllegalArgumentException.class, () -> Predicates.boxContains(normal, wrapY)); + assertTrue(ex2.getMessage().contains("inverted bounds")); + } + @Test public void testDWithinSuccess() { Geometry point1 = GEOMETRY_FACTORY.createPoint(new Coordinate(1, 1)); diff --git a/flink/src/main/java/org/apache/sedona/flink/Catalog.java b/flink/src/main/java/org/apache/sedona/flink/Catalog.java index ba813a3d3b2..90a5534a92b 100644 --- a/flink/src/main/java/org/apache/sedona/flink/Catalog.java +++ b/flink/src/main/java/org/apache/sedona/flink/Catalog.java @@ -247,6 +247,8 @@ public static UserDefinedFunction[] getFuncs() { public static UserDefinedFunction[] getPredicates() { return new UserDefinedFunction[] { + new Predicates.ST_BoxContains(), + new Predicates.ST_BoxIntersects(), new Predicates.ST_Intersects(), new Predicates.ST_Contains(), new Predicates.ST_Crosses(), diff --git a/flink/src/main/java/org/apache/sedona/flink/expressions/Predicates.java b/flink/src/main/java/org/apache/sedona/flink/expressions/Predicates.java index 289565c99bd..62fe9ba2a65 100644 --- a/flink/src/main/java/org/apache/sedona/flink/expressions/Predicates.java +++ b/flink/src/main/java/org/apache/sedona/flink/expressions/Predicates.java @@ -20,11 +20,49 @@ import org.apache.flink.table.annotation.DataTypeHint; import org.apache.flink.table.functions.ScalarFunction; +import org.apache.sedona.common.geometryObjects.Box2D; +import org.apache.sedona.flink.Box2DTypeSerializer; import org.apache.sedona.flink.GeometryTypeSerializer; import org.locationtech.jts.geom.Geometry; public class Predicates { + public static class ST_BoxIntersects extends ScalarFunction { + @DataTypeHint("Boolean") + public Boolean eval( + @DataTypeHint( + value = "RAW", + rawSerializer = Box2DTypeSerializer.class, + bridgedTo = Box2D.class) + Box2D a, + @DataTypeHint( + value = "RAW", + rawSerializer = Box2DTypeSerializer.class, + bridgedTo = Box2D.class) + Box2D b) { + if (a == null || b == null) return null; + return org.apache.sedona.common.Predicates.boxIntersects(a, b); + } + } + + public static class ST_BoxContains extends ScalarFunction { + @DataTypeHint("Boolean") + public Boolean eval( + @DataTypeHint( + value = "RAW", + rawSerializer = Box2DTypeSerializer.class, + bridgedTo = Box2D.class) + Box2D a, + @DataTypeHint( + value = "RAW", + rawSerializer = Box2DTypeSerializer.class, + bridgedTo = Box2D.class) + Box2D b) { + if (a == null || b == null) return null; + return org.apache.sedona.common.Predicates.boxContains(a, b); + } + } + public static class ST_Intersects extends ScalarFunction { /** Constructor for relation checking without duplicate removal */ public ST_Intersects() {} diff --git a/flink/src/test/java/org/apache/sedona/flink/PredicateTest.java b/flink/src/test/java/org/apache/sedona/flink/PredicateTest.java index 6fe137d5cec..a244aca2d40 100644 --- a/flink/src/test/java/org/apache/sedona/flink/PredicateTest.java +++ b/flink/src/test/java/org/apache/sedona/flink/PredicateTest.java @@ -34,6 +34,34 @@ public static void onceExecutedBeforeAll() { initialize(); } + @Test + public void testBoxIntersects() { + Table t = + tableEnv.sqlQuery( + "WITH boxes AS (" + + " SELECT ST_Box2D(ST_GeomFromWKT('POLYGON((0 0, 0 5, 5 5, 5 0, 0 0))')) AS a," + + " ST_Box2D(ST_GeomFromWKT('POLYGON((3 3, 3 7, 7 7, 7 3, 3 3))')) AS overlap," + + " ST_Box2D(ST_GeomFromWKT('POLYGON((6 6, 6 7, 7 7, 7 6, 6 6))')) AS disjoint)" + + " SELECT ST_BoxIntersects(a, overlap), ST_BoxIntersects(a, disjoint) FROM boxes"); + org.apache.flink.types.Row row = first(t); + assertEquals(true, row.getField(0)); + assertEquals(false, row.getField(1)); + } + + @Test + public void testBoxContains() { + Table t = + tableEnv.sqlQuery( + "WITH boxes AS (" + + " SELECT ST_Box2D(ST_GeomFromWKT('POLYGON((0 0, 0 10, 10 10, 10 0, 0 0))')) AS outer_box," + + " ST_Box2D(ST_GeomFromWKT('POLYGON((2 2, 2 5, 5 5, 5 2, 2 2))')) AS inner_box," + + " ST_Box2D(ST_GeomFromWKT('POLYGON((5 5, 5 11, 11 11, 11 5, 5 5))')) AS overlap)" + + " SELECT ST_BoxContains(outer_box, inner_box), ST_BoxContains(outer_box, overlap) FROM boxes"); + org.apache.flink.types.Row row = first(t); + assertEquals(true, row.getField(0)); + assertEquals(false, row.getField(1)); + } + @Test public void testIntersects() { Table pointTable = createPointTable(testDataSize); diff --git a/python/sedona/spark/sql/st_predicates.py b/python/sedona/spark/sql/st_predicates.py index a2254aebdf2..b71327f82a7 100644 --- a/python/sedona/spark/sql/st_predicates.py +++ b/python/sedona/spark/sql/st_predicates.py @@ -30,6 +30,38 @@ _call_predicate_function = partial(call_sedona_function, "st_predicates") +@validate_argument_types +def ST_BoxContains(a: ColumnOrName, b: ColumnOrName) -> Column: + """Check whether Box2D a fully contains Box2D b (closed intervals). + + Mirrors PostGIS ``~`` on box2d. NULL on null input. + + :param a: Outer Box2D column. + :type a: ColumnOrName + :param b: Inner Box2D column. + :type b: ColumnOrName + :return: True if a contains b, false otherwise. + :rtype: Column + """ + return _call_predicate_function("ST_BoxContains", (a, b)) + + +@validate_argument_types +def ST_BoxIntersects(a: ColumnOrName, b: ColumnOrName) -> Column: + """Check whether Box2D a and Box2D b share any point (closed intervals). + + Mirrors PostGIS ``&&`` on box2d. NULL on null input. + + :param a: First Box2D column. + :type a: ColumnOrName + :param b: Second Box2D column. + :type b: ColumnOrName + :return: True if a and b overlap, false otherwise. + :rtype: Column + """ + return _call_predicate_function("ST_BoxIntersects", (a, b)) + + @validate_argument_types def ST_Contains(a: ColumnOrName, b: ColumnOrName) -> Column: """Check whether geometry a contains geometry b. diff --git a/python/tests/sql/test_predicate.py b/python/tests/sql/test_predicate.py index 4d731348bd4..dd8f4143088 100644 --- a/python/tests/sql/test_predicate.py +++ b/python/tests/sql/test_predicate.py @@ -26,6 +26,30 @@ class TestPredicate(TestBase): + def test_st_box_intersects_and_contains(self): + df = self.spark.sql(""" + WITH t AS ( + SELECT + ST_Box2D(ST_GeomFromText('POLYGON((0 0, 0 10, 10 10, 10 0, 0 0))')) AS a, + ST_Box2D(ST_GeomFromText('POLYGON((2 2, 2 5, 5 5, 5 2, 2 2))')) AS inside, + ST_Box2D(ST_GeomFromText('POLYGON((5 5, 5 11, 11 11, 11 5, 5 5))')) AS overlap, + ST_Box2D(ST_GeomFromText('POLYGON((11 11, 11 12, 12 12, 12 11, 11 11))')) AS disjoint + ) + SELECT + ST_BoxIntersects(a, inside) AS i_inside, + ST_BoxIntersects(a, overlap) AS i_overlap, + ST_BoxIntersects(a, disjoint) AS i_disjoint, + ST_BoxContains(a, inside) AS c_inside, + ST_BoxContains(a, overlap) AS c_overlap + FROM t + """) + row = df.first() + assert row[0] is True + assert row[1] is True + assert row[2] is False + assert row[3] is True + assert row[4] is False + def test_st_contains(self): point_csv_df = ( self.spark.read.format("csv") diff --git a/spark/common/src/main/scala/org/apache/sedona/sql/UDF/Catalog.scala b/spark/common/src/main/scala/org/apache/sedona/sql/UDF/Catalog.scala index 3ea2ce4235e..8119fb4d064 100644 --- a/spark/common/src/main/scala/org/apache/sedona/sql/UDF/Catalog.scala +++ b/spark/common/src/main/scala/org/apache/sedona/sql/UDF/Catalog.scala @@ -163,6 +163,8 @@ object Catalog extends AbstractCatalog with Logging { // Predicates val predicateExprs: Seq[FunctionDescription] = Seq( + function[ST_BoxContains](), + function[ST_BoxIntersects](), function[ST_Contains](), function[ST_CoveredBy](), function[ST_Covers](), diff --git a/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/Predicates.scala b/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/Predicates.scala index afee70626e1..22f20fbc03c 100644 --- a/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/Predicates.scala +++ b/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/Predicates.scala @@ -89,6 +89,38 @@ private[apache] case class ST_Contains(inputExpressions: Seq[Expression]) } } +/** + * Closed-interval bbox intersection over two Box2D arguments. Returns true if the boxes overlap + * on both the X and Y axes (matches PostGIS `&&` on box2d). Edge- and corner-touching boxes count + * as intersecting. Throws on inverted bounds (xmin>xmax / ymin>ymax) since planar predicates have + * no defined meaning for inverted intervals; that ordering is reserved for future + * antimeridian-wraparound semantics. + * + * @param inputExpressions + */ +private[apache] case class ST_BoxIntersects(inputExpressions: Seq[Expression]) + extends InferredExpression(Predicates.boxIntersects _) { + + protected def withNewChildrenInternal(newChildren: IndexedSeq[Expression]) = { + copy(inputExpressions = newChildren) + } +} + +/** + * Closed-interval bbox containment over two Box2D arguments. Returns true if argument `a` fully + * contains argument `b` on both axes (matches PostGIS `~` on box2d). Equal boxes contain each + * other. Throws on inverted bounds for the same reason as ST_BoxIntersects. + * + * @param inputExpressions + */ +private[apache] case class ST_BoxContains(inputExpressions: Seq[Expression]) + extends InferredExpression(Predicates.boxContains _) { + + protected def withNewChildrenInternal(newChildren: IndexedSeq[Expression]) = { + copy(inputExpressions = newChildren) + } +} + /** * Test if leftGeometry full intersects rightGeometry. Supports both Geometry (JTS) and Geography * (S2) inputs via InferredExpression dual dispatch. diff --git a/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/st_predicates.scala b/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/st_predicates.scala index ecdb05e849a..cd3ec897444 100644 --- a/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/st_predicates.scala +++ b/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/st_predicates.scala @@ -24,6 +24,12 @@ import org.apache.spark.sql.Column import org.apache.spark.sql.sedona_sql.DataFrameShims._ object st_predicates { + def ST_BoxContains(a: Column, b: Column): Column = wrapExpression[ST_BoxContains](a, b) + def ST_BoxContains(a: String, b: String): Column = wrapExpression[ST_BoxContains](a, b) + + def ST_BoxIntersects(a: Column, b: Column): Column = wrapExpression[ST_BoxIntersects](a, b) + def ST_BoxIntersects(a: String, b: String): Column = wrapExpression[ST_BoxIntersects](a, b) + def ST_Contains(a: Column, b: Column): Column = wrapExpression[ST_Contains](a, b) def ST_Contains(a: String, b: String): Column = wrapExpression[ST_Contains](a, b) diff --git a/spark/common/src/test/scala/org/apache/sedona/sql/predicateTestScala.scala b/spark/common/src/test/scala/org/apache/sedona/sql/predicateTestScala.scala index 482cb145953..e61a027bd09 100644 --- a/spark/common/src/test/scala/org/apache/sedona/sql/predicateTestScala.scala +++ b/spark/common/src/test/scala/org/apache/sedona/sql/predicateTestScala.scala @@ -25,6 +25,41 @@ class predicateTestScala extends TestBaseScala { describe("Sedona-SQL Predicate Test") { + it("Passed ST_BoxIntersects and ST_BoxContains") { + val df = sparkSession.sql(""" + WITH t AS ( + SELECT + ST_Box2D(ST_GeomFromText('POLYGON((0 0, 0 5, 5 5, 5 0, 0 0))')) AS a, + ST_Box2D(ST_GeomFromText('POLYGON((1 1, 1 2, 2 2, 2 1, 1 1))')) AS inside, + ST_Box2D(ST_GeomFromText('POLYGON((3 3, 3 7, 7 7, 7 3, 3 3))')) AS overlap, + ST_Box2D(ST_GeomFromText('POLYGON((5 0, 5 5, 10 5, 10 0, 5 0))')) AS edge, + ST_Box2D(ST_GeomFromText('POLYGON((6 6, 6 7, 7 7, 7 6, 6 6))')) AS disjoint, + ST_Box2D(ST_GeomFromText(NULL)) AS box_null + ) + SELECT + ST_BoxIntersects(a, inside), + ST_BoxIntersects(a, overlap), + ST_BoxIntersects(a, edge), + ST_BoxIntersects(a, disjoint), + ST_BoxIntersects(a, box_null), + ST_BoxContains(a, inside), + ST_BoxContains(a, overlap), + ST_BoxContains(a, a), + ST_BoxContains(a, box_null) + FROM t + """) + val row = df.collect()(0) + assert(row.getBoolean(0)) // intersects: inside + assert(row.getBoolean(1)) // intersects: overlap + assert(row.getBoolean(2)) // intersects: edge-touch + assert(!row.getBoolean(3)) // intersects: disjoint + assert(row.isNullAt(4)) // intersects: NULL propagates + assert(row.getBoolean(5)) // contains: inside + assert(!row.getBoolean(6)) // contains: overlap (extends past) + assert(row.getBoolean(7)) // contains: equal + assert(row.isNullAt(8)) // contains: NULL propagates + } + it("Passed ST_Contains") { var pointCsvDF = sparkSession.read .format("csv")