From fbb3571cd7a5f73cde54521ba6a92d8cab572999 Mon Sep 17 00:00:00 2001 From: Jia Yu Date: Wed, 20 May 2026 00:18:17 -0700 Subject: [PATCH] [GH-2973] Box3D foundation: value class + UDT + Catalyst plumbing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit First slice of the Box3D Phase 1 epic. Lays down the value class and the UDT so subsequent slices (scalar constructors, accessors, predicates, aggregate) can be reviewed independently. - `Box3D` value class in `common` (six doubles in PostGIS order: xmin, ymin, zmin, xmax, ymax, zmax). NaN-Z folds into the z=0 plane per coordinate, matching PostGIS's flat-XY-treated-as-XY[Z=0] convention. - `Box3DUDT` (struct of six non-nullable doubles) + UDT registration via `UdtRegistratorWrapper`. - Catalyst plumbing in `InferredExpression`: Box3D as an `InferrableType` with matching argument-extractor, serializer, and Spark-DataType paths. - `implicits.toBox3D` extension method to deserialise a Box3D from an InternalRow. - `Box3DUDTSuite`: UDT round-trip, JSON schema, and Parquet write/read. No new SQL functions yet — those land in follow-up slices on this same issue. Phase 1 is split into 5 PRs (foundation, constructors, accessors + AsText, predicates, ST_3DExtent aggregate) to keep each review small. --- .../sedona/common/geometryObjects/Box3D.java | 157 ++++++++++++++++++ .../spark/sql/sedona_sql/UDT/Box3DUDT.scala | 92 ++++++++++ .../UDT/UdtRegistratorWrapper.scala | 3 +- .../expressions/InferredExpression.scala | 18 +- .../sedona_sql/expressions/implicits.scala | 21 ++- .../org/apache/sedona/sql/Box3DUDTSuite.scala | 82 +++++++++ 6 files changed, 369 insertions(+), 4 deletions(-) create mode 100644 common/src/main/java/org/apache/sedona/common/geometryObjects/Box3D.java create mode 100644 spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/UDT/Box3DUDT.scala create mode 100644 spark/common/src/test/scala/org/apache/sedona/sql/Box3DUDTSuite.scala diff --git a/common/src/main/java/org/apache/sedona/common/geometryObjects/Box3D.java b/common/src/main/java/org/apache/sedona/common/geometryObjects/Box3D.java new file mode 100644 index 00000000000..00eb0d31176 --- /dev/null +++ b/common/src/main/java/org/apache/sedona/common/geometryObjects/Box3D.java @@ -0,0 +1,157 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.sedona.common.geometryObjects; + +import java.io.Serializable; +import java.util.Objects; +import org.locationtech.jts.geom.Coordinate; +import org.locationtech.jts.geom.Geometry; + +/** + * Planar 3D bounding box with min/max X, Y, and Z. Storage order matches PostGIS {@code box3d}: + * {@code xmin, ymin, zmin, xmax, ymax, zmax}. + * + *

Absence is represented by SQL NULL at the column level rather than an in-band sentinel. + * Geometries that lack a Z dimension are treated as having {@code z = 0} (matching PostGIS), so the + * bbox of an XY geometry has {@code zmin == zmax == 0} rather than NaN. Predicates require ordered + * bounds ({@code xmin <= xmax}, {@code ymin <= ymax}, {@code zmin <= zmax}); inverted Z has no + * defined planar meaning and there is no wraparound convention for the Z axis. + */ +public final class Box3D implements Serializable { + + private final double xmin; + private final double ymin; + private final double zmin; + private final double xmax; + private final double ymax; + private final double zmax; + + public Box3D(double xmin, double ymin, double zmin, double xmax, double ymax, double zmax) { + this.xmin = xmin; + this.ymin = ymin; + this.zmin = zmin; + this.xmax = xmax; + this.ymax = ymax; + this.zmax = zmax; + } + + /** + * Returns the 3D bbox of {@code geometry}, or {@code null} for null/empty geometry. Z values that + * are NaN (i.e. the coordinate has no Z dimension) are treated as 0, matching PostGIS's + * convention where flat XY geometries get a degenerate Z extent at 0. + */ + public static Box3D fromGeometry(Geometry geometry) { + if (geometry == null || geometry.isEmpty()) { + return null; + } + double xMin = Double.POSITIVE_INFINITY; + double yMin = Double.POSITIVE_INFINITY; + double zMin = Double.POSITIVE_INFINITY; + double xMax = Double.NEGATIVE_INFINITY; + double yMax = Double.NEGATIVE_INFINITY; + double zMax = Double.NEGATIVE_INFINITY; + boolean sawZ = false; + for (Coordinate c : geometry.getCoordinates()) { + xMin = Math.min(xMin, c.x); + xMax = Math.max(xMax, c.x); + yMin = Math.min(yMin, c.y); + yMax = Math.max(yMax, c.y); + double z = c.getZ(); + if (Double.isNaN(z)) { + // PostGIS-compatible: missing Z is folded into the 0 plane on each coord. + zMin = Math.min(zMin, 0.0); + zMax = Math.max(zMax, 0.0); + } else { + sawZ = true; + zMin = Math.min(zMin, z); + zMax = Math.max(zMax, z); + } + } + // If the geometry has no Z at any coordinate, collapse to z=0. + if (!sawZ) { + zMin = 0.0; + zMax = 0.0; + } + return new Box3D(xMin, yMin, zMin, xMax, yMax, zMax); + } + + public double getXMin() { + return xmin; + } + + public double getYMin() { + return ymin; + } + + public double getZMin() { + return zmin; + } + + public double getXMax() { + return xmax; + } + + public double getYMax() { + return ymax; + } + + public double getZMax() { + return zmax; + } + + /** + * Returns the union of {@code this} and {@code other}. {@code other == null} is treated as a + * no-op, returning {@code this}. + */ + public Box3D expandToInclude(Box3D other) { + if (other == null) { + return this; + } + return new Box3D( + Math.min(xmin, other.xmin), + Math.min(ymin, other.ymin), + Math.min(zmin, other.zmin), + Math.max(xmax, other.xmax), + Math.max(ymax, other.ymax), + Math.max(zmax, other.zmax)); + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (!(o instanceof Box3D)) return false; + Box3D other = (Box3D) o; + return Double.compare(xmin, other.xmin) == 0 + && Double.compare(ymin, other.ymin) == 0 + && Double.compare(zmin, other.zmin) == 0 + && Double.compare(xmax, other.xmax) == 0 + && Double.compare(ymax, other.ymax) == 0 + && Double.compare(zmax, other.zmax) == 0; + } + + @Override + public int hashCode() { + return Objects.hash(xmin, ymin, zmin, xmax, ymax, zmax); + } + + @Override + public String toString() { + return "BOX3D(" + xmin + " " + ymin + " " + zmin + ", " + xmax + " " + ymax + " " + zmax + ")"; + } +} diff --git a/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/UDT/Box3DUDT.scala b/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/UDT/Box3DUDT.scala new file mode 100644 index 00000000000..ef94699c11f --- /dev/null +++ b/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/UDT/Box3DUDT.scala @@ -0,0 +1,92 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.spark.sql.sedona_sql.UDT + +import org.apache.sedona.common.geometryObjects.Box3D +import org.apache.spark.sql.catalyst.InternalRow +import org.apache.spark.sql.catalyst.expressions.GenericInternalRow +import org.apache.spark.sql.types._ +import org.json4s.JsonAST.JValue +import org.json4s.JsonDSL._ + +/** + * UDT for [[Box3D]]. Stored as a Spark struct of six non-nullable doubles in PostGIS `box3d` + * order: `xmin`, `ymin`, `zmin`, `xmax`, `ymax`, `zmax`. + */ +class Box3DUDT extends UserDefinedType[Box3D] { + + override def sqlType: DataType = StructType( + Seq( + StructField("xmin", DoubleType, nullable = false), + StructField("ymin", DoubleType, nullable = false), + StructField("zmin", DoubleType, nullable = false), + StructField("xmax", DoubleType, nullable = false), + StructField("ymax", DoubleType, nullable = false), + StructField("zmax", DoubleType, nullable = false))) + + // No `pyUDT` override yet — the Python `Box3DType` class is intentionally out of scope for + // Phase 1 (see #2973). It will be added together with the Python bindings follow-up, the + // same way Box2D paired `Box2DUDT.pyUDT` with `python/sedona/spark/sql/types.py::Box2DType`. + + override def userClass: Class[Box3D] = classOf[Box3D] + + override def serialize(obj: Box3D): InternalRow = { + val row = new GenericInternalRow(6) + row.setDouble(0, obj.getXMin) + row.setDouble(1, obj.getYMin) + row.setDouble(2, obj.getZMin) + row.setDouble(3, obj.getXMax) + row.setDouble(4, obj.getYMax) + row.setDouble(5, obj.getZMax) + row + } + + override def deserialize(datum: Any): Box3D = datum match { + case row: InternalRow => + new Box3D( + row.getDouble(0), + row.getDouble(1), + row.getDouble(2), + row.getDouble(3), + row.getDouble(4), + row.getDouble(5)) + } + + override private[sql] def jsonValue: JValue = { + super.jsonValue mapField { + case ("class", _) => "class" -> this.getClass.getName.stripSuffix("$") + case other: Any => other + } + } + + override def equals(other: Any): Boolean = other match { + case _: UserDefinedType[_] => other.isInstanceOf[Box3DUDT] + case _ => false + } + + override def hashCode(): Int = userClass.hashCode() + + override def toString: String = "Box3DUDT" +} + +case object Box3DUDT + extends org.apache.spark.sql.sedona_sql.UDT.Box3DUDT + with scala.Serializable { + def apply(): Box3DUDT = new Box3DUDT() +} diff --git a/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/UDT/UdtRegistratorWrapper.scala b/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/UDT/UdtRegistratorWrapper.scala index cf9a44aa971..9a7194a344f 100644 --- a/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/UDT/UdtRegistratorWrapper.scala +++ b/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/UDT/UdtRegistratorWrapper.scala @@ -19,7 +19,7 @@ package org.apache.spark.sql.sedona_sql.UDT import org.apache.sedona.common.S2Geography.Geography -import org.apache.sedona.common.geometryObjects.Box2D +import org.apache.sedona.common.geometryObjects.{Box2D, Box3D} import org.apache.spark.sql.types.UDTRegistration import org.locationtech.jts.geom.Geometry import org.locationtech.jts.index.SpatialIndex @@ -30,6 +30,7 @@ object UdtRegistratorWrapper { registerIfNotExists(classOf[Geometry].getName, classOf[GeometryUDT].getName) registerIfNotExists(classOf[Geography].getName, classOf[GeographyUDT].getName) registerIfNotExists(classOf[Box2D].getName, classOf[Box2DUDT].getName) + registerIfNotExists(classOf[Box3D].getName, classOf[Box3DUDT].getName) registerIfNotExists(classOf[SpatialIndex].getName, classOf[IndexUDT].getName) } diff --git a/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/InferredExpression.scala b/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/InferredExpression.scala index 8a05bc29445..d9d0aa48c15 100644 --- a/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/InferredExpression.scala +++ b/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/InferredExpression.scala @@ -20,12 +20,12 @@ package org.apache.spark.sql.sedona_sql.expressions import org.apache.commons.lang3.StringUtils import org.apache.sedona.common.S2Geography.Geography -import org.apache.sedona.common.geometryObjects.Box2D +import org.apache.sedona.common.geometryObjects.{Box2D, Box3D} import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback import org.apache.spark.sql.catalyst.expressions.{Expression, ImplicitCastInputTypes} import org.apache.spark.sql.catalyst.util.ArrayData -import org.apache.spark.sql.sedona_sql.UDT.{Box2DUDT, GeographyUDT, GeometryUDT} +import org.apache.spark.sql.sedona_sql.UDT.{Box2DUDT, Box3DUDT, GeographyUDT, GeometryUDT} import org.apache.spark.sql.sedona_sql.expressions.implicits._ import org.apache.spark.sql.types._ import org.apache.spark.unsafe.types.UTF8String @@ -167,6 +167,8 @@ object InferrableType { new InferrableType[Array[Geography]] {} implicit val box2DInstance: InferrableType[Box2D] = new InferrableType[Box2D] {} + implicit val box3DInstance: InferrableType[Box3D] = + new InferrableType[Box3D] {} implicit val javaDoubleInstance: InferrableType[java.lang.Double] = new InferrableType[java.lang.Double] {} implicit val javaIntegerInstance: InferrableType[java.lang.Integer] = @@ -219,6 +221,8 @@ object InferredTypes { expr.toGeographyArray(input) } else if (t =:= typeOf[Box2D]) { expr => input => expr.toBox2D(input) + } else if (t =:= typeOf[Box3D]) { expr => input => + expr.toBox3D(input) } else if (InferredRasterExpression.isRasterType(t)) { InferredRasterExpression.rasterExtractor } else if (t =:= typeOf[Array[Double]]) { expr => input => @@ -279,6 +283,14 @@ object InferredTypes { } else { null } + } else if (t =:= typeOf[Box3D]) { + val udt = Box3DUDT + output => + if (output != null) { + udt.serialize(output.asInstanceOf[Box3D]) + } else { + null + } } else if (InferredRasterExpression.isRasterType(t)) { InferredRasterExpression.rasterSerializer } else if (t =:= typeOf[String]) { output => @@ -347,6 +359,8 @@ object InferredTypes { DataTypes.createArrayType(GeographyUDT()) } else if (t =:= typeOf[Box2D]) { Box2DUDT() + } else if (t =:= typeOf[Box3D]) { + Box3DUDT() } else if (InferredRasterExpression.isRasterType(t)) { InferredRasterExpression.rasterUDT } else if (InferredRasterExpression.isRasterArrayType(t)) { diff --git a/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/implicits.scala b/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/implicits.scala index 3f1ae1fa409..bdbd88db664 100644 --- a/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/implicits.scala +++ b/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/implicits.scala @@ -19,7 +19,7 @@ package org.apache.spark.sql.sedona_sql.expressions import org.apache.sedona.common.S2Geography.{Geography, GeographyWKBSerializer} -import org.apache.sedona.common.geometryObjects.Box2D +import org.apache.sedona.common.geometryObjects.{Box2D, Box3D} import org.apache.sedona.sql.utils.GeometrySerializer import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions.Expression @@ -87,6 +87,25 @@ object implicits { } } + def toBox3D(input: InternalRow): Box3D = { + inputExpression match { + case serdeAware: SerdeAware => + serdeAware.evalWithoutSerialization(input).asInstanceOf[Box3D] + case _ => + inputExpression.eval(input) match { + case row: InternalRow => + new Box3D( + row.getDouble(0), + row.getDouble(1), + row.getDouble(2), + row.getDouble(3), + row.getDouble(4), + row.getDouble(5)) + case _ => null + } + } + } + def toGeographyArray(input: InternalRow): Array[Geography] = { inputExpression match { case aware: SerdeAware => diff --git a/spark/common/src/test/scala/org/apache/sedona/sql/Box3DUDTSuite.scala b/spark/common/src/test/scala/org/apache/sedona/sql/Box3DUDTSuite.scala new file mode 100644 index 00000000000..098acf06604 --- /dev/null +++ b/spark/common/src/test/scala/org/apache/sedona/sql/Box3DUDTSuite.scala @@ -0,0 +1,82 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.sedona.sql + +import org.apache.sedona.common.geometryObjects.Box3D +import org.apache.spark.sql.Row +import org.apache.spark.sql.sedona_sql.UDT.Box3DUDT +import org.apache.spark.sql.types.{DataType, IntegerType, StructType, UDTRegistration} +import org.junit.rules.TemporaryFolder +import org.scalatest.BeforeAndAfter + +class Box3DUDTSuite extends TestBaseScala with BeforeAndAfter { + + val tempFolder: TemporaryFolder = new TemporaryFolder + + before { + tempFolder.create() + } + + after { + tempFolder.delete() + } + + describe("Box3DUDT") { + it("registers Box3D via UdtRegistratorWrapper") { + assert(UDTRegistration.exists(classOf[Box3D].getName)) + } + + it("renders and parses a JSON schema round-trip") { + val schema = new StructType().add("box", new Box3DUDT()) + assert(DataType.fromJson(schema.json).asInstanceOf[StructType] == schema) + } + + it("serializes and deserializes a Box3D round-trip") { + val udt = new Box3DUDT() + val box = new Box3D(-10.0, -20.0, -30.0, 30.0, 40.0, 50.0) + assert(udt.deserialize(udt.serialize(box)) == box) + } + + it("case object equals a fresh instance") { + val instance = new Box3DUDT() + assert(Box3DUDT == Box3DUDT) + assert(instance.equals(instance)) + assert(instance.equals(Box3DUDT)) + assert(Box3DUDT.equals(instance)) + assert(instance.hashCode() == Box3DUDT.hashCode()) + } + + it("writes and reads a Box3D column via Parquet") { + val box = new Box3D(1.0, 2.0, 3.0, 4.0, 5.0, 6.0) + val schema = new StructType() + .add("id", IntegerType, nullable = false) + .add("bbox", new Box3DUDT(), nullable = false) + val rdd = sparkSession.sparkContext.parallelize(Seq(Row(1, box))) + val df = sparkSession.createDataFrame(rdd, schema) + + val path = tempFolder.getRoot.getPath + "/box3d-parquet" + df.write.parquet(path) + + val read = sparkSession.read.parquet(path) + val row = read.collect()(0) + assert(row.getAs[Int]("id") == 1) + assert(row.getAs[Box3D]("bbox") == box) + } + } +}