Skip to content

Commit 64570db

Browse files
authored
[SEDONA-478] Make Sedona geometry functions and spatial join working without GeoTools (#1398)
1 parent c35efdb commit 64570db

17 files changed

+193
-54
lines changed

spark/common/src/main/scala/org/apache/sedona/sql/RasterRegistrator.scala

Lines changed: 1 addition & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -19,26 +19,14 @@
1919
package org.apache.sedona.sql
2020

2121
import org.apache.sedona.sql.UDF.RasterUdafCatalog
22+
import org.apache.sedona.sql.utils.GeoToolsCoverageAvailability.{gridClassName, isGeoToolsAvailable}
2223
import org.apache.spark.sql.catalyst.FunctionIdentifier
2324
import org.apache.spark.sql.sedona_sql.UDT.RasterUdtRegistratorWrapper
2425
import org.apache.spark.sql.{SparkSession, functions}
2526
import org.slf4j.{Logger, LoggerFactory}
2627

2728
object RasterRegistrator {
2829
val logger: Logger = LoggerFactory.getLogger(getClass)
29-
private val gridClassName = "org.geotools.coverage.grid.GridCoverage2D"
30-
31-
// Helper method to check if GridCoverage2D is available
32-
private def isGeoToolsAvailable: Boolean = {
33-
try {
34-
Class.forName(gridClassName, true, Thread.currentThread().getContextClassLoader)
35-
true
36-
} catch {
37-
case _: ClassNotFoundException =>
38-
logger.warn("Geotools was not found on the classpath. Raster operations will not be available.")
39-
false
40-
}
41-
}
4230

4331
def registerAll(sparkSession: SparkSession): Unit = {
4432
if (isGeoToolsAvailable) {
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
package org.apache.sedona.sql.utils
20+
21+
import org.apache.sedona.sql.RasterRegistrator.logger
22+
23+
/**
24+
* A helper object to check if GeoTools GridCoverage2D is available on the classpath.
25+
*/
26+
object GeoToolsCoverageAvailability {
27+
val gridClassName = "org.geotools.coverage.grid.GridCoverage2D"
28+
29+
lazy val isGeoToolsAvailable: Boolean = {
30+
try {
31+
Class.forName(gridClassName, true, Thread.currentThread().getContextClassLoader)
32+
true
33+
} catch {
34+
case _: ClassNotFoundException =>
35+
logger.warn("Geotools was not found on the classpath. Raster operations will not be available.")
36+
false
37+
}
38+
}
39+
}
Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
package org.apache.spark.sql.sedona_sql.expressions
20+
21+
import org.apache.spark.sql.catalyst.InternalRow
22+
import org.apache.spark.sql.catalyst.expressions.Expression
23+
import org.apache.spark.sql.catalyst.util.ArrayData
24+
import org.apache.spark.sql.sedona_sql.UDT.RasterUDT
25+
import org.apache.spark.sql.sedona_sql.expressions.raster.implicits.{RasterEnhancer, RasterInputExpressionEnhancer}
26+
import org.apache.spark.sql.types.{ArrayType, DataTypes, UserDefinedType}
27+
28+
import scala.reflect.runtime.universe.{Type, typeOf}
29+
import org.geotools.coverage.grid.GridCoverage2D
30+
31+
object InferrableRasterTypes {
32+
implicit val gridCoverage2DInstance: InferrableType[GridCoverage2D] =
33+
new InferrableType[GridCoverage2D] {}
34+
implicit val gridCoverage2DArrayInstance: InferrableType[Array[GridCoverage2D]] =
35+
new InferrableType[Array[GridCoverage2D]] {}
36+
37+
def isRasterType(t: Type): Boolean = t =:= typeOf[GridCoverage2D]
38+
def isRasterArrayType(t: Type): Boolean = t =:= typeOf[Array[GridCoverage2D]]
39+
40+
val rasterUDT: UserDefinedType[_] = RasterUDT
41+
val rasterUDTArray: ArrayType = DataTypes.createArrayType(RasterUDT)
42+
43+
def rasterExtractor(expr: Expression)(input: InternalRow): Any = expr.toRaster(input)
44+
45+
def rasterSerializer(output: Any): Any =
46+
if (output != null) {
47+
output.asInstanceOf[GridCoverage2D].serialize
48+
} else {
49+
null
50+
}
51+
52+
def rasterArraySerializer(output: Any): Any =
53+
if (output != null) {
54+
val rasters = output.asInstanceOf[Array[GridCoverage2D]]
55+
val serialized = rasters.map { raster =>
56+
val serialized = raster.serialize
57+
raster.dispose(true)
58+
serialized
59+
}
60+
ArrayData.toArrayData(serialized)
61+
} else {
62+
null
63+
}
64+
}

spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/InferredExpression.scala

Lines changed: 12 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -22,13 +22,11 @@ import org.apache.spark.sql.catalyst.InternalRow
2222
import org.apache.spark.sql.catalyst.expressions.{Expression, ImplicitCastInputTypes}
2323
import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback
2424
import org.apache.spark.sql.catalyst.util.ArrayData
25-
import org.apache.spark.sql.sedona_sql.UDT.{GeometryUDT, RasterUDT}
25+
import org.apache.spark.sql.sedona_sql.UDT.GeometryUDT
2626
import org.apache.spark.sql.types.{AbstractDataType, BinaryType, BooleanType, DataType, DataTypes, DoubleType, IntegerType, LongType, StringType}
2727
import org.apache.spark.unsafe.types.UTF8String
2828
import org.locationtech.jts.geom.Geometry
2929
import org.apache.spark.sql.sedona_sql.expressions.implicits._
30-
import org.apache.spark.sql.sedona_sql.expressions.raster.implicits._
31-
import org.geotools.coverage.grid.GridCoverage2D
3230

3331
import scala.collection.convert.ImplicitConversions.`collection AsScalaIterable`
3432
import scala.reflect.runtime.universe.TypeTag
@@ -75,14 +73,10 @@ abstract class InferredExpression(fSeq: InferrableFunction *)
7573
// This is a compile time type shield for the types we are able to infer. Anything
7674
// other than these types will cause a compilation error. This is the Scala
7775
// 2 way of making a union type.
78-
sealed class InferrableType[T: TypeTag]
76+
class InferrableType[T: TypeTag]
7977
object InferrableType {
8078
implicit val geometryInstance: InferrableType[Geometry] =
8179
new InferrableType[Geometry] {}
82-
implicit val gridCoverage2DInstance: InferrableType[GridCoverage2D] =
83-
new InferrableType[GridCoverage2D] {}
84-
implicit val gridCoverage2DArrayInstance: InferrableType[Array[GridCoverage2D]] =
85-
new InferrableType[Array[GridCoverage2D]] {}
8680
implicit val geometryArrayInstance: InferrableType[Array[Geometry]] =
8781
new InferrableType[Array[Geometry]] {}
8882
implicit val javaDoubleInstance: InferrableType[java.lang.Double] =
@@ -127,8 +121,8 @@ object InferredTypes {
127121
expr => input => expr.toGeometry(input)
128122
} else if (t =:= typeOf[Array[Geometry]]) {
129123
expr => input => expr.toGeometryArray(input)
130-
} else if (t =:= typeOf[GridCoverage2D]) {
131-
expr => input => expr.toRaster(input)
124+
} else if (InferredRasterExpression.isRasterType(t)) {
125+
InferredRasterExpression.rasterExtractor
132126
} else if (t =:= typeOf[Array[Double]]) {
133127
expr => input => expr.eval(input).asInstanceOf[ArrayData].toDoubleArray()
134128
} else if (t =:= typeOf[String]) {
@@ -156,14 +150,8 @@ object InferredTypes {
156150
} else {
157151
null
158152
}
159-
} else if (t =:= typeOf[GridCoverage2D]) {
160-
output => {
161-
if (output != null) {
162-
output.asInstanceOf[GridCoverage2D].serialize
163-
} else {
164-
null
165-
}
166-
}
153+
} else if (InferredRasterExpression.isRasterType(t)) {
154+
InferredRasterExpression.rasterSerializer
167155
} else if (t =:= typeOf[String]) {
168156
output =>
169157
if (output != null) {
@@ -194,19 +182,8 @@ object InferredTypes {
194182
} else {
195183
null
196184
}
197-
} else if (t =:= typeOf[Array[GridCoverage2D]]) {
198-
output =>
199-
if (output != null) {
200-
val rasters = output.asInstanceOf[Array[GridCoverage2D]]
201-
val serialized = rasters.map { raster =>
202-
val serialized = raster.serialize
203-
raster.dispose(true)
204-
serialized
205-
}
206-
ArrayData.toArrayData(serialized)
207-
} else {
208-
null
209-
}
185+
} else if (InferredRasterExpression.isRasterArrayType(t)) {
186+
InferredRasterExpression.rasterArraySerializer
210187
} else if (t =:= typeOf[Option[Boolean]]) {
211188
output =>
212189
if (output != null) {
@@ -224,10 +201,10 @@ object InferredTypes {
224201
GeometryUDT
225202
} else if (t =:= typeOf[Array[Geometry]] || t =:= typeOf[java.util.List[Geometry]]) {
226203
DataTypes.createArrayType(GeometryUDT)
227-
} else if (t =:= typeOf[GridCoverage2D]) {
228-
RasterUDT
229-
} else if (t =:= typeOf[Array[GridCoverage2D]]) {
230-
DataTypes.createArrayType(RasterUDT)
204+
} else if (InferredRasterExpression.isRasterType(t)) {
205+
InferredRasterExpression.rasterUDT
206+
} else if (InferredRasterExpression.isRasterArrayType(t)) {
207+
InferredRasterExpression.rasterUDTArray
231208
} else if (t =:= typeOf[java.lang.Double]) {
232209
DoubleType
233210
} else if (t =:= typeOf[java.lang.Integer]) {
Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
package org.apache.spark.sql.sedona_sql.expressions
20+
21+
import org.apache.sedona.sql.utils.GeoToolsCoverageAvailability.isGeoToolsAvailable
22+
import org.apache.spark.sql.catalyst.InternalRow
23+
import org.apache.spark.sql.catalyst.expressions.Expression
24+
import org.apache.spark.sql.types.{ArrayType, UserDefinedType}
25+
26+
import scala.reflect.runtime.universe.{Type, typeOf}
27+
28+
object InferredRasterExpression {
29+
def isRasterType(t: Type): Boolean =
30+
isGeoToolsAvailable && InferrableRasterTypes.isRasterType(t)
31+
32+
def isRasterArrayType(t: Type): Boolean =
33+
isGeoToolsAvailable && InferrableRasterTypes.isRasterArrayType(t)
34+
35+
def rasterUDT: UserDefinedType[_] = if (isGeoToolsAvailable) {
36+
InferrableRasterTypes.rasterUDT
37+
} else {
38+
null
39+
}
40+
41+
def rasterUDTArray: ArrayType = if (isGeoToolsAvailable) {
42+
InferrableRasterTypes.rasterUDTArray
43+
} else {
44+
null
45+
}
46+
47+
val rasterExtractor: Expression => InternalRow => Any = if (isGeoToolsAvailable) {
48+
InferrableRasterTypes.rasterExtractor
49+
} else {
50+
_ => _ => null
51+
}
52+
53+
val rasterSerializer: Any => Any = if (isGeoToolsAvailable) {
54+
InferrableRasterTypes.rasterSerializer
55+
} else {
56+
(_: Any) => null
57+
}
58+
59+
val rasterArraySerializer: Any => Any = if (isGeoToolsAvailable) {
60+
InferrableRasterTypes.rasterArraySerializer
61+
} else {
62+
(_: Any) => null
63+
}
64+
}

spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/implicits.scala

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -22,13 +22,10 @@ package org.apache.spark.sql.sedona_sql.expressions
2222
import org.apache.sedona.sql.utils.GeometrySerializer
2323
import org.apache.spark.sql.catalyst.InternalRow
2424
import org.apache.spark.sql.catalyst.expressions.Expression
25-
import org.apache.spark.sql.catalyst.util.{ArrayData, GenericArrayData}
26-
import org.apache.spark.sql.types.{ByteType, DataTypes}
25+
import org.apache.spark.sql.catalyst.util.ArrayData
2726
import org.apache.spark.unsafe.types.UTF8String
2827
import org.locationtech.jts.geom.{Geometry, GeometryFactory, Point}
2928

30-
import java.util
31-
3229
object implicits {
3330

3431
implicit class InputExpressionEnhancer(inputExpression: Expression) {

spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/raster/GeometryFunctions.scala

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ import org.apache.sedona.common.raster.GeometryFunctions
2222
import org.apache.spark.sql.catalyst.expressions.Expression
2323
import org.apache.spark.sql.sedona_sql.expressions.InferredExpression
2424
import org.apache.spark.sql.sedona_sql.expressions.InferrableFunctionConverter._
25+
import org.apache.spark.sql.sedona_sql.expressions.InferrableRasterTypes._
2526

2627
case class RS_ConvexHull(inputExpressions: Seq[Expression]) extends InferredExpression(GeometryFunctions.convexHull _) {
2728
protected def withNewChildrenInternal(newChildren: IndexedSeq[Expression]) = {

spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/raster/MapAlgebra.scala

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ import org.apache.spark.sql.catalyst.InternalRow
2323
import org.apache.spark.sql.catalyst.expressions.Expression
2424
import org.apache.spark.sql.catalyst.util.{ArrayData, GenericArrayData}
2525
import org.apache.spark.sql.sedona_sql.expressions.InferrableFunctionConverter._
26+
import org.apache.spark.sql.sedona_sql.expressions.InferrableRasterTypes._
2627
import org.apache.spark.sql.sedona_sql.expressions.InferredExpression
2728

2829
/// Calculate Normalized Difference between two bands

spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/raster/PixelFunctionEditors.scala

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ package org.apache.spark.sql.sedona_sql.expressions.raster
2121
import org.apache.sedona.common.raster.PixelFunctionEditors
2222
import org.apache.spark.sql.catalyst.expressions.Expression
2323
import org.apache.spark.sql.sedona_sql.expressions.InferrableFunctionConverter._
24+
import org.apache.spark.sql.sedona_sql.expressions.InferrableRasterTypes._
2425
import org.apache.spark.sql.sedona_sql.expressions.InferredExpression
2526

2627
case class RS_SetValues(inputExpressions: Seq[Expression]) extends InferredExpression(

spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/raster/PixelFunctions.scala

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback
2626
import org.apache.spark.sql.catalyst.util.GenericArrayData
2727
import org.apache.spark.sql.sedona_sql.UDT.{GeometryUDT, RasterUDT}
2828
import org.apache.spark.sql.sedona_sql.expressions.InferrableFunctionConverter._
29+
import org.apache.spark.sql.sedona_sql.expressions.InferrableRasterTypes._
2930
import org.apache.spark.sql.sedona_sql.expressions.InferredExpression
3031
import org.apache.spark.sql.sedona_sql.expressions.raster.implicits.RasterInputExpressionEnhancer
3132
import org.apache.spark.sql.types.{AbstractDataType, ArrayType, DataType, DoubleType, IntegerType, StructType}

0 commit comments

Comments
 (0)