This is an automated email from the ASF dual-hosted git repository.
jiayu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/sedona.git
The following commit(s) were added to refs/heads/master by this push:
new 1b7a804127 [GH-2704] Disable TransformNestedUDTParquet on Spark 4.1+
(#2703)
1b7a804127 is described below
commit 1b7a8041274d75b5280b4eb7d1d1d0e7d1f21111
Author: James Willis <[email protected]>
AuthorDate: Tue Mar 10 12:16:17 2026 -0700
[GH-2704] Disable TransformNestedUDTParquet on Spark 4.1+ (#2703)
---
.../org/apache/sedona/spark/SedonaContext.scala | 20 +++++++++++++++++---
1 file changed, 17 insertions(+), 3 deletions(-)
diff --git
a/spark/common/src/main/scala/org/apache/sedona/spark/SedonaContext.scala
b/spark/common/src/main/scala/org/apache/sedona/spark/SedonaContext.scala
index b0e46cf6e9..b13f93594c 100644
--- a/spark/common/src/main/scala/org/apache/sedona/spark/SedonaContext.scala
+++ b/spark/common/src/main/scala/org/apache/sedona/spark/SedonaContext.scala
@@ -42,12 +42,26 @@ class InternalApi(
object SedonaContext {
- private def customOptimizationsWithSession(sparkSession: SparkSession) =
- Seq(
- new TransformNestedUDTParquet(sparkSession),
+ private def customOptimizationsWithSession(sparkSession: SparkSession) = {
+ val optimizations = Seq(
new SpatialFilterPushDownForGeoParquet(sparkSession),
new SpatialTemporalFilterPushDownForStacScan(sparkSession))
+ val versionParts =
+ sparkSession.version
+ .split('.')
+ .map(s => scala.util.Try(s.takeWhile(_.isDigit).toInt).getOrElse(0))
+ val major = versionParts.lift(0).getOrElse(0)
+ val minor = versionParts.lift(1).getOrElse(0)
+ if (major < 4 || (major == 4 && minor < 1)) {
+ // SPARK-48942: nested UDTs crash the vectorized Parquet reader on Spark
< 4.1.
+ // SPARK-52651 fixes this in Spark 4.1+ by recursively stripping UDTs in
ColumnVector.
+ new TransformNestedUDTParquet(sparkSession) +: optimizations
+ } else {
+ optimizations
+ }
+ }
+
def create(sqlContext: SQLContext): SQLContext = {
create(sqlContext.sparkSession)
sqlContext