(incubator-gluten) 09/16: [Fix] Using new interface of ParquetFooterReader

changchen Wed, 31 Dec 2025 00:51:16 -0800

This is an automated email from the ASF dual-hosted git repository.

changchen pushed a commit to branch feature/41
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git


commit fd8ea678a64bd1201d43835ca003e184837ef84f
Author: Chang chen <[email protected]>
AuthorDate: Tue Dec 16 14:59:57 2025 +0800

    [Fix] Using new interface of ParquetFooterReader
    
    see https://github.com/apache/spark/pull/52384
---
 .../apache/gluten/utils/ParquetMetadataUtils.scala |  4 +-
 .../parquet/GlutenParquetRowIndexSuite.scala       |  2 +-
 .../parquet/ParquetFooterReaderShim.scala          | 42 +++++++++++++++++++++
 .../parquet/ParquetFooterReaderShim.scala          | 42 +++++++++++++++++++++
 .../parquet/ParquetFooterReaderShim.scala          | 42 +++++++++++++++++++++
 .../parquet/ParquetFooterReaderShim.scala          | 42 +++++++++++++++++++++
 .../parquet/ParquetFooterReaderShim.scala          | 42 +++++++++++++++++++++
 .../parquet/ParquetFooterReaderShim.scala          | 43 ++++++++++++++++++++++
 8 files changed, 256 insertions(+), 3 deletions(-)

diff --git 
a/backends-velox/src/main/scala/org/apache/gluten/utils/ParquetMetadataUtils.scala
 
b/backends-velox/src/main/scala/org/apache/gluten/utils/ParquetMetadataUtils.scala
index 6239ab5ad7..ab76cba4aa 100644
--- 
a/backends-velox/src/main/scala/org/apache/gluten/utils/ParquetMetadataUtils.scala
+++ 
b/backends-velox/src/main/scala/org/apache/gluten/utils/ParquetMetadataUtils.scala
@@ -21,7 +21,7 @@ import org.apache.gluten.sql.shims.SparkShimLoader
 
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.execution.datasources.DataSourceUtils
-import 
org.apache.spark.sql.execution.datasources.parquet.{ParquetFooterReader, 
ParquetOptions}
+import 
org.apache.spark.sql.execution.datasources.parquet.{ParquetFooterReaderShim, 
ParquetOptions}
 
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.{FileSystem, LocatedFileStatus, Path}
@@ -135,7 +135,7 @@ object ParquetMetadataUtils extends Logging {
       parquetOptions: ParquetOptions): Option[String] = {
     val footer =
       try {
-        ParquetFooterReader.readFooter(conf, fileStatus, 
ParquetMetadataConverter.NO_FILTER)
+        ParquetFooterReaderShim.readFooter(conf, fileStatus, 
ParquetMetadataConverter.NO_FILTER)
       } catch {
         case e: Exception if ExceptionUtils.hasCause(e, 
classOf[ParquetCryptoRuntimeException]) =>
           return Some("Encrypted Parquet footer detected.")
diff --git 
a/gluten-ut/spark40/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/GlutenParquetRowIndexSuite.scala
 
b/gluten-ut/spark40/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/GlutenParquetRowIndexSuite.scala
index 5cf41b7a9e..570b6d5e0c 100644
--- 
a/gluten-ut/spark40/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/GlutenParquetRowIndexSuite.scala
+++ 
b/gluten-ut/spark40/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/GlutenParquetRowIndexSuite.scala
@@ -42,7 +42,7 @@ class GlutenParquetRowIndexSuite extends ParquetRowIndexSuite 
with GlutenSQLTest
   import testImplicits._
 
   private def readRowGroupRowCounts(path: String): Seq[Long] = {
-    ParquetFooterReader
+    ParquetFooterReaderShim
       .readFooter(
         spark.sessionState.newHadoopConf(),
         new Path(path),
diff --git 
a/shims/spark32/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFooterReaderShim.scala
 
b/shims/spark32/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFooterReaderShim.scala
new file mode 100644
index 0000000000..b1419e5e62
--- /dev/null
+++ 
b/shims/spark32/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFooterReaderShim.scala
@@ -0,0 +1,42 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.execution.datasources.parquet
+
+import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.fs.{FileStatus, Path}
+import org.apache.parquet.format.converter.ParquetMetadataConverter
+import org.apache.parquet.hadoop.metadata.ParquetMetadata
+
+/** Shim layer for ParquetFooterReader to maintain compatibility across 
different Spark versions. */
+object ParquetFooterReaderShim {
+
+  /** @since Spark 4.1 */
+  def readFooter(
+      configuration: Configuration,
+      fileStatus: FileStatus,
+      filter: ParquetMetadataConverter.MetadataFilter): ParquetMetadata = {
+    ParquetFooterReader.readFooter(configuration, fileStatus, filter)
+  }
+
+  /** @since Spark 4.1 */
+  def readFooter(
+      configuration: Configuration,
+      file: Path,
+      filter: ParquetMetadataConverter.MetadataFilter): ParquetMetadata = {
+    ParquetFooterReader.readFooter(configuration, file, filter)
+  }
+}
diff --git 
a/shims/spark33/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFooterReaderShim.scala
 
b/shims/spark33/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFooterReaderShim.scala
new file mode 100644
index 0000000000..b1419e5e62
--- /dev/null
+++ 
b/shims/spark33/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFooterReaderShim.scala
@@ -0,0 +1,42 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.execution.datasources.parquet
+
+import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.fs.{FileStatus, Path}
+import org.apache.parquet.format.converter.ParquetMetadataConverter
+import org.apache.parquet.hadoop.metadata.ParquetMetadata
+
+/** Shim layer for ParquetFooterReader to maintain compatibility across 
different Spark versions. */
+object ParquetFooterReaderShim {
+
+  /** @since Spark 4.1 */
+  def readFooter(
+      configuration: Configuration,
+      fileStatus: FileStatus,
+      filter: ParquetMetadataConverter.MetadataFilter): ParquetMetadata = {
+    ParquetFooterReader.readFooter(configuration, fileStatus, filter)
+  }
+
+  /** @since Spark 4.1 */
+  def readFooter(
+      configuration: Configuration,
+      file: Path,
+      filter: ParquetMetadataConverter.MetadataFilter): ParquetMetadata = {
+    ParquetFooterReader.readFooter(configuration, file, filter)
+  }
+}
diff --git 
a/shims/spark34/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFooterReaderShim.scala
 
b/shims/spark34/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFooterReaderShim.scala
new file mode 100644
index 0000000000..b1419e5e62
--- /dev/null
+++ 
b/shims/spark34/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFooterReaderShim.scala
@@ -0,0 +1,42 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.execution.datasources.parquet
+
+import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.fs.{FileStatus, Path}
+import org.apache.parquet.format.converter.ParquetMetadataConverter
+import org.apache.parquet.hadoop.metadata.ParquetMetadata
+
+/** Shim layer for ParquetFooterReader to maintain compatibility across 
different Spark versions. */
+object ParquetFooterReaderShim {
+
+  /** @since Spark 4.1 */
+  def readFooter(
+      configuration: Configuration,
+      fileStatus: FileStatus,
+      filter: ParquetMetadataConverter.MetadataFilter): ParquetMetadata = {
+    ParquetFooterReader.readFooter(configuration, fileStatus, filter)
+  }
+
+  /** @since Spark 4.1 */
+  def readFooter(
+      configuration: Configuration,
+      file: Path,
+      filter: ParquetMetadataConverter.MetadataFilter): ParquetMetadata = {
+    ParquetFooterReader.readFooter(configuration, file, filter)
+  }
+}
diff --git 
a/shims/spark35/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFooterReaderShim.scala
 
b/shims/spark35/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFooterReaderShim.scala
new file mode 100644
index 0000000000..b1419e5e62
--- /dev/null
+++ 
b/shims/spark35/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFooterReaderShim.scala
@@ -0,0 +1,42 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.execution.datasources.parquet
+
+import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.fs.{FileStatus, Path}
+import org.apache.parquet.format.converter.ParquetMetadataConverter
+import org.apache.parquet.hadoop.metadata.ParquetMetadata
+
+/** Shim layer for ParquetFooterReader to maintain compatibility across 
different Spark versions. */
+object ParquetFooterReaderShim {
+
+  /** @since Spark 4.1 */
+  def readFooter(
+      configuration: Configuration,
+      fileStatus: FileStatus,
+      filter: ParquetMetadataConverter.MetadataFilter): ParquetMetadata = {
+    ParquetFooterReader.readFooter(configuration, fileStatus, filter)
+  }
+
+  /** @since Spark 4.1 */
+  def readFooter(
+      configuration: Configuration,
+      file: Path,
+      filter: ParquetMetadataConverter.MetadataFilter): ParquetMetadata = {
+    ParquetFooterReader.readFooter(configuration, file, filter)
+  }
+}
diff --git 
a/shims/spark40/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFooterReaderShim.scala
 
b/shims/spark40/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFooterReaderShim.scala
new file mode 100644
index 0000000000..b1419e5e62
--- /dev/null
+++ 
b/shims/spark40/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFooterReaderShim.scala
@@ -0,0 +1,42 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.execution.datasources.parquet
+
+import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.fs.{FileStatus, Path}
+import org.apache.parquet.format.converter.ParquetMetadataConverter
+import org.apache.parquet.hadoop.metadata.ParquetMetadata
+
+/** Shim layer for ParquetFooterReader to maintain compatibility across 
different Spark versions. */
+object ParquetFooterReaderShim {
+
+  /** @since Spark 4.1 */
+  def readFooter(
+      configuration: Configuration,
+      fileStatus: FileStatus,
+      filter: ParquetMetadataConverter.MetadataFilter): ParquetMetadata = {
+    ParquetFooterReader.readFooter(configuration, fileStatus, filter)
+  }
+
+  /** @since Spark 4.1 */
+  def readFooter(
+      configuration: Configuration,
+      file: Path,
+      filter: ParquetMetadataConverter.MetadataFilter): ParquetMetadata = {
+    ParquetFooterReader.readFooter(configuration, file, filter)
+  }
+}
diff --git 
a/shims/spark41/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFooterReaderShim.scala
 
b/shims/spark41/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFooterReaderShim.scala
new file mode 100644
index 0000000000..f4cc013ad4
--- /dev/null
+++ 
b/shims/spark41/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFooterReaderShim.scala
@@ -0,0 +1,43 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.execution.datasources.parquet
+
+import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.fs.{FileStatus, Path}
+import org.apache.parquet.format.converter.ParquetMetadataConverter
+import org.apache.parquet.hadoop.metadata.ParquetMetadata
+import org.apache.parquet.hadoop.util.HadoopInputFile
+
+/** Shim layer for ParquetFooterReader to maintain compatibility across 
different Spark versions. */
+object ParquetFooterReaderShim {
+
+  /** @since Spark 4.1 */
+  def readFooter(
+      configuration: Configuration,
+      fileStatus: FileStatus,
+      filter: ParquetMetadataConverter.MetadataFilter): ParquetMetadata = {
+    ParquetFooterReader.readFooter(HadoopInputFile.fromStatus(fileStatus, 
configuration), filter)
+  }
+
+  /** @since Spark 4.1 */
+  def readFooter(
+      configuration: Configuration,
+      file: Path,
+      filter: ParquetMetadataConverter.MetadataFilter): ParquetMetadata = {
+    ParquetFooterReader.readFooter(HadoopInputFile.fromPath(file, 
configuration), filter)
+  }
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

(incubator-gluten) 09/16: [Fix] Using new interface of ParquetFooterReader

Reply via email to