This is an automated email from the ASF dual-hosted git repository. changchen pushed a commit to branch feature/41 in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git
commit fd8ea678a64bd1201d43835ca003e184837ef84f Author: Chang chen <[email protected]> AuthorDate: Tue Dec 16 14:59:57 2025 +0800 [Fix] Using new interface of ParquetFooterReader see https://github.com/apache/spark/pull/52384 --- .../apache/gluten/utils/ParquetMetadataUtils.scala | 4 +- .../parquet/GlutenParquetRowIndexSuite.scala | 2 +- .../parquet/ParquetFooterReaderShim.scala | 42 +++++++++++++++++++++ .../parquet/ParquetFooterReaderShim.scala | 42 +++++++++++++++++++++ .../parquet/ParquetFooterReaderShim.scala | 42 +++++++++++++++++++++ .../parquet/ParquetFooterReaderShim.scala | 42 +++++++++++++++++++++ .../parquet/ParquetFooterReaderShim.scala | 42 +++++++++++++++++++++ .../parquet/ParquetFooterReaderShim.scala | 43 ++++++++++++++++++++++ 8 files changed, 256 insertions(+), 3 deletions(-) diff --git a/backends-velox/src/main/scala/org/apache/gluten/utils/ParquetMetadataUtils.scala b/backends-velox/src/main/scala/org/apache/gluten/utils/ParquetMetadataUtils.scala index 6239ab5ad7..ab76cba4aa 100644 --- a/backends-velox/src/main/scala/org/apache/gluten/utils/ParquetMetadataUtils.scala +++ b/backends-velox/src/main/scala/org/apache/gluten/utils/ParquetMetadataUtils.scala @@ -21,7 +21,7 @@ import org.apache.gluten.sql.shims.SparkShimLoader import org.apache.spark.internal.Logging import org.apache.spark.sql.execution.datasources.DataSourceUtils -import org.apache.spark.sql.execution.datasources.parquet.{ParquetFooterReader, ParquetOptions} +import org.apache.spark.sql.execution.datasources.parquet.{ParquetFooterReaderShim, ParquetOptions} import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.{FileSystem, LocatedFileStatus, Path} @@ -135,7 +135,7 @@ object ParquetMetadataUtils extends Logging { parquetOptions: ParquetOptions): Option[String] = { val footer = try { - ParquetFooterReader.readFooter(conf, fileStatus, ParquetMetadataConverter.NO_FILTER) + ParquetFooterReaderShim.readFooter(conf, fileStatus, ParquetMetadataConverter.NO_FILTER) } catch { case e: Exception if ExceptionUtils.hasCause(e, classOf[ParquetCryptoRuntimeException]) => return Some("Encrypted Parquet footer detected.") diff --git a/gluten-ut/spark40/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/GlutenParquetRowIndexSuite.scala b/gluten-ut/spark40/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/GlutenParquetRowIndexSuite.scala index 5cf41b7a9e..570b6d5e0c 100644 --- a/gluten-ut/spark40/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/GlutenParquetRowIndexSuite.scala +++ b/gluten-ut/spark40/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/GlutenParquetRowIndexSuite.scala @@ -42,7 +42,7 @@ class GlutenParquetRowIndexSuite extends ParquetRowIndexSuite with GlutenSQLTest import testImplicits._ private def readRowGroupRowCounts(path: String): Seq[Long] = { - ParquetFooterReader + ParquetFooterReaderShim .readFooter( spark.sessionState.newHadoopConf(), new Path(path), diff --git a/shims/spark32/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFooterReaderShim.scala b/shims/spark32/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFooterReaderShim.scala new file mode 100644 index 0000000000..b1419e5e62 --- /dev/null +++ b/shims/spark32/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFooterReaderShim.scala @@ -0,0 +1,42 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.sql.execution.datasources.parquet + +import org.apache.hadoop.conf.Configuration +import org.apache.hadoop.fs.{FileStatus, Path} +import org.apache.parquet.format.converter.ParquetMetadataConverter +import org.apache.parquet.hadoop.metadata.ParquetMetadata + +/** Shim layer for ParquetFooterReader to maintain compatibility across different Spark versions. */ +object ParquetFooterReaderShim { + + /** @since Spark 4.1 */ + def readFooter( + configuration: Configuration, + fileStatus: FileStatus, + filter: ParquetMetadataConverter.MetadataFilter): ParquetMetadata = { + ParquetFooterReader.readFooter(configuration, fileStatus, filter) + } + + /** @since Spark 4.1 */ + def readFooter( + configuration: Configuration, + file: Path, + filter: ParquetMetadataConverter.MetadataFilter): ParquetMetadata = { + ParquetFooterReader.readFooter(configuration, file, filter) + } +} diff --git a/shims/spark33/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFooterReaderShim.scala b/shims/spark33/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFooterReaderShim.scala new file mode 100644 index 0000000000..b1419e5e62 --- /dev/null +++ b/shims/spark33/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFooterReaderShim.scala @@ -0,0 +1,42 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.sql.execution.datasources.parquet + +import org.apache.hadoop.conf.Configuration +import org.apache.hadoop.fs.{FileStatus, Path} +import org.apache.parquet.format.converter.ParquetMetadataConverter +import org.apache.parquet.hadoop.metadata.ParquetMetadata + +/** Shim layer for ParquetFooterReader to maintain compatibility across different Spark versions. */ +object ParquetFooterReaderShim { + + /** @since Spark 4.1 */ + def readFooter( + configuration: Configuration, + fileStatus: FileStatus, + filter: ParquetMetadataConverter.MetadataFilter): ParquetMetadata = { + ParquetFooterReader.readFooter(configuration, fileStatus, filter) + } + + /** @since Spark 4.1 */ + def readFooter( + configuration: Configuration, + file: Path, + filter: ParquetMetadataConverter.MetadataFilter): ParquetMetadata = { + ParquetFooterReader.readFooter(configuration, file, filter) + } +} diff --git a/shims/spark34/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFooterReaderShim.scala b/shims/spark34/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFooterReaderShim.scala new file mode 100644 index 0000000000..b1419e5e62 --- /dev/null +++ b/shims/spark34/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFooterReaderShim.scala @@ -0,0 +1,42 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.sql.execution.datasources.parquet + +import org.apache.hadoop.conf.Configuration +import org.apache.hadoop.fs.{FileStatus, Path} +import org.apache.parquet.format.converter.ParquetMetadataConverter +import org.apache.parquet.hadoop.metadata.ParquetMetadata + +/** Shim layer for ParquetFooterReader to maintain compatibility across different Spark versions. */ +object ParquetFooterReaderShim { + + /** @since Spark 4.1 */ + def readFooter( + configuration: Configuration, + fileStatus: FileStatus, + filter: ParquetMetadataConverter.MetadataFilter): ParquetMetadata = { + ParquetFooterReader.readFooter(configuration, fileStatus, filter) + } + + /** @since Spark 4.1 */ + def readFooter( + configuration: Configuration, + file: Path, + filter: ParquetMetadataConverter.MetadataFilter): ParquetMetadata = { + ParquetFooterReader.readFooter(configuration, file, filter) + } +} diff --git a/shims/spark35/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFooterReaderShim.scala b/shims/spark35/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFooterReaderShim.scala new file mode 100644 index 0000000000..b1419e5e62 --- /dev/null +++ b/shims/spark35/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFooterReaderShim.scala @@ -0,0 +1,42 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.sql.execution.datasources.parquet + +import org.apache.hadoop.conf.Configuration +import org.apache.hadoop.fs.{FileStatus, Path} +import org.apache.parquet.format.converter.ParquetMetadataConverter +import org.apache.parquet.hadoop.metadata.ParquetMetadata + +/** Shim layer for ParquetFooterReader to maintain compatibility across different Spark versions. */ +object ParquetFooterReaderShim { + + /** @since Spark 4.1 */ + def readFooter( + configuration: Configuration, + fileStatus: FileStatus, + filter: ParquetMetadataConverter.MetadataFilter): ParquetMetadata = { + ParquetFooterReader.readFooter(configuration, fileStatus, filter) + } + + /** @since Spark 4.1 */ + def readFooter( + configuration: Configuration, + file: Path, + filter: ParquetMetadataConverter.MetadataFilter): ParquetMetadata = { + ParquetFooterReader.readFooter(configuration, file, filter) + } +} diff --git a/shims/spark40/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFooterReaderShim.scala b/shims/spark40/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFooterReaderShim.scala new file mode 100644 index 0000000000..b1419e5e62 --- /dev/null +++ b/shims/spark40/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFooterReaderShim.scala @@ -0,0 +1,42 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.sql.execution.datasources.parquet + +import org.apache.hadoop.conf.Configuration +import org.apache.hadoop.fs.{FileStatus, Path} +import org.apache.parquet.format.converter.ParquetMetadataConverter +import org.apache.parquet.hadoop.metadata.ParquetMetadata + +/** Shim layer for ParquetFooterReader to maintain compatibility across different Spark versions. */ +object ParquetFooterReaderShim { + + /** @since Spark 4.1 */ + def readFooter( + configuration: Configuration, + fileStatus: FileStatus, + filter: ParquetMetadataConverter.MetadataFilter): ParquetMetadata = { + ParquetFooterReader.readFooter(configuration, fileStatus, filter) + } + + /** @since Spark 4.1 */ + def readFooter( + configuration: Configuration, + file: Path, + filter: ParquetMetadataConverter.MetadataFilter): ParquetMetadata = { + ParquetFooterReader.readFooter(configuration, file, filter) + } +} diff --git a/shims/spark41/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFooterReaderShim.scala b/shims/spark41/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFooterReaderShim.scala new file mode 100644 index 0000000000..f4cc013ad4 --- /dev/null +++ b/shims/spark41/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFooterReaderShim.scala @@ -0,0 +1,43 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.sql.execution.datasources.parquet + +import org.apache.hadoop.conf.Configuration +import org.apache.hadoop.fs.{FileStatus, Path} +import org.apache.parquet.format.converter.ParquetMetadataConverter +import org.apache.parquet.hadoop.metadata.ParquetMetadata +import org.apache.parquet.hadoop.util.HadoopInputFile + +/** Shim layer for ParquetFooterReader to maintain compatibility across different Spark versions. */ +object ParquetFooterReaderShim { + + /** @since Spark 4.1 */ + def readFooter( + configuration: Configuration, + fileStatus: FileStatus, + filter: ParquetMetadataConverter.MetadataFilter): ParquetMetadata = { + ParquetFooterReader.readFooter(HadoopInputFile.fromStatus(fileStatus, configuration), filter) + } + + /** @since Spark 4.1 */ + def readFooter( + configuration: Configuration, + file: Path, + filter: ParquetMetadataConverter.MetadataFilter): ParquetMetadata = { + ParquetFooterReader.readFooter(HadoopInputFile.fromPath(file, configuration), filter) + } +} --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
