This is an automated email from the ASF dual-hosted git repository.
yihua pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hudi.git
The following commit(s) were added to refs/heads/master by this push:
new d78b702c64d5 fix(spark): use HoodieStorageUtils factory in Spark 4.1
legacy parquet read (#18785)
d78b702c64d5 is described below
commit d78b702c64d5f5dc21cbcd6f64d6f911c620c797
Author: Y Ethan Guo <[email protected]>
AuthorDate: Tue May 19 21:41:29 2026 -0700
fix(spark): use HoodieStorageUtils factory in Spark 4.1 legacy parquet read
(#18785)
---
.../datasources/parquet/Spark41LegacyHoodieParquetFileFormat.scala | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git
a/hudi-spark-datasource/hudi-spark4.1.x/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/Spark41LegacyHoodieParquetFileFormat.scala
b/hudi-spark-datasource/hudi-spark4.1.x/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/Spark41LegacyHoodieParquetFileFormat.scala
index 014bdae271ce..91b085b693aa 100644
---
a/hudi-spark-datasource/hudi-spark4.1.x/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/Spark41LegacyHoodieParquetFileFormat.scala
+++
b/hudi-spark-datasource/hudi-spark4.1.x/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/Spark41LegacyHoodieParquetFileFormat.scala
@@ -24,10 +24,11 @@ import
org.apache.hudi.common.table.timeline.versioning.TimelineLayoutVersion
import org.apache.hudi.common.util.InternalSchemaCache
import org.apache.hudi.common.util.StringUtils.isNullOrEmpty
import org.apache.hudi.common.util.collection.Pair
+import org.apache.hudi.hadoop.fs.HadoopFSUtils
import org.apache.hudi.internal.schema.InternalSchema
import org.apache.hudi.internal.schema.action.InternalSchemaMerger
import org.apache.hudi.internal.schema.utils.{InternalSchemaUtils, SerDeHelper}
-import org.apache.hudi.storage.hadoop.HoodieHadoopStorage
+import org.apache.hudi.storage.HoodieStorageUtils
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.mapred.FileSplit
@@ -176,7 +177,7 @@ class Spark41LegacyHoodieParquetFileFormat(private val
shouldAppendPartitionValu
val fileSchema = if (shouldUseInternalSchema) {
val commitInstantTime = FSUtils.getCommitTime(filePath.getName).toLong;
val validCommits =
sharedConf.get(SparkInternalSchemaConverter.HOODIE_VALID_COMMITS_LIST)
- val storage = new HoodieHadoopStorage(tablePath, sharedConf)
+ val storage = HoodieStorageUtils.getStorage(tablePath,
HadoopFSUtils.getStorageConf(sharedConf))
//TODO: HARDCODED TIMELINE OBJECT
val layout =
TimelineLayout.fromVersion(TimelineLayoutVersion.CURR_LAYOUT_VERSION)
InternalSchemaCache.getInternalSchemaByVersionId(