This is an automated email from the ASF dual-hosted git repository.

yihua pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hudi.git


The following commit(s) were added to refs/heads/master by this push:
     new c758508b62f [HUDI-7769] Fix Hudi CDC read on Spark 3.3.4 and 3.4.3 
(#11242)
c758508b62f is described below

commit c758508b62f0617ac95e33a490dde62cc897ab3a
Author: Y Ethan Guo <ethan.guoyi...@gmail.com>
AuthorDate: Thu May 30 09:29:00 2024 -0700

    [HUDI-7769] Fix Hudi CDC read on Spark 3.3.4 and 3.4.3 (#11242)
---
 .../src/main/scala/org/apache/hudi/cdc/CDCRelation.scala          | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git 
a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/cdc/CDCRelation.scala
 
b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/cdc/CDCRelation.scala
index 311383a9c32..f298efc8ed4 100644
--- 
a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/cdc/CDCRelation.scala
+++ 
b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/cdc/CDCRelation.scala
@@ -27,6 +27,7 @@ import org.apache.hudi.common.table.{HoodieTableMetaClient, 
TableSchemaResolver}
 import org.apache.hudi.exception.HoodieException
 import org.apache.hudi.internal.schema.InternalSchema
 import org.apache.hudi.{AvroConversionUtils, DataSourceReadOptions, 
HoodieDataSourceHelper, HoodieTableSchema}
+
 import org.apache.spark.internal.Logging
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.InternalRow
@@ -50,6 +51,8 @@ class CDCRelation(
     options: Map[String, String]
 ) extends BaseRelation with PrunedFilteredScan with Logging {
 
+  imbueConfigs(sqlContext)
+
   val spark: SparkSession = sqlContext.sparkSession
 
   val (tableAvroSchema, _) = {
@@ -118,6 +121,11 @@ class CDCRelation(
     )
     cdcRdd.asInstanceOf[RDD[InternalRow]]
   }
+
+  def imbueConfigs(sqlContext: SQLContext): Unit = {
+    // Disable vectorized reading for CDC relation
+    
sqlContext.sparkSession.sessionState.conf.setConfString("spark.sql.parquet.enableVectorizedReader",
 "false")
+  }
 }
 
 object CDCRelation {

Reply via email to