bvaradar commented on code in PR #8303:
URL: https://github.com/apache/hudi/pull/8303#discussion_r1162253186


##########
docker/demo/sparksql-batch2.commands:
##########
@@ -26,7 +26,8 @@ spark.sql("select `_hoodie_commit_time`, symbol, ts, volume, 
open, close  from s
 spark.sql("select symbol, max(ts) from stock_ticks_mor_rt group by symbol 
HAVING symbol = 'GOOG'").show(100, false)
 spark.sql("select `_hoodie_commit_time`, symbol, ts, volume, open, close  from 
stock_ticks_mor_rt where  symbol = 'GOOG'").show(100, false)
 
- // Copy-On-Write Bootstrapped table
+// Copy-On-Write Bootstrapped table
+spark.sql("set hoodie.bootstrap.data.queries.only=false")

Review Comment:
   Are there any integration test for bootstrap where we test with this feature 
on?



##########
hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/DefaultSource.scala:
##########
@@ -100,7 +101,7 @@ class DefaultSource extends RelationProvider
       )
     } else {
       Map()
-    }) ++ DataSourceOptionsHelper.parametersWithReadDefaults(optParams)
+    }) ++ 
DataSourceOptionsHelper.parametersWithReadDefaults(sqlContext.getAllConfs.filter(k
 => k._1.startsWith("hoodie.")) ++ optParams)

Review Comment:
   Why is this needed ? 



##########
hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/DefaultSource.scala:
##########
@@ -270,6 +271,21 @@ object DefaultSource {
     }
   }
 
+  private def resolveHoodieBootstrapRelation(sqlContext: SQLContext,
+                                             globPaths: Seq[Path],
+                                             userSchema: Option[StructType],
+                                             metaClient: HoodieTableMetaClient,
+                                             parameters: Map[String, String]): 
BaseRelation = {
+    val enableFileIndex = HoodieSparkConfUtils.getConfigValue(parameters, 
sqlContext.sparkSession.sessionState.conf,
+      ENABLE_HOODIE_FILE_INDEX.key, 
ENABLE_HOODIE_FILE_INDEX.defaultValue.toString).toBoolean
+    if (!enableFileIndex || globPaths.nonEmpty || 
parameters.getOrElse(HoodieBootstrapConfig.DATA_QUERIES_ONLY.key(), "true") != 
"true") {

Review Comment:
   @jonvex : Wouldn't this change cause user queries which includes hoodie 
metadata columns to fail ? Can't we just userschema being passed here to 
determine if there are any hoodie metadata columns being queried to determine 
appropriate next steps ?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@hudi.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Reply via email to