bvaradar commented on code in PR #8303: URL: https://github.com/apache/hudi/pull/8303#discussion_r1162253186
########## docker/demo/sparksql-batch2.commands: ########## @@ -26,7 +26,8 @@ spark.sql("select `_hoodie_commit_time`, symbol, ts, volume, open, close from s spark.sql("select symbol, max(ts) from stock_ticks_mor_rt group by symbol HAVING symbol = 'GOOG'").show(100, false) spark.sql("select `_hoodie_commit_time`, symbol, ts, volume, open, close from stock_ticks_mor_rt where symbol = 'GOOG'").show(100, false) - // Copy-On-Write Bootstrapped table +// Copy-On-Write Bootstrapped table +spark.sql("set hoodie.bootstrap.data.queries.only=false") Review Comment: Are there any integration test for bootstrap where we test with this feature on? ########## hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/DefaultSource.scala: ########## @@ -100,7 +101,7 @@ class DefaultSource extends RelationProvider ) } else { Map() - }) ++ DataSourceOptionsHelper.parametersWithReadDefaults(optParams) + }) ++ DataSourceOptionsHelper.parametersWithReadDefaults(sqlContext.getAllConfs.filter(k => k._1.startsWith("hoodie.")) ++ optParams) Review Comment: Why is this needed ? ########## hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/DefaultSource.scala: ########## @@ -270,6 +271,21 @@ object DefaultSource { } } + private def resolveHoodieBootstrapRelation(sqlContext: SQLContext, + globPaths: Seq[Path], + userSchema: Option[StructType], + metaClient: HoodieTableMetaClient, + parameters: Map[String, String]): BaseRelation = { + val enableFileIndex = HoodieSparkConfUtils.getConfigValue(parameters, sqlContext.sparkSession.sessionState.conf, + ENABLE_HOODIE_FILE_INDEX.key, ENABLE_HOODIE_FILE_INDEX.defaultValue.toString).toBoolean + if (!enableFileIndex || globPaths.nonEmpty || parameters.getOrElse(HoodieBootstrapConfig.DATA_QUERIES_ONLY.key(), "true") != "true") { Review Comment: @jonvex : Wouldn't this change cause user queries which includes hoodie metadata columns to fail ? Can't we just userschema being passed here to determine if there are any hoodie metadata columns being queried to determine appropriate next steps ? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@hudi.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org