bvaradar commented on code in PR #10915:
URL: https://github.com/apache/hudi/pull/10915#discussion_r1600600472


##########
hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/cdc/TestCDCDataFrameSuite.scala:
##########
@@ -407,27 +409,32 @@ class TestCDCDataFrameSuite extends HoodieCDCTestBase {
     val inputDF7 = spark.read.json(spark.sparkContext.parallelize(records7, 2))
     inputDF7.write.format("org.apache.hudi")
       .options(options)
+      .option("hoodie.compact.inline", "false")
       .mode(SaveMode.Append)
       .save(basePath)
+    totalInsertedCnt += 7
 
     val records8 = recordsToStrings(dataGen.generateInserts("007", 
3)).asScala.toList
     val inputDF8 = spark.read.json(spark.sparkContext.parallelize(records8, 2))
     inputDF8.write.format("org.apache.hudi")
       .options(options)
+      .option("hoodie.compact.inline", "false")
       .mode(SaveMode.Append)
       .save(basePath)
     val instant8 = metaClient.reloadActiveTimeline.lastInstant().get()
     val commitTime8 = instant8.getTimestamp
+    totalInsertedCnt += 3
 
     // 8. Upsert Operation With Clean Operation
-    val records9 = recordsToStrings(dataGen.generateUniqueUpdates("008", 
30)).asScala.toList
-    val inputDF9 = spark.read.json(spark.sparkContext.parallelize(records9, 2))
+    val inputDF9 = inputDF6.limit(30) // 30 updates to inserts added after 
insert overwrite table. if not for this, updates generated from datagne,
+    // could split as inserts and updates from hudi standpoint due to insert 
overwrite table operation.
     inputDF9.write.format("org.apache.hudi")
       .options(options)
       .option("hoodie.clean.automatic", "true")
-      .option("hoodie.keep.min.commits", "4")
-      .option("hoodie.keep.max.commits", "5")
-      .option("hoodie.clean.commits.retained", "3")
+      .option("hoodie.keep.min.commits", "16")

Review Comment:
   @nsivabalan : Why did we change this test along with this PR ? I am not able 
to follow the comments added inline. 
   cc @yihua 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@hudi.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Reply via email to