codope commented on code in PR #12280:
URL: https://github.com/apache/hudi/pull/12280#discussion_r1850678915
##########
hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/command/index/TestSecondaryIndex.scala:
##########
@@ -284,6 +286,50 @@ class TestSecondaryIndex extends HoodieSparkSqlTestBase {
}
}
+ test("Test Secondary Index With Overwrite and Delete Partition") {
+ if (HoodieSparkUtils.gteqSpark3_3) {
+ withTempDir { tmp =>
+ Seq(
+ WriteOperationType.INSERT_OVERWRITE.value(),
+ WriteOperationType.INSERT_OVERWRITE_TABLE.value(),
+ WriteOperationType.DELETE_PARTITION.value()
+ ).foreach { operationType =>
+ val tableName = generateTableName
+ val basePath = s"${tmp.getCanonicalPath}/$tableName"
+ // Step 1: Initial Insertion of Records
+ val dataGen = new HoodieTestDataGenerator()
+ val initialRecords =
recordsToStrings(dataGen.generateInserts(getInstantTime, 50, true)).asScala
+ val initialDf =
spark.read.json(spark.sparkContext.parallelize(initialRecords.toSeq, 2))
+ val hudiOpts = commonOpts ++ Map(TABLE_TYPE.key -> "MERGE_ON_READ",
HoodieWriteConfig.TBL_NAME.key -> tableName)
+ initialDf.write.format("hudi")
+ .options(hudiOpts)
+ .option(OPERATION.key, INSERT_OPERATION_OPT_VAL)
+ .mode(SaveMode.Overwrite)
+ .save(basePath)
+
+ // Step 2: Create table and secondary index on 'rider' column
+ spark.sql(s"CREATE TABLE $tableName USING hudi LOCATION '$basePath'")
+ spark.sql(s"create index idx_rider on $tableName using
secondary_index(rider)")
+
+ // Verify initial state of secondary index
+ val initialKeys = spark.sql(s"select _row_key from $tableName limit
5").collect().map(_.getString(0))
+ validateSecondaryIndex(basePath, tableName, initialKeys)
+
+ // Step 3: Perform Update Operations on Subset of Records
+ val records =
recordsToStrings(dataGen.generateUniqueUpdates(getInstantTime, 10,
HoodieTestDataGenerator.TRIP_FLATTENED_SCHEMA)).asScala
+ val df =
spark.read.json(spark.sparkContext.parallelize(records.toSeq, 2))
+ // Verify secondary index update fails
Review Comment:
Let's also check one more step after this verification i.e. disable
secondary index and validate that data table update goes through.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]