codope commented on code in PR #12280: URL: https://github.com/apache/hudi/pull/12280#discussion_r1850797043
########## hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/command/index/TestSecondaryIndex.scala: ########## @@ -284,6 +286,50 @@ class TestSecondaryIndex extends HoodieSparkSqlTestBase { } } + test("Test Secondary Index With Overwrite and Delete Partition") { + if (HoodieSparkUtils.gteqSpark3_3) { + withTempDir { tmp => + Seq( + WriteOperationType.INSERT_OVERWRITE.value(), + WriteOperationType.INSERT_OVERWRITE_TABLE.value(), + WriteOperationType.DELETE_PARTITION.value() + ).foreach { operationType => + val tableName = generateTableName + val basePath = s"${tmp.getCanonicalPath}/$tableName" + // Step 1: Initial Insertion of Records + val dataGen = new HoodieTestDataGenerator() + val initialRecords = recordsToStrings(dataGen.generateInserts(getInstantTime, 50, true)).asScala + val initialDf = spark.read.json(spark.sparkContext.parallelize(initialRecords.toSeq, 2)) + val hudiOpts = commonOpts ++ Map(TABLE_TYPE.key -> "MERGE_ON_READ", HoodieWriteConfig.TBL_NAME.key -> tableName) + initialDf.write.format("hudi") + .options(hudiOpts) + .option(OPERATION.key, INSERT_OPERATION_OPT_VAL) + .mode(SaveMode.Overwrite) + .save(basePath) + + // Step 2: Create table and secondary index on 'rider' column + spark.sql(s"CREATE TABLE $tableName USING hudi LOCATION '$basePath'") + spark.sql(s"create index idx_rider on $tableName using secondary_index(rider)") + + // Verify initial state of secondary index + val initialKeys = spark.sql(s"select _row_key from $tableName limit 5").collect().map(_.getString(0)) + validateSecondaryIndex(basePath, tableName, initialKeys) + + // Step 3: Perform Update Operations on Subset of Records + val records = recordsToStrings(dataGen.generateUniqueUpdates(getInstantTime, 10, HoodieTestDataGenerator.TRIP_FLATTENED_SCHEMA)).asScala + val df = spark.read.json(spark.sparkContext.parallelize(records.toSeq, 2)) + // Verify secondary index update fails Review Comment: added an update with secondary index disabled and ti goes through. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@hudi.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org