amrishlal commented on code in PR #9262: URL: https://github.com/apache/hudi/pull/9262#discussion_r1272632308
########## hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestCOWDataSource.scala: ########## @@ -1538,7 +1539,52 @@ class TestCOWDataSource extends HoodieSparkClientTestBase with ScalaAssertionSup assertEquals(2, result.count()) assertEquals(0, result.filter(result("id") === 1).count()) } + + /** Test case to verify MAKE_NEW_COLUMNS_NULLABLE config parameter. */ + @Test + def testSchemaEvolutionWithNewColumn(): Unit = { + val df1 = spark.sql("select '1' as event_id, '2' as ts, '3' as version, 'foo' as event_date") + var hudiOptions = Map[String, String]( + HoodieWriteConfig.TBL_NAME.key() -> "test_hudi_merger", + KeyGeneratorOptions.RECORDKEY_FIELD_NAME.key() -> "event_id", + KeyGeneratorOptions.PARTITIONPATH_FIELD_NAME.key() -> "version,event_date", Review Comment: Fixed. ########## hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestCOWDataSource.scala: ########## @@ -1538,7 +1539,52 @@ class TestCOWDataSource extends HoodieSparkClientTestBase with ScalaAssertionSup assertEquals(2, result.count()) assertEquals(0, result.filter(result("id") === 1).count()) } + + /** Test case to verify MAKE_NEW_COLUMNS_NULLABLE config parameter. */ + @Test + def testSchemaEvolutionWithNewColumn(): Unit = { + val df1 = spark.sql("select '1' as event_id, '2' as ts, '3' as version, 'foo' as event_date") + var hudiOptions = Map[String, String]( + HoodieWriteConfig.TBL_NAME.key() -> "test_hudi_merger", + KeyGeneratorOptions.RECORDKEY_FIELD_NAME.key() -> "event_id", + KeyGeneratorOptions.PARTITIONPATH_FIELD_NAME.key() -> "version,event_date", + DataSourceWriteOptions.OPERATION.key() -> "insert", + HoodieWriteConfig.PRECOMBINE_FIELD_NAME.key() -> "ts", + HoodieWriteConfig.KEYGENERATOR_CLASS_NAME.key() -> "org.apache.hudi.keygen.ComplexKeyGenerator", + KeyGeneratorOptions.HIVE_STYLE_PARTITIONING_ENABLE.key() -> "true", + HiveSyncConfigHolder.HIVE_SYNC_ENABLED.key() -> "false", + HoodieWriteConfig.RECORD_MERGER_IMPLS.key() -> "org.apache.hudi.HoodieSparkRecordMerger" + ) + df1.write.format("org.apache.hudi").options(hudiOptions).mode(SaveMode.Append).save(basePath) + + // Try adding a string column. This operation is expected to throw 'schema not compatible' exception since + // 'MAKE_NEW_COLUMNS_NULLABLE' parameter is 'false' by default. + val df2 = spark.sql("select '2' as event_id, '2' as ts, '3' as version, 'foo' as event_date, 'bar' as add_col") + try { + (df2.write.format("org.apache.hudi").options(hudiOptions).mode("append").save(basePath)) Review Comment: Fixed. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@hudi.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org