renhao created HUDI-2390: ---------------------------- Summary: Create table by hudisql,write data into table by datasource,hudi delete cmd can not delete data Key: HUDI-2390 URL: https://issues.apache.org/jira/browse/HUDI-2390 Project: Apache Hudi Issue Type: Bug Components: Spark Integration Affects Versions: 0.9.0 Reporter: renhao Attachments: tb_base.7z
Test Case: import org.apache.hudi.QuickstartUtils._ import scala.collection.JavaConversions._ import org.apache.spark.sql.SaveMode._ import org.apache.hudi.DataSourceReadOptions._ import org.apache.hudi.DataSourceWriteOptions._ import org.apache.hudi.config.HoodieWriteConfig._ 1.spark.read.parquet("/tmp/tb_base").createTempView("test1") 2.spark.sql("create table testdb.sql_test_cow (primary_key int, col0 int, col1 string, col2 int, col3 string, col4 double, col5 date, col6 timestamp, col7 int) using hudi partitioned by(col0) options(primaryKey='primary_key', preCombineField='col2')") 3.val base_data = spark.read.parquet("/tmp/tb_base") base_data.write.format("hudi").base_data.write.format("hudi"). option(TABLE_TYPE_OPT_KEY, COW_TABLE_TYPE_OPT_VAL). option(PRECOMBINE_FIELD_OPT_KEY, "col2"). option(RECORDKEY_FIELD_OPT_KEY, "primary_key"). option(PARTITIONPATH_FIELD_OPT_KEY, "col0"). option(KEYGENERATOR_CLASS_OPT_KEY, "org.apache.hudi.keygen.SimpleKeyGenerator"). option(OPERATION_OPT_KEY, "bulk_insert"). option(HIVE_SYNC_ENABLED_OPT_KEY, "true"). option(HIVE_PARTITION_FIELDS_OPT_KEY, "col0"). option(HIVE_PARTITION_EXTRACTOR_CLASS_OPT_KEY, "org.apache.hudi.hive.MultiPartKeysValueExtractor"). option(HIVE_DATABASE_OPT_KEY, "testdb"). option(HIVE_TABLE_OPT_KEY, "sql_test_cow"). option(HIVE_USE_JDBC_OPT_KEY, "true"). option("hoodie.bulkinsert.shuffle.parallelism", 4). option("hoodie.datasource.write.hive_style_partitioning", "true"). option(TABLE_NAME, "sql_test_cow").mode(Append).save(s"/user/hive/warehouse/testdb.db/sql_test_cow") 4.sql("delete from testdb.sql_test_cow where primary_key = 1") 5.sql("select primary_key,col0,col1,col2,col3,col4,col5,col6,col7 from testdb.sql_test_cow order by primary_key").show(false) step 4 execute without exception,but record did not delete !image-2021-09-02-09-52-24-704.png! -- This message was sent by Atlassian Jira (v8.3.4#803005)