loukey_j created HUDI-7134: ------------------------------ Summary: After deleting the field and re-executing the merge, the result is not as expected. Key: HUDI-7134 URL: https://issues.apache.org/jira/browse/HUDI-7134 Project: Apache Hudi Issue Type: Bug Components: spark Affects Versions: 0.14.0 Environment: hudi 0.14 spark 3.2.1 Reporter: loukey_j
{code:java} 1、CREATE TABLE if not exists hudi_ut_schema_evolution (id INT, version INT, name STRING, birthDate TIMESTAMP, inc_day STRING) USING HUDI PARTITIONED BY (inc_day) TBLPROPERTIES (delta.enableChangeDataFeed='true', type='cow', primaryKey='id') 2、merge into hudi_ut_schema_evolution t using ( select 1 as id, 2 as version, 'str_1' as name, cast('2023-01-01 12:12:12.0' as timestamp) as birthDate, '2023-10-02' as inc_day) s on t.id=s.id when matched THEN UPDATE SET * WHEN NOT MATCHED THEN INSERT * 3、ALTER TABLE hudi_ut_schema_evolution ADD COLUMNS (add1 String AFTER id); 4、merge into hudi_ut_schema_evolution t using ( select '1' as add1, 2 as id, 2 as version, 'str_1' as name, cast('2023-01-01 12:12:12.0' as timestamp) as birthDate, '2023-10-02' as inc_day) s on t.id=s.id when matched THEN UPDATE SET * WHEN NOT MATCHED THEN INSERT * 5、ALTER TABLE hudi_ut_schema_evolution DROP COLUMN add1; 6、select {color:red}'1' as add1{color}, 3 as id, 2 as version, 'str_1' as name, cast('2023-01-01 12:12:12.0' as timestamp) as birthDate, '2023-10-02' as inc_day) s on t.id=s.id when matched THEN UPDATE SET * WHEN NOT MATCHED THEN INSERT *; 7、select * from hudi_ut_schema_evolution; +-------------------+---------------------+------------------+----------------------+-------------------------------------------------------------------------+----+---+-------+-----+-------------------+----------+ |_hoodie_commit_time|_hoodie_commit_seqno |_hoodie_record_key|_hoodie_partition_path|_hoodie_file_name |add1|id |version|name |birthDate |inc_day | +-------------------+---------------------+------------------+----------------------+-------------------------------------------------------------------------+----+---+-------+-----+-------------------+----------+ |20231122164141030 |20231122164141030_0_0|1 |inc_day=2023-10-02 |9fa5823c-7e29-4330-9b05-dd72e6088d62-0_0-112-98_20231122165413036.parquet|null|1 |2 |str_1|2023-01-01 12:12:12|2023-10-02| |20231122165045413 |20231122165045413_0_1|2 |inc_day=2023-10-02 |9fa5823c-7e29-4330-9b05-dd72e6088d62-0_0-112-98_20231122165413036.parquet|null|2 |2 |str_1|2023-01-01 12:12:12|2023-10-02| |20231122165413036 |20231122165413036_0_2|3 |inc_day=2023-10-02 |9fa5823c-7e29-4330-9b05-dd72e6088d62-0_0-112-98_20231122165413036.parquet|{color:red}null{color}|3 |2 |str_1|2023-01-01 12:12:12|2023-10-02| +-------------------+---------------------+------------------+----------------------+-------------------------------------------------------------------------+----+---+-------+-----+-------------------+----------+ 8、show create table hudi_ut_schema_evolution; CREATE TABLE unisql.hudi_ut_schema_evolution ( `_hoodie_commit_time` STRING COMMENT '', `_hoodie_commit_seqno` STRING COMMENT '', `_hoodie_record_key` STRING COMMENT '', `_hoodie_partition_path` STRING COMMENT '', `_hoodie_file_name` STRING COMMENT '', {color:red}`add1` STRING, `id` INT,{color} `version` INT, `name` STRING, `birthDate` TIMESTAMP, `inc_day` STRING) PARTITIONED BY (inc_day) TBLPROPERTIES( 'hoodie.query.as.ro.table' = 'false', 'last_commit_completion_time_sync' = '20231122171640801', 'last_commit_time_sync' = '20231122171627218', 'primaryKey' = 'id', 'type' = 'cow') {code} -- This message was sent by Atlassian Jira (v8.20.10#820010)