loukey_j created HUDI-7134:
------------------------------

             Summary: After deleting the field and re-executing the merge, the 
result is not as expected.
                 Key: HUDI-7134
                 URL: https://issues.apache.org/jira/browse/HUDI-7134
             Project: Apache Hudi
          Issue Type: Bug
          Components: spark
    Affects Versions: 0.14.0
         Environment: hudi 0.14 spark 3.2.1
            Reporter: loukey_j


{code:java}
1、CREATE TABLE if not exists hudi_ut_schema_evolution 
(id INT, version INT, name STRING, birthDate TIMESTAMP, inc_day STRING) USING 
HUDI 
PARTITIONED BY (inc_day) TBLPROPERTIES (delta.enableChangeDataFeed='true', 
type='cow', primaryKey='id')

2、merge into hudi_ut_schema_evolution t using ( select 1 as id, 2 as version, 
'str_1' as name, cast('2023-01-01 12:12:12.0' as timestamp) as birthDate, 
'2023-10-02' as inc_day) s  on t.id=s.id when matched THEN UPDATE SET * WHEN 
NOT MATCHED THEN INSERT *

3、ALTER TABLE hudi_ut_schema_evolution ADD COLUMNS (add1 String AFTER id);

4、merge into hudi_ut_schema_evolution t using ( select '1' as add1, 2 as id, 2 
as version, 'str_1' as name, cast('2023-01-01 12:12:12.0' as timestamp) as 
birthDate, '2023-10-02' as inc_day) s  on t.id=s.id when matched THEN UPDATE 
SET * WHEN NOT MATCHED THEN INSERT *
 
5、ALTER TABLE hudi_ut_schema_evolution DROP COLUMN add1;

6、select {color:red}'1' as add1{color}, 3 as id, 2 as version, 'str_1' as name, 
cast('2023-01-01 12:12:12.0' as timestamp) as birthDate, '2023-10-02' as 
inc_day) s  on t.id=s.id when matched THEN UPDATE SET * WHEN NOT MATCHED THEN 
INSERT *;

7、select * from hudi_ut_schema_evolution;
+-------------------+---------------------+------------------+----------------------+-------------------------------------------------------------------------+----+---+-------+-----+-------------------+----------+
|_hoodie_commit_time|_hoodie_commit_seqno 
|_hoodie_record_key|_hoodie_partition_path|_hoodie_file_name                    
                                    |add1|id |version|name |birthDate          
|inc_day   |
+-------------------+---------------------+------------------+----------------------+-------------------------------------------------------------------------+----+---+-------+-----+-------------------+----------+
|20231122164141030  |20231122164141030_0_0|1                 
|inc_day=2023-10-02    
|9fa5823c-7e29-4330-9b05-dd72e6088d62-0_0-112-98_20231122165413036.parquet|null|1
  |2      |str_1|2023-01-01 12:12:12|2023-10-02|
|20231122165045413  |20231122165045413_0_1|2                 
|inc_day=2023-10-02    
|9fa5823c-7e29-4330-9b05-dd72e6088d62-0_0-112-98_20231122165413036.parquet|null|2
  |2      |str_1|2023-01-01 12:12:12|2023-10-02|
|20231122165413036  |20231122165413036_0_2|3                 
|inc_day=2023-10-02    
|9fa5823c-7e29-4330-9b05-dd72e6088d62-0_0-112-98_20231122165413036.parquet|{color:red}null{color}|3
  |2      |str_1|2023-01-01 12:12:12|2023-10-02|
+-------------------+---------------------+------------------+----------------------+-------------------------------------------------------------------------+----+---+-------+-----+-------------------+----------+

8、show create table hudi_ut_schema_evolution;
CREATE TABLE unisql.hudi_ut_schema_evolution (
  `_hoodie_commit_time` STRING COMMENT '',
  `_hoodie_commit_seqno` STRING COMMENT '',
  `_hoodie_record_key` STRING COMMENT '',
  `_hoodie_partition_path` STRING COMMENT '',
  `_hoodie_file_name` STRING COMMENT '',
  {color:red}`add1` STRING,
  `id` INT,{color}
  `version` INT,
  `name` STRING,
  `birthDate` TIMESTAMP,
  `inc_day` STRING)
PARTITIONED BY (inc_day)
TBLPROPERTIES(
  'hoodie.query.as.ro.table' = 'false',
  'last_commit_completion_time_sync' = '20231122171640801',
  'last_commit_time_sync' = '20231122171627218',
  'primaryKey' = 'id',
  'type' = 'cow')
{code}




--
This message was sent by Atlassian Jira
(v8.20.10#820010)

Reply via email to