[ 
https://issues.apache.org/jira/browse/HUDI-7131?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17788667#comment-17788667
 ] 

loukey_j commented on HUDI-7131:
--------------------------------

sorry, I didn't notice that I converted inc_day to date type. Later I corrected 
the SQL and got the same error. Execute the following sqls to reproduce. The 
root cause of the problem is that hoodieWriteConfig.getSchema() is incompatible 
with the schema of hudi table

1. CREATE TABLE if not exists hudi_ut_time_traval
(id INT, version INT, name STRING, birthDate TIMESTAMP, inc_day STRING) USING 
HUDI
PARTITIONED BY (inc_day) TBLPROPERTIES (type='cow', primaryKey='id');

2. merge into hudi_ut_time_traval using (select 1 as id, 2 as version, 'str_1' 
as name, cast('2023-01-01 12:12:12.0' as timestamp) as birthDate, 
{color:red}'2023-10-01'{color} as inc_day) s on t.id=s.id when matched THEN 
UPDATE SET * WHEN NOT MATCHED THEN INSERT *

3. merge into hudi_ut_time_traval using (select 1 as id, 2 as version, 'str_1' 
as name, cast('2023-01-01 12:12:12.0' as timestamp) as birthDate, 
{color:red}'2023-10-02' {color}as inc_day) s on t.id=s.id when matched THEN 
UPDATE SET * WHEN NOT MATCHED THEN INSERT *

> The requested schema is not compatible with the file schema
> -----------------------------------------------------------
>
>                 Key: HUDI-7131
>                 URL: https://issues.apache.org/jira/browse/HUDI-7131
>             Project: Apache Hudi
>          Issue Type: Bug
>    Affects Versions: 0.14.0
>            Reporter: loukey_j
>            Priority: Critical
>              Labels: core, merge, spark
>
> use global Index and data partition change , report an error: The requested 
> schema is not compatible with the file schema...
> Why not use the schema of 
> org.apache.hudi.common.table.TableSchemaResolver#getTableAvroSchemaInternal 
> to read hudi data
>  
> CREATE TABLE if not exists unisql.hudi_ut_time_traval
> (id INT, version INT, name STRING, birthDate TIMESTAMP, inc_day STRING) USING 
> HUDI
> PARTITIONED BY (inc_day) TBLPROPERTIES (type='cow', primaryKey='id');
> insert into unisql.hudi_ut_time_traval
> select 1 as id, 1 as version, 'str_1' as name, cast('2023-01-01 12:12:12.0' 
> as timestamp) as birthDate, cast('2023-10-01' as date) as inc_day;
> select * from hudi_ut_time_traval;
> +-------------------+---------------------+------------------+----------------------+------------------------------------------------------------------------+---+-------+-----+-------------------+----------+
> |_hoodie_commit_time|_hoodie_commit_seqno 
> |_hoodie_record_key|_hoodie_partition_path|_hoodie_file_name |id 
> |version|name |birthDate |inc_day |
> +-------------------+---------------------+------------------+----------------------+------------------------------------------------------------------------+---+-------+-----+-------------------+----------+
> |20231122100234339 |20231122100234339_0_0|1 |inc_day=2023-10-01 
> |8a510742-c060-4d12-898e-70bbd122f2e3-0_0-19-16_20231122100234339.parquet|1 
> |1 |str_1|2023-01-01 12:12:12|2023-10-01|
> +-------------------+---------------------+------------------+----------------------+------------------------------------------------------------------------+---+-------+-----+-------------------+----------+
> merge into hudi_ut_time_traval t using (
> select 1 as id, 2 as version, 'str_1' as name, cast('2023-01-01 12:12:12.0' 
> as timestamp) as birthDate, cast('2023-10-02' as date) as inc_day
> ) s on t.id=s.id when matched THEN UPDATE SET * WHEN NOT MATCHED THEN INSERT *
> Caused by: org.apache.parquet.io.ParquetDecodingException: The requested 
> schema is not compatible with the file schema. incompatible types: required 
> int32 id != optional int32 id
> at 
> org.apache.parquet.io.ColumnIOFactory$ColumnIOCreatorVisitor.incompatibleSchema(ColumnIOFactory.java:101)
> at 
> org.apache.parquet.io.ColumnIOFactory$ColumnIOCreatorVisitor.visitChildren(ColumnIOFactory.java:81)
> at 
> org.apache.parquet.io.ColumnIOFactory$ColumnIOCreatorVisitor.visit(ColumnIOFactory.java:57)
> at org.apache.parquet.schema.MessageType.accept(MessageType.java:55)
> at org.apache.parquet.io.ColumnIOFactory.getColumnIO(ColumnIOFactory.java:162)
> at 
> org.apache.parquet.hadoop.InternalParquetRecordReader.checkRead(InternalParquetRecordReader.java:135)
> at 
> org.apache.parquet.hadoop.InternalParquetRecordReader.nextKeyValue(InternalParquetRecordReader.java:225);
> parquet schema:
> {
> "type" : "record",
> "name" : "hudi_ut_time_traval_record",
> "namespace" : "hoodie.hudi_ut_time_traval",
> "fields" : [ {
> "name" : "_hoodie_commit_time",
> "type" : [ "null", "string" ],
> "doc" : "",
> "default" : null
> }, {
> "name" : "_hoodie_commit_seqno",
> "type" : [ "null", "string" ],
> "doc" : "",
> "default" : null
> }, {
> "name" : "_hoodie_record_key",
> "type" : [ "null", "string" ],
> "doc" : "",
> "default" : null
> }, {
> "name" : "_hoodie_partition_path",
> "type" : [ "null", "string" ],
> "doc" : "",
> "default" : null
> }, {
> "name" : "_hoodie_file_name",
> "type" : [ "null", "string" ],
> "doc" : "",
> "default" : null
> }, {
> "name" : "id",
> "type" : [ "null", "int" ],
> "default" : null
> }, {
> "name" : "version",
> "type" : [ "null", "int" ],
> "default" : null
> }, {
> "name" : "name",
> "type" : [ "null", "string" ],
> "default" : null
> }, {
> "name" : "birthDate",
> "type" : [ "null", {
> "type" : "long",
> "logicalType" : "timestamp-micros"
> } ],
> "default" : null
> }, {
> "name" : "inc_day",
> "type" : [ "null", "string" ],
> "default" : null
> } ]
> }
> org.apache.hudi.io.HoodieMergedReadHandle#readerSchema:
> {"type":"record","name":"hudi_ut_time_traval_record","namespace":"hoodie.hudi_ut_time_traval","fields":[\{"name":"id","type":"int"},\{"name":"version","type":"int"},\{"name":"name","type":"string"},\{"name":"birthDate","type":["null",{"type":"long","logicalType":"timestamp-micros"}],"default":null},\{"name":"inc_day","type":["null",{"type":"int","logicalType":"date"}],"default":null}]}
>  



--
This message was sent by Atlassian Jira
(v8.20.10#820010)

Reply via email to