taiyang-li opened a new issue, #8032:
URL: https://github.com/apache/incubator-gluten/issues/8032

   ### Backend
   
   CH (ClickHouse)
   
   ### Bug description
   
   ``` scala
   test(
       "GLUTEN-8021/8022: fix orc read/write mismatch and parquet" +
         "read exception when written complex column contains null") {
       def table(format: String): String = s"t_8021_$format"
       def create(format: String, table_name: Option[String] = None): String =
         s"""CREATE TABLE ${table_name.getOrElse(table(format))}(
            |-- id int,
            |-- x int,
            |-- y int,
            |-- mp map<string, string>,
            |-- arr array<int>,
            |-- tup struct<x:int, y:int>
            |tup struct<x:int>
            |-- arr_mp array<map<string, string>>,
            |-- mp_arr map<string, array<int>>
            |) stored as $format""".stripMargin
       def insert(format: String, table_name: Option[String] = None): String =
         s"""INSERT OVERWRITE TABLE ${table_name.getOrElse(table(format))}
            |with data_source as (
            |select
            |id,
            |if(id % 3 = 1, null, id+1) as x,
            |if(id % 3 = 1, null, id+2) as y
            |from range(100)
            |)
            |select
            |-- id,
            |-- id, x, y,
            |-- str_to_map(concat('x:', x, ',y:', y)) as mp,
            |-- if(id % 4 = 0, null, array(x, y)) as arr,
            |if(id % 4 = 1, null, struct(x)) as tup
            |-- if(id % 4 = 2, null, array(str_to_map(concat('x:', x, ',y:', 
y)))) as arr_mp,
            |-- if(id % 4 = 3, null, map('x', array(x), 'y', array(y))) as 
mp_arr
            |from
            |data_source;""".stripMargin
   
       nativeWrite2(
         format => (table(format), create(format), insert(format)),
         (table_name, format) => {
           val vanilla_table = s"${table_name}_v"
           val vanilla_create = create(format, Some(vanilla_table))
           vanillaWrite {
             withDestinationTable(vanilla_table, Option(vanilla_create)) {
               checkInsertQuery(insert(format, Some(vanilla_table)), 
checkNative = false)
             }
           }
           val rowsFromOriginTable =
             spark.sql(s"select * from $vanilla_table").collect()
           val dfFromWriteTable =
             spark.sql(s"select * from $table_name")
           checkAnswer(dfFromWriteTable, rowsFromOriginTable)
         }
       )
   
       println("hahha")
     }
   ```
   
   
   The left is the content of native written orc file. The right is the content 
of vanilla written orc file. 
   
![企业微信截图_6d945f13-fa14-495a-85b9-eda9e336b3b4](https://github.com/user-attachments/assets/1355645f-aaa1-4565-942b-8fc6a3fb4265)
   
   
   ### Spark version
   
   None
   
   ### Spark configurations
   
   _No response_
   
   ### System information
   
   _No response_
   
   ### Relevant logs
   
   _No response_


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to