taiyang-li opened a new issue, #8032:
URL: https://github.com/apache/incubator-gluten/issues/8032
### Backend
CH (ClickHouse)
### Bug description
``` scala
test(
"GLUTEN-8021/8022: fix orc read/write mismatch and parquet" +
"read exception when written complex column contains null") {
def table(format: String): String = s"t_8021_$format"
def create(format: String, table_name: Option[String] = None): String =
s"""CREATE TABLE ${table_name.getOrElse(table(format))}(
|-- id int,
|-- x int,
|-- y int,
|-- mp map<string, string>,
|-- arr array<int>,
|-- tup struct<x:int, y:int>
|tup struct<x:int>
|-- arr_mp array<map<string, string>>,
|-- mp_arr map<string, array<int>>
|) stored as $format""".stripMargin
def insert(format: String, table_name: Option[String] = None): String =
s"""INSERT OVERWRITE TABLE ${table_name.getOrElse(table(format))}
|with data_source as (
|select
|id,
|if(id % 3 = 1, null, id+1) as x,
|if(id % 3 = 1, null, id+2) as y
|from range(100)
|)
|select
|-- id,
|-- id, x, y,
|-- str_to_map(concat('x:', x, ',y:', y)) as mp,
|-- if(id % 4 = 0, null, array(x, y)) as arr,
|if(id % 4 = 1, null, struct(x)) as tup
|-- if(id % 4 = 2, null, array(str_to_map(concat('x:', x, ',y:',
y)))) as arr_mp,
|-- if(id % 4 = 3, null, map('x', array(x), 'y', array(y))) as
mp_arr
|from
|data_source;""".stripMargin
nativeWrite2(
format => (table(format), create(format), insert(format)),
(table_name, format) => {
val vanilla_table = s"${table_name}_v"
val vanilla_create = create(format, Some(vanilla_table))
vanillaWrite {
withDestinationTable(vanilla_table, Option(vanilla_create)) {
checkInsertQuery(insert(format, Some(vanilla_table)),
checkNative = false)
}
}
val rowsFromOriginTable =
spark.sql(s"select * from $vanilla_table").collect()
val dfFromWriteTable =
spark.sql(s"select * from $table_name")
checkAnswer(dfFromWriteTable, rowsFromOriginTable)
}
)
println("hahha")
}
```
The left is the content of native written orc file. The right is the content
of vanilla written orc file.

### Spark version
None
### Spark configurations
_No response_
### System information
_No response_
### Relevant logs
_No response_
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]