This is an automated email from the ASF dual-hosted git repository.
morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new e4149c6e4c5 [Fix](parquet-reader) Fix null map issue in parquet
reader. (#27777)
e4149c6e4c5 is described below
commit e4149c6e4c5e01c57d00d3ef2748c8dcf751121e
Author: Qi Chen <[email protected]>
AuthorDate: Thu Nov 30 13:55:37 2023 +0800
[Fix](parquet-reader) Fix null map issue in parquet reader. (#27777)
Fix null map issue in parquet reader which cause result incorrect such as
`min()`, `max()`.
In order to share null map between parquet converted src column and dst
column to avoid copying. It is very tricky that will call mutable function
`doris_nullable_column->get_null_map_column_ptr()` which will set
`_need_update_has_null = true`. Because some operations such as agg will call
`has_null()` to set `_need_update_has_null = false`.
---
.../exec/format/parquet/parquet_column_convert.cpp | 6 +++++-
.../hive/scripts/create_preinstalled_table.hql | 13 +++++++++++++
...f6-107e-43c7-a69b-abd2fc641da7.c000.snappy.parquet | Bin 0 -> 2003651 bytes
.../data/external_table_p0/hive/test_hive_parquet.out | 12 ++++++++++++
.../external_table_p0/hive/test_hive_parquet.groovy | 16 ++++++++++++++++
5 files changed, 46 insertions(+), 1 deletion(-)
diff --git a/be/src/vec/exec/format/parquet/parquet_column_convert.cpp
b/be/src/vec/exec/format/parquet/parquet_column_convert.cpp
index 34b6da3e571..28ba92b8680 100644
--- a/be/src/vec/exec/format/parquet/parquet_column_convert.cpp
+++ b/be/src/vec/exec/format/parquet/parquet_column_convert.cpp
@@ -71,7 +71,11 @@ ColumnPtr get_column(tparquet::Type::type
parquet_physical_type, PrimitiveType s
}
if (*need_convert && doris_type->is_nullable()) {
- auto doris_nullable_column = static_cast<const
ColumnNullable*>(doris_column.get());
+ // In order to share null map between parquet converted src column and
dst column to avoid copying. It is very tricky that will
+ // call mutable function
`doris_nullable_column->get_null_map_column_ptr()` which will set
`_need_update_has_null = true`.
+ // Because some operations such as agg will call `has_null()` to set
`_need_update_has_null = false`.
+ auto doris_nullable_column =
+ const_cast<ColumnNullable*>(static_cast<const
ColumnNullable*>(doris_column.get()));
ans_column = ColumnNullable::create(ans_column,
doris_nullable_column->get_null_map_column_ptr());
}
diff --git
a/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_table.hql
b/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_table.hql
index 4e80d7466d2..dcaaa321e78 100644
---
a/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_table.hql
+++
b/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_table.hql
@@ -1788,6 +1788,19 @@ LOCATION
msck repair table partition_table;
+CREATE TABLE `parquet_decimal90_table`(
+ `decimal_col` decimal(9,0))
+ROW FORMAT SERDE
+ 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe'
+STORED AS INPUTFORMAT
+ 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat'
+OUTPUTFORMAT
+ 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat'
+LOCATION
+ '/user/doris/preinstalled_data/parquet_table/parquet_decimal90_table';
+
+msck repair table parquet_decimal90_table;
+
show tables;
diff --git
a/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/parquet_table/parquet_decimal90_table/part-00000-d883d7f6-107e-43c7-a69b-abd2fc641da7.c000.snappy.parquet
b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/parquet_table/parquet_decimal90_table/part-00000-d883d7f6-107e-43c7-a69b-abd2fc641da7.c000.snappy.parquet
new file mode 100644
index 00000000000..9f28d71e504
Binary files /dev/null and
b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/parquet_table/parquet_decimal90_table/part-00000-d883d7f6-107e-43c7-a69b-abd2fc641da7.c000.snappy.parquet
differ
diff --git a/regression-test/data/external_table_p0/hive/test_hive_parquet.out
b/regression-test/data/external_table_p0/hive/test_hive_parquet.out
index f444323407e..a54a25520da 100644
--- a/regression-test/data/external_table_p0/hive/test_hive_parquet.out
+++ b/regression-test/data/external_table_p0/hive/test_hive_parquet.out
@@ -10133,3 +10133,15 @@ rus moscow 996
us chicago 1995
us washington 1999
+-- !q21_max --
+-115249949
+
+-- !q21_min --
+-999999917
+
+-- !q21_sum --
+-247549496714217
+
+-- !q21_avg --
+-495084140.9042
+
diff --git
a/regression-test/suites/external_table_p0/hive/test_hive_parquet.groovy
b/regression-test/suites/external_table_p0/hive/test_hive_parquet.groovy
index 2daac80d71e..c60e1a4f0a6 100644
--- a/regression-test/suites/external_table_p0/hive/test_hive_parquet.groovy
+++ b/regression-test/suites/external_table_p0/hive/test_hive_parquet.groovy
@@ -139,6 +139,21 @@ suite("test_hive_parquet",
"p0,external,hive,external_docker,external_docker_hiv
"""
}
+ def q21 = {
+ qt_q21_max """
+ select max(decimal_col) from parquet_decimal90_table;
+ """
+ qt_q21_min """
+ select min(decimal_col) from parquet_decimal90_table;
+ """
+ qt_q21_sum """
+ select sum(decimal_col) from parquet_decimal90_table;
+ """
+ qt_q21_avg """
+ select avg(decimal_col) from parquet_decimal90_table;
+ """
+ }
+
String enabled = context.config.otherConfigs.get("enableHiveTest")
if (enabled != null && enabled.equalsIgnoreCase("true")) {
try {
@@ -175,6 +190,7 @@ suite("test_hive_parquet",
"p0,external,hive,external_docker,external_docker_hiv
q18()
q19()
q20()
+ q21()
sql """explain physical plan select l_partkey from partition_table
where (nation != 'cn' or city !='beijing') and (l_quantity >
28 or l_extendedprice > 30000)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]