This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.1 by this push:
new 8c237e82a3c [Bug](exec) fix intersections/differences bug (#34675)
8c237e82a3c is described below
commit 8c237e82a3c7329dbc2a06380c7d9e3a1d112cd2
Author: HappenLee <[email protected]>
AuthorDate: Sat May 11 11:27:56 2024 +0800
[Bug](exec) fix intersections/differences bug (#34675)
---
be/src/pipeline/exec/set_sink_operator.cpp | 27 ++++++-------
be/src/pipeline/exec/set_source_operator.cpp | 9 +----
be/src/pipeline/pipeline_x/dependency.h | 12 +++++-
be/src/vec/exec/vset_operation_node.cpp | 47 ++++++++--------------
.../data/correctness_p0/test_set_operation.out | 1 +
.../correctness_p0/test_set_operation.groovy | 2 +
6 files changed, 43 insertions(+), 55 deletions(-)
diff --git a/be/src/pipeline/exec/set_sink_operator.cpp
b/be/src/pipeline/exec/set_sink_operator.cpp
index 2042e3eb1a1..b5774276fdf 100644
--- a/be/src/pipeline/exec/set_sink_operator.cpp
+++ b/be/src/pipeline/exec/set_sink_operator.cpp
@@ -140,19 +140,13 @@ Status
SetSinkOperatorX<is_intersect>::_extract_build_column(
block.get_by_position(result_col_id).column =
block.get_by_position(result_col_id).column->convert_to_full_column_if_const();
- const auto* column = block.get_by_position(result_col_id).column.get();
-
- if (const auto* nullable =
check_and_get_column<vectorized::ColumnNullable>(*column)) {
- const auto& col_nested = nullable->get_nested_column();
- if (local_state._shared_state->build_not_ignore_null[i]) {
- raw_ptrs[i] = nullable;
- } else {
- raw_ptrs[i] = &col_nested;
- }
-
- } else {
- raw_ptrs[i] = column;
+ if (local_state._shared_state->build_not_ignore_null[i]) {
+ block.get_by_position(result_col_id).column =
+ make_nullable(block.get_by_position(result_col_id).column);
}
+
+ const auto* column = block.get_by_position(result_col_id).column.get();
+ raw_ptrs[i] = column;
DCHECK_GE(result_col_id, 0);
local_state._shared_state->build_col_idx.insert({result_col_id, i});
}
@@ -191,11 +185,14 @@ Status
SetSinkLocalState<is_intersect>::open(RuntimeState* state) {
auto& parent = _parent->cast<Parent>();
DCHECK(parent._cur_child_id == 0);
auto& child_exprs_lists = _shared_state->child_exprs_lists;
-
+
_shared_state->build_not_ignore_null.resize(child_exprs_lists[parent._cur_child_id].size());
_shared_state->hash_table_variants =
std::make_unique<vectorized::SetHashTableVariants>();
- for (const auto& ctx : child_exprs_lists[parent._cur_child_id]) {
-
_shared_state->build_not_ignore_null.push_back(ctx->root()->is_nullable());
+ for (const auto& ctl : child_exprs_lists) {
+ for (int i = 0; i < ctl.size(); ++i) {
+ _shared_state->build_not_ignore_null[i] =
+ _shared_state->build_not_ignore_null[i] ||
ctl[i]->root()->is_nullable();
+ }
}
_shared_state->hash_table_init();
return Status::OK();
diff --git a/be/src/pipeline/exec/set_source_operator.cpp
b/be/src/pipeline/exec/set_source_operator.cpp
index 88d38d325af..61fd428b492 100644
--- a/be/src/pipeline/exec/set_source_operator.cpp
+++ b/be/src/pipeline/exec/set_source_operator.cpp
@@ -177,14 +177,7 @@ void SetSourceOperatorX<is_intersect>::_add_result_columns(
auto it = value.begin();
for (auto idx = build_col_idx.begin(); idx != build_col_idx.end(); ++idx) {
auto& column = *build_block.get_by_position(idx->first).column;
- if (local_state._mutable_cols[idx->second]->is_nullable() ^
column.is_nullable()) {
- DCHECK(local_state._mutable_cols[idx->second]->is_nullable());
-
((vectorized::ColumnNullable*)(local_state._mutable_cols[idx->second].get()))
- ->insert_from_not_nullable(column, it->row_num);
-
- } else {
- local_state._mutable_cols[idx->second]->insert_from(column,
it->row_num);
- }
+ local_state._mutable_cols[idx->second]->insert_from(column,
it->row_num);
}
block_size++;
}
diff --git a/be/src/pipeline/pipeline_x/dependency.h
b/be/src/pipeline/pipeline_x/dependency.h
index 19099b32f8a..693bde10f36 100644
--- a/be/src/pipeline/pipeline_x/dependency.h
+++ b/be/src/pipeline/pipeline_x/dependency.h
@@ -689,8 +689,18 @@ public:
}
return;
}
+
+ // here need to change type to nullable, because some case eg:
+ // (select 0) intersect (select null) the build side hash table should
not
+ // ignore null value.
+ std::vector<DataTypePtr> data_types;
+ for (const auto& ctx : child_exprs_lists[0]) {
+ data_types.emplace_back(build_not_ignore_null[0]
+ ?
make_nullable(ctx->root()->data_type())
+ : ctx->root()->data_type());
+ }
if (!try_get_hash_map_context_fixed<NormalHashMap, HashCRC32,
RowRefListWithFlags>(
- *hash_table_variants, child_exprs_lists[0])) {
+ *hash_table_variants, data_types)) {
hash_table_variants->emplace<SetSerializedHashTableContext>();
}
}
diff --git a/be/src/vec/exec/vset_operation_node.cpp
b/be/src/vec/exec/vset_operation_node.cpp
index c4802bee4cf..209e755636e 100644
--- a/be/src/vec/exec/vset_operation_node.cpp
+++ b/be/src/vec/exec/vset_operation_node.cpp
@@ -155,21 +155,20 @@ Status
VSetOperationNode<is_intersect>::prepare(RuntimeState* state) {
auto column_nums = _child_expr_lists[0].size();
DCHECK_EQ(output_data_types.size(), column_nums)
<< output_data_types.size() << " " << column_nums;
- // the nullable is not depend on child, it's should use _row_descriptor
from FE plan
- // some case all not nullable column from children, but maybe need output
nullable.
- vector<bool> nullable_flags(column_nums, false);
- for (int i = 0; i < column_nums; ++i) {
- nullable_flags[i] = output_data_types[i]->is_nullable();
- }
-
for (int i = 0; i < _child_expr_lists.size(); ++i) {
RETURN_IF_ERROR(VExpr::prepare(_child_expr_lists[i], state,
child(i)->row_desc()));
}
for (int i = 0; i < _child_expr_lists[0].size(); ++i) {
const auto& ctx = _child_expr_lists[0][i];
- _build_not_ignore_null.push_back(ctx->root()->is_nullable());
- _left_table_data_types.push_back(nullable_flags[i] ?
make_nullable(ctx->root()->data_type())
- :
ctx->root()->data_type());
+ // the nullable is not depend on child, it's should use
_row_descriptor from FE plan
+ // some case all not nullable column from children, but maybe need
output nullable.
+ if (output_data_types[i]->is_nullable()) {
+ _build_not_ignore_null.push_back(true);
+
_left_table_data_types.push_back(make_nullable(ctx->root()->data_type()));
+ } else {
+ _build_not_ignore_null.push_back(false);
+ _left_table_data_types.push_back(ctx->root()->data_type());
+ }
}
hash_table_init();
@@ -213,7 +212,7 @@ void VSetOperationNode<is_intersect>::hash_table_init() {
return;
}
if (!try_get_hash_map_context_fixed<NormalHashMap, HashCRC32,
RowRefListWithFlags>(
- *_hash_table_variants, _child_expr_lists[0])) {
+ *_hash_table_variants, _left_table_data_types)) {
_hash_table_variants->emplace<SetSerializedHashTableContext>();
}
}
@@ -337,14 +336,7 @@ void
VSetOperationNode<is_intersect>::add_result_columns(RowRefListWithFlags& va
auto it = value.begin();
for (auto idx = _build_col_idx.begin(); idx != _build_col_idx.end();
++idx) {
const auto& column = *_build_block.get_by_position(idx->first).column;
- if (_mutable_cols[idx->second]->is_nullable() ^ column.is_nullable()) {
- DCHECK(_mutable_cols[idx->second]->is_nullable());
- ((ColumnNullable*)(_mutable_cols[idx->second].get()))
- ->insert_from_not_nullable(column, it->row_num);
-
- } else {
- _mutable_cols[idx->second]->insert_from(column, it->row_num);
- }
+ _mutable_cols[idx->second]->insert_from(column, it->row_num);
}
block_size++;
}
@@ -427,19 +419,12 @@ Status
VSetOperationNode<is_intersect>::extract_build_column(Block& block,
block.get_by_position(result_col_id).column =
block.get_by_position(result_col_id).column->convert_to_full_column_if_const();
- const auto* column = block.get_by_position(result_col_id).column.get();
-
- if (const auto* nullable =
check_and_get_column<ColumnNullable>(*column)) {
- const auto& col_nested = nullable->get_nested_column();
- if (_build_not_ignore_null[i]) {
- raw_ptrs[i] = nullable;
- } else {
- raw_ptrs[i] = &col_nested;
- }
-
- } else {
- raw_ptrs[i] = column;
+ if (_build_not_ignore_null[i]) {
+ block.get_by_position(result_col_id).column =
+ make_nullable(block.get_by_position(result_col_id).column);
}
+ const auto* column = block.get_by_position(result_col_id).column.get();
+ raw_ptrs[i] = column;
DCHECK_GE(result_col_id, 0);
_build_col_idx.insert({result_col_id, i});
}
diff --git a/regression-test/data/correctness_p0/test_set_operation.out
b/regression-test/data/correctness_p0/test_set_operation.out
index f6d9f32d4cc..09fa8314065 100644
--- a/regression-test/data/correctness_p0/test_set_operation.out
+++ b/regression-test/data/correctness_p0/test_set_operation.out
@@ -10,3 +10,4 @@ bbbb
aaaa
bbbb
+-- !select1 --
diff --git a/regression-test/suites/correctness_p0/test_set_operation.groovy
b/regression-test/suites/correctness_p0/test_set_operation.groovy
index e3e28a9e3a4..5ee6348a037 100644
--- a/regression-test/suites/correctness_p0/test_set_operation.groovy
+++ b/regression-test/suites/correctness_p0/test_set_operation.groovy
@@ -125,4 +125,6 @@ suite("test_set_operation") {
"""
qt_select1 """ SELECT DISTINCT * FROM((SELECT sku_code FROM test_B)
INTERSECT (SELECT sku_code FROM test_B) UNION (SELECT sku_code FROM test_A)) as
t order by 1; """
+ qt_select1 """ (select 0) intersect (select null); """
+
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]