This is an automated email from the ASF dual-hosted git repository.
zhangstar333 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 9687932d57d [refactor](function) improve compoundPred optimization
work with children is nullable (#26160)
9687932d57d is described below
commit 9687932d57dabba28b010b7f84b3a76a73c6da43
Author: zhangstar333 <[email protected]>
AuthorDate: Tue Nov 7 13:52:10 2023 +0800
[refactor](function) improve compoundPred optimization work with children
is nullable (#26160)
before this optimization work has limit, it's children must not nullable.
---
be/src/vec/exprs/vcompound_pred.h | 231 +++++++++++++------
.../data/correctness_p0/test_and_or.out | 247 +++++++++++++++++++++
.../suites/correctness_p0/test_and_or.groovy | 97 ++++++++
3 files changed, 502 insertions(+), 73 deletions(-)
diff --git a/be/src/vec/exprs/vcompound_pred.h
b/be/src/vec/exprs/vcompound_pred.h
index 94994f7d941..708b3093a06 100644
--- a/be/src/vec/exprs/vcompound_pred.h
+++ b/be/src/vec/exprs/vcompound_pred.h
@@ -19,10 +19,12 @@
#include <gen_cpp/Opcodes_types.h>
#include "common/status.h"
+#include "gutil/integral_types.h"
#include "util/simd/bits.h"
#include "vec/columns/column.h"
#include "vec/columns/columns_number.h"
#include "vec/common/assert_cast.h"
+#include "vec/data_types/data_type_number.h"
#include "vec/exprs/vectorized_fn_call.h"
#include "vec/exprs/vexpr.h"
@@ -52,86 +54,166 @@ public:
const std::string& expr_name() const override { return _expr_name; }
Status execute(VExprContext* context, Block* block, int* result_column_id)
override {
- if (children().size() == 1 || !_all_child_is_compound_and_not_const()
||
- _children[0]->is_nullable() || _children[1]->is_nullable()) {
- // TODO:
- // When the child is nullable, make the optimization also take
effect, and the processing of this piece may be more complicated
- // https://dev.mysql.com/doc/refman/8.0/en/logical-operators.html
+ if (children().size() == 1 || !_all_child_is_compound_and_not_const())
{
return VectorizedFnCall::execute(context, block, result_column_id);
}
int lhs_id = -1;
int rhs_id = -1;
RETURN_IF_ERROR(_children[0]->execute(context, block, &lhs_id));
- ColumnPtr lhs_column =
-
block->get_by_position(lhs_id).column->convert_to_full_column_if_const();
-
+ ColumnPtr lhs_column = block->get_by_position(lhs_id).column;
size_t size = lhs_column->size();
- uint8* __restrict data = _get_raw_data(lhs_column);
- int filted = simd::count_zero_num((int8_t*)data, size);
- bool full = filted == 0;
- bool empty = filted == size;
+ bool lhs_is_nullable = lhs_column->is_nullable();
+ auto [lhs_data_column, lhs_null_map] =
+ _get_raw_data_and_null_map(lhs_column, lhs_is_nullable);
+ int filted = simd::count_zero_num((int8_t*)lhs_data_column, size);
+ bool lhs_all_true = (filted == 0);
+ bool lhs_all_false = (filted == size);
+
+ bool lhs_all_is_not_null = false;
+ if (lhs_is_nullable) {
+ filted = simd::count_zero_num((int8_t*)lhs_null_map, size);
+ lhs_all_is_not_null = (filted == size);
+ }
ColumnPtr rhs_column = nullptr;
- uint8* __restrict data_rhs = nullptr;
- bool full_rhs = false;
- bool empty_rhs = false;
+ uint8* __restrict rhs_data_column = nullptr;
+ uint8* __restrict rhs_null_map = nullptr;
+ bool rhs_is_nullable = false;
+ bool rhs_all_true = false;
+ bool rhs_all_false = false;
+ bool rhs_all_is_not_null = false;
+ bool result_is_nullable = _data_type->is_nullable();
auto get_rhs_colum = [&]() {
if (rhs_id == -1) {
RETURN_IF_ERROR(_children[1]->execute(context, block,
&rhs_id));
- rhs_column =
-
block->get_by_position(rhs_id).column->convert_to_full_column_if_const();
- data_rhs = _get_raw_data(rhs_column);
- int filted = simd::count_zero_num((int8_t*)data_rhs, size);
- full_rhs = filted == 0;
- empty_rhs = filted == size;
+ rhs_column = block->get_by_position(rhs_id).column;
+ rhs_is_nullable = rhs_column->is_nullable();
+ auto rhs_nullable_column =
_get_raw_data_and_null_map(rhs_column, rhs_is_nullable);
+ rhs_data_column = rhs_nullable_column.first;
+ rhs_null_map = rhs_nullable_column.second;
+ int filted = simd::count_zero_num((int8_t*)rhs_data_column,
size);
+ rhs_all_true = (filted == 0);
+ rhs_all_false = (filted == size);
+ if (rhs_is_nullable) {
+ filted = simd::count_zero_num((int8_t*)rhs_null_map, size);
+ rhs_all_is_not_null = (filted == size);
+ }
}
return Status::OK();
};
+ auto return_result_column_id = [&](ColumnPtr res_column, int res_id)
-> int {
+ if (result_is_nullable && !res_column->is_nullable()) {
+ auto result_column =
+ ColumnNullable::create(res_column,
ColumnUInt8::create(size, 0));
+ res_id = block->columns();
+ block->insert({std::move(result_column), _data_type,
_expr_name});
+ }
+ return res_id;
+ };
+
+ auto create_null_map_column = [&](ColumnPtr null_map_column,
+ uint8* __restrict null_map_data) {
+ if (null_map_data == nullptr) {
+ null_map_column = ColumnUInt8::create(size, 0);
+ null_map_data =
assert_cast<ColumnUInt8*>(null_map_column->assume_mutable().get())
+ ->get_data()
+ .data();
+ }
+ return null_map_data;
+ };
+
+ auto vector_vector_null = [&]<bool is_and_op>() {
+ auto col_res = ColumnUInt8::create(size);
+ auto col_nulls = ColumnUInt8::create(size);
+ auto* __restrict res_datas =
assert_cast<ColumnUInt8*>(col_res)->get_data().data();
+ auto* __restrict res_nulls =
assert_cast<ColumnUInt8*>(col_nulls)->get_data().data();
+ ColumnPtr temp_null_map = nullptr;
+ // maybe both children are nullable / or one of children is
nullable
+ lhs_null_map = create_null_map_column(temp_null_map, lhs_null_map);
+ rhs_null_map = create_null_map_column(temp_null_map, rhs_null_map);
+
+ if constexpr (is_and_op) {
+ for (size_t i = 0; i < size; ++i) {
+ res_nulls[i] = apply_and_null(lhs_data_column[i],
lhs_null_map[i],
+ rhs_data_column[i],
rhs_null_map[i]);
+ res_datas[i] = lhs_data_column[i] & rhs_data_column[i];
+ }
+ } else {
+ for (size_t i = 0; i < size; ++i) {
+ res_nulls[i] = apply_or_null(lhs_data_column[i],
lhs_null_map[i],
+ rhs_data_column[i],
rhs_null_map[i]);
+ res_datas[i] = lhs_data_column[i] | rhs_data_column[i];
+ }
+ }
+ auto result_column = ColumnNullable::create(std::move(col_res),
std::move(col_nulls));
+ *result_column_id = block->columns();
+ block->insert({std::move(result_column), _data_type, _expr_name});
+ };
+
+ // false and NULL ----> 0
+ // true and NULL ----> NULL
if (_op == TExprOpcode::COMPOUND_AND) {
- if (empty) {
- // empty and any = empty, return lhs
- *result_column_id = lhs_id;
+ //1. not null column: all data is false
+ //2. nullable column: null map all is not null
+ if ((lhs_all_false && !lhs_is_nullable) || (lhs_all_false &&
lhs_all_is_not_null)) {
+ // false and any = false, return lhs
+ *result_column_id = return_result_column_id(lhs_column,
lhs_id);
} else {
RETURN_IF_ERROR(get_rhs_colum());
- if (full) {
- // full and any = any, return rhs
- *result_column_id = rhs_id;
- } else if (empty_rhs) {
- // any and empty = empty, return rhs
- *result_column_id = rhs_id;
- } else if (full_rhs) {
- // any and full = any, return lhs
- *result_column_id = lhs_id;
+ if ((lhs_all_true && !lhs_is_nullable) || //not null column
+ (lhs_all_true && lhs_all_is_not_null)) { //nullable column
+ // true and any = any, return rhs
+ *result_column_id = return_result_column_id(rhs_column,
rhs_id);
+ } else if ((rhs_all_false && !rhs_is_nullable) ||
+ (rhs_all_false && rhs_all_is_not_null)) {
+ // any and false = false, return rhs
+ *result_column_id = return_result_column_id(rhs_column,
rhs_id);
+ } else if ((rhs_all_true && !rhs_is_nullable) ||
+ (rhs_all_true && rhs_all_is_not_null)) {
+ // any and true = any, return lhs
+ *result_column_id = return_result_column_id(lhs_column,
lhs_id);
} else {
- *result_column_id = lhs_id;
- for (size_t i = 0; i < size; i++) {
- data[i] &= data_rhs[i];
+ if (!result_is_nullable) {
+ *result_column_id = lhs_id;
+ for (size_t i = 0; i < size; i++) {
+ lhs_data_column[i] &= rhs_data_column[i];
+ }
+ } else {
+ vector_vector_null.template operator()<true>();
}
}
}
} else if (_op == TExprOpcode::COMPOUND_OR) {
- if (full) {
- // full or any = full, return lhs
- *result_column_id = lhs_id;
+ // true or NULL ----> 1
+ // false or NULL ----> NULL
+ if ((lhs_all_true && !lhs_is_nullable) || (lhs_all_true &&
lhs_all_is_not_null)) {
+ // true or any = true, return lhs
+ *result_column_id = return_result_column_id(lhs_column,
lhs_id);
} else {
RETURN_IF_ERROR(get_rhs_colum());
- if (empty) {
- // empty or any = any, return rhs
- *result_column_id = rhs_id;
- } else if (full_rhs) {
- // any or full = full, return rhs
- *result_column_id = rhs_id;
- } else if (empty_rhs) {
- // any or empty = any, return lhs
- *result_column_id = lhs_id;
+ if ((lhs_all_false && !lhs_is_nullable) || (lhs_all_false &&
lhs_all_is_not_null)) {
+ // false or any = any, return rhs
+ *result_column_id = return_result_column_id(rhs_column,
rhs_id);
+ } else if ((rhs_all_true && !rhs_is_nullable) ||
+ (rhs_all_true && rhs_all_is_not_null)) {
+ // any or true = true, return rhs
+ *result_column_id = return_result_column_id(rhs_column,
rhs_id);
+ } else if ((rhs_all_false && !rhs_is_nullable) ||
+ (rhs_all_false && rhs_all_is_not_null)) {
+ // any or false = any, return lhs
+ *result_column_id = return_result_column_id(lhs_column,
lhs_id);
} else {
- *result_column_id = lhs_id;
- for (size_t i = 0; i < size; i++) {
- data[i] |= data_rhs[i];
+ if (!result_is_nullable) {
+ *result_column_id = lhs_id;
+ for (size_t i = 0; i < size; i++) {
+ lhs_data_column[i] |= rhs_data_column[i];
+ }
+ } else {
+ vector_vector_null.template operator()<false>();
}
}
}
@@ -145,6 +227,15 @@ public:
bool is_compound_predicate() const override { return true; }
private:
+ static inline constexpr uint8 apply_and_null(UInt8 a, UInt8 l_null, UInt8
b, UInt8 r_null) {
+ // (<> && false) is false, (true && NULL) is NULL
+ return (l_null & r_null) | (r_null & (l_null ^ a)) | (l_null & (r_null
^ b));
+ }
+ static inline constexpr uint8 apply_or_null(UInt8 a, UInt8 l_null, UInt8
b, UInt8 r_null) {
+ // (<> || true) is true, (false || NULL) is NULL
+ return (l_null & r_null) | (r_null & (r_null ^ a)) | (l_null & (l_null
^ b));
+ }
+
bool _all_child_is_compound_and_not_const() const {
for (auto child : _children) {
// we can make sure non const compound predicate's return column
is allow modifyied locally.
@@ -155,29 +246,23 @@ private:
return true;
}
- uint8* _get_raw_data(ColumnPtr column) const {
- if (column->is_nullable()) {
- return assert_cast<ColumnUInt8*>(
-
assert_cast<ColumnNullable*>(column->assume_mutable().get())
- ->get_nested_column_ptr()
- .get())
- ->get_data()
- .data();
- } else {
- return
assert_cast<ColumnUInt8*>(column->assume_mutable().get())->get_data().data();
- }
- }
-
- uint8* _get_null_map(ColumnPtr column) const {
- if (column->is_nullable()) {
- return assert_cast<ColumnUInt8*>(
-
assert_cast<ColumnNullable*>(column->assume_mutable().get())
- ->get_null_map_column_ptr()
- .get())
- ->get_data()
- .data();
+ std::pair<uint8*, uint8*> _get_raw_data_and_null_map(ColumnPtr column,
+ bool nullable_column)
const {
+ if (nullable_column) {
+ auto* nullable_column =
assert_cast<ColumnNullable*>(column->assume_mutable().get());
+ auto* data_column =
+
assert_cast<ColumnUInt8*>(nullable_column->get_nested_column_ptr().get())
+ ->get_data()
+ .data();
+ auto* null_map =
+
assert_cast<ColumnUInt8*>(nullable_column->get_null_map_column_ptr().get())
+ ->get_data()
+ .data();
+ return std::make_pair(data_column, null_map);
} else {
- return nullptr;
+ auto* data_column =
+
assert_cast<ColumnUInt8*>(column->assume_mutable().get())->get_data().data();
+ return std::make_pair(data_column, nullptr);
}
}
diff --git a/regression-test/data/correctness_p0/test_and_or.out
b/regression-test/data/correctness_p0/test_and_or.out
new file mode 100644
index 00000000000..535d01c2273
--- /dev/null
+++ b/regression-test/data/correctness_p0/test_and_or.out
@@ -0,0 +1,247 @@
+-- This file is automatically generated. You should know what you did if you
want to edit this
+-- !select1 --
+1 \N true false true false true true
+2 \N true false true false false false
+3 \N true false true false false true
+4 \N true false true false true false
+
+-- !select3 --
+false
+false
+false
+false
+
+-- !select3 --
+false
+false
+false
+false
+
+-- !select3 --
+true
+true
+true
+true
+
+-- !select3 --
+true
+true
+true
+true
+
+-- !select3 --
+\N
+\N
+false
+false
+
+-- !select3 --
+false
+false
+false
+false
+
+-- !select3 --
+false
+true
+true
+true
+
+-- !select3 --
+\N
+\N
+\N
+\N
+
+-- !select3 --
+false
+false
+true
+true
+
+-- !select3 --
+false
+false
+false
+true
+
+-- !select3 --
+false
+false
+true
+true
+
+-- !select3 --
+false
+false
+true
+true
+
+-- !select3 --
+false
+false
+false
+false
+
+-- !select3 --
+\N
+\N
+\N
+\N
+
+-- !select3 --
+true
+true
+true
+true
+
+-- !select3 --
+true
+true
+true
+true
+
+-- !select3 --
+false
+false
+true
+true
+
+-- !select3 --
+\N
+\N
+\N
+\N
+
+-- !select3 --
+false
+true
+true
+true
+
+-- !select3 --
+false
+false
+false
+true
+
+-- !select3 --
+false
+false
+false
+true
+
+-- !select3 --
+false
+false
+true
+true
+
+-- !select1 --
+false
+false
+false
+false
+
+-- !select2 --
+false
+false
+false
+false
+
+-- !select3 --
+true
+true
+true
+true
+
+-- !select4 --
+true
+true
+true
+true
+
+-- !select5 --
+false
+true
+true
+true
+
+-- !select6 --
+false
+false
+true
+true
+
+-- !select7 --
+false
+false
+false
+false
+
+-- !select8 --
+\N
+\N
+\N
+\N
+
+-- !select9 --
+false
+false
+true
+true
+
+-- !select10 --
+false
+false
+false
+false
+
+-- !select11 --
+\N
+\N
+\N
+\N
+
+-- !select12 --
+\N
+\N
+\N
+\N
+
+-- !select13 --
+\N
+\N
+\N
+\N
+
+-- !select14 --
+\N
+\N
+\N
+\N
+
+-- !select15 --
+false
+false
+false
+true
+
+-- !select16 --
+false
+false
+true
+true
+
+-- !select17 --
+false
+false
+true
+true
+
+-- !select18 --
+false
+false
+false
+false
+
diff --git a/regression-test/suites/correctness_p0/test_and_or.groovy
b/regression-test/suites/correctness_p0/test_and_or.groovy
new file mode 100644
index 00000000000..8ad00122baf
--- /dev/null
+++ b/regression-test/suites/correctness_p0/test_and_or.groovy
@@ -0,0 +1,97 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// The cases is copied from https://github.com/trinodb/trino/tree/master
+//
/testing/trino-product-tests/src/main/resources/sql-tests/testcases/aggregate
+// and modified by Doris.
+
+suite("test_and_or") {
+ def tableName = "test_and_or"
+
+ sql """ DROP TABLE IF EXISTS ${tableName} """
+ sql """
+ CREATE TABLE `test_and_or` (
+ `id` int(11) NULL,
+ `k_null` boolean NULL,
+ `k_true` boolean NULL,
+ `k_false` boolean NULL,
+ `k_true_not` boolean not NULL,
+ `k_false_not` boolean not NULL,
+ `k_true_false` boolean not NULL,
+ `k_true_false_not` boolean not NULL
+ ) ENGINE=OLAP
+ DUPLICATE KEY(`id`)
+ COMMENT 'OLAP'
+ DISTRIBUTED BY HASH(`id`) BUCKETS 10
+ PROPERTIES (
+ "replication_allocation" = "tag.location.default: 1",
+ "min_load_replica_num" = "-1",
+ "is_being_synced" = "false",
+ "storage_format" = "V2",
+ "light_schema_change" = "true",
+ "disable_auto_compaction" = "false",
+ "enable_single_replica_compaction" = "false"
+ );
+ """
+
+ sql """ insert into test_and_or
values(1,NULL,true,false,true,false,true,true);"""
+ sql "insert into test_and_or
values(2,NULL,true,false,true,false,false,false);"
+ sql "insert into test_and_or
values(3,NULL,true,false,true,false,false,true);"
+ sql "insert into test_and_or
values(4,NULL,true,false,true,false,true,false);"
+ qt_select1 """select * from test_and_or order by 1,2,3;"""
+ qt_select3 """ select (k_false_not and non_nullable(k_false)) and (k_null
or k_true) from test_and_or order by 1; """
+ qt_select3 """ select (k_null and k_false) and (k_null or k_true) from
test_and_or order by 1; """
+ qt_select3 """ select (k_null or k_true) and (k_false_not or k_true_not)
from test_and_or order by 1; """
+ qt_select3 """ select (k_true_not or k_false_not) and (k_null or k_true)
from test_and_or order by 1; """
+ qt_select3 """ select (k_null or k_false) and (k_false_not or k_true_not
and k_true_false_not) from test_and_or order by 1; """
+ qt_select3 """ select (k_null or k_false) and (k_false_not or k_true_not
and k_false) from test_and_or order by 1; """
+ qt_select3 """ select (k_true_false_not or k_true_false) and (k_true_not
or k_true) from test_and_or order by 1; """
+ qt_select3 """ select (k_null or k_false) and (k_false_not or k_true_not)
from test_and_or order by 1; """
+ qt_select3 """ select (k_true_not and k_true_false_not or k_true_false)
and (k_false_not or k_true_false_not) from test_and_or order by 1; """
+ qt_select3 """ select (k_true_false or k_false) and (k_false_not or
k_true_false_not) from test_and_or order by 1; """
+ qt_select3 """ select (k_true_false_not and k_true_not) or (k_false and
k_true) from test_and_or order by 1; """
+ qt_select3 """ select (k_true_false_not and k_true_not) or (k_false and
k_true) from test_and_or order by 1; """
+ qt_select3 """ select (k_true_false and k_false) or (k_false_not and
k_true_false_not) from test_and_or order by 1; """
+ qt_select3 """ select (k_false_not and k_true_not) or (k_null and k_true)
from test_and_or order by 1; """
+ qt_select3 """ select (k_false_not and k_true or k_true_false_not) or
(k_true_not and non_nullable(k_true)) from test_and_or order by 1; """
+ qt_select3 """ select (k_false_not and k_true_not or k_true_false_not) or
(k_true_not and k_true) from test_and_or order by 1; """
+ qt_select3 """ select (k_true_false_not and k_true_not) or (k_false and
k_true) from test_and_or order by 1; """
+ qt_select3 """ select (k_null and k_true) or (k_false_not and k_false)
from test_and_or order by 1; """
+ qt_select3 """ select (k_false_not and k_true_not or k_true_false) or
(k_true_not and k_true_false_not) from test_and_or order by 1; """
+ qt_select3 """ select (k_true_false and k_true_false_not) or (k_false_not
and k_true_not or k_false) from test_and_or order by 1; """
+ qt_select3 """ select (k_true_false and k_true_false_not) or (k_false_not
and k_true_not or k_false) from test_and_or order by 1; """
+ qt_select3 """ select (k_true_false and k_true_false_not) or (cast(id as
boolean) and (k_true_false_not)) from test_and_or order by 1; """
+
+ qt_select1 """ select (k_false_not and k_false_not) and (k_null or k_true)
from test_and_or order by 1;"""
+ qt_select2 """ select (k_null and k_false) and (k_null or k_true) from
test_and_or order by 1;"""
+ qt_select3 """ select (k_null or k_true) and (k_false_not or k_true_not)
from test_and_or order by 1;"""
+ qt_select4 """ select (k_true_not or k_false_not) and (k_null or k_true)
from test_and_or order by 1;"""
+ qt_select5 """ select (k_true_false_not or k_true_false) and (k_true_not
or k_false_not) from test_and_or order by 1;"""
+ qt_select6 """ select (k_true_not or k_true_false) and (k_true_false_not
or k_false_not) from test_and_or order by 1;"""
+ qt_select7 """ select (k_true_not and k_false_not) and (k_true_false_not
or k_true) from test_and_or order by 1;"""
+ qt_select8 """ select (k_null or k_false) and (k_false_not or k_true_not)
from test_and_or order by 1; """
+ qt_select9 """ select (k_false and k_true_not) or (k_true and
k_true_false_not) from test_and_or order by 1;"""
+ qt_select10 """ select (k_false_not and k_true_not) or (k_null and
k_false) from test_and_or order by 1;"""
+ qt_select11 """ select (k_false_not and k_false_not) or (k_null and
k_true) from test_and_or order by 1;"""
+ qt_select12 """ select (k_null and k_false) or (k_null and k_true) from
test_and_or order by 1;"""
+ qt_select13 """ select (k_null and k_true) or (k_false_not and k_true_not)
from test_and_or order by 1;"""
+ qt_select14 """ select (k_true_not and k_false_not) or (k_null and k_true)
from test_and_or order by 1;"""
+ qt_select15 """ select (k_true_false_not and k_true_false) or (k_true_not
and k_false_not) from test_and_or order by 1;"""
+ qt_select16 """ select (k_true_not and k_true_false) or(k_true_false_not
and k_false_not) from test_and_or order by 1;"""
+ qt_select17 """ select (k_true_not and k_false_not) or (k_true_false_not
and k_true) from test_and_or order by 1;"""
+ qt_select18 """ select (k_null and k_false) or (k_false_not and
k_true_not) from test_and_or order by 1; """
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]