This is an automated email from the ASF dual-hosted git repository.
panxiaolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new bcd641877f [Enhancement](scan) disable build key range and filters
when push down agg work (#14248)
bcd641877f is described below
commit bcd641877fc85ebc334c48f3912dcaa7f296bc45
Author: Pxl <[email protected]>
AuthorDate: Mon Nov 21 12:47:57 2022 +0800
[Enhancement](scan) disable build key range and filters when push down agg
work (#14248)
disable build key range and filters when push down agg work
---
be/src/exec/olap_common.h | 183 ++++++++++++---------
be/src/vec/exec/scan/new_olap_scan_node.cpp | 110 ++++++++-----
be/test/exec/olap_common_test.cpp | 14 +-
.../test_aggregate_all_functions.out | 16 +-
.../test_aggregate_all_functions.groovy | 8 +-
run-be-ut.sh | 2 +-
6 files changed, 203 insertions(+), 130 deletions(-)
diff --git a/be/src/exec/olap_common.h b/be/src/exec/olap_common.h
index 1823f8cbae..605ab9dda0 100644
--- a/be/src/exec/olap_common.h
+++ b/be/src/exec/olap_common.h
@@ -18,6 +18,7 @@
#pragma once
#include <boost/lexical_cast.hpp>
+#include <cstdint>
#include <map>
#include <sstream>
#include <string>
@@ -25,6 +26,7 @@
#include <variant>
#include "exec/olap_utils.h"
+#include "olap/olap_common.h"
#include "olap/tuple.h"
#include "runtime/primitive_type.h"
#include "runtime/type_limit.h"
@@ -112,7 +114,13 @@ public:
bool convert_to_avg_range_value(std::vector<OlapTuple>& begin_scan_keys,
std::vector<OlapTuple>& end_scan_keys,
bool& begin_include,
- bool& end_include, bool* eos, int32_t
max_scan_key_num);
+ bool& end_include, int32_t
max_scan_key_num);
+
+ bool convert_to_close_range(std::vector<OlapTuple>& begin_scan_keys,
+ std::vector<OlapTuple>& end_scan_keys, bool&
begin_include,
+ bool& end_include);
+
+ constexpr bool is_reject_split_type() const { return
_is_reject_split_type; }
bool has_intersection(ColumnValueRange<primitive_type>& range);
@@ -317,6 +325,16 @@ private:
bool _contain_null;
int _precision;
int _scale;
+
+ static constexpr bool _is_reject_split_type = primitive_type ==
PrimitiveType::TYPE_LARGEINT ||
+ primitive_type ==
PrimitiveType::TYPE_DECIMALV2 ||
+ primitive_type ==
PrimitiveType::TYPE_HLL ||
+ primitive_type ==
PrimitiveType::TYPE_VARCHAR ||
+ primitive_type ==
PrimitiveType::TYPE_CHAR ||
+ primitive_type ==
PrimitiveType::TYPE_STRING ||
+ primitive_type ==
PrimitiveType::TYPE_BOOLEAN ||
+ primitive_type ==
PrimitiveType::TYPE_DATETIME ||
+ primitive_type ==
PrimitiveType::TYPE_DATETIMEV2;
};
class OlapScanKeys {
@@ -517,112 +535,123 @@ size_t
ColumnValueRange<primitive_type>::get_convertible_fixed_value_size() cons
return _high_value - _low_value;
}
+// The return value indicates whether eos.
template <PrimitiveType primitive_type>
-bool ColumnValueRange<primitive_type>::convert_to_avg_range_value(
+bool ColumnValueRange<primitive_type>::convert_to_close_range(
std::vector<OlapTuple>& begin_scan_keys, std::vector<OlapTuple>&
end_scan_keys,
- bool& begin_include, bool& end_include, bool* eos, int32_t
max_scan_key_num) {
- constexpr bool reject_type = primitive_type ==
PrimitiveType::TYPE_LARGEINT ||
- primitive_type ==
PrimitiveType::TYPE_DECIMALV2 ||
- primitive_type == PrimitiveType::TYPE_HLL ||
- primitive_type == PrimitiveType::TYPE_VARCHAR
||
- primitive_type == PrimitiveType::TYPE_CHAR ||
- primitive_type == PrimitiveType::TYPE_STRING
||
- primitive_type == PrimitiveType::TYPE_BOOLEAN
||
- primitive_type ==
PrimitiveType::TYPE_DATETIME ||
- primitive_type ==
PrimitiveType::TYPE_DATETIMEV2;
- begin_include = is_begin_include();
- end_include = is_end_include();
- bool is_empty_range = false;
- if constexpr (reject_type) {
- begin_scan_keys.emplace_back();
- begin_scan_keys.back().add_value(
- cast_to_string<primitive_type, CppType>(get_range_min_value(),
scale()),
- contain_null());
- end_scan_keys.emplace_back();
- end_scan_keys.back().add_value(
- cast_to_string<primitive_type, CppType>(get_range_max_value(),
scale()));
- return true;
- } else if (is_low_value_mininum() && is_high_value_maximum()) {
- // Do not split the range of whole range. TODO: figure out why the code
- // execute here
- begin_scan_keys.emplace_back();
- begin_scan_keys.back().add_value(
- cast_to_string<primitive_type, CppType>(get_range_min_value(),
scale()),
- contain_null());
- end_scan_keys.emplace_back();
- end_scan_keys.back().add_value(
- cast_to_string<primitive_type, CppType>(get_range_max_value(),
scale()));
- return true;
- } else {
- CppType min_value = get_range_min_value();
- CppType max_value = get_range_max_value();
+ bool& begin_include, bool& end_include) {
+ if constexpr (!_is_reject_split_type) {
+ begin_include = true;
+ end_include = true;
- if (contain_null()) {
- begin_scan_keys.emplace_back();
- begin_scan_keys.back().add_null();
- end_scan_keys.emplace_back();
- end_scan_keys.back().add_null();
- }
+ bool is_empty = false;
if (!is_begin_include()) {
- if (min_value == TYPE_MAX) {
- is_empty_range = true;
+ if (_low_value == TYPE_MIN) {
+ is_empty = true;
} else {
- begin_include = true;
- ++min_value;
+ ++_low_value;
}
}
if (!is_end_include()) {
- if (max_value == TYPE_MIN) {
- is_empty_range = true;
+ if (_high_value == TYPE_MAX) {
+ is_empty = true;
} else {
- end_include = true;
- --max_value;
+ --_high_value;
}
}
- if (begin_include && end_include && min_value > max_value) {
- is_empty_range = true;
+ if (_high_value < _low_value) {
+ is_empty = true;
}
- if (is_empty_range) {
- if (!contain_null()) {
- begin_scan_keys.clear();
- end_scan_keys.clear();
- *eos = true;
- }
- return false;
+
+ if (is_empty && !contain_null()) {
+ begin_scan_keys.clear();
+ end_scan_keys.clear();
+ return true;
}
+ }
+ return false;
+}
- int128_t range_size = is_fixed_value_convertible() ?
(int128_t)max_value - min_value : 0;
- size_t step_size = range_size / max_scan_key_num;
+// The return value indicates whether the split result is range or fixed value.
+template <PrimitiveType primitive_type>
+bool ColumnValueRange<primitive_type>::convert_to_avg_range_value(
+ std::vector<OlapTuple>& begin_scan_keys, std::vector<OlapTuple>&
end_scan_keys,
+ bool& begin_include, bool& end_include, int32_t max_scan_key_num) {
+ if constexpr (!_is_reject_split_type) {
+ auto no_split = [&]() -> bool {
+ begin_scan_keys.emplace_back();
+ begin_scan_keys.back().add_value(
+ cast_to_string<primitive_type,
CppType>(get_range_min_value(), scale()),
+ contain_null());
+ end_scan_keys.emplace_back();
+ end_scan_keys.back().add_value(
+ cast_to_string<primitive_type,
CppType>(get_range_max_value(), scale()));
+ return true;
+ };
- auto current = min_value;
+ CppType min_value = get_range_min_value();
+ CppType max_value = get_range_max_value();
if constexpr (primitive_type == PrimitiveType::TYPE_DATE) {
- current.set_type(TimeType::TIME_DATE);
+ min_value.set_type(TimeType::TIME_DATE);
+ max_value.set_type(TimeType::TIME_DATE);
}
- while (current <= max_value) {
+ if (contain_null()) {
+ begin_scan_keys.emplace_back();
+ begin_scan_keys.back().add_null();
+ end_scan_keys.emplace_back();
+ end_scan_keys.back().add_null();
+ }
+
+ if (min_value > max_value || max_scan_key_num == 1) {
+ return no_split();
+ }
+
+ auto cast = [](const CppType& value) {
+ if constexpr (primitive_type == PrimitiveType::TYPE_DATE ||
+ primitive_type == PrimitiveType::TYPE_DATEV2) {
+ return value;
+ } else {
+ return (int128_t)value;
+ }
+ };
+
+ // When CppType is date, we can not convert it to integer number and
calculate distance.
+ // In other case, we convert element to int128 to avoit overflow.
+ size_t step_size = (cast(max_value) - min_value) / max_scan_key_num;
+
+ constexpr size_t MAX_STEP_SIZE = 1 << 20;
+ // When the step size is too large, the range is easy to not really
contain data.
+ if (step_size > MAX_STEP_SIZE) {
+ return no_split();
+ }
+
+ while (true) {
begin_scan_keys.emplace_back();
begin_scan_keys.back().add_value(
- cast_to_string<primitive_type, CppType>(current, scale()));
+ cast_to_string<primitive_type, CppType>(min_value,
scale()));
- if ((int128_t)max_value - current < step_size) {
- current = max_value;
+ if (cast(max_value) - min_value < step_size) {
+ min_value = max_value;
} else {
- current += step_size;
+ min_value += step_size;
}
end_scan_keys.emplace_back();
end_scan_keys.back().add_value(
- cast_to_string<primitive_type, CppType>(current, scale()));
+ cast_to_string<primitive_type, CppType>(min_value,
scale()));
- if (current == max_value) {
+ if (min_value == max_value) {
break;
}
- ++current;
+ ++min_value;
}
+
return step_size != 0;
}
+ return false;
}
template <PrimitiveType primitive_type>
@@ -945,9 +974,13 @@ Status
OlapScanKeys::extend_scan_key(ColumnValueRange<primitive_type>& range,
}
}
} else {
- if (_begin_scan_keys.empty() && range.is_fixed_value_convertible() &&
_is_convertible) {
+ if (_begin_scan_keys.empty() && range.is_fixed_value_convertible() &&
_is_convertible &&
+ !range.is_reject_split_type()) {
+ *eos |= range.convert_to_close_range(_begin_scan_keys,
_end_scan_keys, _begin_include,
+ _end_include);
+
if (range.convert_to_avg_range_value(_begin_scan_keys,
_end_scan_keys, _begin_include,
- _end_include, eos,
max_scan_key_num)) {
+ _end_include,
max_scan_key_num)) {
_has_range_value = true;
}
return Status::OK();
diff --git a/be/src/vec/exec/scan/new_olap_scan_node.cpp
b/be/src/vec/exec/scan/new_olap_scan_node.cpp
index 26a609864f..407021b891 100644
--- a/be/src/vec/exec/scan/new_olap_scan_node.cpp
+++ b/be/src/vec/exec/scan/new_olap_scan_node.cpp
@@ -17,6 +17,7 @@
#include "vec/exec/scan/new_olap_scan_node.h"
+#include "common/status.h"
#include "olap/storage_engine.h"
#include "olap/tablet.h"
#include "util/to_string.h"
@@ -135,6 +136,18 @@ static std::string olap_filters_to_string(const
std::vector<doris::TCondition>&
return filters_string;
}
+inline std::string push_down_agg_to_string(const TPushAggOp::type& op) {
+ if (op == TPushAggOp::MINMAX) {
+ return "MINMAX";
+ } else if (op == TPushAggOp::COUNT) {
+ return "COUNT";
+ } else if (op == TPushAggOp::MIX) {
+ return "MIX";
+ } else {
+ return "NONE";
+ }
+}
+
static std::string tablets_id_to_string(
const std::vector<std::unique_ptr<TPaloScanRange>>& scan_ranges) {
if (scan_ranges.empty()) {
@@ -160,57 +173,64 @@ Status NewOlapScanNode::_process_conjuncts() {
}
Status NewOlapScanNode::_build_key_ranges_and_filters() {
- const std::vector<std::string>& column_names =
_olap_scan_node.key_column_name;
- const std::vector<TPrimitiveType::type>& column_types =
_olap_scan_node.key_column_type;
- DCHECK(column_types.size() == column_names.size());
-
- // 1. construct scan key except last olap engine short key
- _scan_keys.set_is_convertible(limit() == -1);
-
- // we use `exact_range` to identify a key range is an exact range or not
when we convert
- // it to `_scan_keys`. If `exact_range` is true, we can just discard it
from `_olap_filters`.
- bool exact_range = true;
- bool eos = false;
- for (int column_index = 0;
- column_index < column_names.size() && !_scan_keys.has_range_value()
&& !eos;
- ++column_index) {
- auto iter = _colname_to_value_range.find(column_names[column_index]);
- if (_colname_to_value_range.end() == iter) {
- break;
- }
+ if (!_olap_scan_node.__isset.push_down_agg_type_opt ||
+ _olap_scan_node.push_down_agg_type_opt == TPushAggOp::NONE) {
+ const std::vector<std::string>& column_names =
_olap_scan_node.key_column_name;
+ const std::vector<TPrimitiveType::type>& column_types =
_olap_scan_node.key_column_type;
+ DCHECK(column_types.size() == column_names.size());
+
+ // 1. construct scan key except last olap engine short key
+ _scan_keys.set_is_convertible(limit() == -1);
+
+ // we use `exact_range` to identify a key range is an exact range or
not when we convert
+ // it to `_scan_keys`. If `exact_range` is true, we can just discard
it from `_olap_filters`.
+ bool exact_range = true;
+ bool eos = false;
+ for (int column_index = 0;
+ column_index < column_names.size() &&
!_scan_keys.has_range_value() && !eos;
+ ++column_index) {
+ auto iter =
_colname_to_value_range.find(column_names[column_index]);
+ if (_colname_to_value_range.end() == iter) {
+ break;
+ }
- RETURN_IF_ERROR(std::visit(
- [&](auto&& range) {
- // make a copy or range and pass to extend_scan_key, keep
the range unchanged
- // because extend_scan_key method may change the first
parameter.
- // but the original range may be converted to olap
filters, if it's not a exact_range.
- auto temp_range = range;
- if (range.get_fixed_value_size() <=
_max_pushdown_conditions_per_column) {
- RETURN_IF_ERROR(_scan_keys.extend_scan_key(temp_range,
_max_scan_key_num,
-
&exact_range, &eos));
- if (exact_range) {
- _colname_to_value_range.erase(iter->first);
+ RETURN_IF_ERROR(std::visit(
+ [&](auto&& range) {
+ // make a copy or range and pass to extend_scan_key,
keep the range unchanged
+ // because extend_scan_key method may change the first
parameter.
+ // but the original range may be converted to olap
filters, if it's not a exact_range.
+ auto temp_range = range;
+ if (range.get_fixed_value_size() <=
_max_pushdown_conditions_per_column) {
+ RETURN_IF_ERROR(_scan_keys.extend_scan_key(
+ temp_range, _max_scan_key_num,
&exact_range, &eos));
+ if (exact_range) {
+ _colname_to_value_range.erase(iter->first);
+ }
}
- }
- return Status::OK();
- },
- iter->second));
- }
- _eos |= eos;
+ return Status::OK();
+ },
+ iter->second));
+ }
+ _eos |= eos;
- for (auto& iter : _colname_to_value_range) {
- std::vector<TCondition> filters;
- std::visit([&](auto&& range) { range.to_olap_filter(filters); },
iter.second);
+ for (auto& iter : _colname_to_value_range) {
+ std::vector<TCondition> filters;
+ std::visit([&](auto&& range) { range.to_olap_filter(filters); },
iter.second);
- for (const auto& filter : filters) {
- _olap_filters.push_back(filter);
+ for (const auto& filter : filters) {
+ _olap_filters.push_back(filter);
+ }
}
- }
- // Append value ranges in "_not_in_value_ranges"
- for (auto& range : _not_in_value_ranges) {
- std::visit([&](auto&& the_range) {
the_range.to_in_condition(_olap_filters, false); },
- range);
+ // Append value ranges in "_not_in_value_ranges"
+ for (auto& range : _not_in_value_ranges) {
+ std::visit([&](auto&& the_range) {
the_range.to_in_condition(_olap_filters, false); },
+ range);
+ }
+ } else {
+ _runtime_profile->add_info_string(
+ "PushDownAggregate",
+
push_down_agg_to_string(_olap_scan_node.push_down_agg_type_opt));
}
if (_state->enable_profile()) {
diff --git a/be/test/exec/olap_common_test.cpp
b/be/test/exec/olap_common_test.cpp
index 7ed520e84c..adf152bf0f 100644
--- a/be/test/exec/olap_common_test.cpp
+++ b/be/test/exec/olap_common_test.cpp
@@ -630,9 +630,9 @@ TEST_F(OlapScanKeysTest, EachtypeTest) {
EXPECT_EQ(exact_range, true);
scan_keys.get_key_range(&key_range);
// contain null, [-128, 127]
- EXPECT_EQ(key_range.size(), 1);
- EXPECT_EQ(OlapScanKeys::to_print_key(key_range[0]->begin_scan_range),
"null(-128)");
- EXPECT_EQ(OlapScanKeys::to_print_key(key_range[0]->end_scan_range),
"127");
+ EXPECT_EQ(key_range.size(), 257);
+ EXPECT_EQ(OlapScanKeys::to_print_key(key_range[1]->begin_scan_range),
"-128");
+ EXPECT_EQ(OlapScanKeys::to_print_key(key_range[256]->end_scan_range),
"127");
EXPECT_TRUE(range.add_range(FILTER_LESS, 50).ok());
scan_keys.clear();
@@ -655,9 +655,9 @@ TEST_F(OlapScanKeysTest, EachtypeTest) {
EXPECT_TRUE(scan_keys.extend_scan_key(range, max_scan_key,
&exact_range, &eos).ok());
EXPECT_EQ(exact_range, true);
scan_keys.get_key_range(&key_range);
- EXPECT_EQ(key_range.size(), 1);
- EXPECT_EQ(OlapScanKeys::to_print_key(key_range[0]->begin_scan_range),
"null(-32768)");
- EXPECT_EQ(OlapScanKeys::to_print_key(key_range[0]->end_scan_range),
"32767");
+ EXPECT_EQ(key_range.size(), 49);
+ EXPECT_EQ(OlapScanKeys::to_print_key(key_range[1]->begin_scan_range),
"-32768");
+
EXPECT_EQ(OlapScanKeys::to_print_key(key_range[max_scan_key]->end_scan_range),
"32767");
EXPECT_TRUE(range.add_range(FILTER_LARGER, 0).ok());
scan_keys.clear();
@@ -678,7 +678,7 @@ TEST_F(OlapScanKeysTest, EachtypeTest) {
scan_keys.get_key_range(&key_range);
EXPECT_EQ(key_range.size(), max_scan_key);
- EXPECT_EQ(OlapScanKeys::to_print_key(key_range[0]->begin_scan_range),
"1");
+ EXPECT_EQ(OlapScanKeys::to_print_key(key_range[0]->begin_scan_range),
"2");
EXPECT_EQ(OlapScanKeys::to_print_key(key_range[max_scan_key -
1]->end_scan_range), "32765");
}
}
diff --git
a/regression-test/data/query_p0/sql_functions/aggregate_functions/test_aggregate_all_functions.out
b/regression-test/data/query_p0/sql_functions/aggregate_functions/test_aggregate_all_functions.out
index 41a9dda202..a66a5bc843 100644
---
a/regression-test/data/query_p0/sql_functions/aggregate_functions/test_aggregate_all_functions.out
+++
b/regression-test/data/query_p0/sql_functions/aggregate_functions/test_aggregate_all_functions.out
@@ -223,5 +223,19 @@ beijing chengdu shanghai
2
-- !select44 --
-6 3975 2003 33035710 25819.948 78965.368
4449.5830001831055
+6 3975 2003 33035710 25819.948000000 78965.368
4449.5830001831055
+
+-- !select45 --
+1 10
+2 8
+2 441
+3 10
+5 29
+6 101
+
+-- !select46 --
+5 29
+
+-- !select47 --
+6
diff --git
a/regression-test/suites/query_p0/sql_functions/aggregate_functions/test_aggregate_all_functions.groovy
b/regression-test/suites/query_p0/sql_functions/aggregate_functions/test_aggregate_all_functions.groovy
index 29efba378f..bfdaf70d67 100644
---
a/regression-test/suites/query_p0/sql_functions/aggregate_functions/test_aggregate_all_functions.groovy
+++
b/regression-test/suites/query_p0/sql_functions/aggregate_functions/test_aggregate_all_functions.groovy
@@ -469,5 +469,11 @@ suite("test_aggregate_all_functions") {
sql "DROP TABLE IF EXISTS ${tableName_10}"
- qt_select44 """ select sum(distinct k1), sum(distinct k2), sum(distinct
k3), sum(distinct cast(k4 as largeint)), sum(distinct k5), sum(distinct k8),
sum(distinct k9) from test_query_db.test """
+ qt_select44 """select sum(distinct k1), sum(distinct k2), sum(distinct
k3), sum(distinct cast(k4 as largeint)), sum(distinct k5), sum(distinct k8),
sum(distinct k9) from test_query_db.test """
+
+ qt_select45 """select * from ${tableName_12} order by id,level"""
+
+ qt_select46 """select * from ${tableName_12} where id>=5 and id <=5 and
level >10 order by id,level;"""
+
+ qt_select47 """select count(*) from ${tableName_12}"""
}
diff --git a/run-be-ut.sh b/run-be-ut.sh
index fdbd59fd01..f66d65a64e 100755
--- a/run-be-ut.sh
+++ b/run-be-ut.sh
@@ -72,7 +72,7 @@ fi
eval set -- "${OPTS}"
-PARALLEL="$(($(nproc) / 2 + 1))"
+PARALLEL="$(($(nproc) / 5 + 1))"
CLEAN=0
RUN=0
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]