This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.0 by this push:
new 3e2461838a Revert "[improvement](scanner) Remove the predicate that is
always true for the segment (#25366) (#25427)" (#25440)
3e2461838a is described below
commit 3e2461838a25226e821552c555dcef63edf34b00
Author: Kang <[email protected]>
AuthorDate: Sat Oct 14 17:53:03 2023 +0800
Revert "[improvement](scanner) Remove the predicate that is always true for
the segment (#25366) (#25427)" (#25440)
---
be/src/common/config.cpp | 2 -
be/src/common/config.h | 3 -
be/src/olap/column_predicate.h | 4 --
be/src/olap/comparison_predicate.h | 25 --------
be/src/olap/rowset/segment_v2/column_reader.cpp | 26 ---------
be/src/olap/rowset/segment_v2/column_reader.h | 3 -
be/src/olap/rowset/segment_v2/segment.cpp | 20 +------
.../query_p0/test_select_with_predicate_prune.out | 25 --------
.../test_select_with_predicate_prune.groovy | 67 ----------------------
9 files changed, 1 insertion(+), 174 deletions(-)
diff --git a/be/src/common/config.cpp b/be/src/common/config.cpp
index f2c726a33b..63ba1a1288 100644
--- a/be/src/common/config.cpp
+++ b/be/src/common/config.cpp
@@ -1084,8 +1084,6 @@ DEFINE_mInt32(tablet_schema_cache_recycle_interval,
"86400");
DEFINE_Bool(exit_on_exception, "false")
-DEFINE_Bool(ignore_always_true_predicate_for_segment, "true");
-
// clang-format off
#ifdef BE_TEST
// test s3
diff --git a/be/src/common/config.h b/be/src/common/config.h
index 06f9a18fcd..7c4ab39a40 100644
--- a/be/src/common/config.h
+++ b/be/src/common/config.h
@@ -1141,9 +1141,6 @@ DECLARE_mInt32(tablet_schema_cache_recycle_interval);
// Use `LOG(FATAL)` to replace `throw` when true
DECLARE_mBool(exit_on_exception);
-// Remove predicate that is always true for a segment.
-DECLARE_Bool(ignore_always_true_predicate_for_segment);
-
#ifdef BE_TEST
// test s3
DECLARE_String(test_s3_resource);
diff --git a/be/src/olap/column_predicate.h b/be/src/olap/column_predicate.h
index 05e84999a8..b98156f5fb 100644
--- a/be/src/olap/column_predicate.h
+++ b/be/src/olap/column_predicate.h
@@ -173,10 +173,6 @@ public:
return true;
}
- virtual bool is_always_true(const std::pair<WrapperField*, WrapperField*>&
statistic) const {
- return false;
- }
-
virtual bool evaluate_del(const std::pair<WrapperField*, WrapperField*>&
statistic) const {
return false;
}
diff --git a/be/src/olap/comparison_predicate.h
b/be/src/olap/comparison_predicate.h
index fddc554f51..04dfd5dc5c 100644
--- a/be/src/olap/comparison_predicate.h
+++ b/be/src/olap/comparison_predicate.h
@@ -158,8 +158,6 @@ public:
return _operator(*reinterpret_cast<const
T*>(statistic.ELE->cell_ptr()), _value); \
}
- using WarpperFieldType = std::conditional_t<Type == TYPE_DATE, uint24_t,
T>;
-
bool evaluate_and(const std::pair<WrapperField*, WrapperField*>&
statistic) const override {
if (statistic.first->is_null()) {
return true;
@@ -204,29 +202,6 @@ public:
}
}
- bool is_always_true(const std::pair<WrapperField*, WrapperField*>&
statistic) const override {
- if (statistic.first->is_null() || statistic.second->is_null()) {
- return false;
- }
-
- T tmp_min_value {};
- T tmp_max_value {};
- memcpy((char*)(&tmp_min_value), statistic.first->cell_ptr(),
sizeof(WarpperFieldType));
- memcpy((char*)(&tmp_max_value), statistic.second->cell_ptr(),
sizeof(WarpperFieldType));
-
- if constexpr (PT == PredicateType::LT) {
- return _value > tmp_max_value;
- } else if constexpr (PT == PredicateType::LE) {
- return _value >= tmp_max_value;
- } else if constexpr (PT == PredicateType::GT) {
- return _value < tmp_min_value;
- } else if constexpr (PT == PredicateType::GE) {
- return _value <= tmp_min_value;
- }
-
- return false;
- }
-
bool evaluate_del(const std::pair<WrapperField*, WrapperField*>&
statistic) const override {
if (statistic.first->is_null() || statistic.second->is_null()) {
return false;
diff --git a/be/src/olap/rowset/segment_v2/column_reader.cpp
b/be/src/olap/rowset/segment_v2/column_reader.cpp
index d9a074e290..b1b817f545 100644
--- a/be/src/olap/rowset/segment_v2/column_reader.cpp
+++ b/be/src/olap/rowset/segment_v2/column_reader.cpp
@@ -30,7 +30,6 @@
#include "io/fs/file_reader.h"
#include "olap/block_column_predicate.h"
#include "olap/column_predicate.h"
-#include "olap/comparison_predicate.h"
#include "olap/decimal12.h"
#include "olap/inverted_index_parser.h"
#include "olap/iterators.h"
@@ -340,31 +339,6 @@ bool ColumnReader::match_condition(const
AndBlockColumnPredicate* col_predicates
col_predicates);
}
-bool ColumnReader::prune_predicates_by_zone_map(std::vector<ColumnPredicate*>&
predicates,
- const int column_id) const {
- if (_zone_map_index == nullptr) {
- return false;
- }
-
- FieldType type = _type_info->type();
- std::unique_ptr<WrapperField> min_value(WrapperField::create_by_type(type,
_meta_length));
- std::unique_ptr<WrapperField> max_value(WrapperField::create_by_type(type,
_meta_length));
- _parse_zone_map(*_segment_zone_map, min_value.get(), max_value.get());
-
- auto pruned = false;
- for (auto it = predicates.begin(); it != predicates.end();) {
- auto predicate = *it;
- if (predicate->column_id() == column_id &&
- predicate->is_always_true({min_value.get(), max_value.get()})) {
- pruned = true;
- it = predicates.erase(it);
- } else {
- ++it;
- }
- }
- return pruned;
-}
-
void ColumnReader::_parse_zone_map(const ZoneMapPB& zone_map, WrapperField*
min_value_container,
WrapperField* max_value_container) const {
// min value and max value are valid if has_not_null is true
diff --git a/be/src/olap/rowset/segment_v2/column_reader.h
b/be/src/olap/rowset/segment_v2/column_reader.h
index 7964555ade..174aabdefa 100644
--- a/be/src/olap/rowset/segment_v2/column_reader.h
+++ b/be/src/olap/rowset/segment_v2/column_reader.h
@@ -162,9 +162,6 @@ public:
bool is_empty() const { return _num_rows == 0; }
- bool prune_predicates_by_zone_map(std::vector<ColumnPredicate*>&
predicates,
- const int column_id) const;
-
CompressionTypePB get_compression() const { return _meta_compression; }
uint64_t num_rows() const { return _num_rows; }
diff --git a/be/src/olap/rowset/segment_v2/segment.cpp
b/be/src/olap/rowset/segment_v2/segment.cpp
index ddce80bcc3..153ed92517 100644
--- a/be/src/olap/rowset/segment_v2/segment.cpp
+++ b/be/src/olap/rowset/segment_v2/segment.cpp
@@ -147,6 +147,7 @@ Status Segment::new_iterator(SchemaSPtr schema, const
StorageReadOptions& read_o
return Status::OK();
}
}
+
if (read_options.use_topn_opt) {
auto query_ctx = read_options.runtime_state->get_query_ctx();
auto runtime_predicate =
query_ctx->get_runtime_predicate().get_predictate();
@@ -174,25 +175,6 @@ Status Segment::new_iterator(SchemaSPtr schema, const
StorageReadOptions& read_o
iter->reset(new SegmentIterator(this->shared_from_this(), schema));
}
- if (config::ignore_always_true_predicate_for_segment &&
- read_options.io_ctx.reader_type == ReaderType::READER_QUERY &&
- !read_options.column_predicates.empty()) {
- auto pruned_predicates = read_options.column_predicates;
- auto pruned = false;
- for (auto& it : _column_readers) {
- if (it.second->prune_predicates_by_zone_map(pruned_predicates,
it.first)) {
- pruned = true;
- }
- }
-
- if (pruned) {
- auto options_with_pruned_predicates = read_options;
- options_with_pruned_predicates.column_predicates =
pruned_predicates;
- LOG(INFO) << "column_predicates pruned from " <<
read_options.column_predicates.size()
- << " to " << pruned_predicates.size();
- return iter->get()->init(options_with_pruned_predicates);
- }
- }
return iter->get()->init(read_options);
}
diff --git a/regression-test/data/query_p0/test_select_with_predicate_prune.out
b/regression-test/data/query_p0/test_select_with_predicate_prune.out
deleted file mode 100644
index 2e1fad8749..0000000000
--- a/regression-test/data/query_p0/test_select_with_predicate_prune.out
+++ /dev/null
@@ -1,25 +0,0 @@
--- This file is automatically generated. You should know what you did if you
want to edit this
--- !select1 --
-1 jerry 2020-10-01
-2 tom 2020-10-02
-3 jack 2020-10-01
-4 tony 2020-10-02
-
--- !select2 --
-1 jerry 2020-10-01
-3 jack 2020-10-01
-
--- !select3 --
-
--- !select4 --
-1 jerry 2020-10-01
-2 tom 2020-10-02
-3 jack 2020-10-01
-4 tony 2020-10-02
-
--- !select5 --
-2 tom 2020-10-02
-4 tony 2020-10-02
-
--- !select6 --
-
diff --git
a/regression-test/suites/query_p0/test_select_with_predicate_prune.groovy
b/regression-test/suites/query_p0/test_select_with_predicate_prune.groovy
deleted file mode 100644
index 768e04b4c3..0000000000
--- a/regression-test/suites/query_p0/test_select_with_predicate_prune.groovy
+++ /dev/null
@@ -1,67 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-suite("test_select_with_predicate_prune") {
- sql """
- drop table if exists `test_select_with_predicate_prune`;
- """
- sql """
- CREATE TABLE IF NOT EXISTS `test_select_with_predicate_prune` (
- id int,
- name string,
- birthday date not null
- )
- duplicate key(`id`)
- AUTO PARTITION BY LIST (`birthday`)()
- DISTRIBUTED BY HASH(`id`) buckets 1
- PROPERTIES
- (
- "replication_allocation" = "tag.location.default: 1"
- );
- """
-
- sql """
- insert into test_select_with_predicate_prune values (1, 'jerry',
'2020-10-01'), (2, 'tom', '2020-10-02');
- """
- sql """
- insert into test_select_with_predicate_prune values (3, 'jack',
'2020-10-01'), (4, 'tony', '2020-10-02');
- """
-
- qt_select1 """
- select * from test_select_with_predicate_prune where birthday <
'2020-10-03' order by id;
- """
-
- qt_select2 """
- select * from test_select_with_predicate_prune where birthday <
'2020-10-02' order by id;
- """
-
- qt_select3 """
- select * from test_select_with_predicate_prune where birthday <
'2020-10-01' order by id;
- """
-
-
- qt_select4 """
- select * from test_select_with_predicate_prune where birthday >
'2020-09-30' order by id;
- """
-
- qt_select5 """
- select * from test_select_with_predicate_prune where birthday >
'2020-10-01' order by id;
- """
-
- qt_select6 """
- select * from test_select_with_predicate_prune where birthday >
'2020-10-02' order by id;
- """
-}
\ No newline at end of file
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]