This is an automated email from the ASF dual-hosted git repository. morningman pushed a commit to branch branch-3.0 in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-3.0 by this push: new 88926d2b7ee [fix](orc) check all the cases before build_search_argument (#44615) (#44802) 88926d2b7ee is described below commit 88926d2b7eecca61627ac8fbdf06f226f41b9fcf Author: Mingyu Chen (Rayner) <morning...@163.com> AuthorDate: Sat Nov 30 09:17:45 2024 +0800 [fix](orc) check all the cases before build_search_argument (#44615) (#44802) cherry-pick #44615 Co-authored-by: Socrates <suyit...@selectdb.com> --- be/src/vec/exec/format/orc/vorc_reader.cpp | 176 ++++++++++++++--------------- be/src/vec/exec/format/orc/vorc_reader.h | 18 ++- be/test/vec/exec/orc_reader_test.cpp | 57 +++++----- 3 files changed, 131 insertions(+), 120 deletions(-) diff --git a/be/src/vec/exec/format/orc/vorc_reader.cpp b/be/src/vec/exec/format/orc/vorc_reader.cpp index c649ef68617..df8ec52efc2 100644 --- a/be/src/vec/exec/format/orc/vorc_reader.cpp +++ b/be/src/vec/exec/format/orc/vorc_reader.cpp @@ -34,6 +34,7 @@ #include <memory> #include <ostream> #include <tuple> +#include <utility> #include "cctz/civil_time.h" #include "cctz/time_zone.h" @@ -567,12 +568,14 @@ std::tuple<bool, orc::Literal> convert_to_orc_literal(const orc::Type* type, std::tuple<bool, orc::Literal, orc::PredicateDataType> OrcReader::_make_orc_literal( const VSlotRef* slot_ref, const VLiteral* literal) { + DCHECK(_col_name_to_file_col_name_low_case.contains(slot_ref->expr_name())); auto file_col_name_low_case = _col_name_to_file_col_name_low_case[slot_ref->expr_name()]; if (!_type_map.contains(file_col_name_low_case)) { // TODO: this is for acid table LOG(WARNING) << "Column " << slot_ref->expr_name() << " not found in _type_map"; return std::make_tuple(false, orc::Literal(false), orc::PredicateDataType::LONG); } + DCHECK(_type_map.contains(file_col_name_low_case)); const auto* orc_type = _type_map[file_col_name_low_case]; if (!TYPEKIND_TO_PREDICATE_TYPE.contains(orc_type->getKind())) { LOG(WARNING) << "Unsupported Push Down Orc Type [TypeKind=" << orc_type->getKind() << "]"; @@ -624,15 +627,37 @@ std::tuple<bool, orc::Literal, orc::PredicateDataType> OrcReader::_make_orc_lite } } -// check if the slot of expr can be pushed down to orc reader +// check if the slot of expr can be pushed down to orc reader and make orc predicate type bool OrcReader::_check_slot_can_push_down(const VExprSPtr& expr) { if (!expr->children()[0]->is_slot_ref()) { return false; } const auto* slot_ref = static_cast<const VSlotRef*>(expr->children()[0].get()); // check if the slot exists in orc file and not partition column - return _col_name_to_file_col_name.contains(slot_ref->expr_name()) && - !_lazy_read_ctx.predicate_partition_columns.contains(slot_ref->expr_name()); + if (!_col_name_to_file_col_name.contains(slot_ref->expr_name()) || + _lazy_read_ctx.predicate_partition_columns.contains(slot_ref->expr_name())) { + return false; + } + auto [valid, _, predicate_type] = _make_orc_literal(slot_ref, nullptr); + if (valid) { + _vslot_ref_to_orc_predicate_data_type[slot_ref] = predicate_type; + } + return valid; +} + +// check if the literal of expr can be pushed down to orc reader and make orc literal +bool OrcReader::_check_literal_can_push_down(const VExprSPtr& expr, uint16_t child_id) { + if (!expr->children()[child_id]->is_literal()) { + return false; + } + // the slot has been checked in _check_slot_can_push_down before calling this function + const auto* slot_ref = static_cast<const VSlotRef*>(expr->children()[0].get()); + const auto* literal = static_cast<const VLiteral*>(expr->children()[child_id].get()); + auto [valid, orc_literal, _] = _make_orc_literal(slot_ref, literal); + if (valid) { + _vliteral_to_orc_literal.insert(std::make_pair(literal, orc_literal)); + } + return valid; } // check if there are rest children of expr can be pushed down to orc reader @@ -642,7 +667,7 @@ bool OrcReader::_check_rest_children_can_push_down(const VExprSPtr& expr) { } for (size_t i = 1; i < expr->children().size(); ++i) { - if (!expr->children()[i]->is_literal()) { + if (!_check_literal_can_push_down(expr, i)) { return false; } } @@ -651,7 +676,10 @@ bool OrcReader::_check_rest_children_can_push_down(const VExprSPtr& expr) { // check if the expr can be pushed down to orc reader bool OrcReader::_check_expr_can_push_down(const VExprSPtr& expr) { - DCHECK(expr != nullptr); + if (expr == nullptr) { + return false; + } + switch (expr->op()) { case TExprOpcode::COMPOUND_AND: // at least one child can be pushed down @@ -693,198 +721,167 @@ bool OrcReader::_check_expr_can_push_down(const VExprSPtr& expr) { } } -bool OrcReader::_build_less_than(const VExprSPtr& expr, +void OrcReader::_build_less_than(const VExprSPtr& expr, std::unique_ptr<orc::SearchArgumentBuilder>& builder) { DCHECK(expr->children().size() == 2); DCHECK(expr->children()[0]->is_slot_ref()); DCHECK(expr->children()[1]->is_literal()); const auto* slot_ref = static_cast<const VSlotRef*>(expr->children()[0].get()); const auto* literal = static_cast<const VLiteral*>(expr->children()[1].get()); - auto [valid, orc_literal, predicate_type] = _make_orc_literal(slot_ref, literal); - if (!valid) { - return false; - } + DCHECK(_vslot_ref_to_orc_predicate_data_type.contains(slot_ref)); + auto predicate_type = _vslot_ref_to_orc_predicate_data_type[slot_ref]; + DCHECK(_vliteral_to_orc_literal.contains(literal)); + auto orc_literal = _vliteral_to_orc_literal.find(literal)->second; builder->lessThan(slot_ref->expr_name(), predicate_type, orc_literal); - return true; } -bool OrcReader::_build_less_than_equals(const VExprSPtr& expr, +void OrcReader::_build_less_than_equals(const VExprSPtr& expr, std::unique_ptr<orc::SearchArgumentBuilder>& builder) { DCHECK(expr->children().size() == 2); DCHECK(expr->children()[0]->is_slot_ref()); DCHECK(expr->children()[1]->is_literal()); const auto* slot_ref = static_cast<const VSlotRef*>(expr->children()[0].get()); const auto* literal = static_cast<const VLiteral*>(expr->children()[1].get()); - auto [valid, orc_literal, predicate_type] = _make_orc_literal(slot_ref, literal); - if (!valid) { - return false; - } + DCHECK(_vslot_ref_to_orc_predicate_data_type.contains(slot_ref)); + auto predicate_type = _vslot_ref_to_orc_predicate_data_type[slot_ref]; + DCHECK(_vliteral_to_orc_literal.contains(literal)); + auto orc_literal = _vliteral_to_orc_literal.find(literal)->second; builder->lessThanEquals(slot_ref->expr_name(), predicate_type, orc_literal); - return true; } -bool OrcReader::_build_equals(const VExprSPtr& expr, +void OrcReader::_build_equals(const VExprSPtr& expr, std::unique_ptr<orc::SearchArgumentBuilder>& builder) { DCHECK(expr->children().size() == 2); DCHECK(expr->children()[0]->is_slot_ref()); DCHECK(expr->children()[1]->is_literal()); const auto* slot_ref = static_cast<const VSlotRef*>(expr->children()[0].get()); const auto* literal = static_cast<const VLiteral*>(expr->children()[1].get()); - auto [valid, orc_literal, predicate_type] = _make_orc_literal(slot_ref, literal); - if (!valid) { - return false; - } + DCHECK(_vslot_ref_to_orc_predicate_data_type.contains(slot_ref)); + auto predicate_type = _vslot_ref_to_orc_predicate_data_type[slot_ref]; + DCHECK(_vliteral_to_orc_literal.contains(literal)); + auto orc_literal = _vliteral_to_orc_literal.find(literal)->second; builder->equals(slot_ref->expr_name(), predicate_type, orc_literal); - return true; } -bool OrcReader::_build_filter_in(const VExprSPtr& expr, +void OrcReader::_build_filter_in(const VExprSPtr& expr, std::unique_ptr<orc::SearchArgumentBuilder>& builder) { DCHECK(expr->children().size() >= 2); DCHECK(expr->children()[0]->is_slot_ref()); const auto* slot_ref = static_cast<const VSlotRef*>(expr->children()[0].get()); std::vector<orc::Literal> literals; - orc::PredicateDataType predicate_type = orc::PredicateDataType::LONG; + DCHECK(_vslot_ref_to_orc_predicate_data_type.contains(slot_ref)); + orc::PredicateDataType predicate_type = _vslot_ref_to_orc_predicate_data_type[slot_ref]; for (size_t i = 1; i < expr->children().size(); ++i) { DCHECK(expr->children()[i]->is_literal()); const auto* literal = static_cast<const VLiteral*>(expr->children()[i].get()); - auto [valid, orc_literal, type] = _make_orc_literal(slot_ref, literal); - if (!valid) { - return false; - } + DCHECK(_vliteral_to_orc_literal.contains(literal)); + auto orc_literal = _vliteral_to_orc_literal.find(literal)->second; literals.emplace_back(orc_literal); - predicate_type = type; } DCHECK(!literals.empty()); builder->in(slot_ref->expr_name(), predicate_type, literals); - return true; } -bool OrcReader::_build_is_null(const VExprSPtr& expr, +void OrcReader::_build_is_null(const VExprSPtr& expr, std::unique_ptr<orc::SearchArgumentBuilder>& builder) { DCHECK(expr->children().size() == 1); DCHECK(expr->children()[0]->is_slot_ref()); const auto* slot_ref = static_cast<const VSlotRef*>(expr->children()[0].get()); - auto [valid, _, predicate_type] = _make_orc_literal(slot_ref, nullptr); + DCHECK(_vslot_ref_to_orc_predicate_data_type.contains(slot_ref)); + auto predicate_type = _vslot_ref_to_orc_predicate_data_type[slot_ref]; builder->isNull(slot_ref->expr_name(), predicate_type); - return true; } bool OrcReader::_build_search_argument(const VExprSPtr& expr, std::unique_ptr<orc::SearchArgumentBuilder>& builder) { - if (expr == nullptr) { - return false; - } - - // if expr can not be pushed down, skip it and continue to next expr + // OPTIMIZE: check expr only once if (!_check_expr_can_push_down(expr)) { return false; } - switch (expr->op()) { case TExprOpcode::COMPOUND_AND: { - bool at_least_one_can_push_down = false; builder->startAnd(); + bool at_least_one_can_push_down = false; for (const auto& child : expr->children()) { if (_build_search_argument(child, builder)) { at_least_one_can_push_down = true; } } - if (!at_least_one_can_push_down) { - // if all exprs can not be pushed down, builder->end() will throw exception - return false; - } + DCHECK(at_least_one_can_push_down); builder->end(); break; } - case TExprOpcode::COMPOUND_OR: + case TExprOpcode::COMPOUND_OR: { builder->startOr(); + bool all_can_push_down = true; for (const auto& child : expr->children()) { if (!_build_search_argument(child, builder)) { - return false; + all_can_push_down = false; } } + DCHECK(all_can_push_down); builder->end(); break; - case TExprOpcode::COMPOUND_NOT: - builder->startNot(); + } + case TExprOpcode::COMPOUND_NOT: { DCHECK_EQ(expr->children().size(), 1); - if (!_build_search_argument(expr->children()[0], builder)) { - return false; - } + builder->startNot(); + auto res = _build_search_argument(expr->children()[0], builder); + DCHECK(res); builder->end(); break; + } case TExprOpcode::GE: builder->startNot(); - if (!_build_less_than(expr, builder)) { - return false; - } + _build_less_than(expr, builder); builder->end(); break; case TExprOpcode::GT: builder->startNot(); - if (!_build_less_than_equals(expr, builder)) { - return false; - } + _build_less_than_equals(expr, builder); builder->end(); break; case TExprOpcode::LE: - if (!_build_less_than_equals(expr, builder)) { - return false; - } + _build_less_than_equals(expr, builder); break; case TExprOpcode::LT: - if (!_build_less_than(expr, builder)) { - return false; - } + _build_less_than(expr, builder); break; case TExprOpcode::EQ: - if (!_build_equals(expr, builder)) { - return false; - } + _build_equals(expr, builder); break; case TExprOpcode::NE: builder->startNot(); - if (!_build_equals(expr, builder)) { - return false; - } + _build_equals(expr, builder); builder->end(); break; case TExprOpcode::FILTER_IN: - if (!_build_filter_in(expr, builder)) { - return false; - } + _build_filter_in(expr, builder); break; case TExprOpcode::FILTER_NOT_IN: builder->startNot(); - if (!_build_filter_in(expr, builder)) { - return false; - } + _build_filter_in(expr, builder); builder->end(); break; // is null and is not null is represented as function call - case TExprOpcode::INVALID_OPCODE: { + case TExprOpcode::INVALID_OPCODE: DCHECK(expr->node_type() == TExprNodeType::FUNCTION_CALL); if (expr->fn().name.function_name == "is_null_pred") { - if (!_build_is_null(expr, builder)) { - return false; - } + _build_is_null(expr, builder); } else if (expr->fn().name.function_name == "is_not_null_pred") { builder->startNot(); - if (!_build_is_null(expr, builder)) { - return false; - } + _build_is_null(expr, builder); builder->end(); } else { + // should not reach here, because _check_expr_can_push_down has already checked __builtin_unreachable(); } break; - } - default: { + + default: // should not reach here, because _check_expr_can_push_down has already checked __builtin_unreachable(); } - } return true; } @@ -898,6 +895,8 @@ bool OrcReader::_init_search_argument(const VExprContextSPtrs& conjuncts) { bool at_least_one_can_push_down = false; builder->startAnd(); for (const auto& expr_ctx : conjuncts) { + _vslot_ref_to_orc_predicate_data_type.clear(); + _vliteral_to_orc_literal.clear(); if (_build_search_argument(expr_ctx->root(), builder)) { at_least_one_can_push_down = true; } @@ -943,7 +942,7 @@ Status OrcReader::set_fill_columns( visit_slot(child.get()); } } else if (VInPredicate* in_predicate = typeid_cast<VInPredicate*>(filter_impl)) { - if (in_predicate->children().size() > 0) { + if (!in_predicate->children().empty()) { visit_slot(in_predicate->children()[0].get()); } } else { @@ -1179,7 +1178,8 @@ Status OrcReader::_fill_partition_columns( if (num_deserialized != rows) { return Status::InternalError( "Failed to fill partition column: {}={} ." - "Number of rows expected to be written : {}, number of rows actually written : " + "Number of rows expected to be written : {}, number of rows actually " + "written : " "{}", slot_desc->col_name(), value, num_deserialized, rows); } diff --git a/be/src/vec/exec/format/orc/vorc_reader.h b/be/src/vec/exec/format/orc/vorc_reader.h index 8c73957e79e..0dd19077bcf 100644 --- a/be/src/vec/exec/format/orc/vorc_reader.h +++ b/be/src/vec/exec/format/orc/vorc_reader.h @@ -41,6 +41,7 @@ #include "orc/Reader.hh" #include "orc/Type.hh" #include "orc/Vector.hh" +#include "orc/sargs/Literal.hh" #include "runtime/types.h" #include "util/runtime_profile.h" #include "vec/aggregate_functions/aggregate_function.h" @@ -288,23 +289,27 @@ private: bool* is_hive1_orc); static bool _check_acid_schema(const orc::Type& type); static const orc::Type& _remove_acid(const orc::Type& type); + + // functions for building search argument until _init_search_argument std::tuple<bool, orc::Literal, orc::PredicateDataType> _make_orc_literal( const VSlotRef* slot_ref, const VLiteral* literal); bool _check_slot_can_push_down(const VExprSPtr& expr); + bool _check_literal_can_push_down(const VExprSPtr& expr, uint16_t child_id); bool _check_rest_children_can_push_down(const VExprSPtr& expr); bool _check_expr_can_push_down(const VExprSPtr& expr); - bool _build_less_than(const VExprSPtr& expr, + void _build_less_than(const VExprSPtr& expr, std::unique_ptr<orc::SearchArgumentBuilder>& builder); - bool _build_less_than_equals(const VExprSPtr& expr, + void _build_less_than_equals(const VExprSPtr& expr, std::unique_ptr<orc::SearchArgumentBuilder>& builder); - bool _build_equals(const VExprSPtr& expr, std::unique_ptr<orc::SearchArgumentBuilder>& builder); - bool _build_filter_in(const VExprSPtr& expr, + void _build_equals(const VExprSPtr& expr, std::unique_ptr<orc::SearchArgumentBuilder>& builder); + void _build_filter_in(const VExprSPtr& expr, std::unique_ptr<orc::SearchArgumentBuilder>& builder); - bool _build_is_null(const VExprSPtr& expr, + void _build_is_null(const VExprSPtr& expr, std::unique_ptr<orc::SearchArgumentBuilder>& builder); bool _build_search_argument(const VExprSPtr& expr, std::unique_ptr<orc::SearchArgumentBuilder>& builder); bool _init_search_argument(const VExprContextSPtrs& conjuncts); + void _init_bloom_filter( std::unordered_map<std::string, ColumnValueRangeType>* colname_to_value_range); void _init_system_properties(); @@ -644,6 +649,9 @@ private: std::unordered_map<std::string, std::string> _table_col_to_file_col; //support iceberg position delete . std::vector<int64_t>* _position_delete_ordered_rowids = nullptr; + std::unordered_map<const VSlotRef*, orc::PredicateDataType> + _vslot_ref_to_orc_predicate_data_type; + std::unordered_map<const VLiteral*, orc::Literal> _vliteral_to_orc_literal; }; class ORCFileInputStream : public orc::InputStream, public ProfileCollector { diff --git a/be/test/vec/exec/orc_reader_test.cpp b/be/test/vec/exec/orc_reader_test.cpp index ec5bd9b519d..f2bba434368 100644 --- a/be/test/vec/exec/orc_reader_test.cpp +++ b/be/test/vec/exec/orc_reader_test.cpp @@ -94,33 +94,34 @@ private: TEST_F(OrcReaderTest, test_build_search_argument) { ExecEnv::GetInstance()->set_orc_memory_pool(new ORCMemoryPool()); - std:: - vector<std::string> - exprs = - { - // select count(o_orderkey) from tpch1_orc.orders where o_orderkey < 100 or o_orderkey > 5999900 or o_orderkey in (1000000, 2000000, 3000000); - R"|({"1":{"lst":["rec",13,{"1":{"i32":6},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":3},"4":{"i32":2},"20":{"i32":-1},"29":{"tf":1}},{"1":{"i32":6},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":3},"4":{"i32":2},"20":{"i32":-1},"29":{"tf":1}},{"1":{"i32":2},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]} [...] - // select count(o_orderkey) from tpch1_orc.orders where o_orderkey is null or (o_orderkey between 100 and 1000 and o_orderkey not in (200, 300, 400)); - R"|({"1":{"lst":["rec",16,{"1":{"i32":6},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":3},"4":{"i32":2},"20":{"i32":-1},"29":{"tf":1}},{"1":{"i32":20},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"4":{"i32":1},"20":{"i32":-1},"26":{"rec":{"1":{"rec":{"2":{"str":"is_null_pred"}}},"2":{"i32":0},"3":{"lst":["rec",1,{"1":{"lst":["rec",1,{"1":{"i32" [...] - // select count(o_orderkey) from tpch1_orc.orders where o_orderkey is null or (o_orderkey between 1000000 and 1200000 and o_orderkey != 1100000); - R"|({"1":{"lst":["rec",14,{"1":{"i32":6},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":3},"4":{"i32":2},"20":{"i32":-1},"29":{"tf":1}},{"1":{"i32":20},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"4":{"i32":1},"20":{"i32":-1},"26":{"rec":{"1":{"rec":{"2":{"str":"is_null_pred"}}},"2":{"i32":0},"3":{"lst":["rec",1,{"1":{"lst":["rec",1,{"1":{"i32" [...] - // SELECT count(o_orderkey) FROM tpch1_orc.orders WHERE o_orderkey IN (1000000, 2000000, 3000000) OR (o_orderdate >= '1994-01-01' AND o_orderdate <= '1994-12-31'); - R"|({"1":{"lst":["rec",13,{"1":{"i32":6},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":3},"4":{"i32":2},"20":{"i32":-1},"29":{"tf":1}},{"1":{"i32":11},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":5},"4":{"i32":4},"11":{"rec":{"1":{"tf":0}}},"20":{"i32":-1},"29":{"tf":1}},{"1":{"i32":16},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0}, [...] - // select count(o_orderkey) from tpch1_orc.orders where o_orderkey < 2 or (o_comment like '%delayed%' and o_orderpriority = '1-URGENT'); - R"|({"1":{"lst":["rec",11,{"1":{"i32":6},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":3},"4":{"i32":2},"20":{"i32":-1},"29":{"tf":1}},{"1":{"i32":2},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":11},"4":{"i32":2},"20":{"i32":-1},"26":{"rec":{"1":{"rec":{"2":{"str":"lt"}}},"2":{"i32":0},"3":{"lst":["rec",2,{"1":{"lst":["rec",1,{"1":{" [...] - // select count(o_orderkey) from tpch1_orc.orders where o_orderkey < 1 + 1; - R"|({"1":{"lst":["rec",3,{"1":{"i32":2},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":11},"4":{"i32":2},"20":{"i32":-1},"26":{"rec":{"1":{"rec":{"2":{"str":"lt"}}},"2":{"i32":0},"3":{"lst":["rec",2,{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]},"3":{"i64":-1}},{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]},"3":{"i64":-1}}]},"4":{"rec":{"1":{"lst":["rec", [...] - // SELECT count(o_orderkey) FROM tpch1_orc.orders WHERE o_comment LIKE '%delayed%' OR o_orderpriority = '1-URGENT'; - R"|({"1":{"lst":["rec",7,{"1":{"i32":6},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":3},"4":{"i32":2},"20":{"i32":-1},"29":{"tf":1}},{"1":{"i32":20},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"4":{"i32":2},"20":{"i32":-1},"26":{"rec":{"1":{"rec":{"2":{"str":"like"}}},"2":{"i32":0},"3":{"lst":["rec",2,{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{ [...] - // select count(o_orderkey) from tpch1_orc.orders where o_orderkey between 1 and 100 or random() > 0.5; - R"|({"1":{"lst":["rec",11,{"1":{"i32":6},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":3},"4":{"i32":2},"20":{"i32":-1},"29":{"tf":1}},{"1":{"i32":6},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":2},"4":{"i32":2},"20":{"i32":-1},"29":{"tf":1}},{"1":{"i32":2},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]} [...] - // select count(o_orderkey) from tpch1_orc.orders where lower(o_orderpriority) = '1-urgent'; - R"|({"1":{"lst":["rec",4,{"1":{"i32":2},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":9},"4":{"i32":2},"20":{"i32":-1},"26":{"rec":{"1":{"rec":{"2":{"str":"eq"}}},"2":{"i32":0},"3":{"lst":["rec",2,{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":15},"2":{"i32":65533}}}}]},"3":{"i64":-1}},{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":15},"2":{"i32":65533}}}}]},"3":{"i64": [...] - // select count(o_orderkey) from tpch1_orc.orders where o_orderkey * 2 < 60; - R"|({"1":{"lst":["rec",5,{"1":{"i32":2},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":11},"4":{"i32":2},"20":{"i32":-1},"26":{"rec":{"1":{"rec":{"2":{"str":"lt"}}},"2":{"i32":0},"3":{"lst":["rec",2,{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":6}}}}]},"3":{"i64":-1}},{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":6}}}}]},"3":{"i64":-1}}]},"4":{"rec":{"1":{"lst":["rec", [...] - // select count(o_orderkey) from tpch1_orc.orders where o_orderdate is not null; - R"|({"1":{"lst":["rec",4,{"1":{"i32":2},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":9},"4":{"i32":2},"20":{"i32":-1},"26":{"rec":{"1":{"rec":{"2":{"str":"eq"}}},"2":{"i32":0},"3":{"lst":["rec",2,{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":27},"3":{"i32":18},"4":{"i32":0}}}}]},"3":{"i64":-1}},{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":27},"3":{"i32":18},"4":{"i3 [...] - }; + std::vector<std::string> + exprs = + { + // select count(o_orderkey) from tpch1_orc.orders where o_orderkey < 100 or o_orderkey > 5999900 or o_orderkey in (1000000, 2000000, 3000000); + R"|({"1":{"lst":["rec",13,{"1":{"i32":6},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":3},"4":{"i32":2},"20":{"i32":-1},"29":{"tf":1}},{"1":{"i32":6},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":3},"4":{"i32":2},"20":{"i32":-1},"29":{"tf":1}},{"1":{"i32":2},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i [...] + // select count(o_orderkey) from tpch1_orc.orders where o_orderkey is null or (o_orderkey between 100 and 1000 and o_orderkey not in (200, 300, 400)); + R"|({"1":{"lst":["rec",16,{"1":{"i32":6},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":3},"4":{"i32":2},"20":{"i32":-1},"29":{"tf":1}},{"1":{"i32":20},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"4":{"i32":1},"20":{"i32":-1},"26":{"rec":{"1":{"rec":{"2":{"str":"is_null_pred"}}},"2":{"i32":0},"3":{"lst":["rec",1,{"1":{"lst":["rec",1,{"1":{"i32":0},"2": [...] + // select count(o_orderkey) from tpch1_orc.orders where o_orderkey is null or (o_orderkey between 1000000 and 1200000 and o_orderkey != 1100000); + R"|({"1":{"lst":["rec",14,{"1":{"i32":6},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":3},"4":{"i32":2},"20":{"i32":-1},"29":{"tf":1}},{"1":{"i32":20},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"4":{"i32":1},"20":{"i32":-1},"26":{"rec":{"1":{"rec":{"2":{"str":"is_null_pred"}}},"2":{"i32":0},"3":{"lst":["rec",1,{"1":{"lst":["rec",1,{"1":{"i32":0},"2": [...] + // SELECT count(o_orderkey) FROM tpch1_orc.orders WHERE o_orderkey IN (1000000, 2000000, 3000000) OR (o_orderdate >= '1994-01-01' AND o_orderdate <= '1994-12-31'); + R"|({"1":{"lst":["rec",13,{"1":{"i32":6},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":3},"4":{"i32":2},"20":{"i32":-1},"29":{"tf":1}},{"1":{"i32":11},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":5},"4":{"i32":4},"11":{"rec":{"1":{"tf":0}}},"20":{"i32":-1},"29":{"tf":1}},{"1":{"i32":16},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"re [...] + // select count(o_orderkey) from tpch1_orc.orders where o_orderkey < 2 or (o_comment like '%delayed%' and o_orderpriority = '1-URGENT'); + R"|({"1":{"lst":["rec",11,{"1":{"i32":6},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":3},"4":{"i32":2},"20":{"i32":-1},"29":{"tf":1}},{"1":{"i32":2},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":11},"4":{"i32":2},"20":{"i32":-1},"26":{"rec":{"1":{"rec":{"2":{"str":"lt"}}},"2":{"i32":0},"3":{"lst":["rec",2,{"1":{"lst":["rec",1,{"1":{"i32":0}, [...] + // select count(o_orderkey) from tpch1_orc.orders where o_orderkey < 2 or (o_totalprice < 173665.47 and o_custkey >= 36901); + R"|({"1":{"lst":["rec",11,{"1":{"i32":6},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":3},"4":{"i32":2},"20":{"i32":-1},"29":{"tf":1}},{"1":{"i32":2},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":11},"4":{"i32":2},"20":{"i32":-1},"26":{"rec":{"1":{"rec":{"2":{"str":"lt"}}},"2":{"i32":0},"3":{"lst":["rec",2,{"1":{"lst":["rec",1,{"1":{"i32":0}, [...] + // select count(o_orderkey) from tpch1_orc.orders where o_orderkey < 1 + 1; + R"|({"1":{"lst":["rec",3,{"1":{"i32":2},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":11},"4":{"i32":2},"20":{"i32":-1},"26":{"rec":{"1":{"rec":{"2":{"str":"lt"}}},"2":{"i32":0},"3":{"lst":["rec",2,{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]},"3":{"i64":-1}},{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]},"3":{"i64":-1}}]},"4":{"rec":{"1":{"lst":["rec",1,{"1":{ [...] + // SELECT count(o_orderkey) FROM tpch1_orc.orders WHERE o_comment LIKE '%delayed%' OR o_orderpriority = '1-URGENT'; + R"|({"1":{"lst":["rec",7,{"1":{"i32":6},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":3},"4":{"i32":2},"20":{"i32":-1},"29":{"tf":1}},{"1":{"i32":20},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"4":{"i32":2},"20":{"i32":-1},"26":{"rec":{"1":{"rec":{"2":{"str":"like"}}},"2":{"i32":0},"3":{"lst":["rec",2,{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{" [...] + // select count(o_orderkey) from tpch1_orc.orders where o_orderkey between 1 and 100 or random() > 0.5; + R"|({"1":{"lst":["rec",11,{"1":{"i32":6},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":3},"4":{"i32":2},"20":{"i32":-1},"29":{"tf":1}},{"1":{"i32":6},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":2},"4":{"i32":2},"20":{"i32":-1},"29":{"tf":1}},{"1":{"i32":2},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i [...] + // select count(o_orderkey) from tpch1_orc.orders where lower(o_orderpriority) = '1-urgent'; + R"|({"1":{"lst":["rec",4,{"1":{"i32":2},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":9},"4":{"i32":2},"20":{"i32":-1},"26":{"rec":{"1":{"rec":{"2":{"str":"eq"}}},"2":{"i32":0},"3":{"lst":["rec",2,{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":15},"2":{"i32":65533}}}}]},"3":{"i64":-1}},{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":15},"2":{"i32":65533}}}}]},"3":{"i64":-1}}]}," [...] + // select count(o_orderkey) from tpch1_orc.orders where o_orderkey * 2 < 60; + R"|({"1":{"lst":["rec",5,{"1":{"i32":2},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":11},"4":{"i32":2},"20":{"i32":-1},"26":{"rec":{"1":{"rec":{"2":{"str":"lt"}}},"2":{"i32":0},"3":{"lst":["rec",2,{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":6}}}}]},"3":{"i64":-1}},{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":6}}}}]},"3":{"i64":-1}}]},"4":{"rec":{"1":{"lst":["rec",1,{"1":{ [...] + // select count(o_orderkey) from tpch1_orc.orders where o_orderdate is not null; + R"|({"1":{"lst":["rec",4,{"1":{"i32":2},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":2}}}}]},"3":{"i64":-1}}},"3":{"i32":9},"4":{"i32":2},"20":{"i32":-1},"26":{"rec":{"1":{"rec":{"2":{"str":"eq"}}},"2":{"i32":0},"3":{"lst":["rec",2,{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":27},"3":{"i32":18},"4":{"i32":0}}}}]},"3":{"i64":-1}},{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":27},"3":{"i32":18},"4":{"i32":0}}}} [...] + }; std::vector<std::string> result_search_arguments = { "leaf-0 = (o_orderkey < 100), leaf-1 = (o_orderkey <= 5999900), leaf-2 " "= (o_orderkey " @@ -139,6 +140,8 @@ TEST_F(OrcReaderTest, test_build_search_argument) { "(or leaf-0 leaf-2))", "leaf-0 = (o_orderkey < 2), leaf-1 = (o_orderpriority = 1-URGENT), expr = (or leaf-0 " "leaf-1)", + "leaf-0 = (o_orderkey < 2), leaf-1 = (o_custkey < 36901), expr = (or leaf-0 (not " + "leaf-1))", "leaf-0 = (o_orderkey < 2), expr = leaf-0", CANNOT_PUSH_DOWN_ERROR, CANNOT_PUSH_DOWN_ERROR, --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org