This is an automated email from the ASF dual-hosted git repository. morningman pushed a commit to branch branch-2.1-lakehouse in repository https://gitbox.apache.org/repos/asf/doris.git
commit 844c38d850dc857933d1d860b6124d62d121a49a Author: Socrates <[email protected]> AuthorDate: Mon Feb 10 10:03:54 2025 +0800 [enhance](runtime filter) impl partition pruning in runtime filer (#47025) This PR implements partition pruning through runtime filters. When executing a SQL query like: ```sql SELECT count(*) FROM int_partition_table WHERE partition_col = ( SELECT partition_col FROM int_partition_table GROUP BY partition_col HAVING count(*) > 0 ORDER BY partition_col DESC LIMIT 1 ) ``` During execution, the backend (BE) will receive a dynamic runtime filter condition `partition_col = xxx`. Since partition_col is a partitioning column, we can use its value to determine if the partition can be pruned. Additionally, this mechanism also supports filtering queries like: ```sql SELECT count(*) FROM int_partition_table WHERE func(partition_col) = xxx ``` If func cannot be evaluated at the frontend (FE), the frontend will not perform partition pruning. However, since the backend can compute func, this mechanism allows us to handle pruning scenarios that are not possible at the frontend, providing a more efficient pruning process on the backend side. --- be/src/vec/core/block.cpp | 4 +- be/src/vec/exec/scan/vfile_scanner.cpp | 167 ++++++++++++++++++++- be/src/vec/exec/scan/vfile_scanner.h | 12 +- .../scripts/create_preinstalled_scripts/run74.hql | 44 ++++++ .../partition_col=2025-01-01/000000_0 | Bin 0 -> 587 bytes .../partition_col=2025-01-02/000000_0 | Bin 0 -> 581 bytes .../partition_col=2025-01-03/000000_0 | Bin 0 -> 600 bytes .../partition_col=100.01/000000_0 | Bin 0 -> 587 bytes .../partition_col=200.02/000000_0 | Bin 0 -> 581 bytes .../partition_col=300.03/000000_0 | Bin 0 -> 600 bytes .../int_partition_table/partition_col=1/000000_0 | Bin 0 -> 587 bytes .../int_partition_table/partition_col=2/000000_0 | Bin 0 -> 581 bytes .../int_partition_table/partition_col=3/000000_0 | Bin 0 -> 600 bytes .../partition_col=A/000000_0 | Bin 0 -> 587 bytes .../partition_col=B/000000_0 | Bin 0 -> 581 bytes .../partition_col=C/000000_0 | Bin 0 -> 600 bytes .../java/org/apache/doris/qe/SessionVariable.java | 17 +++ gensrc/thrift/PaloInternalService.thrift | 2 + .../test_hive_runtime_filter_partition_pruning.out | Bin 0 -> 1101 bytes ...st_hive_runtime_filter_partition_pruning.groovy | 107 +++++++++++++ 20 files changed, 344 insertions(+), 9 deletions(-) diff --git a/be/src/vec/core/block.cpp b/be/src/vec/core/block.cpp index 8d73cd12144..3750284f04f 100644 --- a/be/src/vec/core/block.cpp +++ b/be/src/vec/core/block.cpp @@ -148,7 +148,7 @@ void Block::initialize_index_by_name() { void Block::insert(size_t position, const ColumnWithTypeAndName& elem) { if (position > data.size()) { throw Exception(ErrorCode::INTERNAL_ERROR, - "invalid input position, position={}, data.size{}, names={}", position, + "invalid input position, position={}, data.size={}, names={}", position, data.size(), dump_names()); } @@ -165,7 +165,7 @@ void Block::insert(size_t position, const ColumnWithTypeAndName& elem) { void Block::insert(size_t position, ColumnWithTypeAndName&& elem) { if (position > data.size()) { throw Exception(ErrorCode::INTERNAL_ERROR, - "invalid input position, position={}, data.size{}, names={}", position, + "invalid input position, position={}, data.size={}, names={}", position, data.size(), dump_names()); } diff --git a/be/src/vec/exec/scan/vfile_scanner.cpp b/be/src/vec/exec/scan/vfile_scanner.cpp index ce14f685127..6aaeed86cc3 100644 --- a/be/src/vec/exec/scan/vfile_scanner.cpp +++ b/be/src/vec/exec/scan/vfile_scanner.cpp @@ -20,23 +20,30 @@ #include <fmt/format.h> #include <gen_cpp/Exprs_types.h> #include <gen_cpp/Metrics_types.h> +#include <gen_cpp/Opcodes_types.h> #include <gen_cpp/PaloInternalService_types.h> #include <gen_cpp/PlanNodes_types.h> +#include <glog/logging.h> +#include <algorithm> #include <boost/iterator/iterator_facade.hpp> #include <iterator> #include <map> +#include <ranges> #include <tuple> +#include <unordered_map> #include <utility> #include "common/compiler_util.h" // IWYU pragma: keep #include "common/config.h" #include "common/logging.h" #include "common/object_pool.h" +#include "common/status.h" #include "io/cache/block/block_file_cache_profile.h" #include "runtime/descriptors.h" #include "runtime/runtime_state.h" #include "runtime/types.h" +#include "util/runtime_profile.h" #include "vec/aggregate_functions/aggregate_function.h" #include "vec/columns/column.h" #include "vec/columns/column_nullable.h" @@ -67,6 +74,7 @@ #include "vec/exec/scan/vscan_node.h" #include "vec/exprs/vexpr.h" #include "vec/exprs/vexpr_context.h" +#include "vec/exprs/vexpr_fwd.h" #include "vec/exprs/vslot_ref.h" #include "vec/functions/function.h" #include "vec/functions/function_string.h" @@ -161,6 +169,8 @@ Status VFileScanner::prepare( ADD_TIMER_WITH_LEVEL(_parent->_scanner_profile, "FileScannerPreFilterTimer", 1); _convert_to_output_block_timer = ADD_TIMER_WITH_LEVEL( _parent->_scanner_profile, "FileScannerConvertOuputBlockTime", 1); + _runtime_filter_partition_prune_timer = ADD_TIMER_WITH_LEVEL( + _parent->_scanner_profile, "FileScannerRuntimeFilterPartitionPruningTime", 1); _empty_file_counter = ADD_COUNTER_WITH_LEVEL(_parent->_scanner_profile, "EmptyFileNum", TUnit::UNIT, 1); _not_found_file_counter = ADD_COUNTER_WITH_LEVEL(_parent->_scanner_profile, @@ -169,6 +179,9 @@ Status VFileScanner::prepare( ADD_COUNTER_WITH_LEVEL(_parent->_scanner_profile, "FileNumber", TUnit::UNIT, 1); _has_fully_rf_file_counter = ADD_COUNTER_WITH_LEVEL(_parent->_scanner_profile, "HasFullyRfFileNumber", TUnit::UNIT, 1); + _runtime_filter_partition_pruned_range_counter = + ADD_COUNTER_WITH_LEVEL(_parent->_scanner_profile, + "RuntimeFilterPartitionPrunedRangeNum", TUnit::UNIT, 1); } else { _get_block_timer = ADD_TIMER_WITH_LEVEL(_local_state->scanner_profile(), "FileScannerGetBlockTime", 1); @@ -184,6 +197,8 @@ Status VFileScanner::prepare( "FileScannerPreFilterTimer", 1); _convert_to_output_block_timer = ADD_TIMER_WITH_LEVEL( _local_state->scanner_profile(), "FileScannerConvertOuputBlockTime", 1); + _runtime_filter_partition_prune_timer = ADD_TIMER_WITH_LEVEL( + _local_state->scanner_profile(), "FileScannerRuntimeFilterPartitionPruningTime", 1); _empty_file_counter = ADD_COUNTER_WITH_LEVEL(_local_state->scanner_profile(), "EmptyFileNum", TUnit::UNIT, 1); _not_found_file_counter = ADD_COUNTER_WITH_LEVEL(_local_state->scanner_profile(), @@ -192,6 +207,9 @@ Status VFileScanner::prepare( TUnit::UNIT, 1); _has_fully_rf_file_counter = ADD_COUNTER_WITH_LEVEL(_local_state->scanner_profile(), "HasFullyRfFileNumber", TUnit::UNIT, 1); + _runtime_filter_partition_pruned_range_counter = + ADD_COUNTER_WITH_LEVEL(_local_state->scanner_profile(), + "RuntimeFilterPartitionPrunedRangeNum", TUnit::UNIT, 1); } _file_cache_statistics.reset(new io::FileCacheStatistics()); @@ -231,6 +249,113 @@ Status VFileScanner::prepare( return Status::OK(); } +// check if the expr is a partition pruning expr +bool VFileScanner::_check_partition_prune_expr(const VExprSPtr& expr) { + if (expr->is_slot_ref()) { + auto* slot_ref = static_cast<VSlotRef*>(expr.get()); + return _partition_slot_index_map.find(slot_ref->slot_id()) != + _partition_slot_index_map.end(); + } + if (expr->is_literal()) { + return true; + } + return std::ranges::all_of(expr->children(), [this](const auto& child) { + return _check_partition_prune_expr(child); + }); +} + +void VFileScanner::_init_runtime_filter_partition_prune_ctxs() { + _runtime_filter_partition_prune_ctxs.clear(); + for (auto& conjunct : _conjuncts) { + auto impl = conjunct->root()->get_impl(); + // If impl is not null, which means this a conjuncts from runtime filter. + auto expr = impl ? impl : conjunct->root(); + if (_check_partition_prune_expr(expr)) { + _runtime_filter_partition_prune_ctxs.emplace_back(conjunct); + } + } +} + +void VFileScanner::_init_runtime_filter_partition_prune_block() { + // init block with empty column + for (auto const* slot_desc : _real_tuple_desc->slots()) { + if (!slot_desc->need_materialize()) { + // should be ignored from reading + continue; + } + _runtime_filter_partition_prune_block.insert( + ColumnWithTypeAndName(slot_desc->get_empty_mutable_column(), + slot_desc->get_data_type_ptr(), slot_desc->col_name())); + } +} + +Status VFileScanner::_process_runtime_filters_partition_prune(bool& can_filter_all) { + SCOPED_TIMER(_runtime_filter_partition_prune_timer); + if (_runtime_filter_partition_prune_ctxs.empty() || _partition_col_descs.empty()) { + return Status::OK(); + } + size_t partition_value_column_size = 1; + + // 1. Get partition key values to string columns. + std::unordered_map<SlotId, MutableColumnPtr> parititon_slot_id_to_column; + for (auto const& partition_col_desc : _partition_col_descs) { + const auto& [partition_value, partition_slot_desc] = partition_col_desc.second; + auto test_serde = partition_slot_desc->get_data_type_ptr()->get_serde(); + auto partition_value_column = partition_slot_desc->get_data_type_ptr()->create_column(); + auto* col_ptr = static_cast<IColumn*>(partition_value_column.get()); + Slice slice(partition_value.data(), partition_value.size()); + int num_deserialized = 0; + RETURN_IF_ERROR(test_serde->deserialize_column_from_fixed_json( + *col_ptr, slice, partition_value_column_size, &num_deserialized, {})); + parititon_slot_id_to_column[partition_slot_desc->id()] = std::move(partition_value_column); + } + + // 2. Fill _runtime_filter_partition_prune_block from the partition column, then execute conjuncts and filter block. + // 2.1 Fill _runtime_filter_partition_prune_block from the partition column to match the conjuncts executing. + size_t index = 0; + bool first_column_filled = false; + for (auto const* slot_desc : _real_tuple_desc->slots()) { + if (!slot_desc->need_materialize()) { + // should be ignored from reading + continue; + } + if (parititon_slot_id_to_column.find(slot_desc->id()) != + parititon_slot_id_to_column.end()) { + auto data_type = slot_desc->get_data_type_ptr(); + auto partition_value_column = std::move(parititon_slot_id_to_column[slot_desc->id()]); + if (data_type->is_nullable()) { + _runtime_filter_partition_prune_block.insert( + index, ColumnWithTypeAndName( + ColumnNullable::create( + std::move(partition_value_column), + ColumnUInt8::create(partition_value_column_size, 0)), + data_type, slot_desc->col_name())); + } else { + _runtime_filter_partition_prune_block.insert( + index, ColumnWithTypeAndName(std::move(partition_value_column), data_type, + slot_desc->col_name())); + } + if (index == 0) { + first_column_filled = true; + } + } + index++; + } + + // 2.2 Execute conjuncts. + if (!first_column_filled) { + // VExprContext.execute has an optimization, the filtering is executed when block->rows() > 0 + // The following process may be tricky and time-consuming, but we have no other way. + _runtime_filter_partition_prune_block.get_by_position(0).column->assume_mutable()->resize( + partition_value_column_size); + } + IColumn::Filter result_filter(_runtime_filter_partition_prune_block.rows(), 1); + RETURN_IF_ERROR(VExprContext::execute_conjuncts(_runtime_filter_partition_prune_ctxs, nullptr, + &_runtime_filter_partition_prune_block, + &result_filter, &can_filter_all)); + return Status::OK(); +} + Status VFileScanner::_process_conjuncts_for_dict_filter() { _slot_id_to_filter_conjuncts.clear(); _not_single_slot_filter_conjuncts.clear(); @@ -294,6 +419,11 @@ Status VFileScanner::open(RuntimeState* state) { RETURN_IF_ERROR(_split_source->get_next(&_first_scan_range, &_current_range)); if (_first_scan_range) { RETURN_IF_ERROR(_init_expr_ctxes()); + if (_state->query_options().enable_runtime_filter_partition_prune && + !_partition_slot_index_map.empty()) { + _init_runtime_filter_partition_prune_ctxs(); + _init_runtime_filter_partition_prune_block(); + } } else { // there's no scan range in split source. stop scanner directly. _scanner_eof = true; @@ -775,6 +905,29 @@ Status VFileScanner::_get_next_reader() { const TFileRangeDesc& range = _current_range; _current_range_path = range.path; + if (!_partition_slot_descs.empty()) { + // we need get partition columns first for runtime filter partition pruning + RETURN_IF_ERROR(_generate_parititon_columns()); + + if (_state->query_options().enable_runtime_filter_partition_prune) { + // if enable_runtime_filter_partition_prune is true, we need to check whether this range can be filtered out + // by runtime filter partition prune + if (_push_down_conjuncts.size() < _conjuncts.size()) { + // there are new runtime filters, need to re-init runtime filter partition pruning ctxs + _init_runtime_filter_partition_prune_ctxs(); + } + + bool can_filter_all = false; + RETURN_IF_ERROR(_process_runtime_filters_partition_prune(can_filter_all)); + if (can_filter_all) { + // this range can be filtered out by runtime filter partition pruning + // so we need to skip this range + COUNTER_UPDATE(_runtime_filter_partition_pruned_range_counter, 1); + continue; + } + } + } + // create reader for specific format Status init_status; // for compatibility, if format_type is not set in range, use the format type of params @@ -1018,7 +1171,8 @@ Status VFileScanner::_get_next_reader() { _missing_cols.clear(); RETURN_IF_ERROR(_cur_reader->get_columns(&_name_to_col_type, &_missing_cols)); _cur_reader->set_push_down_agg_type(_get_push_down_agg_type()); - RETURN_IF_ERROR(_generate_fill_columns()); + RETURN_IF_ERROR(_generate_missing_columns()); + RETURN_IF_ERROR(_cur_reader->set_fill_columns(_partition_col_descs, _missing_col_descs)); if (VLOG_NOTICE_IS_ON && !_missing_cols.empty() && _is_load) { fmt::memory_buffer col_buf; for (auto& col : _missing_cols) { @@ -1048,10 +1202,8 @@ Status VFileScanner::_get_next_reader() { return Status::OK(); } -Status VFileScanner::_generate_fill_columns() { +Status VFileScanner::_generate_parititon_columns() { _partition_col_descs.clear(); - _missing_col_descs.clear(); - const TFileRangeDesc& range = _current_range; if (range.__isset.columns_from_path && !_partition_slot_descs.empty()) { for (const auto& slot_desc : _partition_slot_descs) { @@ -1072,7 +1224,11 @@ Status VFileScanner::_generate_fill_columns() { } } } + return Status::OK(); +} +Status VFileScanner::_generate_missing_columns() { + _missing_col_descs.clear(); if (!_missing_cols.empty()) { for (auto slot_desc : _real_tuple_desc->slots()) { if (!slot_desc->is_materialized()) { @@ -1090,8 +1246,7 @@ Status VFileScanner::_generate_fill_columns() { _missing_col_descs.emplace(slot_desc->col_name(), it->second); } } - - return _cur_reader->set_fill_columns(_partition_col_descs, _missing_col_descs); + return Status::OK(); } Status VFileScanner::_init_expr_ctxes() { diff --git a/be/src/vec/exec/scan/vfile_scanner.h b/be/src/vec/exec/scan/vfile_scanner.h index 1c6d903a87f..edb710ba494 100644 --- a/be/src/vec/exec/scan/vfile_scanner.h +++ b/be/src/vec/exec/scan/vfile_scanner.h @@ -38,6 +38,7 @@ #include "vec/core/block.h" #include "vec/exec/format/generic_reader.h" #include "vec/exec/scan/vscanner.h" +#include "vec/exprs/vexpr_fwd.h" namespace doris { class RuntimeState; @@ -167,6 +168,8 @@ protected: Block _src_block; VExprContextSPtrs _push_down_conjuncts; + VExprContextSPtrs _runtime_filter_partition_prune_ctxs; + Block _runtime_filter_partition_prune_block; std::unique_ptr<io::FileCacheStatistics> _file_cache_statistics; std::unique_ptr<io::IOContext> _io_ctx; @@ -183,10 +186,12 @@ private: RuntimeProfile::Counter* _fill_missing_columns_timer = nullptr; RuntimeProfile::Counter* _pre_filter_timer = nullptr; RuntimeProfile::Counter* _convert_to_output_block_timer = nullptr; + RuntimeProfile::Counter* _runtime_filter_partition_prune_timer = nullptr; RuntimeProfile::Counter* _empty_file_counter = nullptr; RuntimeProfile::Counter* _not_found_file_counter = nullptr; RuntimeProfile::Counter* _file_counter = nullptr; RuntimeProfile::Counter* _has_fully_rf_file_counter = nullptr; + RuntimeProfile::Counter* _runtime_filter_partition_pruned_range_counter = nullptr; const std::unordered_map<std::string, int>* _col_name_to_slot_id = nullptr; // single slot filter conjuncts @@ -214,8 +219,13 @@ private: Status _convert_to_output_block(Block* block); Status _truncate_char_or_varchar_columns(Block* block); void _truncate_char_or_varchar_column(Block* block, int idx, int len); - Status _generate_fill_columns(); Status _handle_dynamic_block(Block* block); + Status _generate_parititon_columns(); + Status _generate_missing_columns(); + bool _check_partition_prune_expr(const VExprSPtr& expr); + void _init_runtime_filter_partition_prune_ctxs(); + void _init_runtime_filter_partition_prune_block(); + Status _process_runtime_filters_partition_prune(bool& is_partition_pruned); Status _process_conjuncts_for_dict_filter(); Status _process_late_arrival_conjuncts(); void _get_slot_ids(VExpr* expr, std::vector<int>* slot_ids); diff --git a/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_scripts/run74.hql b/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_scripts/run74.hql new file mode 100644 index 00000000000..51e6c19ebab --- /dev/null +++ b/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_scripts/run74.hql @@ -0,0 +1,44 @@ +create database if not exists partition_tables; +use partition_tables; + +CREATE TABLE decimal_partition_table ( + id INT, + name STRING, + value FLOAT +) +PARTITIONED BY (partition_col DECIMAL(10, 2)) +STORED AS PARQUET +LOCATION '/user/doris/preinstalled_data/partition_tables/decimal_partition_table'; + +CREATE TABLE int_partition_table ( + id INT, + name STRING, + value FLOAT +) +PARTITIONED BY (partition_col INT) +STORED AS PARQUET +LOCATION '/user/doris/preinstalled_data/partition_tables/int_partition_table'; + +CREATE TABLE string_partition_table ( + id INT, + name STRING, + value FLOAT +) +PARTITIONED BY (partition_col STRING) +STORED AS PARQUET +LOCATION '/user/doris/preinstalled_data/partition_tables/string_partition_table'; + +CREATE TABLE date_partition_table ( + id INT, + name STRING, + value FLOAT +) +PARTITIONED BY (partition_col DATE) +STORED AS PARQUET +LOCATION '/user/doris/preinstalled_data/partition_tables/date_partition_table'; + + +msck repair table decimal_partition_table; +msck repair table int_partition_table; +msck repair table string_partition_table; +msck repair table date_partition_table; diff --git a/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/partition_tables/date_partition_table/partition_col=2025-01-01/000000_0 b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/partition_tables/date_partition_table/partition_col=2025-01-01/000000_0 new file mode 100644 index 00000000000..db221ac2e90 Binary files /dev/null and b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/partition_tables/date_partition_table/partition_col=2025-01-01/000000_0 differ diff --git a/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/partition_tables/date_partition_table/partition_col=2025-01-02/000000_0 b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/partition_tables/date_partition_table/partition_col=2025-01-02/000000_0 new file mode 100644 index 00000000000..1ea988102c7 Binary files /dev/null and b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/partition_tables/date_partition_table/partition_col=2025-01-02/000000_0 differ diff --git a/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/partition_tables/date_partition_table/partition_col=2025-01-03/000000_0 b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/partition_tables/date_partition_table/partition_col=2025-01-03/000000_0 new file mode 100644 index 00000000000..e7c5200f081 Binary files /dev/null and b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/partition_tables/date_partition_table/partition_col=2025-01-03/000000_0 differ diff --git a/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/partition_tables/decimal_partition_table/partition_col=100.01/000000_0 b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/partition_tables/decimal_partition_table/partition_col=100.01/000000_0 new file mode 100644 index 00000000000..db221ac2e90 Binary files /dev/null and b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/partition_tables/decimal_partition_table/partition_col=100.01/000000_0 differ diff --git a/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/partition_tables/decimal_partition_table/partition_col=200.02/000000_0 b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/partition_tables/decimal_partition_table/partition_col=200.02/000000_0 new file mode 100644 index 00000000000..1ea988102c7 Binary files /dev/null and b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/partition_tables/decimal_partition_table/partition_col=200.02/000000_0 differ diff --git a/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/partition_tables/decimal_partition_table/partition_col=300.03/000000_0 b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/partition_tables/decimal_partition_table/partition_col=300.03/000000_0 new file mode 100644 index 00000000000..e7c5200f081 Binary files /dev/null and b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/partition_tables/decimal_partition_table/partition_col=300.03/000000_0 differ diff --git a/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/partition_tables/int_partition_table/partition_col=1/000000_0 b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/partition_tables/int_partition_table/partition_col=1/000000_0 new file mode 100644 index 00000000000..db221ac2e90 Binary files /dev/null and b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/partition_tables/int_partition_table/partition_col=1/000000_0 differ diff --git a/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/partition_tables/int_partition_table/partition_col=2/000000_0 b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/partition_tables/int_partition_table/partition_col=2/000000_0 new file mode 100644 index 00000000000..1ea988102c7 Binary files /dev/null and b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/partition_tables/int_partition_table/partition_col=2/000000_0 differ diff --git a/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/partition_tables/int_partition_table/partition_col=3/000000_0 b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/partition_tables/int_partition_table/partition_col=3/000000_0 new file mode 100644 index 00000000000..e7c5200f081 Binary files /dev/null and b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/partition_tables/int_partition_table/partition_col=3/000000_0 differ diff --git a/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/partition_tables/string_partition_table/partition_col=A/000000_0 b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/partition_tables/string_partition_table/partition_col=A/000000_0 new file mode 100644 index 00000000000..db221ac2e90 Binary files /dev/null and b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/partition_tables/string_partition_table/partition_col=A/000000_0 differ diff --git a/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/partition_tables/string_partition_table/partition_col=B/000000_0 b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/partition_tables/string_partition_table/partition_col=B/000000_0 new file mode 100644 index 00000000000..1ea988102c7 Binary files /dev/null and b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/partition_tables/string_partition_table/partition_col=B/000000_0 differ diff --git a/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/partition_tables/string_partition_table/partition_col=C/000000_0 b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/partition_tables/string_partition_table/partition_col=C/000000_0 new file mode 100644 index 00000000000..e7c5200f081 Binary files /dev/null and b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/partition_tables/string_partition_table/partition_col=C/000000_0 differ diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java index ea4921bef85..76c23913a2c 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java @@ -332,6 +332,9 @@ public class SessionVariable implements Serializable, Writable { public static final String ENABLE_RUNTIME_FILTER_PRUNE = "enable_runtime_filter_prune"; + public static final String ENABLE_RUNTIME_FILTER_PARTITION_PRUNE = + "enable_runtime_filter_partition_prune"; + static final String SESSION_CONTEXT = "session_context"; public static final String DEFAULT_ORDER_BY_LIMIT = "default_order_by_limit"; @@ -1395,6 +1398,9 @@ public class SessionVariable implements Serializable, Writable { @VariableMgr.VarAttr(name = ENABLE_RUNTIME_FILTER_PRUNE, needForward = true) public boolean enableRuntimeFilterPrune = true; + @VariableMgr.VarAttr(name = ENABLE_RUNTIME_FILTER_PARTITION_PRUNE, needForward = true, fuzzy = true) + public boolean enableRuntimeFilterPartitionPrune = true; + /** * The client can pass some special information by setting this session variable in the format: "k1:v1;k2:v2". * For example, trace_id can be passed to trace the query request sent by the user. @@ -2357,6 +2363,8 @@ public class SessionVariable implements Serializable, Writable { this.runtimeFilterType = 1 << randomInt; this.enableParallelScan = Config.pull_request_id % 2 == 0 ? randomInt % 2 == 0 : randomInt % 1 == 0; + this.enableRuntimeFilterPrune = (randomInt % 10) == 0; + this.enableRuntimeFilterPartitionPrune = (randomInt % 2) == 0; switch (randomInt) { case 0: this.parallelScanMaxScannersCount = 32; @@ -3488,6 +3496,14 @@ public class SessionVariable implements Serializable, Writable { this.enableRuntimeFilterPrune = enableRuntimeFilterPrune; } + public boolean isEnableRuntimeFilterPartitionPrune() { + return enableRuntimeFilterPartitionPrune; + } + + public void setEnableRuntimeFilterPartitionPrune(boolean enableRuntimeFilterPartitionPrune) { + this.enableRuntimeFilterPartitionPrune = enableRuntimeFilterPartitionPrune; + } + public void setFragmentTransmissionCompressionCodec(String codec) { this.fragmentTransmissionCompressionCodec = codec; } @@ -3813,6 +3829,7 @@ public class SessionVariable implements Serializable, Writable { tResult.setOrcOnceMaxReadBytes(orcOnceMaxReadBytes); tResult.setNewIsIpAddressInRange(newIsIpAddressInRange); + tResult.setEnableRuntimeFilterPartitionPrune(enableRuntimeFilterPartitionPrune); return tResult; } diff --git a/gensrc/thrift/PaloInternalService.thrift b/gensrc/thrift/PaloInternalService.thrift index c612826836e..ee1069b3e15 100644 --- a/gensrc/thrift/PaloInternalService.thrift +++ b/gensrc/thrift/PaloInternalService.thrift @@ -341,6 +341,8 @@ struct TQueryOptions { 146: optional bool fuzzy_disable_runtime_filter_in_be = false; + 150: optional bool enable_runtime_filter_partition_prune = true; + // upgrade options. keep them same in every branch. 200: optional bool new_is_ip_address_in_range = false; diff --git a/regression-test/data/external_table_p0/hive/test_hive_runtime_filter_partition_pruning.out b/regression-test/data/external_table_p0/hive/test_hive_runtime_filter_partition_pruning.out new file mode 100644 index 00000000000..5e24012544d Binary files /dev/null and b/regression-test/data/external_table_p0/hive/test_hive_runtime_filter_partition_pruning.out differ diff --git a/regression-test/suites/external_table_p0/hive/test_hive_runtime_filter_partition_pruning.groovy b/regression-test/suites/external_table_p0/hive/test_hive_runtime_filter_partition_pruning.groovy new file mode 100644 index 00000000000..77a127605ce --- /dev/null +++ b/regression-test/suites/external_table_p0/hive/test_hive_runtime_filter_partition_pruning.groovy @@ -0,0 +1,107 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_hive_runtime_filter_partition_pruning", "p0,external,hive,external_docker,external_docker_hive") { + def test_runtime_filter_partition_pruning = { + qt_runtime_filter_partition_pruning_decimal1 """ + select count(*) from decimal_partition_table where partition_col = + (select partition_col from decimal_partition_table + group by partition_col having count(*) > 0 + order by partition_col desc limit 1); + """ + qt_runtime_filter_partition_pruning_decimal2 """ + select count(*) from decimal_partition_table where partition_col in + (select partition_col from decimal_partition_table + group by partition_col having count(*) > 0 + order by partition_col desc limit 2); + """ + qt_runtime_filter_partition_pruning_decimal3 """ + select count(*) from decimal_partition_table where abs(partition_col) = + (select partition_col from decimal_partition_table + group by partition_col having count(*) > 0 + order by partition_col desc limit 1); + """ + qt_runtime_filter_partition_pruning_int1 """ + select count(*) from int_partition_table where partition_col = + (select partition_col from int_partition_table + group by partition_col having count(*) > 0 + order by partition_col desc limit 1); + """ + qt_runtime_filter_partition_pruning_int2 """ + select count(*) from int_partition_table where partition_col in + (select partition_col from int_partition_table + group by partition_col having count(*) > 0 + order by partition_col desc limit 2); + """ + qt_runtime_filter_partition_pruning_int3 """ + select count(*) from int_partition_table where abs(partition_col) = + (select partition_col from int_partition_table + group by partition_col having count(*) > 0 + order by partition_col desc limit 1); + """ + qt_runtime_filter_partition_pruning_string1 """ + select count(*) from string_partition_table where partition_col = + (select partition_col from string_partition_table + group by partition_col having count(*) > 0 + order by partition_col desc limit 1); + """ + qt_runtime_filter_partition_pruning_string2 """ + select count(*) from string_partition_table where partition_col in + (select partition_col from string_partition_table + group by partition_col having count(*) > 0 + order by partition_col desc limit 2); + """ + qt_runtime_filter_partition_pruning_date1 """ + select count(*) from date_partition_table where partition_col = + (select partition_col from date_partition_table + group by partition_col having count(*) > 0 + order by partition_col desc limit 1); + """ + qt_runtime_filter_partition_pruning_decimal2 """ + select count(*) from date_partition_table where partition_col in + (select partition_col from date_partition_table + group by partition_col having count(*) > 0 + order by partition_col desc limit 2); + """ + } + + String enabled = context.config.otherConfigs.get("enableHiveTest") + if (enabled == null || !enabled.equalsIgnoreCase("true")) { + logger.info("diable Hive test.") + return; + } + + for (String hivePrefix : ["hive2", "hive3"]) { + try { + String hms_port = context.config.otherConfigs.get(hivePrefix + "HmsPort") + String catalog_name = "${hivePrefix}_test_hive_runtime_filter_partition_pruning" + String externalEnvIp = context.config.otherConfigs.get("externalEnvIp") + + sql """drop catalog if exists ${catalog_name}""" + sql """create catalog if not exists ${catalog_name} properties ( + "type"="hms", + 'hive.metastore.uris' = 'thrift://${externalEnvIp}:${hms_port}' + );""" + sql """use `${catalog_name}`.`partition_tables`""" + + test_runtime_filter_partition_pruning() + + } finally { + } + } +} + --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
