This is an automated email from the ASF dual-hosted git repository. kxiao pushed a commit to branch branch-2.0 in repository https://gitbox.apache.org/repos/asf/doris.git
commit 4bf8d7af2ba30bebe97a38bb8b76c92914767df5 Author: HappenLee <[email protected]> AuthorDate: Mon Jun 26 16:39:45 2023 +0800 [Bug](RuntimeFiter) Fix bf error change the murmurhash to crc32 in regression test p2 (#21167) --- be/src/agent/be_exec_version_manager.h | 1 + be/src/olap/bloom_filter_predicate.h | 44 +++++++++++++++++++++++++--------- 2 files changed, 34 insertions(+), 11 deletions(-) diff --git a/be/src/agent/be_exec_version_manager.h b/be/src/agent/be_exec_version_manager.h index 0491a038c8..657ebab02d 100644 --- a/be/src/agent/be_exec_version_manager.h +++ b/be/src/agent/be_exec_version_manager.h @@ -55,6 +55,7 @@ private: * 2: start from doris 2.0 * a. function month/day/hour/minute/second's return type is changed to smaller type. * b. in order to solve agg of sum/count is not compatibility during the upgrade process + * c. change the string hash method in runtime filter * */ inline const int BeExecVersionManager::max_be_exec_version = 2; diff --git a/be/src/olap/bloom_filter_predicate.h b/be/src/olap/bloom_filter_predicate.h index 99debfa94b..885927d3f5 100644 --- a/be/src/olap/bloom_filter_predicate.h +++ b/be/src/olap/bloom_filter_predicate.h @@ -63,6 +63,17 @@ private: DCHECK(null_map); } + uint24_t tmp_uint24_value; + auto get_cell_value = [&tmp_uint24_value](auto& data) { + if constexpr (std::is_same_v<std::decay_t<decltype(data)>, uint32_t> && + T == PrimitiveType::TYPE_DATE) { + memcpy((char*)(&tmp_uint24_value), (char*)(&data), sizeof(uint24_t)); + return (const char*)&tmp_uint24_value; + } else { + return (const char*)&data; + } + }; + uint16_t new_size = 0; if (column.is_column_dictionary()) { auto* dict_col = reinterpret_cast<const vectorized::ColumnDictI32*>(&column); @@ -90,6 +101,28 @@ private: } } } + } else if (is_string_type(T) && _be_exec_version >= 2) { + auto& pred_col = + reinterpret_cast< + const vectorized::PredicateColumnType<PredicateEvaluateType<T>>*>( + &column) + ->get_data(); + + auto pred_col_data = pred_col.data(); + const bool is_dense_column = pred_col.size() == size; + for (uint16_t i = 0; i < size; i++) { + uint16_t idx = is_dense_column ? i : sel[i]; + if constexpr (is_nullable) { + if (!null_map[idx] && + _specific_filter->find_crc32_hash(get_cell_value(pred_col_data[idx]))) { + sel[new_size++] = idx; + } + } else { + if (_specific_filter->find_crc32_hash(get_cell_value(pred_col_data[idx]))) { + sel[new_size++] = idx; + } + } + } } else if (IRuntimeFilter::enable_use_batch(_be_exec_version > 0, T)) { const auto& data = reinterpret_cast< @@ -99,17 +132,6 @@ private: new_size = _specific_filter->find_fixed_len_olap_engine((char*)data.data(), null_map, sel, size, data.size() != size); } else { - uint24_t tmp_uint24_value; - auto get_cell_value = [&tmp_uint24_value](auto& data) { - if constexpr (std::is_same_v<std::decay_t<decltype(data)>, uint32_t> && - T == PrimitiveType::TYPE_DATE) { - memcpy((char*)(&tmp_uint24_value), (char*)(&data), sizeof(uint24_t)); - return (const char*)&tmp_uint24_value; - } else { - return (const char*)&data; - } - }; - auto& pred_col = reinterpret_cast< const vectorized::PredicateColumnType<PredicateEvaluateType<T>>*>( --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
