This is an automated email from the ASF dual-hosted git repository.
lide pushed a commit to branch branch-1.2-lts
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-1.2-lts by this push:
new 00bd7afb93 [Feature](add bitmap udaf) add the bitmap intersection and
difference set for mixed calculation of udaf (#15588) (#19778)
00bd7afb93 is described below
commit 00bd7afb93e128950418f3d721aa4c2e9d014601
Author: yuxuan-luo <[email protected]>
AuthorDate: Thu May 18 14:59:26 2023 +0800
[Feature](add bitmap udaf) add the bitmap intersection and difference set
for mixed calculation of udaf (#15588) (#19778)
Co-authored-by: zhbinbin <[email protected]>
Co-authored-by: zhangbinbin05 <[email protected]>
(cherry picked from commit ff9e03e2bf8ca5b4cdd6fe44b129db868e84ddb4)
---
be/src/util/bitmap_expr_calculation.h | 216 +++++++++++++++++++++
be/src/util/bitmap_intersect.h | 5 +-
.../aggregate_function_orthogonal_bitmap.cpp | 19 ++
.../aggregate_function_orthogonal_bitmap.h | 97 +++++++++
.../apache/doris/analysis/FunctionCallExpr.java | 4 +-
.../apache/doris/catalog/AggregateFunction.java | 1 +
.../java/org/apache/doris/catalog/FunctionSet.java | 43 ++++
7 files changed, 381 insertions(+), 4 deletions(-)
diff --git a/be/src/util/bitmap_expr_calculation.h
b/be/src/util/bitmap_expr_calculation.h
new file mode 100644
index 0000000000..24784cc957
--- /dev/null
+++ b/be/src/util/bitmap_expr_calculation.h
@@ -0,0 +1,216 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+#pragma once
+#include <stack>
+#include <string>
+
+#include "util/bitmap_intersect.h"
+
+namespace doris {
+
+// Compute the intersection union difference set of two or more bitmaps
+// Usage: orthogonal_bitmap_parse_calculate(bitmap_column, filter_column,
input_string)
+// Example: orthogonal_bitmap_expr_calculate(user_id, event, '(A|B)&(C-D)'),
meaning find the intersection union difference set of user_id in all A/B/C/D 4
bitmaps
+// Operation symbol:
+// the operator '|' stands for union, the operator '&' stands for
intersection, the operator '-' indicates the difference set, the operator '^'
stands for xor
+class BitmapExprCalculation : public BitmapIntersect<std::string> {
+public:
+ BitmapExprCalculation() = default;
+
+ explicit BitmapExprCalculation(const char* src) { deserialize(src); }
+
+ void bitmap_calculation_init(std::string& input_str) {
+ _polish = reverse_polish(input_str);
+ std::string bitmap_key;
+ for (int i = 0; i < _polish.length(); i++) {
+ char c = _polish.at(i);
+ if (c != '&' && c != '|' && c != '^' && c != '-' && c != ' ' && c
!= '\\') {
+ bitmap_key += c;
+ } else if (i != 0 && _polish.at(i - 1) == '\\') {
+ bitmap_key += c;
+ } else if (c == '\\') {
+ continue;
+ } else {
+ if (bitmap_key.length() > 0) {
+ add_key(bitmap_key);
+ bitmap_key.clear();
+ }
+ }
+ }
+ if (bitmap_key.length() > 0) {
+ add_key(bitmap_key);
+ bitmap_key.clear();
+ }
+ }
+
+ BitmapValue bitmap_calculate() {
+ std::stack<BitmapValue> values;
+ std::string bitmap_key;
+ for (int i = 0; i < _polish.length(); i++) {
+ char c = _polish.at(i);
+ if (c == ' ') {
+ if (bitmap_key.length() > 0) {
+ values.push(_bitmaps[bitmap_key]);
+ bitmap_key.clear();
+ }
+ } else if (c != '&' && c != '|' && c != '^' && c != '-' && c !=
'\\') {
+ bitmap_key += c;
+ } else if (i != 0 && _polish.at(i - 1) == '\\') {
+ bitmap_key += c;
+ } else if (c == '\\') {
+ continue;
+ } else {
+ if (bitmap_key.length() > 0) {
+ values.push(_bitmaps[bitmap_key]);
+ bitmap_key.clear();
+ }
+ if (values.size() >= 2) {
+ BitmapValue op_a = values.top();
+ values.pop();
+ BitmapValue op_b = values.top();
+ values.pop();
+ BitmapValue cal_result;
+ bitmap_calculate(op_a, op_b, c, cal_result);
+ values.push(cal_result);
+ }
+ }
+ }
+ BitmapValue result;
+ if (bitmap_key.length() > 0) {
+ result |= _bitmaps[bitmap_key];
+ } else if (!values.empty()) {
+ result |= values.top();
+ }
+ return result;
+ }
+
+ // calculate the bitmap value by expr bitmap calculate
+ int64_t bitmap_calculate_count() {
+ if (_bitmaps.empty()) {
+ return 0;
+ }
+ return bitmap_calculate().cardinality();
+ }
+
+private:
+ constexpr int priority(char c) {
+ switch (c) {
+ case '&':
+ return 1;
+ case '|':
+ return 1;
+ case '^':
+ return 1;
+ case '-':
+ return 1;
+ default:
+ return 0;
+ }
+ }
+
+ template <class T>
+ std::string print_stack(std::stack<T>& stack) {
+ std::string result;
+ while (!stack.empty()) {
+ result = stack.top() + result;
+ stack.pop();
+ }
+ return result;
+ }
+
+ std::string reverse_polish(const std::string& input_str) {
+ std::stack<char> polish;
+ std::stack<char> op_stack;
+ bool last_is_char = false;
+ for (int i = 0; i < input_str.length(); i++) {
+ char cur_char = input_str.at(i);
+ if (cur_char != '&' && cur_char != '|' && cur_char != '^' &&
cur_char != '-' &&
+ cur_char != '(' && cur_char != ')' && cur_char != ' ' &&
cur_char != '\t') {
+ if (!last_is_char) {
+ polish.push(' ');
+ }
+ polish.push(cur_char);
+ last_is_char = true;
+ continue;
+ } else if (i != 0 && input_str.at(i - 1) == '\\') {
+ polish.push(cur_char);
+ last_is_char = true;
+ continue;
+ } else if (cur_char == ' ' || cur_char == '\t') {
+ last_is_char = false;
+ continue;
+ } else if (cur_char == '(') {
+ op_stack.push(cur_char);
+ } else if (!op_stack.empty() && cur_char == ')') {
+ while (!op_stack.empty() && op_stack.top() != '(') {
+ polish.push(op_stack.top());
+ op_stack.pop();
+ }
+ op_stack.pop();
+ } else {
+ if (!op_stack.empty() && op_stack.top() == '(') {
+ op_stack.push(cur_char);
+ } else {
+ if (!op_stack.empty() && priority(cur_char) >
priority(op_stack.top())) {
+ op_stack.push(cur_char);
+ } else {
+ while (!op_stack.empty()) {
+ if (op_stack.top() == '(') {
+ break;
+ }
+ if (priority(cur_char) <=
priority(op_stack.top())) {
+ polish.push(op_stack.top());
+ op_stack.pop();
+ } else {
+ break;
+ }
+ }
+ op_stack.push(cur_char);
+ }
+ }
+ }
+ last_is_char = false;
+ }
+
+ while (!op_stack.empty()) {
+ polish.push(op_stack.top());
+ op_stack.pop();
+ }
+ return print_stack(polish);
+ }
+
+ void bitmap_calculate(BitmapValue& op_a, BitmapValue& op_b, char op,
BitmapValue& result) {
+ result |= op_b;
+ switch (op) {
+ case '&':
+ result &= op_a;
+ break;
+ case '|':
+ result |= op_a;
+ break;
+ case '-':
+ result -= op_a;
+ break;
+ case '^':
+ result ^= op_a;
+ break;
+ }
+ }
+
+ std::string _polish;
+};
+} // namespace doris
diff --git a/be/src/util/bitmap_intersect.h b/be/src/util/bitmap_intersect.h
index 94ff8dc283..a6f3428acb 100644
--- a/be/src/util/bitmap_intersect.h
+++ b/be/src/util/bitmap_intersect.h
@@ -172,7 +172,6 @@ inline void Helper::read_from<std::string>(const char**
src, std::string* result
*src += length;
}
// read_from end
-
} // namespace detail
// Calculate the intersection of two or more bitmaps
@@ -262,7 +261,7 @@ public:
}
}
-private:
+protected:
std::map<T, BitmapValue> _bitmaps;
};
@@ -349,7 +348,7 @@ public:
}
}
-private:
+protected:
phmap::flat_hash_map<std::string, BitmapValue> _bitmaps;
};
diff --git
a/be/src/vec/aggregate_functions/aggregate_function_orthogonal_bitmap.cpp
b/be/src/vec/aggregate_functions/aggregate_function_orthogonal_bitmap.cpp
index 4194befae2..f3327eff1f 100644
--- a/be/src/vec/aggregate_functions/aggregate_function_orthogonal_bitmap.cpp
+++ b/be/src/vec/aggregate_functions/aggregate_function_orthogonal_bitmap.cpp
@@ -68,6 +68,18 @@ AggregateFunctionPtr
create_aggregate_function_orthogonal_bitmap_intersect_count
name, argument_types, parameters, result_is_nullable);
}
+AggregateFunctionPtr
create_aggregate_function_orthogonal_bitmap_expr_calculate(
+ const std::string& name, const DataTypes& argument_types, const Array&
parameters, bool result_is_nullable) {
+ return create_aggregate_function_orthogonal<AggOrthBitMapExprCal>(name,
argument_types, parameters,
+
result_is_nullable);
+}
+
+AggregateFunctionPtr
create_aggregate_function_orthogonal_bitmap_expr_calculate_count(
+ const std::string& name, const DataTypes& argument_types, const Array&
parameters, bool result_is_nullable) {
+ return
create_aggregate_function_orthogonal<AggOrthBitMapExprCalCount>(name,
argument_types, parameters,
+
result_is_nullable);
+}
+
AggregateFunctionPtr create_aggregate_function_intersect_count(const
std::string& name,
const
DataTypes& argument_types,
const Array&
parameters,
@@ -94,5 +106,12 @@ void
register_aggregate_function_orthogonal_bitmap(AggregateFunctionSimpleFactor
create_aggregate_function_orthogonal_bitmap_union_count);
factory.register_function("intersect_count",
create_aggregate_function_intersect_count);
+
+ factory.register_function("orthogonal_bitmap_expr_calculate",
+
create_aggregate_function_orthogonal_bitmap_expr_calculate);
+
+ factory.register_function(
+ "orthogonal_bitmap_expr_calculate_count",
+ create_aggregate_function_orthogonal_bitmap_expr_calculate_count);
}
} // namespace doris::vectorized
\ No newline at end of file
diff --git
a/be/src/vec/aggregate_functions/aggregate_function_orthogonal_bitmap.h
b/be/src/vec/aggregate_functions/aggregate_function_orthogonal_bitmap.h
index 0c8b8b9b20..1dd5f6a93d 100644
--- a/be/src/vec/aggregate_functions/aggregate_function_orthogonal_bitmap.h
+++ b/be/src/vec/aggregate_functions/aggregate_function_orthogonal_bitmap.h
@@ -18,6 +18,7 @@
#pragma once
#include "exprs/bitmap_function.h"
+#include "util/bitmap_expr_calculation.h"
#include "util/bitmap_intersect.h"
#include "util/bitmap_value.h"
#include "vec/aggregate_functions/aggregate_function.h"
@@ -178,6 +179,102 @@ private:
Int64 result = 0;
};
+template <typename T>
+struct AggOrthBitmapExprCalBaseData {
+public:
+ using ColVecData = std::conditional_t<IsNumber<T>, ColumnVector<T>,
ColumnString>;
+
+ void add(const IColumn** columns, size_t row_num) {
+ const auto& bitmap_col = static_cast<const ColumnBitmap&>(*columns[0]);
+ const auto& data_col = static_cast<const ColVecData&>(*columns[1]);
+ const auto& bitmap_value = bitmap_col.get_element(row_num);
+ std::string update_key = data_col.get_data_at(row_num).to_string();
+ bitmap_expr_cal.update(update_key, bitmap_value);
+ }
+
+ void init_add_key(const IColumn** columns, size_t row_num, int
argument_size) {
+ if (first_init) {
+ DCHECK(argument_size > 1);
+ const auto& col = static_cast<const ColVecData&>(*columns[2]);
+ std::string expr = col.get_data_at(row_num).to_string();
+ bitmap_expr_cal.bitmap_calculation_init(expr);
+ first_init = false;
+ }
+ }
+
+protected:
+ doris::BitmapExprCalculation bitmap_expr_cal;
+ bool first_init = true;
+};
+
+template <typename T>
+struct AggOrthBitMapExprCal : public AggOrthBitmapExprCalBaseData<T> {
+public:
+ static constexpr auto name = "orthogonal_bitmap_expr_calculate";
+
+ static DataTypePtr get_return_type() { return
std::make_shared<DataTypeBitMap>(); }
+
+ void merge(const AggOrthBitMapExprCal& rhs) {
+ if (rhs.first_init) {
+ return;
+ }
+ result |= rhs.result;
+ }
+
+ void write(BufferWritable& buf) {
+ write_binary(AggOrthBitmapExprCalBaseData<T>::first_init, buf);
+ result =
AggOrthBitmapExprCalBaseData<T>::bitmap_expr_cal.bitmap_calculate();
+ DataTypeBitMap::serialize_as_stream(result, buf);
+ }
+
+ void read(BufferReadable& buf) {
+ read_binary(AggOrthBitmapExprCalBaseData<T>::first_init, buf);
+ DataTypeBitMap::deserialize_as_stream(result, buf);
+ }
+
+ void get(IColumn& to) const {
+ auto& column = static_cast<ColumnBitmap&>(to);
+ column.get_data().emplace_back(result);
+ }
+
+private:
+ BitmapValue result;
+};
+
+template <typename T>
+struct AggOrthBitMapExprCalCount : public AggOrthBitmapExprCalBaseData<T> {
+public:
+ static constexpr auto name = "orthogonal_bitmap_expr_calculate_count";
+
+ static DataTypePtr get_return_type() { return
std::make_shared<DataTypeInt64>(); }
+
+ void merge(const AggOrthBitMapExprCalCount& rhs) {
+ if (rhs.first_init) {
+ return;
+ }
+ result += rhs.result;
+ }
+
+ void write(BufferWritable& buf) {
+ write_binary(AggOrthBitmapExprCalBaseData<T>::first_init, buf);
+ result =
AggOrthBitmapExprCalBaseData<T>::bitmap_expr_cal.bitmap_calculate_count();
+ write_binary(result, buf);
+ }
+
+ void read(BufferReadable& buf) {
+ read_binary(AggOrthBitmapExprCalBaseData<T>::first_init, buf);
+ read_binary(result, buf);
+ }
+
+ void get(IColumn& to) const {
+ auto& column = static_cast<ColumnVector<Int64>&>(to);
+ column.get_data().emplace_back(result);
+ }
+
+private:
+ int64_t result = 0;
+};
+
template <typename T>
struct OrthBitmapUnionCountData {
static constexpr auto name = "orthogonal_bitmap_union_count";
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java
b/fe/fe-core/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java
index 8ef4b36c88..c9f25a3f8b 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java
@@ -797,7 +797,9 @@ public class FunctionCallExpr extends Expr {
if (fnName.getFunction().equalsIgnoreCase(FunctionSet.INTERSECT_COUNT)
|| fnName.getFunction()
.equalsIgnoreCase(FunctionSet.ORTHOGONAL_BITMAP_INTERSECT) ||
fnName.getFunction()
-
.equalsIgnoreCase(FunctionSet.ORTHOGONAL_BITMAP_INTERSECT_COUNT)) {
+
.equalsIgnoreCase(FunctionSet.ORTHOGONAL_BITMAP_INTERSECT_COUNT) ||
fnName.getFunction()
+
.equalsIgnoreCase(FunctionSet.ORTHOGONAL_BITMAP_EXPR_CALCULATE_COUNT) ||
fnName.getFunction()
+
.equalsIgnoreCase(FunctionSet.ORTHOGONAL_BITMAP_EXPR_CALCULATE)) {
if (children.size() <= 2) {
throw new AnalysisException(fnName + "(bitmap_column,
column_to_filter, filter_values) "
+ "function requires at least three parameters");
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/catalog/AggregateFunction.java
b/fe/fe-core/src/main/java/org/apache/doris/catalog/AggregateFunction.java
index 09cb22b59e..5e2a44ea23 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/AggregateFunction.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/AggregateFunction.java
@@ -50,6 +50,7 @@ public class AggregateFunction extends Function {
"dense_rank", "multi_distinct_count", "multi_distinct_sum",
FunctionSet.HLL_UNION_AGG,
FunctionSet.HLL_UNION, FunctionSet.HLL_RAW_AGG,
FunctionSet.BITMAP_UNION, FunctionSet.BITMAP_INTERSECT,
FunctionSet.ORTHOGONAL_BITMAP_INTERSECT,
FunctionSet.ORTHOGONAL_BITMAP_INTERSECT_COUNT,
+ FunctionSet.ORTHOGONAL_BITMAP_EXPR_CALCULATE_COUNT,
FunctionSet.ORTHOGONAL_BITMAP_EXPR_CALCULATE,
FunctionSet.INTERSECT_COUNT,
FunctionSet.ORTHOGONAL_BITMAP_UNION_COUNT,
FunctionSet.COUNT, "approx_count_distinct", "ndv",
FunctionSet.BITMAP_UNION_INT,
FunctionSet.BITMAP_UNION_COUNT, "ndv_no_finalize",
FunctionSet.WINDOW_FUNNEL, FunctionSet.RETENTION,
diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/FunctionSet.java
b/fe/fe-core/src/main/java/org/apache/doris/catalog/FunctionSet.java
index b6c69c7708..079fa823e9 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/FunctionSet.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/FunctionSet.java
@@ -925,6 +925,8 @@ public class FunctionSet<T> {
public static final String ORTHOGONAL_BITMAP_INTERSECT =
"orthogonal_bitmap_intersect";
public static final String ORTHOGONAL_BITMAP_INTERSECT_COUNT =
"orthogonal_bitmap_intersect_count";
public static final String ORTHOGONAL_BITMAP_UNION_COUNT =
"orthogonal_bitmap_union_count";
+ public static final String ORTHOGONAL_BITMAP_EXPR_CALCULATE_COUNT =
"orthogonal_bitmap_expr_calculate_count";
+ public static final String ORTHOGONAL_BITMAP_EXPR_CALCULATE =
"orthogonal_bitmap_expr_calculate";
public static final String QUANTILE_UNION = "quantile_union";
//TODO(weixiang): is quantile_percent can be replaced by approx_percentile?
@@ -2394,6 +2396,47 @@ public class FunctionSet<T> {
Lists.newArrayList(Type.BITMAP, t, t), Type.BIGINT,
Type.BITMAP, true, "", "", "", "", "", "", "",
true, false, true, true));
}
+
+ Type[] ntypes = {Type.CHAR, Type.VARCHAR, Type.STRING};
+ for (Type t : ntypes) {
+
addBuiltin(AggregateFunction.createBuiltin(ORTHOGONAL_BITMAP_EXPR_CALCULATE,
+ Lists.newArrayList(Type.BITMAP, t, Type.STRING),
+ Type.BITMAP,
+ Type.VARCHAR,
+ true,
+
"_ZN5doris15BitmapFunctions37orthogonal_bitmap_expr_calculate_initEPN9doris_udf15FunctionContextEPNS1_9StringValE",
+
"_ZN5doris15BitmapFunctions39orthogonal_bitmap_expr_calculate_updateEPN9doris_udf15FunctionContextERKNS1_9StringValES6_iPS5_S7_",
+
"_ZN5doris15BitmapFunctions12bitmap_unionEPN9doris_udf15FunctionContextERKNS1_9StringValEPS4_",
+
"_ZN5doris15BitmapFunctions42orthogonal_bitmap_expr_calculate_serializeEPN9doris_udf15FunctionContextERKNS1_9StringValE",
+ "",
+ "",
+
"_ZN5doris15BitmapFunctions16bitmap_serializeEPN9doris_udf15FunctionContextERKNS1_9StringValE",
+ true, false, true));
+
+
addBuiltin(AggregateFunction.createBuiltin(ORTHOGONAL_BITMAP_EXPR_CALCULATE_COUNT,
+ Lists.newArrayList(Type.BITMAP, t, Type.STRING),
+ Type.BIGINT,
+ Type.VARCHAR,
+ true,
+
"_ZN5doris15BitmapFunctions43orthogonal_bitmap_expr_calculate_count_initEPN9doris_udf15FunctionContextEPNS1_9StringValE",
+
"_ZN5doris15BitmapFunctions39orthogonal_bitmap_expr_calculate_updateEPN9doris_udf15FunctionContextERKNS1_9StringValES6_iPS5_S7_",
+
"_ZN5doris15BitmapFunctions29orthogonal_bitmap_count_mergeEPN9doris_udf15FunctionContextERKNS1_9StringValEPS4_",
+
"_ZN5doris15BitmapFunctions48orthogonal_bitmap_expr_calculate_count_serializeEPN9doris_udf15FunctionContextERKNS1_9StringValE",
+ "",
+ "",
+
"_ZN5doris15BitmapFunctions32orthogonal_bitmap_count_finalizeEPN9doris_udf15FunctionContextERKNS1_9StringValE",
+ true, false, true));
+
+ //vec ORTHOGONAL_BITMAP_EXPR_CALCULATE and
ORTHOGONAL_BITMAP_EXPR_CALCULATE_COUNT
+ addBuiltin(
+
AggregateFunction.createBuiltin(ORTHOGONAL_BITMAP_EXPR_CALCULATE,
Lists.newArrayList(Type.BITMAP, t, Type.STRING),
+ Type.BITMAP, Type.BITMAP, true, "", "", "", "",
"", "", "", true, false, true, true));
+
+
addBuiltin(AggregateFunction.createBuiltin(ORTHOGONAL_BITMAP_EXPR_CALCULATE_COUNT,
+ Lists.newArrayList(Type.BITMAP, t, Type.STRING),
Type.BIGINT, Type.BITMAP, true, "", "", "", "", "", "", "",
+ true, false, true, true));
+ }
+
// bitmap
addBuiltin(AggregateFunction.createBuiltin(BITMAP_UNION,
Lists.newArrayList(Type.BITMAP),
Type.BITMAP,
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]