This is an automated email from the ASF dual-hosted git repository. kangpinghuang pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/incubator-doris.git
The following commit(s) were added to refs/heads/master by this push: new 0d66e6b Support bitmap_intersect (#3571) 0d66e6b is described below commit 0d66e6bd1578eba0e9a58cf591f05a83f9e2b334 Author: EmmyMiao87 <522274...@qq.com> AuthorDate: Wed May 20 21:12:02 2020 +0800 Support bitmap_intersect (#3571) * Support bitmap_intersect Support aggregate function Bitmap Intersect, it is mainly used to take intersection of grouped data. The function 'bitmap_intersect(expr)' calculates the intersection of bitmap columns and returns a bitmap object. The defination is following: FunctionName: bitmap_intersect, InputType: bitmap, OutputType: bitmap The scenario is as follows: Query which users satisfy the three tags a, b, and c at the same time. ``` select bitmap_to_string(bitmap_intersect(user_id)) from ( select bitmap_union(user_id) user_id from bitmap_intersect_test where tag in ('a', 'b', 'c') group by tag ) a ``` Closed #3552. * Add docs of bitmap_union and bitmap_intersect * Support null of bitmap_intersect --- be/src/exprs/bitmap_function.cpp | 31 +++++++++++ be/src/exprs/bitmap_function.h | 6 ++- be/test/exprs/bitmap_function_test.cpp | 34 ++++++++++++ docs/.vuepress/sidebar/en.js | 2 + docs/.vuepress/sidebar/zh-CN.js | 2 + .../bitmap-functions/bitmap_intersect.md | 61 +++++++++++++++++++++ .../sql-functions/bitmap-functions/bitmap_union.md | 58 ++++++++++++++++++++ .../bitmap-functions/bitmap_intersect.md | 62 ++++++++++++++++++++++ .../sql-functions/bitmap-functions/bitmap_union.md | 58 ++++++++++++++++++++ .../apache/doris/analysis/FunctionCallExpr.java | 3 +- .../java/org/apache/doris/catalog/FunctionSet.java | 11 ++++ 11 files changed, 326 insertions(+), 2 deletions(-) diff --git a/be/src/exprs/bitmap_function.cpp b/be/src/exprs/bitmap_function.cpp index 09fdd14..0d9bf25 100644 --- a/be/src/exprs/bitmap_function.cpp +++ b/be/src/exprs/bitmap_function.cpp @@ -302,6 +302,31 @@ void BitmapFunctions::bitmap_union(FunctionContext* ctx, const StringVal& src, S } } +// the dst value could be null +void BitmapFunctions::nullable_bitmap_init(FunctionContext* ctx, StringVal* dst) { + dst->is_null = true; +} + +void BitmapFunctions::bitmap_intersect(FunctionContext* ctx, const StringVal& src, StringVal* dst) { + if (src.is_null) { + return; + } + // if dst is null, the src input is the first value + if (dst->is_null) { + dst->is_null = false; + dst->len = sizeof(BitmapValue); + dst->ptr = (uint8_t*)new BitmapValue((char*) src.ptr); + return; + } + auto dst_bitmap = reinterpret_cast<BitmapValue*>(dst->ptr); + // zero size means the src input is a agg object + if (src.len == 0) { + (*dst_bitmap) &= *reinterpret_cast<BitmapValue*>(src.ptr); + } else { + (*dst_bitmap) &= BitmapValue((char*) src.ptr); + } +} + BigIntVal BitmapFunctions::bitmap_count(FunctionContext* ctx, const StringVal& src) { if (src.is_null) { return 0; @@ -343,12 +368,17 @@ StringVal BitmapFunctions::bitmap_hash(doris_udf::FunctionContext* ctx, const do } StringVal BitmapFunctions::bitmap_serialize(FunctionContext* ctx, const StringVal& src) { + if (src.is_null) { + return src; + } + auto src_bitmap = reinterpret_cast<BitmapValue*>(src.ptr); StringVal result = serialize(ctx, src_bitmap); delete src_bitmap; return result; } +// This is a init function for intersect_count not for bitmap_intersect. template<typename T, typename ValType> void BitmapFunctions::bitmap_intersect_init(FunctionContext* ctx, StringVal* dst) { dst->is_null = false; @@ -510,6 +540,7 @@ template void BitmapFunctions::bitmap_update_int<IntVal>( template void BitmapFunctions::bitmap_update_int<BigIntVal>( FunctionContext* ctx, const BigIntVal& src, StringVal* dst); +// this is init function for intersect_count not for bitmap_intersect template void BitmapFunctions::bitmap_intersect_init<int8_t, TinyIntVal>( FunctionContext* ctx, StringVal* dst); template void BitmapFunctions::bitmap_intersect_init<int16_t, SmallIntVal>( diff --git a/be/src/exprs/bitmap_function.h b/be/src/exprs/bitmap_function.h index b69fc49..5d86228 100644 --- a/be/src/exprs/bitmap_function.h +++ b/be/src/exprs/bitmap_function.h @@ -51,6 +51,9 @@ public: static BigIntVal bitmap_get_value(FunctionContext* ctx, const StringVal& src); static void bitmap_union(FunctionContext* ctx, const StringVal& src, StringVal* dst); + // the dst value could be null + static void nullable_bitmap_init(FunctionContext* ctx, StringVal* dst); + static void bitmap_intersect(FunctionContext* ctx, const StringVal& src, StringVal* dst); static BigIntVal bitmap_count(FunctionContext* ctx, const StringVal& src); static StringVal bitmap_serialize(FunctionContext* ctx, const StringVal& src); @@ -68,8 +71,9 @@ public: static BooleanVal bitmap_contains(FunctionContext* ctx, const StringVal& src, const BigIntVal& input); static BooleanVal bitmap_has_any(FunctionContext* ctx, const StringVal& lhs, const StringVal& rhs); - // bitmap_intersect + // intersect count template<typename T, typename ValType> + // this is init function for intersect_count not for bitmap_intersect static void bitmap_intersect_init(FunctionContext* ctx, StringVal* dst); template<typename T, typename ValType> static void bitmap_intersect_update(FunctionContext* ctx, const StringVal& src, const ValType& key, diff --git a/be/test/exprs/bitmap_function_test.cpp b/be/test/exprs/bitmap_function_test.cpp index 16239d9..17b377f 100644 --- a/be/test/exprs/bitmap_function_test.cpp +++ b/be/test/exprs/bitmap_function_test.cpp @@ -172,6 +172,39 @@ TEST_F(BitmapFunctionsTest, bitmap_union) { ASSERT_EQ(expected, result); } +// test bitmap_intersect +TEST_F(BitmapFunctionsTest, bitmap_intersect) { + StringVal dst; + BitmapFunctions::bitmap_intersect_init_real(ctx, &dst); + + BitmapValue bitmap1(1); + bitmap1.add(2); + bitmap1.add(3); + StringVal src1 = convert_bitmap_to_string(ctx, bitmap1); + BitmapFunctions::bitmap_intersect(ctx, src1, &dst); + + BitmapValue bitmap2(1); + bitmap2.add(2); + StringVal src2 = convert_bitmap_to_string(ctx, bitmap2); + BitmapFunctions::bitmap_intersect(ctx, src2, &dst); + + StringVal serialized = BitmapFunctions::bitmap_serialize(ctx, dst); + BigIntVal result = BitmapFunctions::bitmap_count(ctx, serialized); + BigIntVal expected(2); + ASSERT_EQ(expected, result); +} + +// test bitmap_intersect with null dst +TEST_F(BitmapFunctionsTest, bitmap_intersect_empty) { + StringVal dst; + BitmapFunctions::bitmap_intersect_init_real(ctx, &dst); + + StringVal serialized = BitmapFunctions::bitmap_serialize(ctx, dst); + BigIntVal result = BitmapFunctions::bitmap_count(ctx, serialized); + BigIntVal expected(0); + ASSERT_EQ(expected, result); +} + TEST_F(BitmapFunctionsTest, bitmap_count) { BitmapValue bitmap(1024); bitmap.add(1); @@ -186,6 +219,7 @@ TEST_F(BitmapFunctionsTest, bitmap_count) { ASSERT_EQ(BigIntVal(0), null_bitmap); } +// test intersect_count template<typename ValType, typename ValueType> void test_bitmap_intersect(FunctionContext* ctx, ValType key1, ValType key2) { StringVal bitmap_column("placeholder"); diff --git a/docs/.vuepress/sidebar/en.js b/docs/.vuepress/sidebar/en.js index e7ade2c..f293bb8 100644 --- a/docs/.vuepress/sidebar/en.js +++ b/docs/.vuepress/sidebar/en.js @@ -259,6 +259,8 @@ module.exports = [ "bitmap_or", "bitmap_to_string", "to_bitmap", + "bitmap_intersect", + "bitmap_union", ], }, { diff --git a/docs/.vuepress/sidebar/zh-CN.js b/docs/.vuepress/sidebar/zh-CN.js index 880c340..9c4fde2 100644 --- a/docs/.vuepress/sidebar/zh-CN.js +++ b/docs/.vuepress/sidebar/zh-CN.js @@ -271,6 +271,8 @@ module.exports = [ "bitmap_or", "bitmap_to_string", "to_bitmap", + "bitmap_intersect", + "bitmap_union", ], }, { diff --git a/docs/en/sql-reference/sql-functions/bitmap-functions/bitmap_intersect.md b/docs/en/sql-reference/sql-functions/bitmap-functions/bitmap_intersect.md new file mode 100644 index 0000000..374441a --- /dev/null +++ b/docs/en/sql-reference/sql-functions/bitmap-functions/bitmap_intersect.md @@ -0,0 +1,61 @@ +--- +{ + "title": "bitmap_intersect", + "language": "en" +} +--- + +<!-- +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, +software distributed under the License is distributed on an +"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +KIND, either express or implied. See the License for the +specific language governing permissions and limitations +under the License. +--> + +# bitmap_intersect +## description + +Aggregation function, used to calculate the bitmap intersection after grouping. Common usage scenarios such as: calculating user retention rate. + +### Syntax + +`BITMAP BITMAP_INTERSECT(BITMAP value)` + +Enter a set of bitmap values, find the intersection of the set of bitmap values, and return. + +## example + +Table schema + +``` +KeysType: AGG_KEY +Columns: tag varchar, date datetime, user_id bitmap bitmap_union +``` + +``` +Find the retention of users between 2020-05-18 and 2020-05-19 under different tags. +mysql> select tag, bitmap_intersect(user_id) from (select tag, date, bitmap_union(user_id) user_id from table where date in ('2020-05-18', '2020-05-19') group by tag, date) a group by tag; +``` + +Used in combination with the bitmap_to_string function to obtain the specific data of the intersection + +``` +Who are the users retained under different tags between 2020-05-18 and 2020-05-19? +mysql> select tag, bitmap_to_string(bitmap_intersect(user_id)) from (select tag, date, bitmap_union(user_id) user_id from table where date in ('2020-05-18', '2020-05-19') group by tag, date) a group by tag; +``` + +## keyword + + BITMAP_INTERSECT, BITMAP diff --git a/docs/en/sql-reference/sql-functions/bitmap-functions/bitmap_union.md b/docs/en/sql-reference/sql-functions/bitmap-functions/bitmap_union.md new file mode 100644 index 0000000..4e92f97 --- /dev/null +++ b/docs/en/sql-reference/sql-functions/bitmap-functions/bitmap_union.md @@ -0,0 +1,58 @@ +--- +{ + "title": "bitmap_union", + "language": "en" +} +--- + +<!-- +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, +software distributed under the License is distributed on an +"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +KIND, either express or implied. See the License for the +specific language governing permissions and limitations +under the License. +--> + +# bitmap_union +## description + +Aggregate function, used to calculate the grouped bitmap union. Common usage scenarios such as: calculating PV, UV. + +### Syntax + +`BITMAP BITMAP_UNION(BITMAP value)` + +Enter a set of bitmap values, find the union of this set of bitmap values, and return. + +## example + +``` +mysql> select page_id, bitmap_union(user_id) from table group by page_id; +``` + +Combined with the bitmap_count function, the PV data of the web page can be obtained + +``` +mysql> select page_id, bitmap_count(bitmap_union(user_id)) from table group by page_id; +``` + +When the user_id field is int, the above query semantics is equivalent to + +``` +mysql> select page_id, count(distinct user_id) from table group by page_id; +``` + +## keyword + + BITMAP_UNION, BITMAP diff --git a/docs/zh-CN/sql-reference/sql-functions/bitmap-functions/bitmap_intersect.md b/docs/zh-CN/sql-reference/sql-functions/bitmap-functions/bitmap_intersect.md new file mode 100644 index 0000000..3b71de4 --- /dev/null +++ b/docs/zh-CN/sql-reference/sql-functions/bitmap-functions/bitmap_intersect.md @@ -0,0 +1,62 @@ +--- +{ + "title": "bitmap_intersect", + "language": "zh-CN" +} +--- + +<!-- +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, +software distributed under the License is distributed on an +"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +KIND, either express or implied. See the License for the +specific language governing permissions and limitations +under the License. +--> + +# bitmap_intersect +## description + +聚合函数,用于计算分组后的 bitmap 交集。常见使用场景如:计算用户留存率。 + +### Syntax + +`BITMAP BITMAP_INTERSECT(BITMAP value)` + +输入一组 bitmap 值,求这一组 bitmap 值的交集,并返回。 + +## example + +表结构 + +``` +KeysType: AGG_KEY +Columns: tag varchar, date datetime, user_id bitmap bitmap_union + +``` + +``` +求今天和昨天不同 tag 下的用户留存 +mysql> select tag, bitmap_intersect(user_id) from (select tag, date, bitmap_union(user_id) user_id from table where date in ('2020-05-18', '2020-05-19') group by tag, date) a group by tag; +``` + +和 bitmap_to_string 函数组合使用可以获取交集的具体数据 + +``` +求今天和昨天不同 tag 下留存的用户都是哪些 +mysql> select tag, bitmap_to_string(bitmap_intersect(user_id)) from (select tag, date, bitmap_union(user_id) user_id from table where date in ('2020-05-18', '2020-05-19') group by tag, date) a group by tag; +``` + +## keyword + + BITMAP_INTERSECT, BITMAP diff --git a/docs/zh-CN/sql-reference/sql-functions/bitmap-functions/bitmap_union.md b/docs/zh-CN/sql-reference/sql-functions/bitmap-functions/bitmap_union.md new file mode 100644 index 0000000..295e118 --- /dev/null +++ b/docs/zh-CN/sql-reference/sql-functions/bitmap-functions/bitmap_union.md @@ -0,0 +1,58 @@ +--- +{ + "title": "bitmap_union", + "language": "zh-CN" +} +--- + +<!-- +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, +software distributed under the License is distributed on an +"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +KIND, either express or implied. See the License for the +specific language governing permissions and limitations +under the License. +--> + +# bitmap_union +## description + +聚合函数,用于计算分组后的 bitmap 并集。常见使用场景如:计算PV,UV。 + +### Syntax + +`BITMAP BITMAP_UNION(BITMAP value)` + +输入一组 bitmap 值,求这一组 bitmap 值的并集,并返回。 + +## example + +``` +mysql> select page_id, bitmap_union(user_id) from table group by page_id; +``` + +和 bitmap_count 函数组合使用可以求得网页的 PV 数据 + +``` +mysql> select page_id, bitmap_count(bitmap_union(user_id)) from table group by page_id; +``` + +当 user_id 字段为 int 时,上面查询语义等同于 + +``` +mysql> select page_id, count(distinct user_id) from table group by page_id; +``` + +## keyword + + BITMAP_UNION, BITMAP diff --git a/fe/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java b/fe/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java index d185e40..5709e04 100644 --- a/fe/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java +++ b/fe/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java @@ -407,7 +407,8 @@ public class FunctionCallExpr extends Expr { if (fnName.getFunction().equalsIgnoreCase(FunctionSet.BITMAP_COUNT) || fnName.getFunction().equalsIgnoreCase(FunctionSet.BITMAP_UNION) - || fnName.getFunction().equalsIgnoreCase(FunctionSet.BITMAP_UNION_COUNT)) { + || fnName.getFunction().equalsIgnoreCase(FunctionSet.BITMAP_UNION_COUNT) + || fnName.getFunction().equalsIgnoreCase(FunctionSet.BITMAP_INTERSECT)) { if (children.size() != 1) { throw new AnalysisException(fnName + " function could only have one child"); } diff --git a/fe/src/main/java/org/apache/doris/catalog/FunctionSet.java b/fe/src/main/java/org/apache/doris/catalog/FunctionSet.java index 26b744c..256a461 100644 --- a/fe/src/main/java/org/apache/doris/catalog/FunctionSet.java +++ b/fe/src/main/java/org/apache/doris/catalog/FunctionSet.java @@ -540,6 +540,7 @@ public class FunctionSet { public static final String BITMAP_UNION_INT = "bitmap_union_int"; public static final String BITMAP_COUNT = "bitmap_count"; public static final String INTERSECT_COUNT = "intersect_count"; + public static final String BITMAP_INTERSECT = "bitmap_intersect"; private static final Map<Type, String> BITMAP_UNION_INT_SYMBOL = ImmutableMap.<Type, String>builder() @@ -1144,6 +1145,7 @@ public class FunctionSet { null, false, true, false)); } + // bitmap addBuiltin(AggregateFunction.createBuiltin(BITMAP_UNION, Lists.newArrayList(Type.BITMAP), Type.BITMAP, Type.VARCHAR, @@ -1165,6 +1167,15 @@ public class FunctionSet { null, "_ZN5doris15BitmapFunctions15bitmap_finalizeEPN9doris_udf15FunctionContextERKNS1_9StringValE", true, true, true)); + // TODO(ml): supply function symbol + addBuiltin(AggregateFunction.createBuiltin(BITMAP_INTERSECT, Lists.newArrayList(Type.BITMAP), + Type.BITMAP, Type.VARCHAR, + "_ZN5doris15BitmapFunctions20nullable_bitmap_initEPN9doris_udf15FunctionContextEPNS1_9StringValE", + "_ZN5doris15BitmapFunctions16bitmap_intersectEPN9doris_udf15FunctionContextERKNS1_9StringValEPS4_", + "_ZN5doris15BitmapFunctions16bitmap_intersectEPN9doris_udf15FunctionContextERKNS1_9StringValEPS4_", + "_ZN5doris15BitmapFunctions16bitmap_serializeEPN9doris_udf15FunctionContextERKNS1_9StringValE", + "_ZN5doris15BitmapFunctions16bitmap_serializeEPN9doris_udf15FunctionContextERKNS1_9StringValE", + true, false, true)); //PercentileApprox addBuiltin(AggregateFunction.createBuiltin("percentile_approx", --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org