This is an automated email from the ASF dual-hosted git repository.

kangpinghuang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 0d66e6b  Support bitmap_intersect (#3571)
0d66e6b is described below

commit 0d66e6bd1578eba0e9a58cf591f05a83f9e2b334
Author: EmmyMiao87 <522274...@qq.com>
AuthorDate: Wed May 20 21:12:02 2020 +0800

    Support bitmap_intersect (#3571)
    
    * Support bitmap_intersect
    
    Support aggregate function Bitmap Intersect, it is mainly used to take 
intersection of grouped data.
    The function 'bitmap_intersect(expr)' calculates the intersection of bitmap 
columns and returns a bitmap object.
    The defination is following:
    FunctionName: bitmap_intersect,
    InputType: bitmap,
    OutputType: bitmap
    
    The scenario is as follows:
    Query which users satisfy the three tags a, b, and c at the same time.
    
    ```
    select bitmap_to_string(bitmap_intersect(user_id)) from
    (
        select bitmap_union(user_id) user_id from bitmap_intersect_test
        where tag in ('a', 'b', 'c')
        group by tag
    ) a
    ```
    Closed #3552.
    
    * Add docs of bitmap_union and bitmap_intersect
    
    * Support null of bitmap_intersect
---
 be/src/exprs/bitmap_function.cpp                   | 31 +++++++++++
 be/src/exprs/bitmap_function.h                     |  6 ++-
 be/test/exprs/bitmap_function_test.cpp             | 34 ++++++++++++
 docs/.vuepress/sidebar/en.js                       |  2 +
 docs/.vuepress/sidebar/zh-CN.js                    |  2 +
 .../bitmap-functions/bitmap_intersect.md           | 61 +++++++++++++++++++++
 .../sql-functions/bitmap-functions/bitmap_union.md | 58 ++++++++++++++++++++
 .../bitmap-functions/bitmap_intersect.md           | 62 ++++++++++++++++++++++
 .../sql-functions/bitmap-functions/bitmap_union.md | 58 ++++++++++++++++++++
 .../apache/doris/analysis/FunctionCallExpr.java    |  3 +-
 .../java/org/apache/doris/catalog/FunctionSet.java | 11 ++++
 11 files changed, 326 insertions(+), 2 deletions(-)

diff --git a/be/src/exprs/bitmap_function.cpp b/be/src/exprs/bitmap_function.cpp
index 09fdd14..0d9bf25 100644
--- a/be/src/exprs/bitmap_function.cpp
+++ b/be/src/exprs/bitmap_function.cpp
@@ -302,6 +302,31 @@ void BitmapFunctions::bitmap_union(FunctionContext* ctx, 
const StringVal& src, S
     }
 }
 
+// the dst value could be null
+void BitmapFunctions::nullable_bitmap_init(FunctionContext* ctx, StringVal* 
dst) {
+    dst->is_null = true;
+}
+
+void BitmapFunctions::bitmap_intersect(FunctionContext* ctx, const StringVal& 
src, StringVal* dst) {
+    if (src.is_null) {
+        return;
+    }
+    // if dst is null, the src input is the first value
+    if (dst->is_null) {
+        dst->is_null = false;
+        dst->len = sizeof(BitmapValue);
+        dst->ptr = (uint8_t*)new BitmapValue((char*) src.ptr);
+        return;
+    }
+    auto dst_bitmap = reinterpret_cast<BitmapValue*>(dst->ptr);
+    // zero size means the src input is a agg object
+    if (src.len == 0) {
+        (*dst_bitmap) &= *reinterpret_cast<BitmapValue*>(src.ptr);
+    } else {
+        (*dst_bitmap) &= BitmapValue((char*) src.ptr);
+    }
+}
+
 BigIntVal BitmapFunctions::bitmap_count(FunctionContext* ctx, const StringVal& 
src) {
     if (src.is_null) {
         return 0;
@@ -343,12 +368,17 @@ StringVal 
BitmapFunctions::bitmap_hash(doris_udf::FunctionContext* ctx, const do
 }
 
 StringVal BitmapFunctions::bitmap_serialize(FunctionContext* ctx, const 
StringVal& src) {
+    if (src.is_null) {
+        return src;
+    }
+
     auto src_bitmap = reinterpret_cast<BitmapValue*>(src.ptr);
     StringVal result = serialize(ctx, src_bitmap);
     delete src_bitmap;
     return result;
 }
 
+// This is a init function for intersect_count not for bitmap_intersect.
 template<typename T, typename ValType>
 void BitmapFunctions::bitmap_intersect_init(FunctionContext* ctx, StringVal* 
dst) {
     dst->is_null = false;
@@ -510,6 +540,7 @@ template void BitmapFunctions::bitmap_update_int<IntVal>(
 template void BitmapFunctions::bitmap_update_int<BigIntVal>(
         FunctionContext* ctx, const BigIntVal& src, StringVal* dst);
 
+// this is init function for intersect_count not for bitmap_intersect
 template void BitmapFunctions::bitmap_intersect_init<int8_t, TinyIntVal>(
     FunctionContext* ctx, StringVal* dst);
 template void BitmapFunctions::bitmap_intersect_init<int16_t, SmallIntVal>(
diff --git a/be/src/exprs/bitmap_function.h b/be/src/exprs/bitmap_function.h
index b69fc49..5d86228 100644
--- a/be/src/exprs/bitmap_function.h
+++ b/be/src/exprs/bitmap_function.h
@@ -51,6 +51,9 @@ public:
     static BigIntVal bitmap_get_value(FunctionContext* ctx, const StringVal& 
src);
 
     static void bitmap_union(FunctionContext* ctx, const StringVal& src, 
StringVal* dst);
+    // the dst value could be null
+    static void nullable_bitmap_init(FunctionContext* ctx, StringVal* dst);
+    static void bitmap_intersect(FunctionContext* ctx, const StringVal& src, 
StringVal* dst);
     static BigIntVal bitmap_count(FunctionContext* ctx, const StringVal& src);
 
     static StringVal bitmap_serialize(FunctionContext* ctx, const StringVal& 
src);
@@ -68,8 +71,9 @@ public:
     static BooleanVal bitmap_contains(FunctionContext* ctx, const StringVal& 
src, const BigIntVal& input);
     static BooleanVal bitmap_has_any(FunctionContext* ctx, const StringVal& 
lhs, const StringVal& rhs);
 
-    // bitmap_intersect
+    // intersect count
     template<typename T, typename ValType>
+    // this is init function for intersect_count not for bitmap_intersect
     static void bitmap_intersect_init(FunctionContext* ctx, StringVal* dst);
     template<typename T, typename ValType>
     static void bitmap_intersect_update(FunctionContext* ctx, const StringVal& 
src, const ValType& key,
diff --git a/be/test/exprs/bitmap_function_test.cpp 
b/be/test/exprs/bitmap_function_test.cpp
index 16239d9..17b377f 100644
--- a/be/test/exprs/bitmap_function_test.cpp
+++ b/be/test/exprs/bitmap_function_test.cpp
@@ -172,6 +172,39 @@ TEST_F(BitmapFunctionsTest, bitmap_union) {
     ASSERT_EQ(expected, result);
 }
 
+// test bitmap_intersect
+TEST_F(BitmapFunctionsTest, bitmap_intersect) {
+    StringVal dst;
+    BitmapFunctions::bitmap_intersect_init_real(ctx, &dst);
+
+    BitmapValue bitmap1(1);
+    bitmap1.add(2);
+    bitmap1.add(3);
+    StringVal src1 = convert_bitmap_to_string(ctx, bitmap1);
+    BitmapFunctions::bitmap_intersect(ctx, src1, &dst);
+
+    BitmapValue bitmap2(1);
+    bitmap2.add(2);
+    StringVal src2 = convert_bitmap_to_string(ctx, bitmap2);
+    BitmapFunctions::bitmap_intersect(ctx, src2, &dst);
+
+    StringVal serialized = BitmapFunctions::bitmap_serialize(ctx, dst);
+    BigIntVal result = BitmapFunctions::bitmap_count(ctx, serialized);
+    BigIntVal expected(2);
+    ASSERT_EQ(expected, result);    
+}
+
+// test bitmap_intersect with null dst
+TEST_F(BitmapFunctionsTest, bitmap_intersect_empty) {
+    StringVal dst;
+    BitmapFunctions::bitmap_intersect_init_real(ctx, &dst);
+
+    StringVal serialized = BitmapFunctions::bitmap_serialize(ctx, dst);
+    BigIntVal result = BitmapFunctions::bitmap_count(ctx, serialized);
+    BigIntVal expected(0);
+    ASSERT_EQ(expected, result);    
+}
+
 TEST_F(BitmapFunctionsTest, bitmap_count) {
     BitmapValue bitmap(1024);
     bitmap.add(1);
@@ -186,6 +219,7 @@ TEST_F(BitmapFunctionsTest, bitmap_count) {
     ASSERT_EQ(BigIntVal(0), null_bitmap);
 }
 
+// test intersect_count
 template<typename ValType, typename ValueType>
 void test_bitmap_intersect(FunctionContext* ctx, ValType key1, ValType key2) {
     StringVal bitmap_column("placeholder");
diff --git a/docs/.vuepress/sidebar/en.js b/docs/.vuepress/sidebar/en.js
index e7ade2c..f293bb8 100644
--- a/docs/.vuepress/sidebar/en.js
+++ b/docs/.vuepress/sidebar/en.js
@@ -259,6 +259,8 @@ module.exports = [
               "bitmap_or",
               "bitmap_to_string",
               "to_bitmap",
+              "bitmap_intersect",
+              "bitmap_union",
             ],
           },
           {
diff --git a/docs/.vuepress/sidebar/zh-CN.js b/docs/.vuepress/sidebar/zh-CN.js
index 880c340..9c4fde2 100644
--- a/docs/.vuepress/sidebar/zh-CN.js
+++ b/docs/.vuepress/sidebar/zh-CN.js
@@ -271,6 +271,8 @@ module.exports = [
               "bitmap_or",
               "bitmap_to_string",
               "to_bitmap",
+              "bitmap_intersect",
+              "bitmap_union",
             ],
           },
           {
diff --git 
a/docs/en/sql-reference/sql-functions/bitmap-functions/bitmap_intersect.md 
b/docs/en/sql-reference/sql-functions/bitmap-functions/bitmap_intersect.md
new file mode 100644
index 0000000..374441a
--- /dev/null
+++ b/docs/en/sql-reference/sql-functions/bitmap-functions/bitmap_intersect.md
@@ -0,0 +1,61 @@
+---
+{
+    "title": "bitmap_intersect",
+    "language": "en"
+}
+---
+
+<!-- 
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+# bitmap_intersect
+## description
+
+Aggregation function, used to calculate the bitmap intersection after 
grouping. Common usage scenarios such as: calculating user retention rate.
+
+### Syntax
+
+`BITMAP BITMAP_INTERSECT(BITMAP value)`
+
+Enter a set of bitmap values, find the intersection of the set of bitmap 
values, and return.
+
+## example
+
+Table schema
+
+```
+KeysType: AGG_KEY
+Columns: tag varchar, date datetime, user_id bitmap bitmap_union
+```
+
+```
+Find the retention of users between 2020-05-18 and 2020-05-19 under different 
tags.
+mysql> select tag, bitmap_intersect(user_id) from (select tag, date, 
bitmap_union(user_id) user_id from table where date in ('2020-05-18', 
'2020-05-19') group by tag, date) a group by tag;
+```
+
+Used in combination with the bitmap_to_string function to obtain the specific 
data of the intersection
+
+```
+Who are the users retained under different tags between 2020-05-18 and 
2020-05-19?
+mysql> select tag, bitmap_to_string(bitmap_intersect(user_id)) from (select 
tag, date, bitmap_union(user_id) user_id from table where date in 
('2020-05-18', '2020-05-19') group by tag, date) a group by tag;
+```
+
+## keyword
+
+    BITMAP_INTERSECT, BITMAP
diff --git 
a/docs/en/sql-reference/sql-functions/bitmap-functions/bitmap_union.md 
b/docs/en/sql-reference/sql-functions/bitmap-functions/bitmap_union.md
new file mode 100644
index 0000000..4e92f97
--- /dev/null
+++ b/docs/en/sql-reference/sql-functions/bitmap-functions/bitmap_union.md
@@ -0,0 +1,58 @@
+---
+{
+    "title": "bitmap_union",
+    "language": "en"
+}
+---
+
+<!-- 
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+# bitmap_union
+## description
+
+Aggregate function, used to calculate the grouped bitmap union. Common usage 
scenarios such as: calculating PV, UV.
+
+### Syntax
+
+`BITMAP BITMAP_UNION(BITMAP value)`
+
+Enter a set of bitmap values, find the union of this set of bitmap values, and 
return.
+
+## example
+
+```
+mysql> select page_id, bitmap_union(user_id) from table group by page_id;
+```
+
+Combined with the bitmap_count function, the PV data of the web page can be 
obtained
+
+```
+mysql> select page_id, bitmap_count(bitmap_union(user_id)) from table group by 
page_id;
+```
+
+When the user_id field is int, the above query semantics is equivalent to
+
+```
+mysql> select page_id, count(distinct user_id) from table group by page_id;
+```
+
+## keyword
+
+    BITMAP_UNION, BITMAP
diff --git 
a/docs/zh-CN/sql-reference/sql-functions/bitmap-functions/bitmap_intersect.md 
b/docs/zh-CN/sql-reference/sql-functions/bitmap-functions/bitmap_intersect.md
new file mode 100644
index 0000000..3b71de4
--- /dev/null
+++ 
b/docs/zh-CN/sql-reference/sql-functions/bitmap-functions/bitmap_intersect.md
@@ -0,0 +1,62 @@
+---
+{
+    "title": "bitmap_intersect",
+    "language": "zh-CN"
+}
+---
+
+<!-- 
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+# bitmap_intersect
+## description
+
+聚合函数,用于计算分组后的 bitmap 交集。常见使用场景如:计算用户留存率。
+
+### Syntax
+
+`BITMAP BITMAP_INTERSECT(BITMAP value)`
+
+输入一组 bitmap 值,求这一组 bitmap 值的交集,并返回。
+
+## example
+
+表结构
+
+```
+KeysType: AGG_KEY
+Columns: tag varchar, date datetime, user_id bitmap bitmap_union
+
+```
+
+```
+求今天和昨天不同 tag 下的用户留存
+mysql> select tag, bitmap_intersect(user_id) from (select tag, date, 
bitmap_union(user_id) user_id from table where date in ('2020-05-18', 
'2020-05-19') group by tag, date) a group by tag;
+```
+
+和 bitmap_to_string 函数组合使用可以获取交集的具体数据
+
+```
+求今天和昨天不同 tag 下留存的用户都是哪些
+mysql> select tag, bitmap_to_string(bitmap_intersect(user_id)) from (select 
tag, date, bitmap_union(user_id) user_id from table where date in 
('2020-05-18', '2020-05-19') group by tag, date) a group by tag;
+```
+
+## keyword
+
+    BITMAP_INTERSECT, BITMAP
diff --git 
a/docs/zh-CN/sql-reference/sql-functions/bitmap-functions/bitmap_union.md 
b/docs/zh-CN/sql-reference/sql-functions/bitmap-functions/bitmap_union.md
new file mode 100644
index 0000000..295e118
--- /dev/null
+++ b/docs/zh-CN/sql-reference/sql-functions/bitmap-functions/bitmap_union.md
@@ -0,0 +1,58 @@
+---
+{
+    "title": "bitmap_union",
+    "language": "zh-CN"
+}
+---
+
+<!-- 
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+# bitmap_union
+## description
+
+聚合函数,用于计算分组后的 bitmap 并集。常见使用场景如:计算PV,UV。
+
+### Syntax
+
+`BITMAP BITMAP_UNION(BITMAP value)`
+
+输入一组 bitmap 值,求这一组 bitmap 值的并集,并返回。
+
+## example
+
+```
+mysql> select page_id, bitmap_union(user_id) from table group by page_id;
+```
+
+和 bitmap_count 函数组合使用可以求得网页的 PV 数据
+
+```
+mysql> select page_id, bitmap_count(bitmap_union(user_id)) from table group by 
page_id;
+```
+
+当 user_id 字段为 int 时,上面查询语义等同于
+
+```
+mysql> select page_id, count(distinct user_id) from table group by page_id;
+```
+
+## keyword
+
+    BITMAP_UNION, BITMAP
diff --git a/fe/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java 
b/fe/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java
index d185e40..5709e04 100644
--- a/fe/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java
+++ b/fe/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java
@@ -407,7 +407,8 @@ public class FunctionCallExpr extends Expr {
 
         if (fnName.getFunction().equalsIgnoreCase(FunctionSet.BITMAP_COUNT)
                 || 
fnName.getFunction().equalsIgnoreCase(FunctionSet.BITMAP_UNION)
-                || 
fnName.getFunction().equalsIgnoreCase(FunctionSet.BITMAP_UNION_COUNT)) {
+                || 
fnName.getFunction().equalsIgnoreCase(FunctionSet.BITMAP_UNION_COUNT)
+                || 
fnName.getFunction().equalsIgnoreCase(FunctionSet.BITMAP_INTERSECT)) {
             if (children.size() != 1) {
                 throw new AnalysisException(fnName + " function could only 
have one child");
             }
diff --git a/fe/src/main/java/org/apache/doris/catalog/FunctionSet.java 
b/fe/src/main/java/org/apache/doris/catalog/FunctionSet.java
index 26b744c..256a461 100644
--- a/fe/src/main/java/org/apache/doris/catalog/FunctionSet.java
+++ b/fe/src/main/java/org/apache/doris/catalog/FunctionSet.java
@@ -540,6 +540,7 @@ public class FunctionSet {
     public static final String BITMAP_UNION_INT = "bitmap_union_int";
     public static final String BITMAP_COUNT = "bitmap_count";
     public static final String INTERSECT_COUNT = "intersect_count";
+    public static final String BITMAP_INTERSECT = "bitmap_intersect";
 
     private static final Map<Type, String> BITMAP_UNION_INT_SYMBOL =
             ImmutableMap.<Type, String>builder()
@@ -1144,6 +1145,7 @@ public class FunctionSet {
                     null, false, true, false));
         }
 
+        // bitmap
         addBuiltin(AggregateFunction.createBuiltin(BITMAP_UNION, 
Lists.newArrayList(Type.BITMAP),
                 Type.BITMAP,
                 Type.VARCHAR,
@@ -1165,6 +1167,15 @@ public class FunctionSet {
                 null,
                 
"_ZN5doris15BitmapFunctions15bitmap_finalizeEPN9doris_udf15FunctionContextERKNS1_9StringValE",
                 true, true, true));
+        // TODO(ml): supply function symbol
+        addBuiltin(AggregateFunction.createBuiltin(BITMAP_INTERSECT, 
Lists.newArrayList(Type.BITMAP),
+                Type.BITMAP, Type.VARCHAR,
+                
"_ZN5doris15BitmapFunctions20nullable_bitmap_initEPN9doris_udf15FunctionContextEPNS1_9StringValE",
+                
"_ZN5doris15BitmapFunctions16bitmap_intersectEPN9doris_udf15FunctionContextERKNS1_9StringValEPS4_",
+                
"_ZN5doris15BitmapFunctions16bitmap_intersectEPN9doris_udf15FunctionContextERKNS1_9StringValEPS4_",
+                
"_ZN5doris15BitmapFunctions16bitmap_serializeEPN9doris_udf15FunctionContextERKNS1_9StringValE",
+                
"_ZN5doris15BitmapFunctions16bitmap_serializeEPN9doris_udf15FunctionContextERKNS1_9StringValE",
+                true, false, true));
 
         //PercentileApprox
         addBuiltin(AggregateFunction.createBuiltin("percentile_approx",


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to