kangkaisen commented on a change in pull request #2418: Add  intersect_count 
UDAF
URL: https://github.com/apache/incubator-doris/pull/2418#discussion_r356956589
 
 

 ##########
 File path: be/src/exprs/bitmap_function.cpp
 ##########
 @@ -22,6 +22,256 @@
 #include "util/string_parser.hpp"
 
 namespace doris {
+
+namespace detail {
+
+const int DATETIME_PACKED_TIME_BYTE_SIZE = 8;
+const int DATETIME_TYPE_BYTE_SIZE = 4;
+
+const int DECIMAL_BYTE_SIZE = 16;
+
+// get_val start
+template<typename ValType, typename T>
+T get_val(const ValType& x) {
+    DCHECK(!x.is_null);
+    return x.val;
+}
+
+template<>
+StringValue get_val(const StringVal& x) {
+    DCHECK(!x.is_null);
+    return StringValue::from_string_val(x);
+}
+
+template<>
+DateTimeValue get_val(const DateTimeVal& x) {
+    return DateTimeValue::from_datetime_val(x);
+}
+
+template<>
+DecimalV2Value get_val(const DecimalV2Val& x) {
+    return DecimalV2Value::from_decimal_val(x);
+}
+// get_val end
+
+// serialize_size start
+template<typename T>
+int32_t serialize_size(const T& v) {
+    return sizeof(T);
+}
+
+template<>
+int32_t serialize_size(const DateTimeValue& v) {
+    return DATETIME_PACKED_TIME_BYTE_SIZE + DATETIME_TYPE_BYTE_SIZE;
+}
+
+template<>
+int32_t serialize_size(const DecimalV2Value& v) {
+    return DECIMAL_BYTE_SIZE;
+}
+
+template<>
+int32_t serialize_size(const StringValue& v) {
+    return v.len + 4;
+}
+// serialize_size end
+
+// write_to start
+template<typename T>
+char* write_to(const T& v, char* dest) {
+    size_t type_size = sizeof(T);
+    memcpy(dest, &v, type_size);
+    dest += type_size;
+    return dest;
+}
+
+template<>
+char* write_to(const DateTimeValue& v, char* dest) {
+    DateTimeVal value;
+    v.to_datetime_val(&value);
+    *(int64_t*)dest = value.packed_time;
+    dest += DATETIME_PACKED_TIME_BYTE_SIZE;
+    *(int*)dest = value.type;
+    dest += DATETIME_TYPE_BYTE_SIZE;
+    return dest;
+}
+
+template<>
+char* write_to(const DecimalV2Value& v, char* dest) {
+    __int128 value = v.value();
+    memcpy(dest, &value, DECIMAL_BYTE_SIZE);
+    dest += DECIMAL_BYTE_SIZE;
+    return dest;
+}
+
+template<>
+char* write_to(const StringValue& v, char* dest) {
+    *(int32_t*)dest = v.len;
+    dest += 4;
+    memcpy(dest, v.ptr, v.len);
+    dest += v.len;
+    return dest;
+}
+// write_to end
+
+// read_from start
+template<typename T>
+void read_from(const char** src, T* result) {
+    size_t type_size = sizeof(T);
+    memcpy(result, *src, type_size);
+    *src += type_size;
+}
+
+template<>
+void read_from(const char** src, DateTimeValue* result) {
+    DateTimeVal value;
+    value.is_null = false;
+    value.packed_time = *(int64_t*)(*src);
+    *src += DATETIME_PACKED_TIME_BYTE_SIZE;
+    value.type = *(int*)(*src);
+    *src += DATETIME_TYPE_BYTE_SIZE;
+    *result = DateTimeValue::from_datetime_val(value);;
+}
+
+template<>
+void read_from(const char** src, DecimalV2Value* result) {
+    __int128 v = 0;
+    memcpy(&v, *src, DECIMAL_BYTE_SIZE);
+    *src += DECIMAL_BYTE_SIZE;
+    *result = DecimalV2Value(v);
+}
+
+template<>
+void read_from(const char** src, StringValue* result) {
+    int32_t length = *(int32_t*)(*src);
+    *src += 4;
+    *result = StringValue((char *)*src, length);
+    *src += length;
+}
+// read_from end
+
+} // namespace detail
+
+template<typename T>
+struct BitmapRetention {
+public:
+    BitmapRetention() {}
+
+    explicit BitmapRetention(const char* src) {
+        deserialize(src);
+    }
+
+    void add_key(T key) {
+        _keys.push_back(key);
+    }
+
+    void update(const T& key, const RoaringBitmap& bitmap) {
+        typename std::vector<T>::iterator it = std::find(_keys.begin(), 
_keys.end(), key);
+        if (it != _keys.end()) {
+            if (_bitmaps.find(key) != _bitmaps.end()) {
+                _bitmaps[key].merge(bitmap);
+            } else {
+                _bitmaps[key] = bitmap;
+            }
+        }
+    }
+
+    void merge(const BitmapRetention& other) {
+        // Note, in merge phase, this _keys is empty and other's _keys is whole
+        if (_keys.empty()) {
+            _keys = other._keys;
+        }
+        for(auto& key: _keys) {
+            if (other._bitmaps.find(key) == other._bitmaps.end()) {
+                continue;
+            }
+
+            if (_bitmaps.find(key) != _bitmaps.end()) {
+                _bitmaps[key].merge(other._bitmaps.at(key));
+            } else {
+                _bitmaps[key] = other._bitmaps.at(key);
+            }
+        }
+
+    }
+
+    int64_t result() {
+        if (_keys.empty()) {
+            return 0;
+        }
+
+        // if any specified key not in map, the intersection must be 0
+        for(auto& key: _keys) {
+            if (_bitmaps.find(key) == _bitmaps.end()) {
+                return 0;
+            }
+        }
+
+        for(int32_t i = 1; i < _keys.size(); ++i) {
+            _bitmaps[_keys[0]].intersect(_bitmaps[_keys[i]]);
+        }
+
+        return _bitmaps[_keys[0]].cardinality();
+    }
+
+    size_t size() {
+        size_t size = 4; // keys size;
+        for (auto& key: _keys) {
+            size += detail::serialize_size(key);
+        }
+
+        size += 4; // bitmaps size;
+        for (auto& kv: _bitmaps) {
+            size +=  detail::serialize_size(kv.first);;
+            size +=  kv.second.size();
+        }
+        return size;
+    }
+
+    //must call size() first
+    void serialize(char* dest) {
+        char* writer = dest;
+        *(int32_t*)writer = _keys.size();
+        writer += 4;
+        for (auto& key: _keys) {
+            writer = detail::write_to(key, writer);
+        }
+
+        *(int32_t*)writer = _bitmaps.size();
+        writer += 4;
+        for (auto& kv: _bitmaps) {
+            writer = detail::write_to(kv.first, writer);
+            kv.second.serialize(writer);
+            writer += kv.second.size();
+        }
+    }
+
+    void deserialize(const char* src) {
+        const char* reader = src;
+        int32_t keys_size = *(int32_t*)reader;
+        reader += 4;
+        for(int32_t i = 0; i < keys_size; i++) {
+            T value;
+            detail::read_from(&reader, &value);
+            _keys.push_back(value);
+        }
+
+        int32_t bitmaps_size = *(int32_t*)reader;
+        reader += 4;
+        for(int32_t i = 0; i < bitmaps_size; i++) {
+            T key;
+            detail::read_from(&reader, &key);
+            RoaringBitmap bitmap(reader);
+            reader += bitmap.size();
+            _bitmaps[key] = bitmap;
+        }
+    }
+
+private:
+    std::map<T, RoaringBitmap> _bitmaps;
+    std::vector<T> _keys;
 
 Review comment:
   The `_keys ` means the constant args. it's a super set of bitmap keys.  I 
will rename it

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


With regards,
Apache Git Services

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to