This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch branch-1.2-lts
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-1.2-lts by this push:
new 4b9c87d3bf [improvement](bitmap) Use shared_ptr in BitmapValue to
avoid deep copying (#19101) (#21271)
4b9c87d3bf is described below
commit 4b9c87d3bf0f73c9c7f2329df8b2e6fc52230b3f
Author: Jerry Hu <[email protected]>
AuthorDate: Tue Jul 11 18:34:20 2023 +0800
[improvement](bitmap) Use shared_ptr in BitmapValue to avoid deep copying
(#19101) (#21271)
Currently bitmapvalue type is copied between columns, it cost a lot of
memory. Use a shared ptr in bitmap value to avoid copy data.
---
be/src/util/bitmap_value.h | 227 +++++++++++++++++++++++++------------
be/test/util/bitmap_value_test.cpp | 22 ++++
2 files changed, 178 insertions(+), 71 deletions(-)
diff --git a/be/src/util/bitmap_value.h b/be/src/util/bitmap_value.h
index 21fa5f217e..f2c869f862 100644
--- a/be/src/util/bitmap_value.h
+++ b/be/src/util/bitmap_value.h
@@ -1144,10 +1144,10 @@ class BitmapValueIterator;
class BitmapValue {
public:
// Construct an empty bitmap.
- BitmapValue() : _type(EMPTY) {}
+ BitmapValue() : _type(EMPTY), _is_shared(false) {}
// Construct a bitmap with one element.
- explicit BitmapValue(uint64_t value) : _sv(value), _type(SINGLE) {}
+ explicit BitmapValue(uint64_t value) : _sv(value), _type(SINGLE),
_is_shared(false) {}
// Construct a bitmap from serialized data.
explicit BitmapValue(const char* src) {
@@ -1155,8 +1155,48 @@ public:
DCHECK(res);
}
+ BitmapValue(const BitmapValue& other) {
+ _type = other._type;
+ _sv = other._sv;
+ _bitmap = other._bitmap;
+ _is_shared = true;
+ // should also set other's state to shared, so that other bitmap value
will
+ // create a new bitmap when it wants to modify it.
+ const_cast<BitmapValue&>(other)._is_shared = true;
+ }
+
+ BitmapValue(BitmapValue&& other) {
+ _type = other._type;
+ _sv = other._sv;
+ _is_shared = other._is_shared;
+ _bitmap = std::move(other._bitmap);
+ }
+
+ BitmapValue& operator=(const BitmapValue& other) {
+ _type = other._type;
+ _sv = other._sv;
+ _bitmap = other._bitmap;
+ _is_shared = true;
+ // should also set other's state to shared, so that other bitmap value
will
+ // create a new bitmap when it wants to modify it.
+ const_cast<BitmapValue&>(other)._is_shared = true;
+ return *this;
+ }
+
+ BitmapValue& operator=(BitmapValue&& other) {
+ if (this == &other) {
+ return *this;
+ }
+
+ _type = other._type;
+ _sv = other._sv;
+ _bitmap = std::move(other._bitmap);
+ _is_shared = other._is_shared;
+ return *this;
+ }
+
// Construct a bitmap from given elements.
- explicit BitmapValue(const std::vector<uint64_t>& bits) {
+ explicit BitmapValue(const std::vector<uint64_t>& bits) :
_is_shared(false) {
switch (bits.size()) {
case 0:
_type = EMPTY;
@@ -1167,7 +1207,8 @@ public:
break;
default:
_type = BITMAP;
- _bitmap.addMany(bits.size(), &bits[0]);
+ _prepare_bitmap_for_write();
+ _bitmap->addMany(bits.size(), &bits[0]);
}
}
@@ -1182,12 +1223,15 @@ public:
if (_sv == value) {
break;
}
- _bitmap.add(_sv);
- _bitmap.add(value);
+ _prepare_bitmap_for_write();
+ _bitmap->add(_sv);
+ _bitmap->add(value);
_type = BITMAP;
break;
case BITMAP:
- _bitmap.add(value);
+ _prepare_bitmap_for_write();
+ _bitmap->add(value);
+ break;
}
}
@@ -1202,7 +1246,8 @@ public:
}
break;
case BITMAP:
- _bitmap.remove(value);
+ _prepare_bitmap_for_write();
+ _bitmap->remove(value);
_convert_to_smaller_type();
}
}
@@ -1220,12 +1265,13 @@ public:
case EMPTY:
break;
case SINGLE:
- if (rhs._bitmap.contains(_sv)) {
+ if (rhs._bitmap->contains(_sv)) {
_type = EMPTY;
}
break;
case BITMAP:
- _bitmap -= rhs._bitmap;
+ _prepare_bitmap_for_write();
+ *_bitmap -= *rhs._bitmap;
_convert_to_smaller_type();
break;
}
@@ -1250,15 +1296,21 @@ public:
switch (_type) {
case EMPTY:
_bitmap = rhs._bitmap;
+ const_cast<BitmapValue&>(rhs)._is_shared = true;
+ _is_shared = true;
_type = BITMAP;
break;
case SINGLE:
_bitmap = rhs._bitmap;
- _bitmap.add(_sv);
+ const_cast<BitmapValue&>(rhs)._is_shared = true;
+ _is_shared = true;
+ _prepare_bitmap_for_write();
+ _bitmap->add(_sv);
_type = BITMAP;
break;
case BITMAP:
- _bitmap |= rhs._bitmap;
+ _prepare_bitmap_for_write();
+ *_bitmap |= *rhs._bitmap;
}
break;
}
@@ -1277,24 +1329,25 @@ public:
single_values.push_back(value->_sv);
break;
case BITMAP:
- bitmaps.push_back(&value->_bitmap);
+ bitmaps.push_back(value->_bitmap.get());
break;
}
}
if (!bitmaps.empty()) {
+ _prepare_bitmap_for_write();
switch (_type) {
case EMPTY:
- _bitmap = detail::Roaring64Map::fastunion(bitmaps.size(),
bitmaps.data());
+ *_bitmap = detail::Roaring64Map::fastunion(bitmaps.size(),
bitmaps.data());
_type = BITMAP;
break;
case SINGLE:
- _bitmap = detail::Roaring64Map::fastunion(bitmaps.size(),
bitmaps.data());
- _bitmap.add(_sv);
+ *_bitmap = detail::Roaring64Map::fastunion(bitmaps.size(),
bitmaps.data());
+ _bitmap->add(_sv);
_type = BITMAP;
break;
case BITMAP:
- _bitmap |= detail::Roaring64Map::fastunion(bitmaps.size(),
bitmaps.data());
+ *_bitmap |= detail::Roaring64Map::fastunion(bitmaps.size(),
bitmaps.data());
break;
}
}
@@ -1303,9 +1356,10 @@ public:
_sv = single_values[0];
_type = SINGLE;
} else if (!single_values.empty()) {
- _bitmap.addMany(single_values.size(), single_values.data());
+ _prepare_bitmap_for_write();
+ _bitmap->addMany(single_values.size(), single_values.data());
if (_type == SINGLE) {
- _bitmap.add(_sv);
+ _bitmap->add(_sv);
}
_type = BITMAP;
}
@@ -1322,7 +1376,7 @@ public:
switch (rhs._type) {
case EMPTY:
_type = EMPTY;
- _bitmap.clear();
+ _bitmap.reset();
break;
case SINGLE:
switch (_type) {
@@ -1334,13 +1388,13 @@ public:
}
break;
case BITMAP:
- if (!_bitmap.contains(rhs._sv)) {
+ if (!_bitmap->contains(rhs._sv)) {
_type = EMPTY;
} else {
_type = SINGLE;
_sv = rhs._sv;
}
- _bitmap.clear();
+ _bitmap.reset();
break;
}
break;
@@ -1349,12 +1403,13 @@ public:
case EMPTY:
break;
case SINGLE:
- if (!rhs._bitmap.contains(_sv)) {
+ if (!rhs._bitmap->contains(_sv)) {
_type = EMPTY;
}
break;
case BITMAP:
- _bitmap &= rhs._bitmap;
+ _prepare_bitmap_for_write();
+ *_bitmap &= *rhs._bitmap;
_convert_to_smaller_type();
break;
}
@@ -1380,16 +1435,17 @@ public:
case SINGLE:
if (_sv == rhs._sv) {
_type = EMPTY;
- _bitmap.clear();
+ _bitmap.reset();
} else {
add(rhs._sv);
}
break;
case BITMAP:
- if (!_bitmap.contains(rhs._sv)) {
+ if (!_bitmap->contains(rhs._sv)) {
add(rhs._sv);
} else {
- _bitmap.remove(rhs._sv);
+ _prepare_bitmap_for_write();
+ _bitmap->remove(rhs._sv);
}
break;
}
@@ -1398,19 +1454,25 @@ public:
switch (_type) {
case EMPTY:
_bitmap = rhs._bitmap;
+ const_cast<BitmapValue&>(rhs)._is_shared = true;
+ _is_shared = true;
_type = BITMAP;
break;
case SINGLE:
_bitmap = rhs._bitmap;
+ const_cast<BitmapValue&>(rhs)._is_shared = true;
+ _is_shared = true;
_type = BITMAP;
- if (!rhs._bitmap.contains(_sv)) {
- _bitmap.add(_sv);
+ _prepare_bitmap_for_write();
+ if (!rhs._bitmap->contains(_sv)) {
+ _bitmap->add(_sv);
} else {
- _bitmap.remove(_sv);
+ _bitmap->remove(_sv);
}
break;
case BITMAP:
- _bitmap ^= rhs._bitmap;
+ _prepare_bitmap_for_write();
+ *_bitmap ^= *rhs._bitmap;
_convert_to_smaller_type();
break;
}
@@ -1427,7 +1489,7 @@ public:
case SINGLE:
return _sv == x;
case BITMAP:
- return _bitmap.contains(x);
+ return _bitmap->contains(x);
}
return false;
}
@@ -1442,7 +1504,7 @@ public:
case SINGLE:
return 1;
case BITMAP:
- return _bitmap.cardinality();
+ return _bitmap->cardinality();
}
return 0;
}
@@ -1458,16 +1520,16 @@ public:
case SINGLE:
return _sv == rhs._sv;
case BITMAP:
- return _bitmap.contains(rhs._sv);
+ return _bitmap->contains(rhs._sv);
}
case BITMAP:
switch (_type) {
case EMPTY:
return 0;
case SINGLE:
- return rhs._bitmap.contains(_sv);
+ return rhs._bitmap->contains(_sv);
case BITMAP:
- return _bitmap.andCardinality(rhs._bitmap);
+ return _bitmap->andCardinality(*rhs._bitmap);
}
}
return 0;
@@ -1484,16 +1546,16 @@ public:
case SINGLE:
return 1 + (_sv != rhs._sv);
case BITMAP:
- return cardinality() + !_bitmap.contains(rhs._sv);
+ return cardinality() + !_bitmap->contains(rhs._sv);
}
case BITMAP:
switch (_type) {
case EMPTY:
return rhs.cardinality();
case SINGLE:
- return rhs.cardinality() + !rhs._bitmap.contains(_sv);
+ return rhs.cardinality() + !rhs._bitmap->contains(_sv);
case BITMAP:
- return _bitmap.orCardinality(rhs._bitmap);
+ return _bitmap->orCardinality(*rhs._bitmap);
}
}
return 0;
@@ -1510,16 +1572,16 @@ public:
case SINGLE:
return 2 - 2 * (_sv == rhs._sv);
case BITMAP:
- return cardinality() + 1 - 2 * (_bitmap.contains(rhs._sv));
+ return cardinality() + 1 - 2 * (_bitmap->contains(rhs._sv));
}
case BITMAP:
switch (_type) {
case EMPTY:
return rhs.cardinality();
case SINGLE:
- return rhs.cardinality() + 1 - 2 * (rhs._bitmap.contains(_sv));
+ return rhs.cardinality() + 1 - 2 *
(rhs._bitmap->contains(_sv));
case BITMAP:
- return _bitmap.xorCardinality(rhs._bitmap);
+ return _bitmap->xorCardinality(*rhs._bitmap);
}
}
return 0;
@@ -1536,16 +1598,16 @@ public:
case SINGLE:
return 1 - _sv == rhs._sv;
case BITMAP:
- return cardinality() - _bitmap.contains(rhs._sv);
+ return cardinality() - _bitmap->contains(rhs._sv);
}
case BITMAP:
switch (_type) {
case EMPTY:
return 0;
case SINGLE:
- return !rhs._bitmap.contains(_sv);
+ return !rhs._bitmap->contains(_sv);
case BITMAP:
- return _bitmap.andnotCardinality(rhs._bitmap);
+ return _bitmap->andnotCardinality(*rhs._bitmap);
}
}
return 0;
@@ -1567,9 +1629,9 @@ public:
}
break;
case BITMAP:
- _bitmap.runOptimize();
- _bitmap.shrinkToFit();
- res = _bitmap.getSizeInBytes();
+ _bitmap->runOptimize();
+ _bitmap->shrinkToFit();
+ res = _bitmap->getSizeInBytes();
break;
}
return res;
@@ -1592,7 +1654,7 @@ public:
}
break;
case BITMAP:
- _bitmap.write(dst);
+ _bitmap->write(dst);
break;
}
}
@@ -1615,7 +1677,8 @@ public:
case BitmapTypeCode::BITMAP32:
case BitmapTypeCode::BITMAP64:
_type = BITMAP;
- _bitmap = detail::Roaring64Map::read(src);
+ _prepare_bitmap_for_write();
+ *_bitmap = detail::Roaring64Map::read(src);
break;
default:
LOG(ERROR) << "BitmapTypeCode invalid, should between: " <<
BitmapTypeCode::EMPTY
@@ -1631,7 +1694,7 @@ public:
case SINGLE:
return doris_udf::BigIntVal(_sv);
case BITMAP:
- return doris_udf::BigIntVal(_bitmap.minimum());
+ return doris_udf::BigIntVal(_bitmap->minimum());
default:
return doris_udf::BigIntVal::null();
}
@@ -1653,7 +1716,7 @@ public:
} iter_ctx;
iter_ctx.ss = &ss;
- _bitmap.iterate(
+ _bitmap->iterate(
[](uint64_t value, void* c) -> bool {
auto ctx = reinterpret_cast<IterCtx*>(c);
if (ctx->first) {
@@ -1676,18 +1739,18 @@ public:
case SINGLE:
return doris_udf::BigIntVal(_sv);
case BITMAP:
- return doris_udf::BigIntVal(_bitmap.maximum());
+ return doris_udf::BigIntVal(_bitmap->maximum());
default:
return doris_udf::BigIntVal::null();
}
}
uint64_t max(bool* empty) const {
- return min_or_max(empty, [&]() { return _bitmap.maximum(); });
+ return min_or_max(empty, [&]() { return _bitmap->maximum(); });
}
uint64_t min(bool* empty) const {
- return min_or_max(empty, [&]() { return _bitmap.minimum(); });
+ return min_or_max(empty, [&]() { return _bitmap->minimum(); });
}
bool empty() const { return _type == EMPTY; }
@@ -1711,7 +1774,7 @@ public:
}
case BITMAP: {
int64_t count = 0;
- for (auto it = _bitmap.begin(); it != _bitmap.end(); ++it) {
+ for (auto it = _bitmap->begin(); it != _bitmap->end(); ++it) {
if (*it < range_start) {
continue;
}
@@ -1750,7 +1813,7 @@ public:
}
case BITMAP: {
int64_t count = 0;
- for (auto it = _bitmap.begin(); it != _bitmap.end(); ++it) {
+ for (auto it = _bitmap->begin(); it != _bitmap->end(); ++it) {
if (*it < range_start) {
continue;
}
@@ -1786,23 +1849,23 @@ public:
}
}
case BITMAP: {
- if (std::abs(offset) >= _bitmap.cardinality()) {
+ if (std::abs(offset) >= _bitmap->cardinality()) {
return 0;
}
}
}
int64_t abs_offset = offset;
if (offset < 0) {
- abs_offset = _bitmap.cardinality() + offset;
+ abs_offset = _bitmap->cardinality() + offset;
}
int64_t count = 0;
int64_t offset_count = 0;
- auto it = _bitmap.begin();
- for (; it != _bitmap.end() && offset_count < abs_offset; ++it) {
+ auto it = _bitmap->begin();
+ for (; it != _bitmap->end() && offset_count < abs_offset; ++it) {
++offset_count;
}
- for (; it != _bitmap.end() && count < limit; ++it, ++count) {
+ for (; it != _bitmap->end() && count < limit; ++it, ++count) {
ret_bitmap->add(*it);
}
return count;
@@ -1818,7 +1881,7 @@ public:
break;
}
case BITMAP: {
- for (auto it = _bitmap.begin(); it != _bitmap.end(); ++it) {
+ for (auto it = _bitmap->begin(); it != _bitmap->end(); ++it) {
data.emplace_back(*it);
}
break;
@@ -1828,7 +1891,7 @@ public:
void clear() {
_type = EMPTY;
- _bitmap.clear();
+ _bitmap.reset();
_sv = 0;
}
@@ -1843,15 +1906,15 @@ public:
private:
void _convert_to_smaller_type() {
if (_type == BITMAP) {
- uint64_t c = _bitmap.cardinality();
+ uint64_t c = _bitmap->cardinality();
if (c > 1) return;
if (c == 0) {
_type = EMPTY;
} else {
_type = SINGLE;
- _sv = _bitmap.minimum();
+ _sv = _bitmap->minimum();
}
- _bitmap.clear();
+ _bitmap.reset();
}
}
@@ -1874,14 +1937,36 @@ private:
return result;
}
+ void _prepare_bitmap_for_write() {
+ if (!_bitmap) {
+ _bitmap = std::make_shared<detail::Roaring64Map>();
+ _is_shared = false;
+ return;
+ }
+
+ if (!_is_shared) {
+ // the state is not shared, not need to check use count any more
+ return;
+ }
+
+ if (_bitmap.use_count() > 1) {
+ auto new_one = std::make_shared<detail::Roaring64Map>();
+ *new_one = *_bitmap;
+ _bitmap = new_one;
+ }
+ _is_shared = false;
+ }
+
enum BitmapDataType {
EMPTY = 0,
SINGLE = 1, // single element
BITMAP = 2 // more than one elements
};
- uint64_t _sv = 0; // store the single value when _type ==
SINGLE
- detail::Roaring64Map _bitmap; // used when _type == BITMAP
- BitmapDataType _type;
+ uint64_t _sv = 0; // store the single value
when _type == SINGLE
+ std::shared_ptr<detail::Roaring64Map> _bitmap; // used when _type == BITMAP
+ BitmapDataType _type {EMPTY};
+ // Indicate whether the state is shared among multi BitmapValue object
+ bool _is_shared = true;
};
// A simple implement of bitmap value iterator(Read only)
@@ -1903,7 +1988,7 @@ public:
_sv = _bitmap._sv;
break;
case BitmapValue::BitmapDataType::BITMAP:
- _iter = new
detail::Roaring64MapSetBitForwardIterator(_bitmap._bitmap, _end);
+ _iter = new
detail::Roaring64MapSetBitForwardIterator(*_bitmap._bitmap, _end);
break;
default:
CHECK(false) << _bitmap._type;
diff --git a/be/test/util/bitmap_value_test.cpp
b/be/test/util/bitmap_value_test.cpp
index ee950744db..5964927d4d 100644
--- a/be/test/util/bitmap_value_test.cpp
+++ b/be/test/util/bitmap_value_test.cpp
@@ -27,6 +27,28 @@
namespace doris {
using roaring::Roaring;
+TEST(BitmapValueTest, copy) {
+ BitmapValue empty;
+ BitmapValue single(1024);
+ BitmapValue bitmap({1024, 1025, 1026});
+
+ BitmapValue copied = bitmap;
+ EXPECT_TRUE(copied.contains(1024));
+ EXPECT_TRUE(copied.contains(1025));
+ EXPECT_TRUE(copied.contains(1026));
+ copied &= single;
+
+ // value of copied changed.
+ EXPECT_TRUE(copied.contains(1024));
+ EXPECT_FALSE(copied.contains(1025));
+ EXPECT_FALSE(copied.contains(1026));
+
+ // value of bitmap not changed.
+ EXPECT_TRUE(bitmap.contains(1024));
+ EXPECT_TRUE(bitmap.contains(1025));
+ EXPECT_TRUE(bitmap.contains(1026));
+}
+
TEST(BitmapValueTest, bitmap_union) {
BitmapValue empty;
BitmapValue single(1024);
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]