This is an automated email from the ASF dual-hosted git repository.
xiaofeng pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/brpc.git
The following commit(s) were added to refs/heads/master by this push:
new 8b5571fd Fix multi FlatMap scale and size (#2669)
8b5571fd is described below
commit 8b5571fdfdc07fb7412404fc912e63e5cae1da7a
Author: Bright Chen <[email protected]>
AuthorDate: Thu Jul 11 18:23:39 2024 +0800
Fix multi FlatMap scale and size (#2669)
---
src/butil/bit_array.h | 26 ++++----
src/butil/containers/flat_map.h | 32 ++++++++-
src/butil/containers/flat_map_inl.h | 129 ++++++++++++++++++++++++------------
test/flat_map_unittest.cpp | 29 +++++++-
4 files changed, 158 insertions(+), 58 deletions(-)
diff --git a/src/butil/bit_array.h b/src/butil/bit_array.h
index 54beb398..3bcc6948 100644
--- a/src/butil/bit_array.h
+++ b/src/butil/bit_array.h
@@ -28,18 +28,22 @@
namespace butil {
+#define BIT_ARRAY_LEN(nbit) (((nbit) + 63 ) / 64 * 8)
+
// Create an array with at least |nbit| bits. The array is not cleared.
-inline uint64_t* bit_array_malloc(size_t nbit)
-{
+inline uint64_t* bit_array_malloc(size_t nbit) {
if (!nbit) {
return NULL;
}
- return (uint64_t*)malloc((nbit + 63 ) / 64 * 8/*different from /8*/);
+ return (uint64_t*)malloc(BIT_ARRAY_LEN(nbit)/*different from /8*/);
+}
+
+inline void bit_array_free(uint64_t* array) {
+ free(array);
}
// Set bit 0 ~ nbit-1 of |array| to be 0
-inline void bit_array_clear(uint64_t* array, size_t nbit)
-{
+inline void bit_array_clear(uint64_t* array, size_t nbit) {
const size_t off = (nbit >> 6);
memset(array, 0, off * 8);
const size_t last = (off << 6);
@@ -49,22 +53,19 @@ inline void bit_array_clear(uint64_t* array, size_t nbit)
}
// Set i-th bit (from left, counting from 0) of |array| to be 1
-inline void bit_array_set(uint64_t* array, size_t i)
-{
+inline void bit_array_set(uint64_t* array, size_t i) {
const size_t off = (i >> 6);
array[off] |= (((uint64_t)1) << (i - (off << 6)));
}
// Set i-th bit (from left, counting from 0) of |array| to be 0
-inline void bit_array_unset(uint64_t* array, size_t i)
-{
+inline void bit_array_unset(uint64_t* array, size_t i) {
const size_t off = (i >> 6);
array[off] &= ~(((uint64_t)1) << (i - (off << 6)));
}
// Get i-th bit (from left, counting from 0) of |array|
-inline uint64_t bit_array_get(const uint64_t* array, size_t i)
-{
+inline uint64_t bit_array_get(const uint64_t* array, size_t i) {
const size_t off = (i >> 6);
return (array[off] & (((uint64_t)1) << (i - (off << 6))));
}
@@ -72,8 +73,7 @@ inline uint64_t bit_array_get(const uint64_t* array, size_t i)
// Find index of first 1-bit from bit |begin| to |end| in |array|.
// Returns |end| if all bits are 0.
// This function is of O(nbit) complexity.
-inline size_t bit_array_first1(const uint64_t* array, size_t begin, size_t end)
-{
+inline size_t bit_array_first1(const uint64_t* array, size_t begin, size_t
end) {
size_t off1 = (begin >> 6);
const size_t first = (off1 << 6);
if (first != begin) {
diff --git a/src/butil/containers/flat_map.h b/src/butil/containers/flat_map.h
index f0d1d1ca..40c44c59 100644
--- a/src/butil/containers/flat_map.h
+++ b/src/butil/containers/flat_map.h
@@ -104,6 +104,7 @@
#include "butil/containers/hash_tables.h" // hash<>
#include "butil/bit_array.h" // bit_array_*
#include "butil/strings/string_piece.h" // StringPiece
+#include "butil/memory/scope_guard.h"
namespace butil {
@@ -290,6 +291,18 @@ private:
template <typename _Map, typename _Element> friend class FlatMapIterator;
template <typename _Map, typename _Element> friend class SparseFlatMapIterator;
+ struct NewBucketsInfo {
+ NewBucketsInfo()
+ : buckets(NULL), thumbnail(NULL), nbucket(0) {}
+ NewBucketsInfo(Bucket* b, uint64_t* t, size_t n)
+ : buckets(b), thumbnail(t), nbucket(n) {}
+ Bucket* buckets;
+ uint64_t* thumbnail;
+ size_t nbucket;
+ };
+
+ NewBucketsInfo new_buckets_and_thumbnail(size_t size, size_t new_nbucket);
+
// For `_Multi=true'.
// Insert a new default-constructed associated with |key| always.
// If size()*100/bucket_count() is more than load_factor,
@@ -299,8 +312,23 @@ template <typename _Map, typename _Element> friend class
SparseFlatMapIterator;
typename std::enable_if<Multi, mapped_type&>::type operator[](const
key_type& key);
// True if buckets need to be resized before holding `size' elements.
- inline bool is_too_crowded(size_t size) const
- { return size * 100 >= _nbucket * _load_factor; }
+ bool is_too_crowded(size_t size) const {
+ return is_too_crowded(size, _nbucket, _load_factor);
+ }
+ static bool is_too_crowded(size_t size, size_t nbucket, u_int load_factor)
{
+ return size * 100 >= nbucket * load_factor;
+ }
+
+ static void init_buckets_and_thumbnail(
+ Bucket* buckets, uint64_t* thumbnail, size_t nbucket) {
+ for (size_t i = 0; i < nbucket; ++i) {
+ buckets[i].set_invalid();
+ }
+ buckets[nbucket].next = NULL;
+ if (_Sparse) {
+ bit_array_clear(thumbnail, nbucket);
+ }
+ }
size_t _size;
size_t _nbucket;
diff --git a/src/butil/containers/flat_map_inl.h
b/src/butil/containers/flat_map_inl.h
index 5218fab9..f37baba1 100644
--- a/src/butil/containers/flat_map_inl.h
+++ b/src/butil/containers/flat_map_inl.h
@@ -242,7 +242,7 @@ FlatMap<_K, _T, _H, _E, _S, _A, _M>::~FlatMap() {
clear();
get_allocator().Free(_buckets);
_buckets = NULL;
- free(_thumbnail);
+ bit_array_free(_thumbnail);
_thumbnail = NULL;
_nbucket = 0;
_load_factor = 0;
@@ -272,35 +272,32 @@ FlatMap<_K, _T, _H, _E, _S, _A, _M>::operator=(const
FlatMap<_K, _T, _H, _E, _S,
if (!rhs.initialized()) {
return *this;
}
- bool need_copy = !rhs.empty();
_load_factor = rhs._load_factor;
if (_buckets == NULL || is_too_crowded(rhs._size)) {
- get_allocator().Free(_buckets);
- _nbucket = rhs._nbucket;
- // note: need an extra bucket to let iterator know where buckets end
- _buckets = (Bucket*)get_allocator().Alloc(sizeof(Bucket) * (_nbucket +
1/*note*/));
- if (NULL == _buckets) {
- LOG(ERROR) << "Fail to new _buckets";
+ NewBucketsInfo info = new_buckets_and_thumbnail(_size, rhs._nbucket);
+ if (0 == info.nbucket) {
+ LOG(ERROR) << "Invalid nbucket=0";
return *this;
}
- // If no need to copy, set buckets invalid.
- if (!need_copy) {
- for (size_t i = 0; i < _nbucket; ++i) {
- _buckets[i].set_invalid();
- }
- _buckets[_nbucket].next = NULL;
+ if (NULL == info.buckets) {
+ LOG(ERROR) << "Fail to new buckets";
+ return *this;
}
+ if (_S && NULL == info.thumbnail) {
+ LOG(ERROR) << "Fail to new thumbnail";
+ return *this;
+ }
+
+ _nbucket = info.nbucket;
+ get_allocator().Free(_buckets);
+ _buckets = info.buckets;
if (_S) {
- free(_thumbnail);
- _thumbnail = bit_array_malloc(_nbucket);
- if (NULL == _thumbnail) {
- LOG(ERROR) << "Fail to new _thumbnail";
- return *this;
- }
- bit_array_clear(_thumbnail, _nbucket);
+ bit_array_free(_thumbnail);
+ _thumbnail = info.thumbnail;
}
}
- if (!need_copy) {
+ if (rhs.empty()) {
+ // No need to copy, returns directly.
return *this;
}
if (_nbucket == rhs._nbucket) {
@@ -327,7 +324,7 @@ FlatMap<_K, _T, _H, _E, _S, _A, _M>::operator=(const
FlatMap<_K, _T, _H, _E, _S,
_size = rhs._size;
} else {
for (const_iterator it = rhs.begin(); it != rhs.end(); ++it) {
- operator[](Element::first_ref_from_value(*it)) =
+ operator[](Element::first_ref_from_value(*it)) =
Element::second_ref_from_value(*it);
}
}
@@ -341,7 +338,7 @@ int FlatMap<_K, _T, _H, _E, _S, _A, _M>::init(size_t
nbucket, u_int load_factor)
return -1;
}
if (nbucket == 0) {
- LOG(WARNING) << "Fail to init FlatMap, nbucket=" << nbucket;
+ LOG(WARNING) << "Fail to init FlatMap, nbucket=" << nbucket;
return -1;
}
if (load_factor < 10 || load_factor > 100) {
@@ -349,25 +346,25 @@ int FlatMap<_K, _T, _H, _E, _S, _A, _M>::init(size_t
nbucket, u_int load_factor)
return -1;
}
_size = 0;
- _nbucket = flatmap_round(nbucket);
_load_factor = load_factor;
-
- _buckets = (Bucket*)get_allocator().Alloc(sizeof(Bucket) * (_nbucket + 1));
- if (NULL == _buckets) {
- LOG(ERROR) << "Fail to new _buckets";
+
+ NewBucketsInfo info = new_buckets_and_thumbnail(_size, nbucket);
+ if (0 == info.nbucket) {
+ LOG(ERROR) << "Invalid nbucket=0";
return -1;
}
- for (size_t i = 0; i < _nbucket; ++i) {
- _buckets[i].set_invalid();
+ if (NULL == info.buckets) {
+ LOG(ERROR) << "Fail to new buckets";
+ return -1;
}
- _buckets[_nbucket].next = NULL;
-
+ if (_S && NULL == info.thumbnail) {
+ LOG(ERROR) << "Fail to new thumbnail";
+ return -1;
+ }
+ _nbucket = info.nbucket;
+ _buckets = info.buckets;
if (_S) {
- _thumbnail = bit_array_malloc(_nbucket);
- if (NULL == _thumbnail) {
- LOG(ERROR) << "Fail to new _thumbnail";
- return -1;
- }
+ _thumbnail = info.thumbnail;
bit_array_clear(_thumbnail, _nbucket);
}
return 0;
@@ -625,7 +622,7 @@ FlatMap<_K, _T, _H, _E, _S, _A, _M>::operator[](const
key_type& key) {
return first_node.element().second_ref();
}
Bucket *p = &first_node;
- while (1) {
+ while (true) {
if (_eql(p->element().first_ref(), key)) {
return p->element().second_ref();
}
@@ -659,6 +656,24 @@ FlatMap<_K, _T, _H, _E, _S, _A, _M>::operator[](const
key_type& key) {
new (&first_node) Bucket(key);
return first_node.element().second_ref();
}
+ if (is_too_crowded(_size)) {
+ Bucket *p = &first_node;
+ bool need_scale = false;
+ while (NULL != p) {
+ // Increase the capacity of bucket when
+ // hash collision occur and map is crowded.
+ if (!_eql(p->element().first_ref(), key)) {
+ need_scale = true;
+ break;
+ }
+ p = p->next;
+ }
+ if (need_scale && resize(_nbucket + 1)) {
+ return operator[](key);
+ }
+ // fail to resize is OK
+ }
+ ++_size;
Bucket* newp = new (_pool.get()) Bucket(key);
newp->next = first_node.next;
first_node.next = newp;
@@ -720,7 +735,7 @@ bool FlatMap<_K, _T, _H, _E, _S, _A, _M>::resize(size_t
nbucket2) {
return false;
}
- // NOTE: following functors must be kept after resizing otherwise the
+ // NOTE: following functors must be kept after resizing otherwise the
// internal state is lost.
FlatMap new_map(_hashfn, _eql, get_allocator());
if (new_map.init(nbucket2, _load_factor) != 0) {
@@ -728,7 +743,7 @@ bool FlatMap<_K, _T, _H, _E, _S, _A, _M>::resize(size_t
nbucket2) {
return false;
}
for (iterator it = begin(); it != end(); ++it) {
- new_map[Element::first_ref_from_value(*it)] =
+ new_map[Element::first_ref_from_value(*it)] =
Element::second_movable_ref_from_value(*it);
}
new_map.swap(*this);
@@ -751,6 +766,38 @@ BucketInfo FlatMap<_K, _T, _H, _E, _S, _A,
_M>::bucket_info() const {
return info;
}
+template <typename _K, typename _T, typename _H, typename _E, bool _S,
typename _A, bool _M>
+typename FlatMap<_K, _T, _H, _E, _S, _A, _M>::NewBucketsInfo
+FlatMap<_K, _T, _H, _E, _S, _A, _M>::new_buckets_and_thumbnail(size_t size,
+ size_t
new_nbucket) {
+ do {
+ new_nbucket = flatmap_round(new_nbucket);
+ } while (is_too_crowded(size, new_nbucket, _load_factor));
+ // Note: need an extra bucket to let iterator know where buckets end.
+ auto buckets = (Bucket*)get_allocator().Alloc(sizeof(Bucket) * (
+ new_nbucket + 1/*note*/));
+ auto guard = MakeScopeGuard([buckets, this]() {
+ get_allocator().Free(buckets);
+ });
+ if (NULL == buckets) {
+ LOG(FATAL) << "Fail to new Buckets";
+ return {};
+ }
+
+ uint64_t* thumbnail = NULL;
+ if (_S) {
+ thumbnail = bit_array_malloc(new_nbucket);
+ if (NULL == thumbnail) {
+ LOG(FATAL) << "Fail to new thumbnail";
+ return {};
+ }
+ }
+
+ guard.dismiss();
+ init_buckets_and_thumbnail(buckets, thumbnail, new_nbucket);
+ return { buckets, thumbnail, new_nbucket };
+}
+
inline std::ostream& operator<<(std::ostream& os, const BucketInfo& info) {
return os << "{maxb=" << info.longest_length
<< " avgb=" << info.average_length << '}';
diff --git a/test/flat_map_unittest.cpp b/test/flat_map_unittest.cpp
index 5d9850d7..c3368d36 100644
--- a/test/flat_map_unittest.cpp
+++ b/test/flat_map_unittest.cpp
@@ -1437,6 +1437,10 @@ TEST_F(FlatMapTest, copy) {
m3 = m2;
ASSERT_TRUE(m3.initialized());
+ ASSERT_TRUE(m3.seek(1));
+ ASSERT_TRUE(m3.seek(2));
+ ASSERT_FALSE(m3.seek(3));
+
m3.clear();
ASSERT_TRUE(m3.initialized());
ASSERT_TRUE(m3.empty());
@@ -1451,26 +1455,30 @@ TEST_F(FlatMapTest, multi) {
g_foo_copy_ctor = 0;
g_foo_assign = 0;
butil::MultiFlatMap<int, Foo> map;
- int bucket_count = 32;
+ size_t bucket_count = 32;
ASSERT_EQ(0, map.init(bucket_count));
ASSERT_EQ(0, g_foo_ctor);
ASSERT_EQ(0, g_foo_copy_ctor);
ASSERT_EQ(0, g_foo_assign);
Foo& f1 = map[1];
+ ASSERT_EQ(1UL, map.size());
ASSERT_EQ(1, g_foo_ctor);
ASSERT_EQ(0, g_foo_copy_ctor);
ASSERT_EQ(0, g_foo_assign);
Foo& f2 = map[1];
+ ASSERT_EQ(2UL, map.size());
ASSERT_EQ(2, g_foo_ctor);
ASSERT_EQ(0, g_foo_copy_ctor);
ASSERT_EQ(0, g_foo_assign);
Foo f3;
Foo& f4 = *map.insert(1, f3);
+ ASSERT_EQ(3UL, map.size());
ASSERT_EQ(4, g_foo_ctor);
ASSERT_EQ(0, g_foo_copy_ctor);
ASSERT_EQ(1, g_foo_assign);
ASSERT_EQ(&f1, map.seek(1));
std::vector<Foo*> f_vec = map.seek_all(1);
+ ASSERT_EQ(3UL, f_vec.size());
ASSERT_NE(f_vec.end(), std::find(f_vec.begin(), f_vec.end(), &f1));
ASSERT_NE(f_vec.end(), std::find(f_vec.begin(), f_vec.end(), &f2));
ASSERT_NE(f_vec.end(), std::find(f_vec.begin(), f_vec.end(), &f4));
@@ -1479,9 +1487,10 @@ TEST_F(FlatMapTest, multi) {
int same_bucket_key = 1 + bucket_count;
butil::DefaultHasher<int> hasher;
ASSERT_EQ(butil::flatmap_mod(hasher(1), bucket_count),
- butil::flatmap_mod(hasher(same_bucket_key), bucket_count));
+ butil::flatmap_mod(hasher(same_bucket_key), bucket_count));
ASSERT_EQ(0, map.erase(same_bucket_key));
Foo& f5 = map[same_bucket_key];
+ ASSERT_EQ(4UL, map.size());
ASSERT_EQ(&f5, map.seek(same_bucket_key));
ASSERT_EQ(1UL, map.seek_all(same_bucket_key).size());
ASSERT_EQ(5, g_foo_ctor);
@@ -1489,15 +1498,31 @@ TEST_F(FlatMapTest, multi) {
ASSERT_EQ(1, g_foo_assign);
ASSERT_EQ(&f5, map.seek(same_bucket_key));
ASSERT_EQ(3u, map.erase(1));
+ ASSERT_EQ(1UL, map.size());
ASSERT_EQ(nullptr, map.seek(1));
ASSERT_TRUE(map.seek_all(1).empty());
// Value node of same_bucket_key is the last one in the bucket,
// so it has been moved to the first node.
ASSERT_EQ(&f1, map.seek(same_bucket_key));
ASSERT_EQ(1UL, map.erase(same_bucket_key));
+ ASSERT_EQ(0UL, map.size());
ASSERT_EQ(nullptr, map.seek(same_bucket_key));
ASSERT_TRUE(map.seek_all(same_bucket_key).empty());
+ // Increase the capacity of bucket when hash collision occur and map is
crowded.
+ for (size_t i = 0; i < bucket_count + 1; ++i) {
+ map[i] = Foo();
+ }
+ ASSERT_EQ(bucket_count + 1, map.size());
+ ASSERT_EQ(butil::flatmap_round(bucket_count + 1), map.bucket_count());
+
+ // No need to Increase the capacity of bucket when key is already in the
map.
+ for (size_t i = 0; i < bucket_count + 1; ++i) {
+ map[1] = Foo();
+ }
+ ASSERT_EQ((bucket_count + 1) * 2, map.size());
+ ASSERT_EQ(butil::flatmap_round(bucket_count + 1), map.bucket_count());
+
// Zeroize POD values.
butil::MultiFlatMap<int, Bar> map2;
ASSERT_EQ(0, map2.init(32));
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]