This is an automated email from the ASF dual-hosted git repository.

xiaofeng pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/brpc.git


The following commit(s) were added to refs/heads/master by this push:
     new 8b5571fd Fix multi FlatMap scale and size (#2669)
8b5571fd is described below

commit 8b5571fdfdc07fb7412404fc912e63e5cae1da7a
Author: Bright Chen <chenguangmin...@foxmail.com>
AuthorDate: Thu Jul 11 18:23:39 2024 +0800

    Fix multi FlatMap scale and size (#2669)
---
 src/butil/bit_array.h               |  26 ++++----
 src/butil/containers/flat_map.h     |  32 ++++++++-
 src/butil/containers/flat_map_inl.h | 129 ++++++++++++++++++++++++------------
 test/flat_map_unittest.cpp          |  29 +++++++-
 4 files changed, 158 insertions(+), 58 deletions(-)

diff --git a/src/butil/bit_array.h b/src/butil/bit_array.h
index 54beb398..3bcc6948 100644
--- a/src/butil/bit_array.h
+++ b/src/butil/bit_array.h
@@ -28,18 +28,22 @@
 
 namespace butil {
 
+#define BIT_ARRAY_LEN(nbit) (((nbit) + 63 ) / 64 * 8)
+
 // Create an array with at least |nbit| bits. The array is not cleared.
-inline uint64_t* bit_array_malloc(size_t nbit)
-{
+inline uint64_t* bit_array_malloc(size_t nbit) {
     if (!nbit) {
         return NULL;
     }
-    return (uint64_t*)malloc((nbit + 63 ) / 64 * 8/*different from /8*/);
+    return (uint64_t*)malloc(BIT_ARRAY_LEN(nbit)/*different from /8*/);
+}
+
+inline void bit_array_free(uint64_t* array) {
+    free(array);
 }
 
 // Set bit 0 ~ nbit-1 of |array| to be 0
-inline void bit_array_clear(uint64_t* array, size_t nbit)
-{
+inline void bit_array_clear(uint64_t* array, size_t nbit) {
     const size_t off = (nbit >> 6);
     memset(array, 0, off * 8);
     const size_t last = (off << 6);
@@ -49,22 +53,19 @@ inline void bit_array_clear(uint64_t* array, size_t nbit)
 }
 
 // Set i-th bit (from left, counting from 0) of |array| to be 1
-inline void bit_array_set(uint64_t* array, size_t i)
-{
+inline void bit_array_set(uint64_t* array, size_t i) {
     const size_t off = (i >> 6);
     array[off] |= (((uint64_t)1) << (i - (off << 6)));
 }
 
 // Set i-th bit (from left, counting from 0) of |array| to be 0
-inline void bit_array_unset(uint64_t* array, size_t i)
-{
+inline void bit_array_unset(uint64_t* array, size_t i) {
     const size_t off = (i >> 6);
     array[off] &= ~(((uint64_t)1) << (i - (off << 6)));
 }
 
 // Get i-th bit (from left, counting from 0) of |array|
-inline uint64_t bit_array_get(const uint64_t* array, size_t i)
-{
+inline uint64_t bit_array_get(const uint64_t* array, size_t i) {
     const size_t off = (i >> 6);
     return (array[off] & (((uint64_t)1) << (i - (off << 6))));
 }
@@ -72,8 +73,7 @@ inline uint64_t bit_array_get(const uint64_t* array, size_t i)
 // Find index of first 1-bit from bit |begin| to |end| in |array|.
 // Returns |end| if all bits are 0.
 // This function is of O(nbit) complexity.
-inline size_t bit_array_first1(const uint64_t* array, size_t begin, size_t end)
-{
+inline size_t bit_array_first1(const uint64_t* array, size_t begin, size_t 
end) {
     size_t off1 = (begin >> 6);
     const size_t first = (off1 << 6);
     if (first != begin) {
diff --git a/src/butil/containers/flat_map.h b/src/butil/containers/flat_map.h
index f0d1d1ca..40c44c59 100644
--- a/src/butil/containers/flat_map.h
+++ b/src/butil/containers/flat_map.h
@@ -104,6 +104,7 @@
 #include "butil/containers/hash_tables.h"          // hash<>
 #include "butil/bit_array.h"                       // bit_array_*
 #include "butil/strings/string_piece.h"            // StringPiece
+#include "butil/memory/scope_guard.h"
 
 namespace butil {
 
@@ -290,6 +291,18 @@ private:
 template <typename _Map, typename _Element> friend class FlatMapIterator;
 template <typename _Map, typename _Element> friend class SparseFlatMapIterator;
 
+    struct NewBucketsInfo {
+        NewBucketsInfo()
+            : buckets(NULL), thumbnail(NULL), nbucket(0) {}
+        NewBucketsInfo(Bucket* b, uint64_t* t, size_t n)
+            : buckets(b), thumbnail(t), nbucket(n) {}
+        Bucket* buckets;
+        uint64_t* thumbnail;
+        size_t nbucket;
+    };
+
+    NewBucketsInfo new_buckets_and_thumbnail(size_t size, size_t new_nbucket);
+
     // For `_Multi=true'.
     // Insert a new default-constructed associated with |key| always.
     // If size()*100/bucket_count() is more than load_factor,
@@ -299,8 +312,23 @@ template <typename _Map, typename _Element> friend class 
SparseFlatMapIterator;
     typename std::enable_if<Multi, mapped_type&>::type operator[](const 
key_type& key);
 
     // True if buckets need to be resized before holding `size' elements.
-    inline bool is_too_crowded(size_t size) const
-    { return size * 100 >= _nbucket * _load_factor; }
+    bool is_too_crowded(size_t size) const {
+        return is_too_crowded(size, _nbucket, _load_factor);
+    }
+    static bool is_too_crowded(size_t size, size_t nbucket, u_int load_factor) 
{
+        return size * 100 >= nbucket * load_factor;
+    }
+
+    static void init_buckets_and_thumbnail(
+        Bucket* buckets, uint64_t* thumbnail, size_t nbucket) {
+        for (size_t i = 0; i < nbucket; ++i) {
+            buckets[i].set_invalid();
+        }
+        buckets[nbucket].next = NULL;
+        if (_Sparse) {
+            bit_array_clear(thumbnail, nbucket);
+        }
+    }
         
     size_t _size;
     size_t _nbucket;
diff --git a/src/butil/containers/flat_map_inl.h 
b/src/butil/containers/flat_map_inl.h
index 5218fab9..f37baba1 100644
--- a/src/butil/containers/flat_map_inl.h
+++ b/src/butil/containers/flat_map_inl.h
@@ -242,7 +242,7 @@ FlatMap<_K, _T, _H, _E, _S, _A, _M>::~FlatMap() {
     clear();
     get_allocator().Free(_buckets);
     _buckets = NULL;
-    free(_thumbnail);
+    bit_array_free(_thumbnail);
     _thumbnail = NULL;
     _nbucket = 0;
     _load_factor = 0;
@@ -272,35 +272,32 @@ FlatMap<_K, _T, _H, _E, _S, _A, _M>::operator=(const 
FlatMap<_K, _T, _H, _E, _S,
     if (!rhs.initialized()) {
         return *this;
     }
-    bool need_copy = !rhs.empty();
     _load_factor = rhs._load_factor;
     if (_buckets == NULL || is_too_crowded(rhs._size)) {
-        get_allocator().Free(_buckets);
-        _nbucket = rhs._nbucket;
-        // note: need an extra bucket to let iterator know where buckets end
-        _buckets = (Bucket*)get_allocator().Alloc(sizeof(Bucket) * (_nbucket + 
1/*note*/));
-        if (NULL == _buckets) {
-            LOG(ERROR) << "Fail to new _buckets";
+        NewBucketsInfo info = new_buckets_and_thumbnail(_size, rhs._nbucket);
+        if (0 == info.nbucket) {
+            LOG(ERROR) << "Invalid nbucket=0";
             return *this;
         }
-        // If no need to copy, set buckets invalid.
-        if (!need_copy) {
-            for (size_t i = 0; i < _nbucket; ++i) {
-                _buckets[i].set_invalid();
-            }
-            _buckets[_nbucket].next = NULL;
+        if (NULL == info.buckets) {
+            LOG(ERROR) << "Fail to new buckets";
+            return *this;
         }
+        if (_S && NULL == info.thumbnail) {
+            LOG(ERROR) << "Fail to new thumbnail";
+            return *this;
+        }
+
+        _nbucket = info.nbucket;
+        get_allocator().Free(_buckets);
+        _buckets = info.buckets;
         if (_S) {
-            free(_thumbnail);
-            _thumbnail = bit_array_malloc(_nbucket);
-            if (NULL == _thumbnail) {
-                LOG(ERROR) << "Fail to new _thumbnail";
-                return *this;
-            }
-            bit_array_clear(_thumbnail, _nbucket);
+            bit_array_free(_thumbnail);
+            _thumbnail = info.thumbnail;
         }
     }
-    if (!need_copy) {
+    if (rhs.empty()) {
+        // No need to copy, returns directly.
         return *this;
     }
     if (_nbucket == rhs._nbucket) {
@@ -327,7 +324,7 @@ FlatMap<_K, _T, _H, _E, _S, _A, _M>::operator=(const 
FlatMap<_K, _T, _H, _E, _S,
         _size = rhs._size;
     } else {
         for (const_iterator it = rhs.begin(); it != rhs.end(); ++it) {
-            operator[](Element::first_ref_from_value(*it)) = 
+            operator[](Element::first_ref_from_value(*it)) =
                 Element::second_ref_from_value(*it);
         }
     }
@@ -341,7 +338,7 @@ int FlatMap<_K, _T, _H, _E, _S, _A, _M>::init(size_t 
nbucket, u_int load_factor)
         return -1;
     }
     if (nbucket == 0) {
-        LOG(WARNING) << "Fail to init FlatMap, nbucket=" << nbucket; 
+        LOG(WARNING) << "Fail to init FlatMap, nbucket=" << nbucket;
         return -1;
     }
     if (load_factor < 10 || load_factor > 100) {
@@ -349,25 +346,25 @@ int FlatMap<_K, _T, _H, _E, _S, _A, _M>::init(size_t 
nbucket, u_int load_factor)
         return -1;
     }
     _size = 0;
-    _nbucket = flatmap_round(nbucket);
     _load_factor = load_factor;
-                                
-    _buckets = (Bucket*)get_allocator().Alloc(sizeof(Bucket) * (_nbucket + 1));
-    if (NULL == _buckets) {
-        LOG(ERROR) << "Fail to new _buckets";
+
+    NewBucketsInfo info = new_buckets_and_thumbnail(_size, nbucket);
+    if (0 == info.nbucket) {
+        LOG(ERROR) << "Invalid nbucket=0";
         return -1;
     }
-    for (size_t i = 0; i < _nbucket; ++i) {
-        _buckets[i].set_invalid();
+    if (NULL == info.buckets) {
+        LOG(ERROR) << "Fail to new buckets";
+        return -1;
     }
-    _buckets[_nbucket].next = NULL;
-
+    if (_S && NULL == info.thumbnail) {
+        LOG(ERROR) << "Fail to new thumbnail";
+        return -1;
+    }
+    _nbucket = info.nbucket;
+    _buckets = info.buckets;
     if (_S) {
-        _thumbnail = bit_array_malloc(_nbucket);
-        if (NULL == _thumbnail) {
-            LOG(ERROR) << "Fail to new _thumbnail";
-            return -1;
-        }
+        _thumbnail = info.thumbnail;
         bit_array_clear(_thumbnail, _nbucket);
     }
     return 0;
@@ -625,7 +622,7 @@ FlatMap<_K, _T, _H, _E, _S, _A, _M>::operator[](const 
key_type& key) {
         return first_node.element().second_ref();
     }
     Bucket *p = &first_node;
-    while (1) {
+    while (true) {
         if (_eql(p->element().first_ref(), key)) {
             return p->element().second_ref();
         }
@@ -659,6 +656,24 @@ FlatMap<_K, _T, _H, _E, _S, _A, _M>::operator[](const 
key_type& key) {
         new (&first_node) Bucket(key);
         return first_node.element().second_ref();
     }
+    if (is_too_crowded(_size)) {
+        Bucket *p = &first_node;
+        bool need_scale = false;
+        while (NULL != p) {
+            // Increase the capacity of bucket when
+            // hash collision occur and map is crowded.
+            if (!_eql(p->element().first_ref(), key)) {
+                need_scale = true;
+                break;
+            }
+            p = p->next;
+        }
+        if (need_scale && resize(_nbucket + 1)) {
+            return operator[](key);
+        }
+        // fail to resize is OK
+    }
+    ++_size;
     Bucket* newp = new (_pool.get()) Bucket(key);
     newp->next = first_node.next;
     first_node.next = newp;
@@ -720,7 +735,7 @@ bool FlatMap<_K, _T, _H, _E, _S, _A, _M>::resize(size_t 
nbucket2) {
         return false;
     }
 
-    // NOTE: following functors must be kept after resizing otherwise the 
+    // NOTE: following functors must be kept after resizing otherwise the
     // internal state is lost.
     FlatMap new_map(_hashfn, _eql, get_allocator());
     if (new_map.init(nbucket2, _load_factor) != 0) {
@@ -728,7 +743,7 @@ bool FlatMap<_K, _T, _H, _E, _S, _A, _M>::resize(size_t 
nbucket2) {
         return false;
     }
     for (iterator it = begin(); it != end(); ++it) {
-        new_map[Element::first_ref_from_value(*it)] = 
+        new_map[Element::first_ref_from_value(*it)] =
             Element::second_movable_ref_from_value(*it);
     }
     new_map.swap(*this);
@@ -751,6 +766,38 @@ BucketInfo FlatMap<_K, _T, _H, _E, _S, _A, 
_M>::bucket_info() const {
     return info;
 }
 
+template <typename _K, typename _T, typename _H, typename _E, bool _S, 
typename _A, bool _M>
+typename FlatMap<_K, _T, _H, _E, _S, _A, _M>::NewBucketsInfo
+FlatMap<_K, _T, _H, _E, _S, _A, _M>::new_buckets_and_thumbnail(size_t size,
+                                                               size_t 
new_nbucket) {
+    do {
+        new_nbucket = flatmap_round(new_nbucket);
+    } while (is_too_crowded(size, new_nbucket, _load_factor));
+    // Note: need an extra bucket to let iterator know where buckets end.
+    auto buckets = (Bucket*)get_allocator().Alloc(sizeof(Bucket) * (
+        new_nbucket + 1/*note*/));
+    auto guard = MakeScopeGuard([buckets, this]() {
+        get_allocator().Free(buckets);
+    });
+    if (NULL == buckets) {
+        LOG(FATAL) << "Fail to new Buckets";
+        return {};
+    }
+
+    uint64_t* thumbnail = NULL;
+    if (_S) {
+        thumbnail = bit_array_malloc(new_nbucket);
+        if (NULL == thumbnail) {
+            LOG(FATAL) << "Fail to new thumbnail";
+            return {};
+        }
+    }
+
+    guard.dismiss();
+    init_buckets_and_thumbnail(buckets, thumbnail, new_nbucket);
+    return { buckets, thumbnail, new_nbucket };
+}
+
 inline std::ostream& operator<<(std::ostream& os, const BucketInfo& info) {
     return os << "{maxb=" << info.longest_length
               << " avgb=" << info.average_length << '}';
diff --git a/test/flat_map_unittest.cpp b/test/flat_map_unittest.cpp
index 5d9850d7..c3368d36 100644
--- a/test/flat_map_unittest.cpp
+++ b/test/flat_map_unittest.cpp
@@ -1437,6 +1437,10 @@ TEST_F(FlatMapTest, copy) {
 
     m3 = m2;
     ASSERT_TRUE(m3.initialized());
+    ASSERT_TRUE(m3.seek(1));
+    ASSERT_TRUE(m3.seek(2));
+    ASSERT_FALSE(m3.seek(3));
+
     m3.clear();
     ASSERT_TRUE(m3.initialized());
     ASSERT_TRUE(m3.empty());
@@ -1451,26 +1455,30 @@ TEST_F(FlatMapTest, multi) {
     g_foo_copy_ctor = 0;
     g_foo_assign = 0;
     butil::MultiFlatMap<int, Foo> map;
-    int bucket_count = 32;
+    size_t bucket_count = 32;
     ASSERT_EQ(0, map.init(bucket_count));
     ASSERT_EQ(0, g_foo_ctor);
     ASSERT_EQ(0, g_foo_copy_ctor);
     ASSERT_EQ(0, g_foo_assign);
     Foo& f1 = map[1];
+    ASSERT_EQ(1UL, map.size());
     ASSERT_EQ(1, g_foo_ctor);
     ASSERT_EQ(0, g_foo_copy_ctor);
     ASSERT_EQ(0, g_foo_assign);
     Foo& f2 = map[1];
+    ASSERT_EQ(2UL, map.size());
     ASSERT_EQ(2, g_foo_ctor);
     ASSERT_EQ(0, g_foo_copy_ctor);
     ASSERT_EQ(0, g_foo_assign);
     Foo f3;
     Foo& f4 = *map.insert(1, f3);
+    ASSERT_EQ(3UL, map.size());
     ASSERT_EQ(4, g_foo_ctor);
     ASSERT_EQ(0, g_foo_copy_ctor);
     ASSERT_EQ(1, g_foo_assign);
     ASSERT_EQ(&f1, map.seek(1));
     std::vector<Foo*> f_vec = map.seek_all(1);
+    ASSERT_EQ(3UL, f_vec.size());
     ASSERT_NE(f_vec.end(), std::find(f_vec.begin(), f_vec.end(), &f1));
     ASSERT_NE(f_vec.end(), std::find(f_vec.begin(), f_vec.end(), &f2));
     ASSERT_NE(f_vec.end(), std::find(f_vec.begin(), f_vec.end(), &f4));
@@ -1479,9 +1487,10 @@ TEST_F(FlatMapTest, multi) {
     int same_bucket_key = 1 + bucket_count;
     butil::DefaultHasher<int> hasher;
     ASSERT_EQ(butil::flatmap_mod(hasher(1), bucket_count),
-              butil::flatmap_mod(hasher(same_bucket_key), bucket_count));
+        butil::flatmap_mod(hasher(same_bucket_key), bucket_count));
     ASSERT_EQ(0, map.erase(same_bucket_key));
     Foo& f5 = map[same_bucket_key];
+    ASSERT_EQ(4UL, map.size());
     ASSERT_EQ(&f5, map.seek(same_bucket_key));
     ASSERT_EQ(1UL, map.seek_all(same_bucket_key).size());
     ASSERT_EQ(5, g_foo_ctor);
@@ -1489,15 +1498,31 @@ TEST_F(FlatMapTest, multi) {
     ASSERT_EQ(1, g_foo_assign);
     ASSERT_EQ(&f5, map.seek(same_bucket_key));
     ASSERT_EQ(3u, map.erase(1));
+    ASSERT_EQ(1UL, map.size());
     ASSERT_EQ(nullptr, map.seek(1));
     ASSERT_TRUE(map.seek_all(1).empty());
     // Value node of same_bucket_key is the last one in the bucket,
     // so it has been moved to the first node.
     ASSERT_EQ(&f1, map.seek(same_bucket_key));
     ASSERT_EQ(1UL, map.erase(same_bucket_key));
+    ASSERT_EQ(0UL, map.size());
     ASSERT_EQ(nullptr, map.seek(same_bucket_key));
     ASSERT_TRUE(map.seek_all(same_bucket_key).empty());
 
+    // Increase the capacity of bucket when hash collision occur and map is 
crowded.
+    for (size_t i = 0; i < bucket_count + 1; ++i) {
+        map[i] = Foo();
+    }
+    ASSERT_EQ(bucket_count + 1, map.size());
+    ASSERT_EQ(butil::flatmap_round(bucket_count + 1), map.bucket_count());
+
+    // No need to Increase the capacity of bucket when key is already in the 
map.
+    for (size_t i = 0; i < bucket_count + 1; ++i) {
+        map[1] = Foo();
+    }
+    ASSERT_EQ((bucket_count + 1) * 2, map.size());
+    ASSERT_EQ(butil::flatmap_round(bucket_count + 1), map.bucket_count());
+
     // Zeroize POD values.
     butil::MultiFlatMap<int, Bar> map2;
     ASSERT_EQ(0, map2.init(32));


---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscr...@brpc.apache.org
For additional commands, e-mail: dev-h...@brpc.apache.org

Reply via email to