libstdc++: [_Hashtable] Extend the small size optimization

    A number of methods were still not using the small size optimization which     is to prefer an O(N) research to a hash computation as long as N is small.

    libstdc++-v3/ChangeLog:

            * include/bits/hashtable.h: Move comment about all equivalent values
            being next to each other in the class documentation header.
            (_M_reinsert_node, _M_merge_unique): Implement small size optimization.
            (_M_find_tr, _M_count_tr, _M_equal_range_tr): Likewise.

Tested under Linux x64

Ok to commit ?

François
diff --git a/libstdc++-v3/include/bits/hashtable.h 
b/libstdc++-v3/include/bits/hashtable.h
index 771ed9968f7..aec77c34a58 100644
--- a/libstdc++-v3/include/bits/hashtable.h
+++ b/libstdc++-v3/include/bits/hashtable.h
@@ -152,6 +152,10 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
    *  _M_before_begin, if any, is updated to point to its new before
    *  begin node.
    *
+   *  Note that all equivalent values, if any, are next to each other, if
+   *  we find a non-equivalent value after an equivalent one it means that
+   *  we won't find any new equivalent value.
+   *
    *  On erase, the simple iterator design requires using the hash
    *  functor to get the index of the bucket to update. For this
    *  reason, when __cache_hash_code is set to false the hash functor must
@@ -1054,10 +1058,27 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
          {
            __glibcxx_assert(get_allocator() == __nh.get_allocator());
 
+           __node_ptr __n = nullptr;
            const key_type& __k = __nh._M_key();
-           __hash_code __code = this->_M_hash_code(__k);
-           size_type __bkt = _M_bucket_index(__code);
-           if (__node_ptr __n = _M_find_node(__bkt, __k, __code))
+           const size_type __size = size();
+           if (__size <= __small_size_threshold())
+             {
+               for (__n = _M_begin(); __n; __n = __n->_M_next())
+                 if (this->_M_key_equals(__k, *__n))
+                   break;
+             }
+
+           __hash_code __code;
+           size_type __bkt;
+           if (!__n)
+             {
+               __code = this->_M_hash_code(__k);
+               __bkt = _M_bucket_index(__code);
+               if (__size > __small_size_threshold())
+                 __n = _M_find_node(__bkt, __k, __code);
+             }
+
+           if (__n)
              {
                __ret.node = std::move(__nh);
                __ret.position = iterator(__n);
@@ -1161,11 +1182,31 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
          for (auto __i = __src.cbegin(), __end = __src.cend(); __i != __end;)
            {
              auto __pos = __i++;
+             const size_type __size = size();
              const key_type& __k = _ExtractKey{}(*__pos);
+             if (__size <= __small_size_threshold())
+               {
+                 bool __found = false;
+                 for (auto __n = _M_begin(); __n; __n = __n->_M_next())
+                   if (this->_M_key_equals(__k, *__n))
+                     {
+                       __found = true;
+                       break;
+                     }
+
+                 if (__found)
+                   {
+                     if (__n_elt != 1)
+                       --__n_elt;
+                     continue;
+                   }
+               }
+
              __hash_code __code
                = _M_src_hash_code(__src.hash_function(), __k, *__pos._M_cur);
              size_type __bkt = _M_bucket_index(__code);
-             if (_M_find_node(__bkt, __k, __code) == nullptr)
+             if (__size <= __small_size_threshold()
+                 || _M_find_node(__bkt, __k, __code) == nullptr)
                {
                  auto __nh = __src.extract(__pos);
                  _M_insert_unique_node(__bkt, __code, __nh._M_ptr, __n_elt);
@@ -1743,6 +1784,14 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       _M_find_tr(const _Kt& __k)
       -> iterator
       {
+       if (size() <= __small_size_threshold())
+         {
+           for (auto __n = _M_begin(); __n; __n = __n->_M_next())
+             if (this->_M_key_equals_tr(__k, *__n))
+               return iterator(__n);
+           return end();
+         }
+
        __hash_code __code = this->_M_hash_code_tr(__k);
        std::size_t __bkt = _M_bucket_index(__code);
        return iterator(_M_find_node_tr(__bkt, __k, __code));
@@ -1759,6 +1808,14 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       _M_find_tr(const _Kt& __k) const
       -> const_iterator
       {
+       if (size() <= __small_size_threshold())
+         {
+           for (auto __n = _M_begin(); __n; __n = __n->_M_next())
+             if (this->_M_key_equals_tr(__k, *__n))
+               return const_iterator(__n);
+           return end();
+         }
+
        __hash_code __code = this->_M_hash_code_tr(__k);
        std::size_t __bkt = _M_bucket_index(__code);
        return const_iterator(_M_find_node_tr(__bkt, __k, __code));
@@ -1782,9 +1839,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       if (__unique_keys::value)
        return 1;
 
-      // All equivalent values are next to each other, if we find a
-      // non-equivalent value after an equivalent one it means that we won't
-      // find any new equivalent value.
       size_type __result = 1;
       for (auto __ref = __it++;
           __it._M_cur && this->_M_node_equals(*__ref._M_cur, *__it._M_cur);
@@ -1806,15 +1860,30 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       _M_count_tr(const _Kt& __k) const
       -> size_type
       {
+       if (size() <= __small_size_threshold())
+         {
+           size_type __result = 0;
+           for (auto __n = _M_begin(); __n; __n = __n->_M_next())
+             {
+               if (this->_M_key_equals_tr(__k, *__n))
+                 {
+                   ++__result;
+                   continue;
+                 }
+
+               if (__result)
+                 break;
+             }
+
+           return __result;
+         }
+
        __hash_code __code = this->_M_hash_code_tr(__k);
        std::size_t __bkt = _M_bucket_index(__code);
        auto __n = _M_find_node_tr(__bkt, __k, __code);
        if (!__n)
          return 0;
 
-       // All equivalent values are next to each other, if we find a
-       // non-equivalent value after an equivalent one it means that we won't
-       // find any new equivalent value.
        iterator __it(__n);
        size_type __result = 1;
        for (++__it;
@@ -1844,9 +1913,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       if (__unique_keys::value)
        return { __beg, __ite };
 
-      // All equivalent values are next to each other, if we find a
-      // non-equivalent value after an equivalent one it means that we won't
-      // find any new equivalent value.
       while (__ite._M_cur && this->_M_node_equals(*__beg._M_cur, 
*__ite._M_cur))
        ++__ite;
 
@@ -1871,9 +1937,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       if (__unique_keys::value)
        return { __beg, __ite };
 
-      // All equivalent values are next to each other, if we find a
-      // non-equivalent value after an equivalent one it means that we won't
-      // find any new equivalent value.
       while (__ite._M_cur && this->_M_node_equals(*__beg._M_cur, 
*__ite._M_cur))
        ++__ite;
 
@@ -1892,6 +1955,25 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       _M_equal_range_tr(const _Kt& __k)
       -> pair<iterator, iterator>
       {
+       if (size() <= __small_size_threshold())
+         {
+           __node_ptr __n, __beg = nullptr;
+           for (__n = _M_begin(); __n; __n = __n->_M_next())
+             {
+               if (this->_M_key_equals_tr(__k, *__n))
+                 {
+                   if (!__beg)
+                     __beg = __n;
+                   continue;
+                 }
+
+               if (__beg)
+                 break;
+             }
+
+           return { iterator(__beg), iterator(__n) };
+         }
+
        __hash_code __code = this->_M_hash_code_tr(__k);
        std::size_t __bkt = _M_bucket_index(__code);
        auto __n = _M_find_node_tr(__bkt, __k, __code);
@@ -1899,9 +1981,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
        if (!__n)
          return { __ite, __ite };
 
-       // All equivalent values are next to each other, if we find a
-       // non-equivalent value after an equivalent one it means that we won't
-       // find any new equivalent value.
        auto __beg = __ite++;
        while (__ite._M_cur && this->_M_equals_tr(__k, __code, *__ite._M_cur))
          ++__ite;
@@ -1920,6 +1999,25 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       _M_equal_range_tr(const _Kt& __k) const
       -> pair<const_iterator, const_iterator>
       {
+       if (size() <= __small_size_threshold())
+         {
+           __node_ptr __n, __beg = nullptr;
+           for (__n = _M_begin(); __n; __n = __n->_M_next())
+             {
+               if (this->_M_key_equals_tr(__k, *__n))
+                 {
+                   if (!__beg)
+                     __beg = __n;
+                   continue;
+                 }
+
+               if (__beg)
+                 break;
+             }
+
+           return { const_iterator(__beg), const_iterator(__n) };
+         }
+
        __hash_code __code = this->_M_hash_code_tr(__k);
        std::size_t __bkt = _M_bucket_index(__code);
        auto __n = _M_find_node_tr(__bkt, __k, __code);
@@ -1927,9 +2025,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
        if (!__n)
          return { __ite, __ite };
 
-       // All equivalent values are next to each other, if we find a
-       // non-equivalent value after an equivalent one it means that we won't
-       // find any new equivalent value.
        auto __beg = __ite++;
        while (__ite._M_cur && this->_M_equals_tr(__k, __code, *__ite._M_cur))
          ++__ite;
@@ -2058,7 +2153,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
        // First build the node to get access to the hash code
        _Scoped_node __node { this, std::forward<_Args>(__args)...  };
        const key_type& __k = _ExtractKey{}(__node._M_node->_M_v());
-       if (size() <= __small_size_threshold())
+       const size_type __size = size();
+       if (__size <= __small_size_threshold())
          {
            for (auto __it = _M_begin(); __it; __it = __it->_M_next())
              if (this->_M_key_equals(__k, *__it))
@@ -2068,7 +2164,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 
        __hash_code __code = this->_M_hash_code(__k);
        size_type __bkt = _M_bucket_index(__code);
-       if (size() > __small_size_threshold())
+       if (__size > __small_size_threshold())
          if (__node_ptr __p = _M_find_node(__bkt, __k, __code))
            // There is already an equivalent node, no insertion
            return { iterator(__p), false };
@@ -2231,7 +2327,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
                       _NodeGenerator& __node_gen)
       -> pair<iterator, bool>
       {
-       if (size() <= __small_size_threshold())
+       const size_type __size = size();
+       if (__size <= __small_size_threshold())
          for (auto __it = _M_begin(); __it; __it = __it->_M_next())
            if (this->_M_key_equals_tr(__k, *__it))
              return { iterator(__it), false };
@@ -2239,7 +2336,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
        __hash_code __code = this->_M_hash_code_tr(__k);
        size_type __bkt = _M_bucket_index(__code);
 
-       if (size() > __small_size_threshold())
+       if (__size > __small_size_threshold())
          if (__node_ptr __node = _M_find_node_tr(__bkt, __k, __code))
            return { iterator(__node), false };
 

Reply via email to