Re: [PATCH][Hashtable 6/6] PR 68303 small size optimization

2022-01-05 Thread Jonathan Wakely via Gcc-patches
On Sat, 25 Dec 2021 at 21:39, François Dumont  wrote:

> On 23/12/21 2:03 pm, Jonathan Wakely wrote:
> > On 21/12/21 07:07 +0100, François Dumont wrote:
> >> Hi
> >>
> >> ??? Is there a chance for this patch to be integrated for next gcc
> >> release ?
> >
> > Yes, I think this can still make it for GCC 12 (the patch was first
> > posted long ago in stage 1 and it's just me being slow to review it).
> >
> > But I have some questions and comments ...
> >
> >
> >> diff --git a/libstdc++-v3/include/bits/hashtable.h
> >> b/libstdc++-v3/include/bits/hashtable.h
> >> index 6e2d4c10cfe..6b2c6b99fef 100644
> >> --- a/libstdc++-v3/include/bits/hashtable.h
> >> +++ b/libstdc++-v3/include/bits/hashtable.h
> >> @@ -419,6 +419,13 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
> >>   _M_uses_single_bucket() const
> >>   { return _M_uses_single_bucket(_M_buckets); }
> >>
> >> +  static constexpr size_t
> >> +  __small_size_threshold()
> >> +  {
> >> +return
> >> + __detail::_Hashtable_hash_traits<_Hash>::__small_size_threshold();
> >> +  }
> >> +
> I've added the noexcept qualification as you asked.
> >>   __hashtable_alloc&
> >>   _M_base_alloc() { return *this; }
> >>
> >> @@ -788,6 +795,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
> >>   _M_bucket_index(__hash_code __c) const
> >>   { return __hash_code_base::_M_bucket_index(__c,
> >> _M_bucket_count); }
> >>
> >> +  __node_base_ptr
> >> +  _M_find_before_node(const key_type&);
> >> +
> >>   // Find and insert helper functions and types
> >>   // Find the node before the one matching the criteria.
> >>   __node_base_ptr
> >> @@ -831,6 +841,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
> >>   __node_base_ptr
> >>   _M_get_previous_node(size_type __bkt, __node_ptr __n);
> >>
> >> +  pair
> >> +  _M_compute_hash_code(const_iterator __hint, const key_type&
> >> __k) const;
> >> +
> >>   // Insert node __n with hash code __code, in bucket __bkt if no
> >>   // rehash (assumes no element with same key already present).
> >>   // Takes ownership of __n if insertion succeeds, throws otherwise.
> >> @@ -1126,7 +1139,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
> >>   void _M_rehash(size_type __bkt_count, const __rehash_state&
> >> __state);
> >> };
> >>
> >> -
> >>   // Definitions of class template _Hashtable's out-of-line member
> >> functions.
> >>   template >>typename _ExtractKey, typename _Equal,
> >> @@ -1628,6 +1640,14 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
> >> find(const key_type& __k)
> >> -> iterator
> >> {
> >> +  if (size() <= __small_size_threshold())
> >> +{
> >> +  for (auto __it = begin(); __it != end(); ++__it)
> >> +if (this->_M_key_equals(__k, *__it._M_cur))
> >> +  return __it;
> >> +  return end();
> >> +}
> >
> > This loop is repeated a few times, would it be better factored out
> > into its own function? _M_find_loop or something? The return type is
> > different in some cases, so maybe it's OK like this.
> Yes, I also thought about that but there is often small changes from one
> loop to another as you noticed.
> >
> >
> >
> >> +
> >>   __hash_code __code = this->_M_hash_code(__k);
> >>   std::size_t __bkt = _M_bucket_index(__code);
> >>   return iterator(_M_find_node(__bkt, __k, __code));
> >> @@ -1643,6 +1663,14 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
> >> find(const key_type& __k) const
> >> -> const_iterator
> >> {
> >> +  if (size() <= __small_size_threshold())
> >> +{
> >> +  for (auto __it = begin(); __it != end(); ++__it)
> >> +if (this->_M_key_equals(__k, *__it._M_cur))
> >> +  return __it;
> >> +  return end();
> >> +}
> >> +
> >>   __hash_code __code = this->_M_hash_code(__k);
> >>   std::size_t __bkt = _M_bucket_index(__code);
> >>   return const_iterator(_M_find_node(__bkt, __k, __code));
> >> @@ -1855,6 +1883,35 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
> >>   }
> >> #endif
> >>
> >> +  // Find the node before the one whose key compares equal to k.
> >> +  // Return nullptr if no node is found.
> >> +  template >> +   typename _ExtractKey, typename _Equal,
> >> +   typename _Hash, typename _RangeHash, typename _Unused,
> >> +   typename _RehashPolicy, typename _Traits>
> >> +auto
> >> +_Hashtable<_Key, _Value, _Alloc, _ExtractKey, _Equal,
> >> +   _Hash, _RangeHash, _Unused, _RehashPolicy, _Traits>::
> >> +_M_find_before_node(const key_type& __k)
> >> +-> __node_base_ptr
> >> +{
> >> +  __node_base_ptr __prev_p = &_M_before_begin;
> >
> > This is OK now, but would need to be std::__addressof(_M_before_begin)
> > if/when the _Hash_code_base type becomes dependent on the allocator's
> > pointer.
> Yes, maybe after gcc release we will talk about those fancy pointer
> types again.
> >
> >  __n_last = __n_last->_M_next();
> >> diff --git a/libstdc++-v3/include/bits/hashtable_policy.h
> >> 

Re: [PATCH][Hashtable 6/6] PR 68303 small size optimization

2021-12-25 Thread François Dumont via Gcc-patches

On 23/12/21 2:03 pm, Jonathan Wakely wrote:

On 21/12/21 07:07 +0100, François Dumont wrote:

Hi

??? Is there a chance for this patch to be integrated for next gcc
release ?


Yes, I think this can still make it for GCC 12 (the patch was first
posted long ago in stage 1 and it's just me being slow to review it).

But I have some questions and comments ...


diff --git a/libstdc++-v3/include/bits/hashtable.h 
b/libstdc++-v3/include/bits/hashtable.h

index 6e2d4c10cfe..6b2c6b99fef 100644
--- a/libstdc++-v3/include/bits/hashtable.h
+++ b/libstdc++-v3/include/bits/hashtable.h
@@ -419,6 +419,13 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
  _M_uses_single_bucket() const
  { return _M_uses_single_bucket(_M_buckets); }

+  static constexpr size_t
+  __small_size_threshold()
+  {
+    return
+ __detail::_Hashtable_hash_traits<_Hash>::__small_size_threshold();
+  }
+

I've added the noexcept qualification as you asked.

  __hashtable_alloc&
  _M_base_alloc() { return *this; }

@@ -788,6 +795,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
  _M_bucket_index(__hash_code __c) const
  { return __hash_code_base::_M_bucket_index(__c, 
_M_bucket_count); }


+  __node_base_ptr
+  _M_find_before_node(const key_type&);
+
  // Find and insert helper functions and types
  // Find the node before the one matching the criteria.
  __node_base_ptr
@@ -831,6 +841,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
  __node_base_ptr
  _M_get_previous_node(size_type __bkt, __node_ptr __n);

+  pair
+  _M_compute_hash_code(const_iterator __hint, const key_type& 
__k) const;

+
  // Insert node __n with hash code __code, in bucket __bkt if no
  // rehash (assumes no element with same key already present).
  // Takes ownership of __n if insertion succeeds, throws otherwise.
@@ -1126,7 +1139,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
  void _M_rehash(size_type __bkt_count, const __rehash_state& 
__state);

    };

-
  // Definitions of class template _Hashtable's out-of-line member 
functions.

  template iterator
    {
+  if (size() <= __small_size_threshold())
+    {
+  for (auto __it = begin(); __it != end(); ++__it)
+    if (this->_M_key_equals(__k, *__it._M_cur))
+  return __it;
+  return end();
+    }


This loop is repeated a few times, would it be better factored out
into its own function? _M_find_loop or something? The return type is
different in some cases, so maybe it's OK like this.
Yes, I also thought about that but there is often small changes from one 
loop to another as you noticed.





+
  __hash_code __code = this->_M_hash_code(__k);
  std::size_t __bkt = _M_bucket_index(__code);
  return iterator(_M_find_node(__bkt, __k, __code));
@@ -1643,6 +1663,14 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
    find(const key_type& __k) const
    -> const_iterator
    {
+  if (size() <= __small_size_threshold())
+    {
+  for (auto __it = begin(); __it != end(); ++__it)
+    if (this->_M_key_equals(__k, *__it._M_cur))
+  return __it;
+  return end();
+    }
+
  __hash_code __code = this->_M_hash_code(__k);
  std::size_t __bkt = _M_bucket_index(__code);
  return const_iterator(_M_find_node(__bkt, __k, __code));
@@ -1855,6 +1883,35 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
  }
#endif

+  // Find the node before the one whose key compares equal to k.
+  // Return nullptr if no node is found.
+  template
+    auto
+    _Hashtable<_Key, _Value, _Alloc, _ExtractKey, _Equal,
+   _Hash, _RangeHash, _Unused, _RehashPolicy, _Traits>::
+    _M_find_before_node(const key_type& __k)
+    -> __node_base_ptr
+    {
+  __node_base_ptr __prev_p = &_M_before_begin;


This is OK now, but would need to be std::__addressof(_M_before_begin)
if/when the _Hash_code_base type becomes dependent on the allocator's
pointer.
Yes, maybe after gcc release we will talk about those fancy pointer 
types again.


 __n_last = __n_last->_M_next();
diff --git a/libstdc++-v3/include/bits/hashtable_policy.h 
b/libstdc++-v3/include/bits/hashtable_policy.h

index 0b5443fc187..b4a8af081ce 100644
--- a/libstdc++-v3/include/bits/hashtable_policy.h
+++ b/libstdc++-v3/include/bits/hashtable_policy.h
@@ -246,6 +246,20 @@ namespace __detail
  using __unique_keys = __bool_constant<_Unique_keys>;
    };

+  /**
+   *  struct _Hashtable_hash_traits
+   *
+   *  Important traits for hash tables depending on associated hasher.
+   *
+   */
+  template
+    struct _Hashtable_hash_traits
+    {
+  static constexpr std::size_t
+  __small_size_threshold()
+  { return std::__is_fast_hash<_Hash>::value ? 0 : 20; }
+    };


Yet another trait that nobody is ever going to specialize make me sad.
I don't have a better idea though.


Sure, but maybe I can document it ?

I also wonder why you did not propose to make it a constant rather than 
requesting to add the noexcept.




   {

@@ -1263,6 +1279,14 @@ namespace __detail
    const 

Re: [PATCH][Hashtable 6/6] PR 68303 small size optimization

2021-12-23 Thread Jonathan Wakely via Gcc-patches
On Tue, 21 Dec 2021 at 17:56, François Dumont via Libstdc++ <
libstd...@gcc.gnu.org> wrote:

> On 21/12/21 7:28 am, Daniel Krügler wrote:
> > Am Di., 21. Dez. 2021 um 07:08 Uhr schrieb François Dumont via
> > Libstdc++ :
> >> Hi
> >>
> >>   Is there a chance for this patch to be integrated for next gcc
> >> release ?
> >>
> >> François
> >>
> > No counterargument for the acceptance, but: Shouldn't
> > __small_size_threshold() be a noexcept function?
> >
> > - Daniel
>
> Could it enhance code generation ? I could make it depends on
> _Hashtable_hash_traits<>::__small_size_threshold() noexcept
> qualification if so. But I was hoping that the compiler to detect all
> that itself.
>
> Otherwise no, it do not have to be noexcept as it is used to avoid
> hasher invocation in some situations and hasher is not noexcept
> constraint. At least I do not need to static_assert this.
>
>
But why not make it noexcept? It just returns a constant integer. It can be
noexcept.


Re: [PATCH][Hashtable 6/6] PR 68303 small size optimization

2021-12-21 Thread François Dumont via Gcc-patches

On 21/12/21 7:28 am, Daniel Krügler wrote:

Am Di., 21. Dez. 2021 um 07:08 Uhr schrieb François Dumont via
Libstdc++ :

Hi

  Is there a chance for this patch to be integrated for next gcc
release ?

François


No counterargument for the acceptance, but: Shouldn't
__small_size_threshold() be a noexcept function?

- Daniel


Could it enhance code generation ? I could make it depends on 
_Hashtable_hash_traits<>::__small_size_threshold() noexcept 
qualification if so. But I was hoping that the compiler to detect all 
that itself.


Otherwise no, it do not have to be noexcept as it is used to avoid 
hasher invocation in some situations and hasher is not noexcept 
constraint. At least I do not need to static_assert this.





Re: [PATCH][Hashtable 6/6] PR 68303 small size optimization

2021-12-20 Thread Daniel Krügler via Gcc-patches
Am Di., 21. Dez. 2021 um 07:08 Uhr schrieb François Dumont via
Libstdc++ :
>
> Hi
>
>  Is there a chance for this patch to be integrated for next gcc
> release ?
>
> François
>

No counterargument for the acceptance, but: Shouldn't
__small_size_threshold() be a noexcept function?

- Daniel


Re: [PATCH][Hashtable 6/6] PR 68303 small size optimization

2021-12-20 Thread François Dumont via Gcc-patches

Hi

    Is there a chance for this patch to be integrated for next gcc 
release ?


François


On 23/09/21 6:36 am, François Dumont wrote:

Ping ?

On 16/08/21 9:03 pm, François Dumont wrote:

On 17/07/20 2:58 pm, Jonathan Wakely wrote:

On 17/11/19 22:31 +0100, François Dumont wrote:

This is an implementation of PR 68303.

I try to use this idea as much as possible to avoid computation of 
hash codes.


Note that tests are not showing any gain. I guess hash computation 
must be quite bad to get a benefit from it. So I am only activating 
it when hash code is not cached and/or when computation is not fast.


If the tests don't show any benefit, why bother making the change?


I eventually managed to demonstrate this optimization through a 
performance test case.




Does it help the example in the PR?


No, the code attached to the PR just show what the user has done to 
put in place this optim on his side.


What I needed was a slow hash code computation compared to the equal 
operation. I realized that I had to use longer string to achieve this.


Moreover making this optim dependant on 
_Hashtable_traits::__hash_cached was just wrong as we cannot use the 
cached hash code here as the input is a key instance, not a node.


I introduce _Hashtable_hash_traits<_Hash> to offer a single 
customization point as this optim depends highly on the difference 
between a hash code computation and a comparison. Maybe I should put 
it at std namespace scope to ease partial specialization ?


Performance test results before the patch:

unordered_small_size.cc      std::unordered_set: 1st insert      
40r   32u    8s 264000112mem    0pf
unordered_small_size.cc      std::unordered_set: find/erase      
22r   22u    0s -191999808mem    0pf
unordered_small_size.cc      std::unordered_set: 2nd insert      
36r   36u    0s 191999776mem    0pf
unordered_small_size.cc      std::unordered_set: erase key       
25r   25u    0s -191999808mem    0pf
unordered_small_size.cc      std::unordered_set: 1st 
insert     404r  244u  156s -1989936256mem    0pf
unordered_small_size.cc      std::unordered_set: 
find/erase     315r  315u    0s 2061942272mem    0pf
unordered_small_size.cc      std::unordered_set: 2nd 
insert     233r  233u    0s -2061942208mem    0pf
unordered_small_size.cc      std::unordered_set: erase key 
     299r  298u    0s 2061942208mem    0pf


after the patch:

unordered_small_size.cc      std::unordered_set: 1st insert      
41r   33u    7s 264000112mem    0pf
unordered_small_size.cc      std::unordered_set: find/erase      
24r   25u    1s -191999808mem    0pf
unordered_small_size.cc      std::unordered_set: 2nd insert      
34r   34u    0s 191999776mem    0pf
unordered_small_size.cc      std::unordered_set: erase key       
25r   25u    0s -191999808mem    0pf
unordered_small_size.cc      std::unordered_set: 1st 
insert     399r  232u  165s -1989936256mem    0pf
unordered_small_size.cc      std::unordered_set: 
find/erase     196r  197u    0s 2061942272mem    0pf
unordered_small_size.cc      std::unordered_set: 2nd 
insert     221r  222u    0s -2061942208mem    0pf
unordered_small_size.cc      std::unordered_set: erase key 
     178r  178u    0s 2061942208mem    0pf


    libstdc++: Optimize operations on small size hashtable [PR 68303]

    When hasher is identified as slow and the number of elements is 
limited in the
    container use a brute-force loop on those elements to look for a 
given key using
    the key_equal functor. For the moment the default threshold below 
which the

    container is considered as small is 20.

    libstdc++-v3/ChangeLog:

    PR libstdc++/68303
    * include/bits/hashtable_policy.h
    (_Hashtable_hash_traits<_Hash>): New.
    (_Hash_code_base<>::_M_hash_code(const 
_Hash_node_value<>&)): New.

    (_Hashtable_base<>::_M_key_equals): New.
    (_Hashtable_base<>::_M_equals): Use latter.
    (_Hashtable_base<>::_M_key_equals_tr): New.
    (_Hashtable_base<>::_M_equals_tr): Use latter.
    * include/bits/hashtable.h
    (_Hashtable<>::__small_size_threshold()): New, use 
_Hashtable_hash_traits.
    (_Hashtable<>::find): Loop through elements to look for 
key if size is lower

    than __small_size_threshold().
    (_Hashtable<>::_M_emplace(true_type, _Args&&...)): Likewise.
    (_Hashtable<>::_M_insert_unique(_Kt&&, _Args&&, const 
_NodeGenerator&)): Likewise.
(_Hashtable<>::_M_compute_hash_code(const_iterator, const 
key_type&)): New.
    (_Hashtable<>::_M_emplace(const_iterator, false_type, 
_Args&&...)): Use latter.

    (_Hashtable<>::_M_find_before_node(const key_type&)): New.
    (_Hashtable<>::_M_erase(true_type, const key_type&)): Use 
latter.
    (_Hashtable<>::_M_erase(false_type, const key_type&)): 
Likewise.
    * src/c++11/hashtable_c++0x.cc: Include 
.

    * testsuite/util/testsuite_performane.h
   

Re: [PATCH][Hashtable 6/6] PR 68303 small size optimization

2021-09-22 Thread François Dumont via Gcc-patches

Ping ?

On 16/08/21 9:03 pm, François Dumont wrote:

On 17/07/20 2:58 pm, Jonathan Wakely wrote:

On 17/11/19 22:31 +0100, François Dumont wrote:

This is an implementation of PR 68303.

I try to use this idea as much as possible to avoid computation of 
hash codes.


Note that tests are not showing any gain. I guess hash computation 
must be quite bad to get a benefit from it. So I am only activating 
it when hash code is not cached and/or when computation is not fast.


If the tests don't show any benefit, why bother making the change?


I eventually managed to demonstrate this optimization through a 
performance test case.




Does it help the example in the PR?


No, the code attached to the PR just show what the user has done to 
put in place this optim on his side.


What I needed was a slow hash code computation compared to the equal 
operation. I realized that I had to use longer string to achieve this.


Moreover making this optim dependant on 
_Hashtable_traits::__hash_cached was just wrong as we cannot use the 
cached hash code here as the input is a key instance, not a node.


I introduce _Hashtable_hash_traits<_Hash> to offer a single 
customization point as this optim depends highly on the difference 
between a hash code computation and a comparison. Maybe I should put 
it at std namespace scope to ease partial specialization ?


Performance test results before the patch:

unordered_small_size.cc      std::unordered_set: 1st insert      
40r   32u    8s 264000112mem    0pf
unordered_small_size.cc      std::unordered_set: find/erase      
22r   22u    0s -191999808mem    0pf
unordered_small_size.cc      std::unordered_set: 2nd insert      
36r   36u    0s 191999776mem    0pf
unordered_small_size.cc      std::unordered_set: erase key       
25r   25u    0s -191999808mem    0pf
unordered_small_size.cc      std::unordered_set: 1st insert    
 404r  244u  156s -1989936256mem    0pf
unordered_small_size.cc      std::unordered_set: find/erase    
 315r  315u    0s 2061942272mem    0pf
unordered_small_size.cc      std::unordered_set: 2nd insert    
 233r  233u    0s -2061942208mem    0pf
unordered_small_size.cc      std::unordered_set: erase key     
 299r  298u    0s 2061942208mem    0pf


after the patch:

unordered_small_size.cc      std::unordered_set: 1st insert      
41r   33u    7s 264000112mem    0pf
unordered_small_size.cc      std::unordered_set: find/erase      
24r   25u    1s -191999808mem    0pf
unordered_small_size.cc      std::unordered_set: 2nd insert      
34r   34u    0s 191999776mem    0pf
unordered_small_size.cc      std::unordered_set: erase key       
25r   25u    0s -191999808mem    0pf
unordered_small_size.cc      std::unordered_set: 1st insert    
 399r  232u  165s -1989936256mem    0pf
unordered_small_size.cc      std::unordered_set: find/erase    
 196r  197u    0s 2061942272mem    0pf
unordered_small_size.cc      std::unordered_set: 2nd insert    
 221r  222u    0s -2061942208mem    0pf
unordered_small_size.cc      std::unordered_set: erase key     
 178r  178u    0s 2061942208mem    0pf


    libstdc++: Optimize operations on small size hashtable [PR 68303]

    When hasher is identified as slow and the number of elements is 
limited in the
    container use a brute-force loop on those elements to look for a 
given key using
    the key_equal functor. For the moment the default threshold below 
which the

    container is considered as small is 20.

    libstdc++-v3/ChangeLog:

    PR libstdc++/68303
    * include/bits/hashtable_policy.h
    (_Hashtable_hash_traits<_Hash>): New.
    (_Hash_code_base<>::_M_hash_code(const 
_Hash_node_value<>&)): New.

    (_Hashtable_base<>::_M_key_equals): New.
    (_Hashtable_base<>::_M_equals): Use latter.
    (_Hashtable_base<>::_M_key_equals_tr): New.
    (_Hashtable_base<>::_M_equals_tr): Use latter.
    * include/bits/hashtable.h
    (_Hashtable<>::__small_size_threshold()): New, use 
_Hashtable_hash_traits.
    (_Hashtable<>::find): Loop through elements to look for 
key if size is lower

    than __small_size_threshold().
    (_Hashtable<>::_M_emplace(true_type, _Args&&...)): Likewise.
    (_Hashtable<>::_M_insert_unique(_Kt&&, _Args&&, const 
_NodeGenerator&)): Likewise.
(_Hashtable<>::_M_compute_hash_code(const_iterator, const key_type&)): 
New.
    (_Hashtable<>::_M_emplace(const_iterator, false_type, 
_Args&&...)): Use latter.

    (_Hashtable<>::_M_find_before_node(const key_type&)): New.
    (_Hashtable<>::_M_erase(true_type, const key_type&)): Use 
latter.
    (_Hashtable<>::_M_erase(false_type, const key_type&)): 
Likewise.
    * src/c++11/hashtable_c++0x.cc: Include 
.

    * testsuite/util/testsuite_performane.h
    (report_performance): Use 9 width to display memory.
    * 

Re: [PATCH][Hashtable 6/6] PR 68303 small size optimization

2021-08-16 Thread François Dumont via Gcc-patches

On 17/07/20 2:58 pm, Jonathan Wakely wrote:

On 17/11/19 22:31 +0100, François Dumont wrote:

This is an implementation of PR 68303.

I try to use this idea as much as possible to avoid computation of 
hash codes.


Note that tests are not showing any gain. I guess hash computation 
must be quite bad to get a benefit from it. So I am only activating 
it when hash code is not cached and/or when computation is not fast.


If the tests don't show any benefit, why bother making the change?


I eventually managed to demonstrate this optimization through a 
performance test case.




Does it help the example in the PR?


No, the code attached to the PR just show what the user has done to put 
in place this optim on his side.


What I needed was a slow hash code computation compared to the equal 
operation. I realized that I had to use longer string to achieve this.


Moreover making this optim dependant on _Hashtable_traits::__hash_cached 
was just wrong as we cannot use the cached hash code here as the input 
is a key instance, not a node.


I introduce _Hashtable_hash_traits<_Hash> to offer a single 
customization point as this optim depends highly on the difference 
between a hash code computation and a comparison. Maybe I should put it 
at std namespace scope to ease partial specialization ?


Performance test results before the patch:

unordered_small_size.cc      std::unordered_set: 1st insert      
40r   32u    8s 264000112mem    0pf
unordered_small_size.cc      std::unordered_set: find/erase      
22r   22u    0s -191999808mem    0pf
unordered_small_size.cc      std::unordered_set: 2nd insert      
36r   36u    0s 191999776mem    0pf
unordered_small_size.cc      std::unordered_set: erase key       
25r   25u    0s -191999808mem    0pf
unordered_small_size.cc      std::unordered_set: 1st insert    
 404r  244u  156s -1989936256mem    0pf
unordered_small_size.cc      std::unordered_set: find/erase    
 315r  315u    0s 2061942272mem    0pf
unordered_small_size.cc      std::unordered_set: 2nd insert    
 233r  233u    0s -2061942208mem    0pf
unordered_small_size.cc      std::unordered_set: erase key     
 299r  298u    0s 2061942208mem    0pf


after the patch:

unordered_small_size.cc      std::unordered_set: 1st insert      
41r   33u    7s 264000112mem    0pf
unordered_small_size.cc      std::unordered_set: find/erase      
24r   25u    1s -191999808mem    0pf
unordered_small_size.cc      std::unordered_set: 2nd insert      
34r   34u    0s 191999776mem    0pf
unordered_small_size.cc      std::unordered_set: erase key       
25r   25u    0s -191999808mem    0pf
unordered_small_size.cc      std::unordered_set: 1st insert    
 399r  232u  165s -1989936256mem    0pf
unordered_small_size.cc      std::unordered_set: find/erase    
 196r  197u    0s 2061942272mem    0pf
unordered_small_size.cc      std::unordered_set: 2nd insert    
 221r  222u    0s -2061942208mem    0pf
unordered_small_size.cc      std::unordered_set: erase key     
 178r  178u    0s 2061942208mem    0pf


    libstdc++: Optimize operations on small size hashtable [PR 68303]

    When hasher is identified as slow and the number of elements is 
limited in the
    container use a brute-force loop on those elements to look for a 
given key using
    the key_equal functor. For the moment the default threshold below 
which the

    container is considered as small is 20.

    libstdc++-v3/ChangeLog:

    PR libstdc++/68303
    * include/bits/hashtable_policy.h
    (_Hashtable_hash_traits<_Hash>): New.
    (_Hash_code_base<>::_M_hash_code(const 
_Hash_node_value<>&)): New.

    (_Hashtable_base<>::_M_key_equals): New.
    (_Hashtable_base<>::_M_equals): Use latter.
    (_Hashtable_base<>::_M_key_equals_tr): New.
    (_Hashtable_base<>::_M_equals_tr): Use latter.
    * include/bits/hashtable.h
    (_Hashtable<>::__small_size_threshold()): New, use 
_Hashtable_hash_traits.
    (_Hashtable<>::find): Loop through elements to look for key 
if size is lower

    than __small_size_threshold().
    (_Hashtable<>::_M_emplace(true_type, _Args&&...)): Likewise.
    (_Hashtable<>::_M_insert_unique(_Kt&&, _Args&&, const 
_NodeGenerator&)): Likewise.

(_Hashtable<>::_M_compute_hash_code(const_iterator, const key_type&)): New.
    (_Hashtable<>::_M_emplace(const_iterator, false_type, 
_Args&&...)): Use latter.

    (_Hashtable<>::_M_find_before_node(const key_type&)): New.
    (_Hashtable<>::_M_erase(true_type, const key_type&)): Use 
latter.
    (_Hashtable<>::_M_erase(false_type, const key_type&)): 
Likewise.
    * src/c++11/hashtable_c++0x.cc: Include 
.

    * testsuite/util/testsuite_performane.h
    (report_performance): Use 9 width to display memory.
    * 
testsuite/performance/23_containers/insert_erase/unordered_small_size.cc:

    New performance test case.

Tested 

Re: [PATCH][Hashtable 6/6] PR 68303 small size optimization

2020-07-17 Thread Jonathan Wakely via Gcc-patches

On 17/11/19 22:31 +0100, François Dumont wrote:

This is an implementation of PR 68303.

I try to use this idea as much as possible to avoid computation of 
hash codes.


Note that tests are not showing any gain. I guess hash computation 
must be quite bad to get a benefit from it. So I am only activating it 
when hash code is not cached and/or when computation is not fast.


If the tests don't show any benefit, why bother making the change?

Does it help the example in the PR?


    PR libstdc++/68303
    * include/bits/hashtable_policy.h
    (_Small_size_threshold<_Hash>): New.
    (_Hashtable_traits<>): Add _Small_size_threshold std::size_t template
    parameter, default to 0.
    (_Hashtable_traits<>::__small_size_threshold): New.
    (_Hash_code_base<>::_M_hash_code(const __node_type*)): New.
    (_Equal_helper<>::_S_node_equals): New.
    * include/bits/hashtable.h:
    (__small_size_threshold_default<>): New template alias.
    (_Hashtable<>::find): Add linear lookup when size is lower or equal to
    _Small_size_threshold.
    (_Hashtable<>::_M_emplace<_Args>(true_type, _Args&&...)): Add linear
    lookup when size is lower or equal to _Small_size_threshold.
    (_Hashtable<>::_M_insert<>(_Arg&&, const _NodeGenerator&, true_type,
    size_type)): Likewise.
    (_Hashtable<>::_M_compute_hash_code(const_iterator, const key_type&)):
    New.
    (_Hashtable<>::_M_emplace<_Args>(false_type, _Args&&...)): Use latter.
    (_Hashtable<>::_M_insert(const_iterator, _Arg&&, const _NodeGenerator&,
    false_type)): Likewise.
    (_Hashtable<>::_M_find_before_node(const key_type&)): New.
    (_Hashtable<>::_M_erase(true_type, const key_type&)): Use latter 
if size

    is lower or equal to _Small_size_threshold.
    (_Hashtable<>::_M_erase(false_type, const key_type&)): Likewise.
    * include/bits/unordered_map.h (__umaps_traits): Adapt using small size
    threshold set to 20.
    (__ummap_traits): Likewise.
    * include/bits/unordered_set.h (__uset_traits, __ummset_traits): 
Likewise.

    * src/c++11/hashtable_c++0x.cc: Add  include.


The implementation of this seems unnecessarily complicated, and it
changes the mangled name of the _Hashtable_traits type, which changes
the mangled name of _Hashtable too.

It seems to me that instead of __traits_type::__small_size_threshold
being a typedef inside the traits class, we could just use a constexpr
function, maybe as a member of the _Hashtable_traits class template:

--- a/libstdc++-v3/include/bits/hashtable_policy.h
+++ b/libstdc++-v3/include/bits/hashtable_policy.h
@@ -201,6 +201,14 @@ namespace __detail
   using __hash_cached = __bool_constant<_Cache_hash_code>;
   using __constant_iterators = __bool_constant<_Constant_iterators>;
   using __unique_keys = __bool_constant<_Unique_keys>;
+
+  template
+   static constexpr size_t
+   __small_size_threshold()
+   {
+ return _Cache_hash_code ? 0
+   : __detail::_Small_size_threshold<_Hash>::value;
+   }
 };
 
Regarding the new _Small_size_threshold class template, do we really

need yet another customization point? Can't we just use
__is_fast_hash? e.g.

 return _Cache_hash_code || __is_fast_hash ? 0 : 20;

As an aside, it seems like a terrible design that _Hashtable_traits
doesn't actually depend on the hash function, so any time we want to
extend it to detect some other property of the hash function we need
to add extra template parameters. Far too late to fix now, but we can
extend it by adding members, not by changing its type.




[PATCH][Hashtable 6/6] PR 68303 small size optimization

2019-11-17 Thread François Dumont

This is an implementation of PR 68303.

I try to use this idea as much as possible to avoid computation of hash 
codes.


Note that tests are not showing any gain. I guess hash computation must 
be quite bad to get a benefit from it. So I am only activating it when 
hash code is not cached and/or when computation is not fast.


    PR libstdc++/68303
    * include/bits/hashtable_policy.h
    (_Small_size_threshold<_Hash>): New.
    (_Hashtable_traits<>): Add _Small_size_threshold std::size_t template
    parameter, default to 0.
    (_Hashtable_traits<>::__small_size_threshold): New.
    (_Hash_code_base<>::_M_hash_code(const __node_type*)): New.
    (_Equal_helper<>::_S_node_equals): New.
    * include/bits/hashtable.h:
    (__small_size_threshold_default<>): New template alias.
    (_Hashtable<>::find): Add linear lookup when size is lower or equal to
    _Small_size_threshold.
    (_Hashtable<>::_M_emplace<_Args>(true_type, _Args&&...)): Add linear
    lookup when size is lower or equal to _Small_size_threshold.
    (_Hashtable<>::_M_insert<>(_Arg&&, const _NodeGenerator&, true_type,
    size_type)): Likewise.
    (_Hashtable<>::_M_compute_hash_code(const_iterator, const key_type&)):
    New.
    (_Hashtable<>::_M_emplace<_Args>(false_type, _Args&&...)): Use latter.
    (_Hashtable<>::_M_insert(const_iterator, _Arg&&, const _NodeGenerator&,
    false_type)): Likewise.
    (_Hashtable<>::_M_find_before_node(const key_type&)): New.
    (_Hashtable<>::_M_erase(true_type, const key_type&)): Use latter if 
size

    is lower or equal to _Small_size_threshold.
    (_Hashtable<>::_M_erase(false_type, const key_type&)): Likewise.
    * include/bits/unordered_map.h (__umaps_traits): Adapt using small size
    threshold set to 20.
    (__ummap_traits): Likewise.
    * include/bits/unordered_set.h (__uset_traits, __ummset_traits): 
Likewise.

    * src/c++11/hashtable_c++0x.cc: Add  include.

Tested under Linux x86_64.

François

diff --git a/libstdc++-v3/include/bits/hashtable.h b/libstdc++-v3/include/bits/hashtable.h
index 9dadc62e328..460f25affe4 100644
--- a/libstdc++-v3/include/bits/hashtable.h
+++ b/libstdc++-v3/include/bits/hashtable.h
@@ -48,6 +48,12 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 		   // Mandatory to have erase not throwing.
 		   __is_nothrow_invocable>>;
 
+  template
+using __small_size_threshold_default
+  = typename conditional<__cache,
+		// No small size optimization if hash code is cached...
+		integral_constant,
+		_Small_size_threshold<_Hash>>::type;
   /**
*  Primary class template _Hashtable.
*
@@ -743,6 +749,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   __node_base*
   _M_find_before_node(size_type, const key_type&, __hash_code) const;
 
+  __node_base*
+  _M_find_before_node(const key_type&);
+
   __node_type*
   _M_find_node(size_type __bkt, const key_type& __key,
 		   __hash_code __c) const
@@ -766,6 +775,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   __node_base*
   _M_get_previous_node(size_type __bkt, __node_base* __n);
 
+  pair
+  _M_compute_hash_code(const_iterator __hint, const key_type& __k) const;
+
   // Insert node __n with hash code __code, in bucket __bkt if no
   // rehash (assumes no element with same key already present).
   // Takes ownership of __n if insertion succeeds, throws otherwise.
@@ -1490,6 +1502,14 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 find(const key_type& __k)
 -> iterator
 {
+  if (size() <= __traits_type::__small_size_threshold::value)
+	{
+	  for (auto __it = begin(); __it != end(); ++__it)
+	if (this->_M_key_equals(__k, __it._M_cur))
+	  return __it;
+	  return end();
+	}
+
   __hash_code __code = this->_M_hash_code(__k);
   std::size_t __bkt = _M_bucket_index(__code);
   return iterator(_M_find_node(__bkt, __k, __code));
@@ -1504,6 +1524,14 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 find(const key_type& __k) const
 -> const_iterator
 {
+  if (size() <= __traits_type::__small_size_threshold::value)
+	{
+	  for (auto __it = begin(); __it != end(); ++__it)
+	if (this->_M_key_equals(__k, __it._M_cur))
+	  return __it;
+	  return end();
+	}
+
   __hash_code __code = this->_M_hash_code(__k);
   std::size_t __bkt = _M_bucket_index(__code);
   return const_iterator(_M_find_node(__bkt, __k, __code));
@@ -1619,6 +1647,34 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   return nullptr;
 }
 
+  // Find the node before the one whose key compares equal to k.
+  // Return nullptr if no node is found.
+  template
+auto
+_Hashtable<_Key, _Value, _Alloc, _ExtractKey, _Equal,
+	   _Hash, _RehashPolicy, _Traits>::
+_M_find_before_node(const key_type& __k)
+-> __node_base*
+{
+  __node_base* __prev_p = &_M_before_begin;
+  if (!__prev_p->_M_nxt)
+	return nullptr;
+
+  for (__node_type* __p = static_cast<__node_type*>(__prev_p->_M_nxt);
+	   __p != nullptr;
+	   __p = __p->_M_next())
+	{
+	  if