On Mon, 24 Feb 2025 at 20:52, François Dumont <[email protected]> wrote:
>
> All good remarks as usual, here is a new version.
>
> I took the time to leverage on the new method to replace a usage of
> _M_src_hash_code.
>
> libstdc++: [_Hashtable] Fix hash code cache usage when stateful
> hash functor
>
> It is wrong to reuse a cached hash code from another container when
> this code depends
> on the state of the container's Hash functor.
>
> Add checks that Hash functor is stateless before reusing the cached
> hash code.
>
> libstdc++-v3/ChangeLog:
>
> * include/bits/hashtable_policy.h
> (_Hash_code_base::_M_copy_code,
> _Hash_code_base::_M_stored_code): Remove.
"_M_store_code" not "_M_stored_code".
> * include/bits/hashtable.h (_M_hash_code_ext): New.
> (_M_copy_code): New.
> (_M_assign): Use latter.
> (_M_bucket_index_ex): New.
> (_M_equals): Use latter.
> (_M_store_code): New.
The changes to _M_src_hash_code and _M_merge_multi should be in the
ChangeLog too.
OK for trunk with those changes, thanks.
> * testsuite/23_containers/unordered_map/modifiers/merge.cc
> (test10): New
> test case.
>
> Tested under Linux x64.
>
> Ok to commit ?
>
> François
>
>
> On 19/02/2025 14:33, Jonathan Wakely wrote:
> > On 20/01/25 22:12 +0100, François Dumont wrote:
> >> Hi
> >>
> >> In my work on fancy pointer support I've decided to always cache the
> >> hash code.
> >>
> >> Doing so I spotted a bug in the management of this cache when hash
> >> functor is stateful.
> >>
> >> libstdc++: [_Hashtable] Fix hash code cache usage when hash
> >> functor is stateful
> >>
> >> It is wrong to reuse a cached hash code when this code depends
> >> then on the state
> >> of the Hash functor.
> >>
> >> Add checks that Hash functor is stateless before reusing the
> >> cached hash code.
> >>
> >> libstdc++-v3/ChangeLog:
> >>
> >> * include/bits/hashtable_policy.h
> >> (_Hash_code_base::_M_copy_code): Remove.
> >> * include/bits/hashtable.h (_M_copy_code): New.
> >
> > I like replacing the _M_copy_code overloads with one function, thanks.
> > We could do the same for _M_store_code too.
> >
> >> (_M_assign): Use latter.
> >> (_M_bucket_index_ex): New.
> >
> > I assume "ex" means "external"? That doesn't seem very clear to me
> > though. Maybe "ext" would be better.
> >
> >> (_M_equals): Use latter.
> >> *
> >> testsuite/23_containers/unordered_map/modifiers/merge.cc (test10): New
> >> test case.
> >>
> >> Tested under Linux x64, ok to commit ?
> >>
> >> François
> >
> >> diff --git a/libstdc++-v3/include/bits/hashtable.h
> >> b/libstdc++-v3/include/bits/hashtable.h
> >> index d6d76a743bb..b3c1d7aac24 100644
> >> --- a/libstdc++-v3/include/bits/hashtable.h
> >> +++ b/libstdc++-v3/include/bits/hashtable.h
> >> @@ -808,6 +808,36 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
> >> _M_bucket_index(__hash_code __c) const
> >> { return __hash_code_base::_M_bucket_index(__c,
> >> _M_bucket_count); }
> >>
> >> +#pragma GCC diagnostic push
> >> +#pragma GCC diagnostic ignored "-Wc++17-extensions" // if constexpr
> >> + // Like _M_bucket_index but when the node is coming from another
> >> + // container instance.
> >> + size_type
> >> + _M_bucket_index_ex(const __node_value_type& __n) const
> >> + {
> >> + if constexpr (__hash_cached::value)
> >> + if constexpr (std::is_empty<_Hash>::value)
> >> + return _RangeHash{}(__n._M_hash_code, _M_bucket_count);
> >> +
> >> + return _RangeHash{}
> >> + (this->_M_hash_code(_ExtractKey{}(__n._M_v())), _M_bucket_count);
> >> + }
> >> +
> >> + void
> >> + _M_copy_code(__node_value_type& __to,
> >> + const __node_value_type& __from) const
> >> + {
> >> + if constexpr (__hash_cached::value)
> >> + {
> >> + if constexpr (std::is_empty<_Hash>::value)
> >> + __to._M_hash_code = __from._M_hash_code;
> >> + else
> >> + __to._M_hash_code =
> >> + this->_M_hash_code(_ExtractKey{}(__from._M_v()));
> >> + }
> >> + }
> >
> >
> > These two functions are doing similar work, would it make sense to
> > factor out the common part to a new function:
> >
> > // Get hash code for a node that comes from another _Hashtable.
> > // Reuse a cached hash code if the hash function is stateless,
> > // otherwise recalculate it using our own hash function.
> > size_t
> > _M_hash_code_ext(const __node_value_type& __from) const
> > {
> > if constexpr (__and_<__hash_cached, is_empty<_Hash>>::value)
> > return __from._M_hash_code;
> > else
> > this->_M_hash_code(_ExtractKey{}(__from._M_v()));
> > }
> >
> > // Like _M_bucket_index but when the node is coming from another
> > // container instance.
> > size_type
> > _M_bucket_index_ext(const __node_value_type& __n) const
> > { return _M_bucket_index(_M_hash_code_ext(__n)); }
> >
> > void
> > _M_copy_code(__node_value_type& __to,
> > const __node_value_type& __from) const
> > {
> > if constexpr (__hash_cached::value)
> > __to._M_hash_code = _M_hash_code_ext(__n);
> > }
> >
> >
> >> +#pragma GCC diagnostic pop
> >> +
> >> // Find and insert helper functions and types
> >>
> >> // Find the node before the one matching the criteria.
> >> @@ -1587,7 +1617,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
> >> __node_ptr __ht_n = __ht._M_begin();
> >> __node_ptr __this_n
> >> = __node_gen(static_cast<_FromVal>(__ht_n->_M_v()));
> >> - this->_M_copy_code(*__this_n, *__ht_n);
> >> + _M_copy_code(*__this_n, *__ht_n);
> >> _M_update_bbegin(__this_n);
> >>
> >> // Then deal with other nodes.
> >> @@ -1596,7 +1626,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
> >> {
> >> __this_n = __node_gen(static_cast<_FromVal>(__ht_n->_M_v()));
> >> __prev_n->_M_nxt = __this_n;
> >> - this->_M_copy_code(*__this_n, *__ht_n);
> >> + _M_copy_code(*__this_n, *__ht_n);
> >> size_type __bkt = _M_bucket_index(*__this_n);
> >> if (!_M_buckets[__bkt])
> >> _M_buckets[__bkt] = __prev_n;
> >> @@ -2851,7 +2881,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
> >> if constexpr (__unique_keys::value)
> >> for (auto __x_n = _M_begin(); __x_n; __x_n = __x_n->_M_next())
> >> {
> >> - std::size_t __ybkt = __other._M_bucket_index(*__x_n);
> >> + std::size_t __ybkt = __other._M_bucket_index_ex(*__x_n);
> >> auto __prev_n = __other._M_buckets[__ybkt];
> >> if (!__prev_n)
> >> return false;
> >> @@ -2878,7 +2908,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
> >> __x_n_end = __x_n_end->_M_next())
> >> ++__x_count;
> >>
> >> - std::size_t __ybkt = __other._M_bucket_index(*__x_n);
> >> + std::size_t __ybkt = __other._M_bucket_index_ex(*__x_n);
> >> auto __y_prev_n = __other._M_buckets[__ybkt];
> >> if (!__y_prev_n)
> >> return false;
> >> diff --git a/libstdc++-v3/include/bits/hashtable_policy.h
> >> b/libstdc++-v3/include/bits/hashtable_policy.h
> >> index 1fa8c01d5e8..61c57651c4a 100644
> >> --- a/libstdc++-v3/include/bits/hashtable_policy.h
> >> +++ b/libstdc++-v3/include/bits/hashtable_policy.h
> >> @@ -1108,19 +1108,9 @@ namespace __detail
> >> _M_store_code(_Hash_node_code_cache<false>&, __hash_code) const
> >> { }
> >>
> >> - void
> >> - _M_copy_code(_Hash_node_code_cache<false>&,
> >> - const _Hash_node_code_cache<false>&) const
> >> - { }
> >> -
> >> void
> >> _M_store_code(_Hash_node_code_cache<true>& __n, __hash_code
> >> __c) const
> >> { __n._M_hash_code = __c; }
> >> -
> >> - void
> >> - _M_copy_code(_Hash_node_code_cache<true>& __to,
> >> - const _Hash_node_code_cache<true>& __from) const
> >> - { __to._M_hash_code = __from._M_hash_code; }
> >> };
> >>
> >> /// Partial specialization used when nodes contain a cached hash code.
> >> diff --git
> >> a/libstdc++-v3/testsuite/23_containers/unordered_map/modifiers/merge.cc
> >> b/libstdc++-v3/testsuite/23_containers/unordered_map/modifiers/merge.cc
> >> index 10b61464243..010181f7038 100644
> >> ---
> >> a/libstdc++-v3/testsuite/23_containers/unordered_map/modifiers/merge.cc
> >> +++
> >> b/libstdc++-v3/testsuite/23_containers/unordered_map/modifiers/merge.cc
> >> @@ -417,6 +417,51 @@ test09()
> >> VERIFY( c2.size() == 3 );
> >> }
> >>
> >> +struct slow_stateful_hash
> >> +{
> >> + size_t seed = 0;
> >> +
> >> + auto operator()(const int& i) const noexcept
> >> + { return std::hash<int>()(i) + seed; }
> >> +};
> >> +
> >> +namespace std
> >> +{
> >> + template<>
> >> + struct __is_fast_hash<slow_stateful_hash> : public std::false_type
> >> + { };
> >> +}
> >> +
> >> +void
> >> +test10()
> >> +{
> >> + using map_type = std::unordered_map<int, int, slow_stateful_hash>;
> >> + map_type c1({ {1, 1}, {3, 3}, {5, 5} }, 0, slow_stateful_hash{1});
> >> + map_type c2({ {2, 2}, {4, 4}, {6, 6} }, 0, slow_stateful_hash{2});
> >> + const auto c3 = c2;
> >> +
> >> + c1.merge(c2);
> >> + VERIFY( c1.size() == 6 );
> >> + VERIFY( c2.empty() );
> >> +
> >> + c2 = c3;
> >> + c1.clear();
> >> + c1.merge(std::move(c2));
> >> + VERIFY( c1 == c3 );
> >> + VERIFY( c2.empty() );
> >> +
> >> + c2.merge(std::move(c1));
> >> + VERIFY( c1.empty() );
> >> + VERIFY( c2 == c3 );
> >> +
> >> + c2.merge(c1);
> >> + VERIFY( c1.empty() );
> >> + VERIFY( c2 == c3 );
> >> +
> >> + c2.merge(c2);
> >> + VERIFY( c2 == c3 );
> >> +}
> >> +
> >> int
> >> main()
> >> {
> >> @@ -429,4 +474,5 @@ main()
> >> test07();
> >> test08();
> >> test09();
> >> + test10();
> >> }
> >