From c01e2aedb0bc21574f4ec7689f8b63a4d853cec6 Mon Sep 17 00:00:00 2001
From: "Duncan P. N. Exon Smith" <dexonsmith@apple.com>
Date: Wed, 16 Dec 2015 13:02:40 -0800
Subject: [PATCH] unordered_map: Avoid unnecessary mallocs when no insert
 occurs

This is a follow-up to r239666: "Fix PR12999 - unordered_set::insert
calls operator new when no insert occurs".  That fix didn't apply to
unordered_map because unordered_map::value_type gets packed inside:

    union __value_type {
      pair<key_type, mapped_type> __nc;       // Only C++11 or higher.
      pair<const key_type, mapped_type> __cc; // Always.
      // Constructors...
    };

and the underlying __hash_table only knows about __value_type.

unordered_map is also complicated because, unlike unordered_set,
insert() takes anything convertible to value_type.

This patch should avoid unnecessary mallocs whenever the caller passes
in a pair<T, U> such that T is trivially convertible to key_type.

Since __hash_table doesn't know whether its value_type is actually a
pair, the static dispatch is all in unordered_map.  It's doing this:
  - If the argument isn't a pair<>, alloc.
  - If argument.first can be referenced as const key_type&, don't alloc.
  - If argument.first can be trivially converted to key_type, don't
    alloc.
  - Else alloc.

In the pre-C++11 world the caller has already converted to
unordered_map::value_type.  We can always avoid the alloc.

To support all of this:
  - In C++03, __unordered_map_equal and __unordered_map_hasher need to
    handle unordered_map::value_type.
  - In C++03, __hash_table::__insert_unique_value() now takes its
    argument by template.
  - In C++11, __hash_table::__insert_unique_value() is now a one-liner
    that forwards to __insert_unique_key_value() for the real work.
  - The versions of __hash_table::__construct_node() that take a
    pre-computed hash have been renamed to __construct_node_hash(), and
    these versions use perfect forwarding.
---
 include/__hash_table                               | 41 ++++++-----
 include/unordered_map                              | 78 +++++++++++++++++++--
 .../unord/unord.map/insert_dup_alloc.pass.cpp      | 79 ++++++++++++++++++++++
 3 files changed, 178 insertions(+), 20 deletions(-)
 create mode 100644 test/libcxx/containers/unord/unord.map/insert_dup_alloc.pass.cpp

diff --git a/include/__hash_table b/include/__hash_table
index cfa763a..c391079 100644
--- a/include/__hash_table
+++ b/include/__hash_table
@@ -902,10 +902,15 @@ public:
 #ifndef _LIBCPP_HAS_NO_RVALUE_REFERENCES
     template <class _ValueTp>
     _LIBCPP_INLINE_VISIBILITY
-    pair<iterator, bool> __insert_unique_value(_ValueTp&& __x);
+    pair<iterator, bool> __insert_unique_value(_ValueTp&& __x)
+        {return __insert_unique_key_value(__x, std::forward<_ValueTp>(__x));}
+    template <class _KeyTp, class _ValueTp>
+    _LIBCPP_INLINE_VISIBILITY
+    pair<iterator, bool> __insert_unique_key_value(const _KeyTp &__key, _ValueTp&& __x);
 #else
+    template <class _ValueTp>
     _LIBCPP_INLINE_VISIBILITY
-    pair<iterator, bool> __insert_unique_value(const value_type& __x);
+    pair<iterator, bool> __insert_unique_value(const _ValueTp& __x);
 #endif
 
     pair<iterator, bool> __insert_unique(const value_type& __x);
@@ -1089,11 +1094,12 @@ private:
     template <class ..._Args>
         __node_holder __construct_node(_Args&& ...__args);
 #endif  // _LIBCPP_HAS_NO_VARIADICS
-    __node_holder __construct_node(value_type&& __v, size_t __hash);
+    template <class _ValueTp>
+    __node_holder __construct_node_hash(_ValueTp&& __v, size_t __hash);
 #else  // _LIBCPP_HAS_NO_RVALUE_REFERENCES
     __node_holder __construct_node(const value_type& __v);
 #endif
-    __node_holder __construct_node(const value_type& __v, size_t __hash);
+    __node_holder __construct_node_hash(const value_type& __v, size_t __hash);
 
     _LIBCPP_INLINE_VISIBILITY
     void __copy_assign_alloc(const __hash_table& __u)
@@ -1737,21 +1743,23 @@ __hash_table<_Tp, _Hash, _Equal, _Alloc>::__insert_unique(const value_type& __x)
 
 #ifndef _LIBCPP_HAS_NO_RVALUE_REFERENCES
 template <class _Tp, class _Hash, class _Equal, class _Alloc>
-template <class _ValueTp>
+template <class _KeyTp, class _ValueTp>
 _LIBCPP_INLINE_VISIBILITY
 pair<typename __hash_table<_Tp, _Hash, _Equal, _Alloc>::iterator, bool>
-__hash_table<_Tp, _Hash, _Equal, _Alloc>::__insert_unique_value(_ValueTp&& __x)
+__hash_table<_Tp, _Hash, _Equal, _Alloc>::__insert_unique_key_value(const _KeyTp &__key, _ValueTp&& __x)
 #else
 template <class _Tp, class _Hash, class _Equal, class _Alloc>
+template <class _RawValueTp>
 _LIBCPP_INLINE_VISIBILITY
 pair<typename __hash_table<_Tp, _Hash, _Equal, _Alloc>::iterator, bool>
-__hash_table<_Tp, _Hash, _Equal, _Alloc>::__insert_unique_value(const value_type& __x)
+__hash_table<_Tp, _Hash, _Equal, _Alloc>::__insert_unique_value(const _RawValueTp& __x)
 #endif
 {
 #if defined(_LIBCPP_HAS_NO_RVALUE_REFERENCES)
-    typedef const value_type& _ValueTp;
+    typedef const _RawValueTp& _ValueTp;
+    const _RawValueTp& __key = __x;
 #endif
-    size_t __hash = hash_function()(__x);
+    size_t __hash = hash_function()(__key);
     size_type __bc = bucket_count();
     bool __inserted = false;
     __node_pointer __nd;
@@ -1766,13 +1774,13 @@ __hash_table<_Tp, _Hash, _Equal, _Alloc>::__insert_unique_value(const value_type
                                        __constrain_hash(__nd->__hash_, __bc) == __chash;
                                                            __nd = __nd->__next_)
             {
-                if (key_eq()(__nd->__value_, __x))
+                if (key_eq()(__nd->__value_, __key))
                     goto __done;
             }
         }
     }
     {
-        __node_holder __h = __construct_node(_VSTD::forward<_ValueTp>(__x), __hash);
+        __node_holder __h = __construct_node_hash(_VSTD::forward<_ValueTp>(__x), __hash);
         if (size()+1 > __bc * max_load_factor() || __bc == 0)
         {
             rehash(_VSTD::max<size_type>(2 * __bc + !__is_hash_power2(__bc),
@@ -2093,13 +2101,14 @@ __hash_table<_Tp, _Hash, _Equal, _Alloc>::__construct_node(_Args&& ...__args)
 #endif  // _LIBCPP_HAS_NO_VARIADICS
 
 template <class _Tp, class _Hash, class _Equal, class _Alloc>
+template <class _ValueTp>
 typename __hash_table<_Tp, _Hash, _Equal, _Alloc>::__node_holder
-__hash_table<_Tp, _Hash, _Equal, _Alloc>::__construct_node(value_type&& __v,
-                                                           size_t __hash)
+__hash_table<_Tp, _Hash, _Equal, _Alloc>::__construct_node_hash(_ValueTp&& __v,
+                                                                size_t __hash)
 {
     __node_allocator& __na = __node_alloc();
     __node_holder __h(__node_traits::allocate(__na, 1), _Dp(__na));
-    __node_traits::construct(__na, _VSTD::addressof(__h->__value_), _VSTD::move(__v));
+    __node_traits::construct(__na, _VSTD::addressof(__h->__value_), _VSTD::forward<_ValueTp>(__v));
     __h.get_deleter().__value_constructed = true;
     __h->__hash_ = __hash;
     __h->__next_ = nullptr;
@@ -2125,8 +2134,8 @@ __hash_table<_Tp, _Hash, _Equal, _Alloc>::__construct_node(const value_type& __v
 
 template <class _Tp, class _Hash, class _Equal, class _Alloc>
 typename __hash_table<_Tp, _Hash, _Equal, _Alloc>::__node_holder
-__hash_table<_Tp, _Hash, _Equal, _Alloc>::__construct_node(const value_type& __v,
-                                                           size_t __hash)
+__hash_table<_Tp, _Hash, _Equal, _Alloc>::__construct_node_hash(const value_type& __v,
+                                                                size_t __hash)
 {
     __node_allocator& __na = __node_alloc();
     __node_holder __h(__node_traits::allocate(__na, 1), _Dp(__na));
diff --git a/include/unordered_map b/include/unordered_map
index 3c774d4..329395b 100644
--- a/include/unordered_map
+++ b/include/unordered_map
@@ -398,6 +398,11 @@ public:
     _LIBCPP_INLINE_VISIBILITY
     size_t operator()(const _Cp& __x) const
         {return static_cast<const _Hash&>(*this)(__x.__cc.first);}
+#if defined(_LIBCPP_HAS_NO_RVALUE_REFERENCES)
+    _LIBCPP_INLINE_VISIBILITY
+    size_t operator()(const typename _Cp::value_type& __x) const
+        {return static_cast<const _Hash&>(*this)(__x.first);}
+#endif
     _LIBCPP_INLINE_VISIBILITY
     size_t operator()(const _Key& __x) const
         {return static_cast<const _Hash&>(*this)(__x);}
@@ -428,6 +433,11 @@ public:
     _LIBCPP_INLINE_VISIBILITY
     size_t operator()(const _Cp& __x) const
         {return __hash_(__x.__cc.first);}
+#if defined(_LIBCPP_HAS_NO_RVALUE_REFERENCES)
+    _LIBCPP_INLINE_VISIBILITY
+    size_t operator()(const typename _Cp::value_type& __x) const
+        {return __hash_(__x.first);}
+#endif
     _LIBCPP_INLINE_VISIBILITY
     size_t operator()(const _Key& __x) const
         {return __hash_(__x);}
@@ -475,6 +485,11 @@ public:
     _LIBCPP_INLINE_VISIBILITY
     bool operator()(const _Key& __x, const _Cp& __y) const
         {return static_cast<const _Pred&>(*this)(__x, __y.__cc.first);}
+#if defined(_LIBCPP_HAS_NO_RVALUE_REFERENCES)
+    _LIBCPP_INLINE_VISIBILITY
+    bool operator()(const _Cp& __x, const typename _Cp::value_type& __y) const
+        {return static_cast<const _Pred&>(*this)(__x.__cc.first, __y.first);}
+#endif
     void swap(__unordered_map_equal&__y)
         _NOEXCEPT_(__is_nothrow_swappable<_Pred>::value)
     {
@@ -508,6 +523,11 @@ public:
     _LIBCPP_INLINE_VISIBILITY
     bool operator()(const _Key& __x, const _Cp& __y) const
         {return __pred_(__x, __y.__cc.first);}
+#if defined(_LIBCPP_HAS_NO_RVALUE_REFERENCES)
+    _LIBCPP_INLINE_VISIBILITY
+    bool operator()(const _Cp& __x, const typename _Cp::value_type& __y) const
+        {return __pred_(__x.__cc.first, __y.first);}
+#endif
     void swap(__unordered_map_equal&__y)
         _NOEXCEPT_(__is_nothrow_swappable<_Pred>::value)
     {
@@ -954,13 +974,63 @@ public:
 #endif  // _LIBCPP_HAS_NO_RVALUE_REFERENCES
     _LIBCPP_INLINE_VISIBILITY
     pair<iterator, bool> insert(const value_type& __x)
-        {return __table_.__insert_unique(__x);}
+        {return __table_.__insert_unique_value(__x);}
 #ifndef _LIBCPP_HAS_NO_RVALUE_REFERENCES
     template <class _Pp,
               class = typename enable_if<is_constructible<value_type, _Pp>::value>::type>
-        _LIBCPP_INLINE_VISIBILITY
-        pair<iterator, bool> insert(_Pp&& __x)
-            {return __table_.__insert_unique(_VSTD::forward<_Pp>(__x));}
+    _LIBCPP_INLINE_VISIBILITY
+    pair<iterator, bool> insert(_Pp&& __x)
+        {return __insert_extract_key_if_pair(_VSTD::forward<_Pp>(__x));}
+
+private:
+    template <class _Pp>
+    _LIBCPP_INLINE_VISIBILITY
+    pair<iterator, bool> __insert_extract_key_if_pair(_Pp&& __x)
+        {return __table_.__insert_unique(std::forward<_Pp>(__x));}
+    template <class _FirstTp, class _SecondTp>
+    _LIBCPP_INLINE_VISIBILITY
+    pair<iterator, bool> __insert_extract_key_if_pair(pair<_FirstTp, _SecondTp>&& __x)
+        {return __insert_extract_key<_FirstTp>(_VSTD::move(__x));}
+    template <class _FirstTp, class _SecondTp>
+    _LIBCPP_INLINE_VISIBILITY
+    pair<iterator, bool> __insert_extract_key_if_pair(const pair<_FirstTp, _SecondTp>& __x)
+        {return __insert_extract_key<_FirstTp>(__x);}
+
+    template <class _FirstTp, class _Pp>
+    _LIBCPP_INLINE_VISIBILITY
+    pair<iterator, bool> __insert_extract_key(_Pp&& __x)
+        {
+            return __insert_extract_key_if_referenceable<_FirstTp>(
+                _VSTD::forward<_Pp>(__x),
+                is_same<typename decay<_FirstTp>::type, key_type>());
+        }
+
+    template <class _FirstTp, class _Pp>
+    _LIBCPP_INLINE_VISIBILITY
+    pair<iterator, bool> __insert_extract_key_if_referenceable(_Pp&& __x, false_type)
+        {
+            return __insert_extract_key_if_trivial<_FirstTp>(
+                _VSTD::forward<_Pp>(__x),
+                integral_constant<
+                    bool,
+                    is_trivially_destructible<key_type>::value &&
+                    is_trivially_constructible<key_type, typename decay<_FirstTp>::type>::value>());
+        }
+    template <class _FirstTp, class _Pp>
+    _LIBCPP_INLINE_VISIBILITY
+    pair<iterator, bool> __insert_extract_key_if_referenceable(_Pp&& __x, true_type)
+        {return __table_.__insert_unique_key_value(__x.first, _VSTD::forward<_Pp>(__x));}
+
+    template <class _FirstTp, class _Pp>
+    _LIBCPP_INLINE_VISIBILITY
+    pair<iterator, bool> __insert_extract_key_if_trivial(_Pp&& __x, false_type)
+        {return __table_.__insert_unique(_VSTD::forward<_Pp>(__x));}
+    template <class _FirstTp, class _Pp>
+    _LIBCPP_INLINE_VISIBILITY
+    pair<iterator, bool> __insert_extract_key_if_trivial(_Pp&& __x, true_type)
+        {return __table_.__insert_unique_key_value(key_type(__x.first), _VSTD::forward<_Pp>(__x));}
+
+public:
 #endif  // _LIBCPP_HAS_NO_RVALUE_REFERENCES
     _LIBCPP_INLINE_VISIBILITY
 #if _LIBCPP_DEBUG_LEVEL >= 2
diff --git a/test/libcxx/containers/unord/unord.map/insert_dup_alloc.pass.cpp b/test/libcxx/containers/unord/unord.map/insert_dup_alloc.pass.cpp
new file mode 100644
index 0000000..c221599
--- /dev/null
+++ b/test/libcxx/containers/unord/unord.map/insert_dup_alloc.pass.cpp
@@ -0,0 +1,79 @@
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+// Check that we don't allocate when trying to insert a duplicate value into a
+// unordered_map.
+
+#include <cassert>
+#include <unordered_map>
+#include "count_new.hpp"
+#include "MoveOnly.h"
+
+int main()
+{
+    {
+    std::unordered_map<int, int> s;
+    assert(globalMemCounter.checkNewCalledEq(0));
+
+    for(int i=0; i < 100; ++i) {
+        s.insert(std::make_pair(3, i));
+        s.insert(std::make_pair(3, unsigned(i)));
+        s.insert(std::make_pair(3u, i));
+        s.insert(std::make_pair(3u, unsigned(i)));
+        s.insert(std::make_pair<const int>(3, i));
+        {
+            const std::pair<int, int> P(3, i);
+            s.insert(P);
+        }
+        {
+            const std::pair<const int, int> P(3, i);
+            s.insert(P);
+        }
+    }
+
+    assert(s.size() == 1);
+    assert(s.count(3) == 1);
+    assert(s.at(3) == 0);
+    assert(globalMemCounter.checkNewCalledEq(2));
+    }
+    assert(globalMemCounter.checkOutstandingNewEq(0));
+    globalMemCounter.reset();
+#ifndef _LIBCPP_HAS_NO_RVALUE_REFERENCES
+    {
+    std::unordered_map<MoveOnly, MoveOnly> s;
+    assert(globalMemCounter.checkNewCalledEq(0));
+
+    for(int i=0; i<100; i++)
+        s.insert(std::make_pair(MoveOnly(3), MoveOnly(i)));
+
+    assert(s.size() == 1);
+    assert(s.count(MoveOnly(3)) == 1);
+    assert(s.at(MoveOnly(3)) == 0);
+    assert(globalMemCounter.checkNewCalledEq(2));
+    }
+    assert(globalMemCounter.checkOutstandingNewEq(0));
+    globalMemCounter.reset();
+    {
+    std::unordered_map<int, MoveOnly> s;
+    assert(globalMemCounter.checkNewCalledEq(0));
+
+    for(int i=0; i<100; i++) {
+        s.insert(std::make_pair(3, MoveOnly(i)));
+        s.insert(std::make_pair(3u, MoveOnly(i)));
+    }
+
+    assert(s.size() == 1);
+    assert(s.count(3) == 1);
+    assert(s.at(3) == 0);
+    assert(globalMemCounter.checkNewCalledEq(2));
+    }
+    assert(globalMemCounter.checkOutstandingNewEq(0));
+    globalMemCounter.reset();
+#endif
+}
-- 
2.5.4 (Apple Git-61)

