Title: [205842] trunk
Revision
205842
Author
sbar...@apple.com
Date
2016-09-12 17:14:59 -0700 (Mon, 12 Sep 2016)

Log Message

HashMapImpl should take into account m_deleteCount in its load factor and it should be able to rehash the table to be smaller
https://bugs.webkit.org/show_bug.cgi?id=161640

Reviewed by Geoffrey Garen.

JSTests:

* microbenchmarks/map-rehash.js: Added.
* stress/map-delete.js: Added.
(assert):
* stress/map-rehash-2.js: Added.
(assert):
* stress/map-rehash.js: Added.
(assert):

Source/_javascript_Core:

HashMapImpl now takes into account m_deleteCount in its load factor.
It now knows how to rehash to either decrease its capacity, stay at
the same capacity, or increase its capacity. The reason we can sometimes
stay at the same capacity is that we can reduce the load factor enough
by rehashing that growing isn't warranted. The reason for this is that
anytime we rehash, we remove all deleted sentinels from the buffer.
Therefore, staying at the same same capacity, when there are deleted entries,
can still reduce the load factor because it removes all deleted sentinels.

* runtime/HashMapImpl.h:
(JSC::HashMapBuffer::create):
(JSC::HashMapBuffer::reset):
(JSC::HashMapImpl::HashMapImpl):
(JSC::HashMapImpl::add):
(JSC::HashMapImpl::remove):
(JSC::HashMapImpl::size):
(JSC::HashMapImpl::clear):
(JSC::HashMapImpl::approximateSize):
(JSC::HashMapImpl::shouldRehashAfterAdd):
(JSC::HashMapImpl::shouldShrink):
(JSC::HashMapImpl::rehash):
(JSC::HashMapImpl::checkConsistency):
(JSC::HashMapImpl::makeAndSetNewBuffer):
(JSC::HashMapImpl::assertBufferIsEmpty):

Modified Paths

Added Paths

Diff

Modified: trunk/JSTests/ChangeLog (205841 => 205842)


--- trunk/JSTests/ChangeLog	2016-09-12 23:51:57 UTC (rev 205841)
+++ trunk/JSTests/ChangeLog	2016-09-13 00:14:59 UTC (rev 205842)
@@ -1,3 +1,18 @@
+2016-09-12  Saam Barati  <sbar...@apple.com>
+
+        HashMapImpl should take into account m_deleteCount in its load factor and it should be able to rehash the table to be smaller
+        https://bugs.webkit.org/show_bug.cgi?id=161640
+
+        Reviewed by Geoffrey Garen.
+
+        * microbenchmarks/map-rehash.js: Added.
+        * stress/map-delete.js: Added.
+        (assert):
+        * stress/map-rehash-2.js: Added.
+        (assert):
+        * stress/map-rehash.js: Added.
+        (assert):
+
 2016-09-12  Yusuke Suzuki  <utatane....@gmail.com>
 
         Unreviewed, fix tests for different libm environments

Added: trunk/JSTests/microbenchmarks/map-rehash.js (0 => 205842)


--- trunk/JSTests/microbenchmarks/map-rehash.js	                        (rev 0)
+++ trunk/JSTests/microbenchmarks/map-rehash.js	2016-09-13 00:14:59 UTC (rev 205842)
@@ -0,0 +1,21 @@
+function test(bias) {
+    let set = new Set;
+    let counter = 0;
+    for (let i = 0; i < 50000; i++) {
+        ++counter;
+        if (!set.size || Math.random() > bias) {
+            let key = counter; 
+            set.add(key);
+        } else {
+            let keyToRemove = set[Symbol.iterator]().next().value;
+            set.delete(keyToRemove);
+        }
+    }
+}
+
+let start = Date.now();
+test(0.45);
+test(0.60);
+const verbose = false;
+if (verbose)
+    print(Date.now() - start);

Added: trunk/JSTests/stress/map-delete.js (0 => 205842)


--- trunk/JSTests/stress/map-delete.js	                        (rev 0)
+++ trunk/JSTests/stress/map-delete.js	2016-09-13 00:14:59 UTC (rev 205842)
@@ -0,0 +1,19 @@
+function assert(b) {
+    if (!b)
+        throw new Error("Bad!")
+}
+
+let set = new Set;
+for (let i = 0; i < 50000; i++) {
+    assert(set.size === i);
+    set.add(i);
+    assert(set.has(i));
+}
+
+for (let i = 0; i < 50000; i++) {
+    assert(set.size === 50000 - i);
+    set.delete(i);
+    assert(!set.has(i));
+}
+
+assert(!set.size);

Added: trunk/JSTests/stress/map-rehash-2.js (0 => 205842)


--- trunk/JSTests/stress/map-rehash-2.js	                        (rev 0)
+++ trunk/JSTests/stress/map-rehash-2.js	2016-09-13 00:14:59 UTC (rev 205842)
@@ -0,0 +1,13 @@
+function assert(b) {
+    if (!b)
+        throw new Error("Bad!")
+}
+
+let set = new Set;
+for (let i = 0; i < 64 + ((128 - 64)/2); i++) {
+    set.add(i);
+}
+
+for (let i = 0; i < 64 + ((128 - 64)/2); i++) {
+    set.delete(i);
+}

Added: trunk/JSTests/stress/map-rehash.js (0 => 205842)


--- trunk/JSTests/stress/map-rehash.js	                        (rev 0)
+++ trunk/JSTests/stress/map-rehash.js	2016-09-13 00:14:59 UTC (rev 205842)
@@ -0,0 +1,26 @@
+function assert(b) {
+    if (!b)
+        throw new Error("Bad!")
+}
+
+let set = new Set;
+let items = [];
+for (let i = 0; i < 3000; i++) {
+    items.push(i);
+    set.add(i);
+}
+
+let counter = 1000000;
+while (items.length) {
+    if (Math.random() < 0.85) {
+        let item = items.pop();
+        let removed = set.delete(item);
+        assert(removed);
+        assert(items.length === set.size); 
+    } else {
+        let newItem = ++counter;
+        items.push(newItem);
+        set.add(newItem);
+        assert(items.length === set.size); 
+    }
+}

Modified: trunk/Source/_javascript_Core/ChangeLog (205841 => 205842)


--- trunk/Source/_javascript_Core/ChangeLog	2016-09-12 23:51:57 UTC (rev 205841)
+++ trunk/Source/_javascript_Core/ChangeLog	2016-09-13 00:14:59 UTC (rev 205842)
@@ -1,3 +1,35 @@
+2016-09-12  Saam Barati  <sbar...@apple.com>
+
+        HashMapImpl should take into account m_deleteCount in its load factor and it should be able to rehash the table to be smaller
+        https://bugs.webkit.org/show_bug.cgi?id=161640
+
+        Reviewed by Geoffrey Garen.
+
+        HashMapImpl now takes into account m_deleteCount in its load factor.
+        It now knows how to rehash to either decrease its capacity, stay at
+        the same capacity, or increase its capacity. The reason we can sometimes
+        stay at the same capacity is that we can reduce the load factor enough
+        by rehashing that growing isn't warranted. The reason for this is that
+        anytime we rehash, we remove all deleted sentinels from the buffer.
+        Therefore, staying at the same same capacity, when there are deleted entries,
+        can still reduce the load factor because it removes all deleted sentinels.
+
+        * runtime/HashMapImpl.h:
+        (JSC::HashMapBuffer::create):
+        (JSC::HashMapBuffer::reset):
+        (JSC::HashMapImpl::HashMapImpl):
+        (JSC::HashMapImpl::add):
+        (JSC::HashMapImpl::remove):
+        (JSC::HashMapImpl::size):
+        (JSC::HashMapImpl::clear):
+        (JSC::HashMapImpl::approximateSize):
+        (JSC::HashMapImpl::shouldRehashAfterAdd):
+        (JSC::HashMapImpl::shouldShrink):
+        (JSC::HashMapImpl::rehash):
+        (JSC::HashMapImpl::checkConsistency):
+        (JSC::HashMapImpl::makeAndSetNewBuffer):
+        (JSC::HashMapImpl::assertBufferIsEmpty):
+
 2016-09-12  Benjamin Poulain  <bpoul...@apple.com>
 
         [JSC] Use GetArrayLength for JSArray.length even when the array type is undecided

Modified: trunk/Source/_javascript_Core/runtime/HashMapImpl.h (205841 => 205842)


--- trunk/Source/_javascript_Core/runtime/HashMapImpl.h	2016-09-12 23:51:57 UTC (rev 205841)
+++ trunk/Source/_javascript_Core/runtime/HashMapImpl.h	2016-09-13 00:14:59 UTC (rev 205842)
@@ -158,9 +158,14 @@
         }
 
         HashMapBuffer* buffer = static_cast<HashMapBuffer*>(data);
-        memset(buffer, -1, allocationSize);
+        buffer->reset(capacity);
         return buffer;
     }
+
+    ALWAYS_INLINE void reset(uint32_t capacity)
+    {
+        memset(this, -1, allocationSize(capacity));
+    }
 };
 
 ALWAYS_INLINE static bool areKeysEqual(ExecState* exec, JSValue a, JSValue b)
@@ -280,7 +285,7 @@
 
     HashMapImpl(VM& vm, Structure* structure)
         : Base(vm, structure)
-        , m_size(0)
+        , m_keyCount(0)
         , m_deleteCount(0)
         , m_capacity(4)
     {
@@ -391,8 +396,10 @@
         newTail->setPrev(vm, newEntry);
         newTail->setDeleted(true);
         newEntry->setNext(vm, newTail);
-        uint32_t newSize = ++m_size;
-        if (2*newSize > m_capacity)
+
+        ++m_keyCount;
+
+        if (shouldRehashAfterAdd())
             rehash(exec);
     }
 
@@ -410,21 +417,25 @@
 
         *bucket = deletedValue();
 
-        m_deleteCount++;
+        ++m_deleteCount;
+        ASSERT(m_keyCount > 0);
+        --m_keyCount;
 
+        if (shouldShrink())
+            rehash(exec);
+
         return true;
     }
 
     ALWAYS_INLINE uint32_t size() const
     {
-        RELEASE_ASSERT(m_size >= m_deleteCount);
-        return m_size - m_deleteCount;
+        return m_keyCount;
     }
 
     ALWAYS_INLINE void clear(ExecState* exec)
     {
         VM& vm = exec->vm();
-        m_size = 0;
+        m_keyCount = 0;
         m_deleteCount = 0;
         HashMapBucketType* head = m_head.get();
         HashMapBucketType* bucket = m_head->next();
@@ -433,6 +444,7 @@
             HashMapBucketType* next = bucket->next();
             // We restart each iterator by pointing it to the head of the list.
             bucket->setNext(vm, head);
+            bucket->setDeleted(true);
             bucket = next;
         }
         m_head->setNext(vm, m_tail.get());
@@ -439,6 +451,7 @@
         m_tail->setPrev(vm, m_head.get());
         m_capacity = 4;
         makeAndSetNewBuffer(exec, vm);
+        checkConsistency();
     }
 
     ALWAYS_INLINE size_t bufferSizeInBytes() const
@@ -464,11 +477,21 @@
         size_t size = sizeof(HashMapImpl);
         size += bufferSizeInBytes();
         size += 2 * sizeof(HashMapBucketType); // Head and tail members.
-        size += (m_deleteCount + m_size) * sizeof(HashMapBucketType); // Number of members on the list.
+        size += m_keyCount * sizeof(HashMapBucketType); // Number of members that are on the list.
         return size;
     }
 
 private:
+    ALWAYS_INLINE uint32_t shouldRehashAfterAdd() const
+    {
+        return 2 * (m_keyCount + m_deleteCount) >= m_capacity;
+    }
+
+    ALWAYS_INLINE uint32_t shouldShrink() const
+    {
+        return 8 * m_keyCount <= m_capacity && m_capacity > 4;
+    }
+
     ALWAYS_INLINE HashMapBucketType** findBucketAlreadyHashedAndNormalized(ExecState* exec, JSValue key, uint32_t hash)
     {
         const uint32_t mask = m_capacity - 1;
@@ -490,11 +513,35 @@
         VM& vm = exec->vm();
         auto scope = DECLARE_THROW_SCOPE(vm);
 
-        m_capacity = m_capacity * 2;
-        makeAndSetNewBuffer(exec, vm);
-        if (UNLIKELY(scope.exception()))
-            return;
+        uint32_t oldCapacity = m_capacity;
+        if (shouldShrink()) {
+            m_capacity = m_capacity / 2;
+            ASSERT(m_capacity >= 4);
+        } else if (3 * m_keyCount <= m_capacity && m_capacity > 64) {
+            // We stay at the same size if rehashing would cause us to be no more than
+            // 1/3rd full. This comes up for programs like this:
+            // Say the hash table grew to a key count of 64, causing it to grow to a capacity of 256.
+            // Then, the table added 63 items. The load is now 127. Then, 63 items are deleted.
+            // The load is still 127. Then, another item is added. The load is now 128, and we
+            // decide that we need to rehash. The key count is 65, almost exactly what it was
+            // when we grew to a capacity of 256. We don't really need to grow to a capacity
+            // of 512 in this situation. Instead, we choose to rehash at the same size. This
+            // will bring the load down to 65. We rehash into the same size when we determine
+            // that the new load ratio will be under 1/3rd. (We also pick a minumum capacity
+            // at which this rule kicks in because otherwise we will be too sensitive to rehashing
+            // at the same capacity).
+        } else
+            m_capacity = (Checked<uint32_t>(m_capacity) * 2).unsafeGet();
 
+        if (m_capacity != oldCapacity) {
+            makeAndSetNewBuffer(exec, vm);
+            if (UNLIKELY(scope.exception()))
+                return;
+        } else {
+            m_buffer.get()->reset(m_capacity);
+            assertBufferIsEmpty();
+        }
+
         HashMapBucketType* iter = m_head->next();
         HashMapBucketType* end = m_tail.get();
         const uint32_t mask = m_capacity - 1;
@@ -513,8 +560,26 @@
             buffer[index] = iter;
             iter = iter->next();
         }
+
+        m_deleteCount = 0;
+
+        checkConsistency();
     }
 
+    ALWAYS_INLINE void checkConsistency() const
+    {
+        if (!ASSERT_DISABLED) {
+            HashMapBucketType* iter = m_head->next();
+            HashMapBucketType* end = m_tail.get();
+            uint32_t size = 0;
+            while (iter != end) {
+                ++size;
+                iter = iter->next();
+            }
+            ASSERT(size == m_keyCount);
+        }
+    }
+
     void makeAndSetNewBuffer(ExecState* exec, VM& vm)
     {
         ASSERT(!(m_capacity & (m_capacity - 1)));
@@ -524,9 +589,14 @@
             return;
 
         m_buffer.set(vm, this, buffer);
+        assertBufferIsEmpty();
+    }
+
+    ALWAYS_INLINE void assertBufferIsEmpty() const
+    {
         if (!ASSERT_DISABLED) {
             for (unsigned i = 0; i < m_capacity; i++)
-                ASSERT(isEmpty(this->buffer()[i]));
+                ASSERT(isEmpty(buffer()[i]));
         }
     }
 
@@ -533,7 +603,7 @@
     WriteBarrier<HashMapBucketType> m_head;
     WriteBarrier<HashMapBucketType> m_tail;
     AuxiliaryBarrier<HashMapBufferType*> m_buffer;
-    uint32_t m_size;
+    uint32_t m_keyCount;
     uint32_t m_deleteCount;
     uint32_t m_capacity;
 };
_______________________________________________
webkit-changes mailing list
webkit-changes@lists.webkit.org
https://lists.webkit.org/mailman/listinfo/webkit-changes

Reply via email to