Author: Armin Rigo <[email protected]>
Branch: 
Changeset: r72330:0a347de43469
Date: 2014-07-03 17:52 +0200
http://bitbucket.org/pypy/pypy/changeset/0a347de43469/

Log:    For dict.update(), pre-scale the dictionary from RPython code in
        dictmultiobject.py.

diff --git a/pypy/objspace/std/dictmultiobject.py 
b/pypy/objspace/std/dictmultiobject.py
--- a/pypy/objspace/std/dictmultiobject.py
+++ b/pypy/objspace/std/dictmultiobject.py
@@ -1,8 +1,9 @@
 """The builtin dict implementation"""
 
-from rpython.rlib import jit, rerased
+from rpython.rlib import jit, rerased, objectmodel
 from rpython.rlib.debug import mark_dict_non_null
 from rpython.rlib.objectmodel import newlist_hint, r_dict, specialize
+from rpython.rlib.unroll import SpecTag
 from rpython.tool.sourcetools import func_renamer, func_with_new_name
 
 from pypy.interpreter.baseobjspace import W_Root
@@ -509,6 +510,9 @@
                 break
             w_updatedict.setitem(w_key, w_value)
 
+    def prepare_update(self, w_dict, num_extra):
+        pass
+
 
 class EmptyDictStrategy(DictStrategy):
     erase, unerase = rerased.new_erasing_pair("empty")
@@ -748,20 +752,32 @@
     @jit.look_inside_iff(lambda self, w_dict, w_updatedict:
                          w_dict_unrolling_heuristic(w_dict))
     def rev_update1_dict_dict(self, w_dict, w_updatedict):
+        # the logic is to call prepare_dict_update() after the first setitem():
+        # it gives the w_updatedict a chance to switch its strategy.
         if override_next_item is not None:
             # this is very similar to the general version, but the difference
             # is that it is specialized to call a specific next_item()
             iteritems = IterClassItems(self.space, self, w_dict)
+            spec = _SPEC1
             while True:
                 w_key, w_value = iteritems.next_item()
                 if w_key is None:
                     break
                 w_updatedict.setitem(w_key, w_value)
+                if spec is _SPEC1:
+                    spec = _SPEC2
+                    w_updatedict.strategy.prepare_update(w_updatedict,
+                                                         w_dict.length() - 1)
         else:
+            spec = _SPEC1
             for key, value in self.getiteritems(w_dict):
                 w_key = wrapkey(self.space, key)
                 w_value = wrapvalue(self.space, value)
                 w_updatedict.setitem(w_key, w_value)
+                if spec is _SPEC1:
+                    spec = _SPEC2
+                    w_updatedict.strategy.prepare_update(w_updatedict,
+                                                         w_dict.length() - 1)
 
     dictimpl.iterkeys = iterkeys
     dictimpl.itervalues = itervalues
@@ -770,6 +786,9 @@
 
 create_iterator_classes(EmptyDictStrategy)
 
+_SPEC1 = SpecTag()
+_SPEC2 = SpecTag()
+
 
 # concrete subclasses of the above
 
@@ -884,6 +903,10 @@
     def getiteritems(self, w_dict):
         return self.unerase(w_dict.dstorage).iteritems()
 
+    def prepare_update(self, w_dict, num_extra):
+        objectmodel.prepare_dict_update(self.unerase(w_dict.dstorage),
+                                        num_extra)
+
 
 class ObjectDictStrategy(AbstractTypedStrategy, DictStrategy):
     erase, unerase = rerased.new_erasing_pair("object")
diff --git a/rpython/annotator/unaryop.py b/rpython/annotator/unaryop.py
--- a/rpython/annotator/unaryop.py
+++ b/rpython/annotator/unaryop.py
@@ -388,6 +388,9 @@
             return SomeImpossibleValue()
         dct1.dictdef.union(dct2.dictdef)
 
+    def method__prepare_dict_update(dct, num):
+        pass
+
     def method_keys(self):
         return getbookkeeper().newlist(self.dictdef.read_key())
 
diff --git a/rpython/rlib/objectmodel.py b/rpython/rlib/objectmodel.py
--- a/rpython/rlib/objectmodel.py
+++ b/rpython/rlib/objectmodel.py
@@ -740,6 +740,14 @@
         return repr(self.key)
 
 
+def prepare_dict_update(dict, n_elements):
+    """RPython hint that the given dict (or r_dict) will soon be
+    enlarged by n_elements."""
+    if we_are_translated():
+        dict._prepare_dict_update(n_elements)
+        # ^^ call an extra method that doesn't exist before translation
+
+
 # ____________________________________________________________
 
 def import_from_mixin(M, special_methods=['__init__', '__del__']):
diff --git a/rpython/rlib/test/test_objectmodel.py 
b/rpython/rlib/test/test_objectmodel.py
--- a/rpython/rlib/test/test_objectmodel.py
+++ b/rpython/rlib/test/test_objectmodel.py
@@ -321,6 +321,14 @@
         res = self.interpret(g, [3])
         assert res == 77
 
+    def test_prepare_dict_update(self):
+        def g(n):
+            d = {}
+            prepare_dict_update(d, n)
+            return 42
+        res = self.interpret(g, [3])
+        assert res == 42     # "did not crash"
+
     def test_compute_hash(self):
         class Foo(object):
             pass
diff --git a/rpython/rtyper/lltypesystem/rdict.py 
b/rpython/rtyper/lltypesystem/rdict.py
--- a/rpython/rtyper/lltypesystem/rdict.py
+++ b/rpython/rtyper/lltypesystem/rdict.py
@@ -286,6 +286,11 @@
         hop.exception_cannot_occur()
         return hop.gendirectcall(ll_update, v_dic1, v_dic2)
 
+    def rtype_method__prepare_dict_update(self, hop):
+        v_dict, v_num = hop.inputargs(self, lltype.Signed)
+        hop.exception_cannot_occur()
+        hop.gendirectcall(ll_prepare_dict_update, v_dict, v_num)
+
     def _rtype_method_kvi(self, hop, ll_func):
         v_dic, = hop.inputargs(self)
         r_list = hop.r_result
@@ -543,13 +548,14 @@
     # make a 'new_size' estimate and shrink it if there are many
     # deleted entry markers.  See CPython for why it is a good idea to
     # quadruple the dictionary size as long as it's not too big.
-    num_items = d.num_items + 1
-    if num_items > 50000: new_estimate = num_items * 2
-    else:                 new_estimate = num_items * 4
-    _ll_dict_resize_to(d, new_estimate)
+    # (Quadrupling comes from '(d.num_items + d.num_items + 1) * 2'
+    # as long as num_items is not too large.)
+    num_extra = min(d.num_items + 1, 30000)
+    _ll_dict_resize_to(d, num_extra)
 ll_dict_resize.oopspec = 'dict.resize(d)'
 
-def _ll_dict_resize_to(d, new_estimate):
+def _ll_dict_resize_to(d, num_extra):
+    new_estimate = (d.num_items + num_extra) * 2
     new_size = DICT_INITSIZE
     while new_size <= new_estimate:
         new_size *= 2
@@ -818,16 +824,7 @@
 ll_clear.oopspec = 'dict.clear(d)'
 
 def ll_update(dic1, dic2):
-    # Prescale 'dic1', assuming that most items don't collide.
-    # If this assumption is false, 'dic1' becomes at most two times too large.
-    #  * dic2.num_items = upper bound on the number of items added
-    #  * (dic1.resize_counter - 1) // 3 = room left in dic1
-    #  so, if dic2 has 1 item, we need dic1.resize_counter > 3
-    #      if dic2 has 2 items we need dic1.resize_counter > 6  etc.
-    if not (dic1.resize_counter > dic2.num_items * 3):
-        new_estimate = (dic1.num_items + dic2.num_items) * 2
-        _ll_dict_resize_to(dic1, new_estimate)
-    #
+    ll_prepare_dict_update(dic1, dic2.num_items)
     entries = dic2.entries
     d2len = len(entries)
     i = 0
@@ -842,6 +839,16 @@
         i += 1
 ll_update.oopspec = 'dict.update(dic1, dic2)'
 
+def ll_prepare_dict_update(d, num_extra):
+    # Prescale 'd' for 'num_extra' items, assuming that most items don't
+    # collide.  If this assumption is false, 'd' becomes too large by at
+    # most 'num_extra'.  The logic is based on:
+    #      (d.resize_counter - 1) // 3 = room left in d
+    #  so, if num_extra == 1, we need d.resize_counter > 3
+    #      if num_extra == 2, we need d.resize_counter > 6  etc.
+    jit.conditional_call(d.resize_counter <= num_extra * 3,
+                         _ll_dict_resize_to, d, num_extra)
+
 # this is an implementation of keys(), values() and items()
 # in a single function.
 # note that by specialization on func, three different
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit

Reply via email to