Author: Armin Rigo <[email protected]>
Branch: 
Changeset: r58529:a18b9bbc6530
Date: 2012-10-28 08:11 +0100
http://bitbucket.org/pypy/pypy/changeset/a18b9bbc6530/

Log:    A bug and fix in the dict insertion code. Interestingly, CPython
        has the same bug until changes in 3.3 fixed it.

diff --git a/pypy/rpython/lltypesystem/lltype.py 
b/pypy/rpython/lltypesystem/lltype.py
--- a/pypy/rpython/lltypesystem/lltype.py
+++ b/pypy/rpython/lltypesystem/lltype.py
@@ -1651,10 +1651,7 @@
         if n < 0:
             raise ValueError, "negative array length"
         _parentable.__init__(self, TYPE)
-        try:
-            myrange = range(n)
-        except OverflowError:
-            raise MemoryError("definitely too many items")
+        myrange = self._check_range(n)
         self.items = [TYPE.OF._allocate(initialization=initialization,
                                         parent=self, parentindex=j)
                       for j in myrange]
@@ -1664,6 +1661,14 @@
     def __repr__(self):
         return '<%s>' % (self,)
 
+    def _check_range(self, n):
+        # checks that it's ok to make an array of size 'n', and returns
+        # range(n).  Explicitly overridden by some tests.
+        try:
+            return range(n)
+        except OverflowError:
+            raise MemoryError("definitely too many items")
+
     def _str_item(self, item):
         if isinstance(item, _uninitialized):
             return '#'
diff --git a/pypy/rpython/lltypesystem/rdict.py 
b/pypy/rpython/lltypesystem/rdict.py
--- a/pypy/rpython/lltypesystem/rdict.py
+++ b/pypy/rpython/lltypesystem/rdict.py
@@ -4,6 +4,7 @@
      rtype_newdict)
 from pypy.rpython.lltypesystem import lltype
 from pypy.rlib import objectmodel, jit
+from pypy.rlib.debug import ll_assert
 from pypy.rlib.rarithmetic import r_uint, intmask, LONG_BIT
 from pypy.rpython import rmodel
 from pypy.rpython.error import TyperError
@@ -462,22 +463,30 @@
 def _ll_dict_setitem_lookup_done(d, key, value, hash, i):
     valid = (i & HIGHEST_BIT) == 0
     i = i & MASK
-    everused = d.entries.everused(i)
-    # set up the new entry
     ENTRY = lltype.typeOf(d.entries).TO.OF
     entry = d.entries[i]
-    entry.value = value
-    if valid:
-        return
+    if not d.entries.everused(i):
+        # a new entry that was never used before
+        ll_assert(not valid, "valid but not everused")
+        rc = d.resize_counter - 3
+        if rc <= 0:       # if needed, resize the dict -- before the insertion
+            ll_dict_resize(d)
+            i = ll_dict_lookup_clean(d, hash)  # then redo the lookup for 'key'
+            entry = d.entries[i]
+            rc = d.resize_counter - 3
+            ll_assert(rc > 0, "ll_dict_resize failed?")
+        d.resize_counter = rc
+        if hasattr(ENTRY, 'f_everused'): entry.f_everused = True
+        entry.value = value
+    else:
+        # override an existing or deleted entry
+        entry.value = value
+        if valid:
+            return
     entry.key = key
     if hasattr(ENTRY, 'f_hash'):  entry.f_hash = hash
     if hasattr(ENTRY, 'f_valid'): entry.f_valid = True
     d.num_items += 1
-    if not everused:
-        if hasattr(ENTRY, 'f_everused'): entry.f_everused = True
-        d.resize_counter -= 3
-        if d.resize_counter <= 0:
-            ll_dict_resize(d)
 
 def ll_dict_insertclean(d, key, value, hash):
     # Internal routine used by ll_dict_resize() to insert an item which is
@@ -534,8 +543,9 @@
     # make a 'new_size' estimate and shrink it if there are many
     # deleted entry markers.  See CPython for why it is a good idea to
     # quadruple the dictionary size as long as it's not too big.
-    if d.num_items > 50000: new_estimate = d.num_items * 2
-    else:                   new_estimate = d.num_items * 4
+    num_items = d.num_items + 1
+    if num_items > 50000: new_estimate = num_items * 2
+    else:                 new_estimate = num_items * 4
     new_size = DICT_INITSIZE
     while new_size <= new_estimate:
         new_size *= 2
diff --git a/pypy/rpython/test/test_rdict.py b/pypy/rpython/test/test_rdict.py
--- a/pypy/rpython/test/test_rdict.py
+++ b/pypy/rpython/test/test_rdict.py
@@ -970,6 +970,39 @@
         DICT = lltype.typeOf(llres.item1)
         assert sorted(DICT.TO.entries.TO.OF._flds) == ['f_hash', 'key', 
'value']
 
+    def test_memoryerror_should_not_insert(self):
+        # This shows a misbehaviour that also exists in CPython 2.7, but not
+        # any more in CPython 3.3.  The behaviour is that even if a dict
+        # insertion raises MemoryError, the new item is still inserted.
+        # If we catch the MemoryError, we can keep inserting new items until
+        # the dict table is completely full.  Then the next insertion loops
+        # forever.  This test only checks that after a MemoryError the
+        # new item was not inserted.
+        def _check_small_range(self, n):
+            if n >= 128:
+                raise MemoryError
+            return range(n)
+        original_check_range = lltype._array._check_range
+        try:
+            lltype._array._check_range = _check_small_range
+            #
+            def do_insert(d, i):
+                d[i] = i
+            def func():
+                d = {}
+                i = 0
+                while True:
+                    try:
+                        do_insert(d, i)
+                    except MemoryError:
+                        return (i in d)
+                    i += 1
+            res = self.interpret(func, [])
+            assert res == 0
+            #
+        finally:
+            lltype._array._check_range = original_check_range
+
     # ____________________________________________________________
 
 
_______________________________________________
pypy-commit mailing list
[email protected]
http://mail.python.org/mailman/listinfo/pypy-commit

Reply via email to