Author: Armin Rigo <[email protected]>
Branch:
Changeset: r58529:a18b9bbc6530
Date: 2012-10-28 08:11 +0100
http://bitbucket.org/pypy/pypy/changeset/a18b9bbc6530/
Log: A bug and fix in the dict insertion code. Interestingly, CPython
has the same bug until changes in 3.3 fixed it.
diff --git a/pypy/rpython/lltypesystem/lltype.py
b/pypy/rpython/lltypesystem/lltype.py
--- a/pypy/rpython/lltypesystem/lltype.py
+++ b/pypy/rpython/lltypesystem/lltype.py
@@ -1651,10 +1651,7 @@
if n < 0:
raise ValueError, "negative array length"
_parentable.__init__(self, TYPE)
- try:
- myrange = range(n)
- except OverflowError:
- raise MemoryError("definitely too many items")
+ myrange = self._check_range(n)
self.items = [TYPE.OF._allocate(initialization=initialization,
parent=self, parentindex=j)
for j in myrange]
@@ -1664,6 +1661,14 @@
def __repr__(self):
return '<%s>' % (self,)
+ def _check_range(self, n):
+ # checks that it's ok to make an array of size 'n', and returns
+ # range(n). Explicitly overridden by some tests.
+ try:
+ return range(n)
+ except OverflowError:
+ raise MemoryError("definitely too many items")
+
def _str_item(self, item):
if isinstance(item, _uninitialized):
return '#'
diff --git a/pypy/rpython/lltypesystem/rdict.py
b/pypy/rpython/lltypesystem/rdict.py
--- a/pypy/rpython/lltypesystem/rdict.py
+++ b/pypy/rpython/lltypesystem/rdict.py
@@ -4,6 +4,7 @@
rtype_newdict)
from pypy.rpython.lltypesystem import lltype
from pypy.rlib import objectmodel, jit
+from pypy.rlib.debug import ll_assert
from pypy.rlib.rarithmetic import r_uint, intmask, LONG_BIT
from pypy.rpython import rmodel
from pypy.rpython.error import TyperError
@@ -462,22 +463,30 @@
def _ll_dict_setitem_lookup_done(d, key, value, hash, i):
valid = (i & HIGHEST_BIT) == 0
i = i & MASK
- everused = d.entries.everused(i)
- # set up the new entry
ENTRY = lltype.typeOf(d.entries).TO.OF
entry = d.entries[i]
- entry.value = value
- if valid:
- return
+ if not d.entries.everused(i):
+ # a new entry that was never used before
+ ll_assert(not valid, "valid but not everused")
+ rc = d.resize_counter - 3
+ if rc <= 0: # if needed, resize the dict -- before the insertion
+ ll_dict_resize(d)
+ i = ll_dict_lookup_clean(d, hash) # then redo the lookup for 'key'
+ entry = d.entries[i]
+ rc = d.resize_counter - 3
+ ll_assert(rc > 0, "ll_dict_resize failed?")
+ d.resize_counter = rc
+ if hasattr(ENTRY, 'f_everused'): entry.f_everused = True
+ entry.value = value
+ else:
+ # override an existing or deleted entry
+ entry.value = value
+ if valid:
+ return
entry.key = key
if hasattr(ENTRY, 'f_hash'): entry.f_hash = hash
if hasattr(ENTRY, 'f_valid'): entry.f_valid = True
d.num_items += 1
- if not everused:
- if hasattr(ENTRY, 'f_everused'): entry.f_everused = True
- d.resize_counter -= 3
- if d.resize_counter <= 0:
- ll_dict_resize(d)
def ll_dict_insertclean(d, key, value, hash):
# Internal routine used by ll_dict_resize() to insert an item which is
@@ -534,8 +543,9 @@
# make a 'new_size' estimate and shrink it if there are many
# deleted entry markers. See CPython for why it is a good idea to
# quadruple the dictionary size as long as it's not too big.
- if d.num_items > 50000: new_estimate = d.num_items * 2
- else: new_estimate = d.num_items * 4
+ num_items = d.num_items + 1
+ if num_items > 50000: new_estimate = num_items * 2
+ else: new_estimate = num_items * 4
new_size = DICT_INITSIZE
while new_size <= new_estimate:
new_size *= 2
diff --git a/pypy/rpython/test/test_rdict.py b/pypy/rpython/test/test_rdict.py
--- a/pypy/rpython/test/test_rdict.py
+++ b/pypy/rpython/test/test_rdict.py
@@ -970,6 +970,39 @@
DICT = lltype.typeOf(llres.item1)
assert sorted(DICT.TO.entries.TO.OF._flds) == ['f_hash', 'key',
'value']
+ def test_memoryerror_should_not_insert(self):
+ # This shows a misbehaviour that also exists in CPython 2.7, but not
+ # any more in CPython 3.3. The behaviour is that even if a dict
+ # insertion raises MemoryError, the new item is still inserted.
+ # If we catch the MemoryError, we can keep inserting new items until
+ # the dict table is completely full. Then the next insertion loops
+ # forever. This test only checks that after a MemoryError the
+ # new item was not inserted.
+ def _check_small_range(self, n):
+ if n >= 128:
+ raise MemoryError
+ return range(n)
+ original_check_range = lltype._array._check_range
+ try:
+ lltype._array._check_range = _check_small_range
+ #
+ def do_insert(d, i):
+ d[i] = i
+ def func():
+ d = {}
+ i = 0
+ while True:
+ try:
+ do_insert(d, i)
+ except MemoryError:
+ return (i in d)
+ i += 1
+ res = self.interpret(func, [])
+ assert res == 0
+ #
+ finally:
+ lltype._array._check_range = original_check_range
+
# ____________________________________________________________
_______________________________________________
pypy-commit mailing list
[email protected]
http://mail.python.org/mailman/listinfo/pypy-commit