[pypy-commit] pypy rdict-experiments-2: work some more

fijal Sun, 06 Jan 2013 18:03:04 -0800

Author: Maciej Fijalkowski <[email protected]>
Branch: rdict-experiments-2
Changeset: r59836:b1a3be05b6c4
Date: 2013-01-07 03:58 +0200
http://bitbucket.org/pypy/pypy/changeset/b1a3be05b6c4/


Log:    work some more

diff --git a/pypy/rpython/lltypesystem/rdict.py 
b/pypy/rpython/lltypesystem/rdict.py
--- a/pypy/rpython/lltypesystem/rdict.py
+++ b/pypy/rpython/lltypesystem/rdict.py
@@ -2,7 +2,7 @@
 from pypy.objspace.flow.model import Constant
 from pypy.rpython.rdict import (AbstractDictRepr, AbstractDictIteratorRepr,
                                 rtype_newdict)
-from pypy.rpython.lltypesystem import lltype
+from pypy.rpython.lltypesystem import lltype, llmemory, rffi
 from pypy.rpython.lltypesystem.lloperation import llop
 from pypy.rlib import objectmodel, jit, rgc
 from pypy.rlib.debug import ll_assert
@@ -31,6 +31,7 @@
 #
 #    struct dicttable {
 #        int num_items;
+#        int size;
 #        int resize_counter;
 #        int *indexes; # note that this can be different int
 #        Array *entries;
@@ -78,15 +79,12 @@
     DICTENTRY = lltype.Struct("dictentry", *entryfields)
     DICTENTRYARRAY = lltype.GcArray(DICTENTRY,
                                     adtmeths=entrymeths)
-    array_adtmeths = {'allocate': lltype.typeMethod(_ll_malloc_indexes),
-                      'getitem': ll_dict_index_getitem,
-                      'setitem': ll_dict_index_setitem}
     
     fields = [("num_items", lltype.Signed),
+              ("size", lltype.Signed),
               ("resize_counter", lltype.Signed),
               ("entries", lltype.Ptr(DICTENTRYARRAY)),
-              ("indexes", lltype.Ptr(lltype.GcArray(lltype.Signed,
-                                     adtmeths=array_adtmeths)))]
+              ("indexes", DICTINDEXOPAQUE)]
     if get_custom_eq_hash is not None:
         r_rdict_eqfn, r_rdict_hashfn = get_custom_eq_hash()
         fields.extend([ ("fnkeyeq", r_rdict_eqfn.lowleveltype),
@@ -340,6 +338,40 @@
 #  be direct_call'ed from rtyped flow graphs, which means that they will
 #  get flowed and annotated, mostly with SomePtr.
 
+
+# ------------------ indexes ----------------
+
+DICTINDEXOPAQUE = llmemory.GCREF
+DICTINDEX_SIGNED = lltype.Ptr(lltype.GcArray(lltype.Signed))
+DICTINDEX_INT = lltype.Ptr(lltype.GcArray(rffi.INT_real))
+
+def _ll_malloc_indexes(n):
+    res = lltype.malloc(DICTINDEX_SIGNED.TO, n)
+    res = lltype.cast_opaque_ptr(DICTINDEXOPAQUE, res)
+    i = 0
+    while i < n:
+        ll_index_setitem(n, res, i, FREE)
+        i += 1
+    return res
+
+def ll_index_getitem(size, indexes, i):
+    return lltype.cast_opaque_ptr(DICTINDEX_SIGNED, indexes)[i]
+
+def ll_index_setitem(size, indexes, i, v):
+    lltype.cast_opaque_ptr(DICTINDEX_SIGNED, indexes)[i] = v
+
+def ll_dict_copy_indexes(size, from_indexes, to_indexes):
+    i = 0
+    while i < size:
+        ll_index_setitem(size, to_indexes, i,
+                         ll_index_getitem(size, from_indexes, i))
+        i += 1
+
+def ll_get_value(d, i):
+    return d.entries[ll_index_getitem(d.size, d.indexes, i)].value
+
+# ---------------- hashes ------------------
+
 def ll_hash_from_cache(entries, i):
     return entries[i].f_hash
 
@@ -350,21 +382,6 @@
     ENTRIES = lltype.typeOf(entries).TO
     return ENTRIES.fasthashfn(entries[i].key)
 
-def ll_dict_index_getitem(indexes, i):
-    return indexes[i]
-
-def ll_dict_index_setitem(indexes, i, v):
-    indexes[i] = v
-
-def ll_dict_copy_indexes(from_indexes, to_indexes):
-    i = 0
-    while i < len(from_indexes):
-        to_indexes.setitem(i, from_indexes.getitem(i))
-        i += 1
-
-def ll_get_value(d, i):
-    return d.entries[d.indexes.getitem(i)].value
-
 def ll_keyhash_custom(d, key):
     DICT = lltype.typeOf(d).TO
     return objectmodel.hlinvoke(DICT.r_rdict_hashfn, d.fnkeyhash, key)
@@ -403,7 +420,7 @@
     valid = (i & HIGHEST_BIT) == 0
     i = i & MASK
     ENTRY = lltype.typeOf(d.entries).TO.OF
-    index = d.indexes.getitem(i)
+    index = ll_index_getitem(d.size, d.indexes, i)
     if index == FREE:
         index = d.num_items
         entry = d.entries[index]
@@ -419,12 +436,12 @@
             ll_assert(rc > 0, "ll_dict_resize failed?")
         ll_assert(index < len(d.entries), "invalid insert")
         d.resize_counter = rc
-        d.indexes.setitem(i, index)
+        ll_index_setitem(d.size, d.indexes, i, index)
         entry.value = value
     elif index == DELETED:
         index = d.num_items
         entry = d.entries[index]        
-        d.indexes.setitem(i, index)
+        ll_index_setitem(d.size, d.indexes, i, index)
         entry.value = value
     else:
         # override an existing or deleted entry
@@ -444,7 +461,7 @@
 
 @jit.look_inside_iff(lambda d, i: jit.isvirtual(d) and jit.isconstant(i))
 def _ll_dict_del(d, i):
-    index = d.indexes.getitem(i)
+    index = ll_index_getitem(d.size, d.indexes, i)
     ENTRIES = lltype.typeOf(d.entries).TO
     ENTRY = ENTRIES.OF
     if index != d.num_items - 1:
@@ -452,7 +469,7 @@
         key = old_entry.key
         to_insert_i = ll_dict_lookup(d, key, d.keyhash(key))
         ll_assert(not to_insert_i & HIGHEST_BIT, "invalid entry")
-        d.indexes.setitem(to_insert_i, index)
+        ll_index_setitem(d.size, d.indexes, to_insert_i, index)
         # copy the value
         new_entry = d.entries[index]
         new_entry.key = key
@@ -460,7 +477,7 @@
         if hasattr(ENTRY, 'f_hash'):
             new_entry.f_hash = old_entry.f_hash
     # clear the key and the value if they are GC pointers
-    d.indexes.setitem(i, DELETED)
+    ll_index_setitem(d.size, d.indexes, i, DELETED)
     d.num_items -= 1
     entry = d.entries[d.num_items]
     if ENTRIES.clear_key:
@@ -484,7 +501,7 @@
 def ll_dict_resize(d):
     old_entries = d.entries
     old_indexes = d.indexes
-    old_size = len(old_indexes)
+    old_size = d.size
     # make a 'new_size' estimate and shrink it if there are many
     # deleted entry markers.  See CPython for why it is a good idea to
     # quadruple the dictionary size as long as it's not too big.
@@ -496,15 +513,16 @@
         new_size *= 2
     #
     new_item_size = new_size // 3 * 2 + 1
-    d.indexes = lltype.typeOf(d).TO.indexes.TO.allocate(new_size)
+    d.indexes = _ll_malloc_indexes(new_size)
     d.resize_counter = new_item_size - d.num_items
+
     i = 0
     indexes = d.indexes
     while i < old_size:
-        index = old_indexes.getitem(i)
+        index = ll_index_getitem(old_size, old_indexes, i)
         if old_entries.valid(index):
             pos = ll_dict_lookup_clean(d, old_entries.hash(index))
-            indexes.setitem(pos, index)
+            ll_index_setitem(new_size, indexes, pos, index)
         i += 1
     if len(old_entries) != new_item_size:
         d.entries = lltype.typeOf(old_entries).TO.allocate(new_item_size)
@@ -513,6 +531,7 @@
     else:
         # we just removed deleted items, but we didn't do anything else special
         d.entries = old_entries
+    d.size = new_size
 ll_dict_resize.oopspec = 'dict.resize(d)'
 
 # ------- a port of CPython's dictobject.c's lookdict implementation -------
@@ -532,12 +551,12 @@
 def ll_dict_lookup(d, key, hash):
     entries = d.entries
     indexes = d.indexes
-    mask = len(indexes) - 1
+    mask = d.size - 1
     i = hash & mask
     # do the first try before any looping
     ENTRIES = lltype.typeOf(entries).TO
     direct_compare = not hasattr(ENTRIES, 'no_direct_compare')
-    index = indexes.getitem(i)
+    index = ll_index_getitem(d.size, indexes, i)
     if entries.valid(index):
         checkingkey = entries[index].key
         if direct_compare and checkingkey == key:
@@ -549,7 +568,7 @@
             #llop.debug_print(lltype.Void, "comparing keys", 
ll_debugrepr(checkingkey), ll_debugrepr(key), found)
             if d.paranoia:
                 if (entries != d.entries or
-                    not entries.valid(indexes.getitem(i))
+                    not entries.valid(ll_index_getitem(d.size, indexes, i))
                     or entries[index].key != checkingkey):
                     # the compare did major nasty stuff to the dict: start over
                     return ll_dict_lookup(d, key, hash)
@@ -569,7 +588,7 @@
         i = r_uint(i)
         i = (i << 2) + i + perturb + 1
         i = intmask(i) & mask
-        index = indexes.getitem(i)
+        index = ll_index_getitem(d.size, indexes, i)
         # keep 'i' as a signed number here, to consistently pass signed
         # arguments to the small helper methods.
         if index == FREE:
@@ -586,7 +605,7 @@
                 found = d.keyeq(checkingkey, key)
                 if d.paranoia:
                     if (entries != d.entries or
-                        not entries.valid(indexes.getitem(i)) or
+                        not entries.valid(ll_index_getitem(d.size, indexes, 
i)) or
                         entries[index].key != checkingkey):
                         # the compare did major nasty stuff to the dict:
                         # start over
@@ -602,10 +621,10 @@
     # key is new, and the dictionary doesn't contain deleted entries.
     # It only finds the next free slot for the given hash.
     indexes = d.indexes
-    mask = len(indexes) - 1
+    mask = d.size - 1
     i = hash & mask
     perturb = r_uint(hash)
-    while indexes.getitem(i) != FREE:
+    while ll_index_getitem(d.size, indexes, i) != FREE:
         i = r_uint(i)
         i = (i << 2) + i + perturb + 1
         i = intmask(i) & mask
@@ -619,15 +638,11 @@
 DICT_INITSIZE = 8
 DICT_ITEMS_INITSIZE = 5
 
[email protected]_safe # we always unroll the small allocation
 def ll_newdict(DICT):
     d = DICT.allocate()
-    # XXX don't use _ll_items_allocate because of jit.unroll_safe,
-    #     should be *really* jit_unroll_iff
-    d.indexes = lltype.malloc(DICT.indexes.TO, DICT_INITSIZE)
+    d.indexes = _ll_malloc_indexes(DICT_INITSIZE)
     d.num_items = 0
-    for i in range(DICT_INITSIZE):
-        d.indexes.setitem(i, FREE)
+    d.size = DICT_INITSIZE
     d.entries = DICT.entries.TO.allocate(DICT_ITEMS_INITSIZE)    
     d.resize_counter = DICT_ITEMS_INITSIZE
     return d
@@ -640,7 +655,8 @@
     items_size = n // 3 * 2 + 1
     d = DICT.allocate()
     d.entries = DICT.entries.TO.allocate(items_size)
-    d.indexes = DICT.indexes.TO.allocate(n)
+    d.indexes = _ll_malloc_indexes(n)
+    d.size = n
     d.num_items = 0
     d.resize_counter = items_size
     return d
@@ -652,13 +668,6 @@
     return lltype.malloc(DICT)
 def _ll_malloc_entries(ENTRIES, n):
     return lltype.malloc(ENTRIES, n, zero=True)
-def _ll_malloc_indexes(ITEMS, n):
-    res = lltype.malloc(ITEMS, n)
-    i = 0
-    while i < n:
-        res[i] = FREE
-        i += 1
-    return res
 
 def rtype_r_dict(hop):
     r_dict = hop.r_result
@@ -684,6 +693,7 @@
 def get_ll_dictiter(DICT):
     return lltype.Ptr(lltype.GcStruct('dictiter',
                                       ('dict', DICT),
+                                      ('size', lltype.Signed),
                                       ('index', lltype.Signed)))
 
 class DictIteratorRepr(AbstractDictIteratorRepr):
@@ -700,6 +710,7 @@
     iter = lltype.malloc(ITERPTR.TO)
     iter.dict = d
     iter.index = 0
+    iter.size = d.size
     return iter
 
 def _make_ll_dictnext(kind):
@@ -711,6 +722,8 @@
     def ll_dictnext(RETURNTYPE, iter):
         # note that RETURNTYPE is None for keys and values
         dict = iter.dict
+        if dict.size != iter.size:
+            raise RuntimeError
         if not dict or iter.index >= dict.num_items:
             # clear the reference to the dict and prevent restarts
             iter.dict = lltype.nullptr(lltype.typeOf(iter).TO.dict.TO)
@@ -758,24 +771,25 @@
     dictsize = len(dict.entries)
     d = DICT.allocate()
     d.entries = lltype.malloc(DICT.entries.TO, dictsize)
-    d.indexes = DICT.indexes.TO.allocate(len(dict.indexes))
+    d.indexes = _ll_malloc_indexes(dict.size)
     d.num_items = dict.num_items
+    d.size = dict.size
     d.resize_counter = dict.resize_counter
     if hasattr(DICT, 'fnkeyeq'):   d.fnkeyeq   = dict.fnkeyeq
     if hasattr(DICT, 'fnkeyhash'): d.fnkeyhash = dict.fnkeyhash
-    ll_dict_copy_indexes(dict.indexes, d.indexes)
+    ll_dict_copy_indexes(dict.size, dict.indexes, d.indexes)
     #rgc.ll_arraycopy(dict.indexes, d.indexes, 0, 0, len(dict.indexes))
     rgc.ll_arraycopy(dict.entries, d.entries, 0, 0, len(dict.entries))
     return d
 ll_copy.oopspec = 'dict.copy(dict)'
 
 def ll_clear(d):
-    if (len(d.indexes) == DICT_INITSIZE and
+    if (d.size == DICT_INITSIZE and
         d.resize_counter == DICT_ITEMS_INITSIZE):
         return
     old_entries = d.entries
     d.entries = lltype.typeOf(old_entries).TO.allocate(DICT_ITEMS_INITSIZE)
-    d.indexes = lltype.typeOf(d).TO.indexes.TO.allocate(DICT_INITSIZE)
+    d.indexes = _ll_malloc_indexes(DICT_INITSIZE)
     d.num_items = 0
     d.resize_counter = DICT_ITEMS_INITSIZE
 ll_clear.oopspec = 'dict.clear(d)'
diff --git a/pypy/rpython/test/test_rdict.py b/pypy/rpython/test/test_rdict.py
--- a/pypy/rpython/test/test_rdict.py
+++ b/pypy/rpython/test/test_rdict.py
@@ -24,6 +24,12 @@
         yield x
 
 
+def count_items(ll_d, TP):
+    return len([i for i in
+                [rdict.ll_index_getitem(ll_d.size, ll_d.indexes, i)
+                 for i in range(ll_d.size)] if i == TP])
+    
+
 class TestRDictDirect(object):
     def _get_str_dict(self):
         # STR -> lltype.Signed
@@ -37,8 +43,7 @@
         DICT = self._get_str_dict()
         ll_d = rdict.ll_newdict(DICT)
         rdict.ll_dict_setitem(ll_d, llstr("abc"), 13)
-        assert (len([i for i in ll_d.indexes if i == rdict.FREE]) ==
-                rdict.DICT_INITSIZE - 1)
+        assert count_items(ll_d, rdict.FREE) == rdict.DICT_INITSIZE - 1
         assert rdict.ll_dict_getitem(ll_d, llstr("abc")) == 13
 
     def test_dict_del_lastitem(self):
@@ -48,9 +53,8 @@
         rdict.ll_dict_setitem(ll_d, llstr("abc"), 13)
         py.test.raises(KeyError, rdict.ll_dict_delitem, ll_d, llstr("def"))
         rdict.ll_dict_delitem(ll_d, llstr("abc"))
-        assert (len([i for i in ll_d.indexes if i == rdict.FREE]) ==
-                rdict.DICT_INITSIZE - 1)
-        assert (len([i for i in ll_d.indexes if i == rdict.DELETED]) == 1)
+        assert count_items(ll_d, rdict.FREE) == rdict.DICT_INITSIZE - 1
+        assert count_items(ll_d, rdict.DELETED) == 1
         py.test.raises(KeyError, rdict.ll_dict_getitem, ll_d, llstr("abc"))
 
     def test_dict_del_not_lastitem(self):
@@ -59,9 +63,8 @@
         rdict.ll_dict_setitem(ll_d, llstr("abc"), 13)
         rdict.ll_dict_setitem(ll_d, llstr("def"), 15)
         rdict.ll_dict_delitem(ll_d, llstr("abc"))
-        assert (len([i for i in ll_d.indexes if i == rdict.FREE]) ==
-                rdict.DICT_INITSIZE - 2)
-        assert (len([i for i in ll_d.indexes if i == rdict.DELETED]) == 1)
+        assert count_items(ll_d, rdict.FREE) == rdict.DICT_INITSIZE - 2
+        assert count_items(ll_d, rdict.DELETED) == 1
 
     def test_dict_resize(self):
         DICT = self._get_str_dict()
@@ -70,10 +73,10 @@
         rdict.ll_dict_setitem(ll_d, llstr("b"), 2)        
         rdict.ll_dict_setitem(ll_d, llstr("c"), 3)        
         rdict.ll_dict_setitem(ll_d, llstr("d"), 4)         
-        assert len(ll_d.indexes) == 8
+        assert ll_d.size == 8
         rdict.ll_dict_setitem(ll_d, llstr("e"), 5)
         rdict.ll_dict_setitem(ll_d, llstr("f"), 6)
-        assert len(ll_d.indexes) == 32
+        assert ll_d.size == 32
         for item in ['a', 'b', 'c', 'd', 'e', 'f']:
             assert rdict.ll_dict_getitem(ll_d, llstr(item)) == ord(item) - 
ord('a') + 1
 
@@ -115,8 +118,10 @@
         for num in numbers:
             rdict.ll_dict_setitem(ll_d, num, 1)
             rdict.ll_dict_delitem(ll_d, num)
-            for k in ll_d.indexes:
-                assert k < 0
+            tot = (count_items(ll_d, rdict.FREE) +
+                   count_items(ll_d, rdict.DELETED))
+            assert tot == ll_d.size
+
 
 class BaseTestRdict(BaseRtypingTest):
 
@@ -829,7 +834,11 @@
             return d
 
         res = self.interpret(func2, [ord(x), ord(y)])
-        assert len([i for i in res.indexes if i >= 0]) == 2
+        c = 0
+        for i in range(res.size):
+            if rdict.ll_index_getitem(res.size, res.indexes, i) >= 0:
+                c += 1
+        assert c == 2
 
         def func3(c0, c1, c2, c3, c4, c5, c6, c7):
             d = {}
@@ -847,11 +856,7 @@
             py.test.skip("make dict tests more indepdent from initsize")
         res = self.interpret(func3, [ord(char_by_hash[i][0])
                                    for i in range(rdict.DICT_INITSIZE)])
-        count_frees = 0
-        for i in res.indexes:
-            if i == -1:
-                count_frees += 1
-        assert count_frees >= 3
+        assert count_items(res, rdict.FREE) >= 3
 
     def test_dict_resize(self):
         # XXX we no longer automatically resize on 'del'.  We need to
@@ -870,11 +875,12 @@
                     del d[chr(ord('A') - i)]
             return d
         res = self.interpret(func, [0])
-        assert len(res.indexes) > rdict.DICT_INITSIZE
+        assert res.size > rdict.DICT_INITSIZE
         res = self.interpret(func, [1])
-        assert len(res.indexes) == rdict.DICT_INITSIZE
+        assert res.size == rdict.DICT_INITSIZE
 
     def test_dict_valid_resize(self):
+        py.test.skip("no longer valid, we're a bit too paranoid now")
         # see if we find our keys after resize
         def func():
             d = {}
_______________________________________________
pypy-commit mailing list
[email protected]
http://mail.python.org/mailman/listinfo/pypy-commit

[pypy-commit] pypy rdict-experiments-2: work some more

Reply via email to