Author: Armin Rigo <[email protected]>
Branch:
Changeset: r58777:6a60ef6bac4e
Date: 2012-11-07 11:07 +0100
http://bitbucket.org/pypy/pypy/changeset/6a60ef6bac4e/
Log: Tweak the gc.get_*() functions to give them hopefully reasonable
performance, based on the new ability to set and clear a flag in the
object headers from RPython.
diff --git a/pypy/module/gc/referents.py b/pypy/module/gc/referents.py
--- a/pypy/module/gc/referents.py
+++ b/pypy/module/gc/referents.py
@@ -45,6 +45,109 @@
return OperationError(space.w_NotImplementedError,
space.wrap("operation not implemented by this GC"))
+# ____________________________________________________________
+
+def clear_gcflag_extra(fromlist):
+ pending = fromlist[:]
+ while pending:
+ gcref = pending.pop()
+ if rgc.get_gcflag_extra(gcref):
+ rgc.toggle_gcflag_extra(gcref)
+ pending.extend(rgc.get_rpy_referents(gcref))
+
+def do_get_objects():
+ roots = [gcref for gcref in rgc.get_rpy_roots() if gcref]
+ pending = roots[:]
+ result_w = []
+ while pending:
+ gcref = pending.pop()
+ if not rgc.get_gcflag_extra(gcref):
+ rgc.toggle_gcflag_extra(gcref)
+ w_obj = try_cast_gcref_to_w_root(gcref)
+ if w_obj is not None:
+ result_w.append(w_obj)
+ pending.extend(rgc.get_rpy_referents(gcref))
+ clear_gcflag_extra(roots)
+ return result_w
+
+# ____________________________________________________________
+
+class PathEntry(object):
+ # PathEntries are nodes of a complete tree of all objects, but
+ # built lazily (there is only one branch alive at any time).
+ # Each node has a 'gcref' and the list of referents from this gcref.
+ def __init__(self, prev, gcref, referents):
+ self.prev = prev
+ self.gcref = gcref
+ self.referents = referents
+ self.remaining = len(referents)
+
+ def get_most_recent_w_obj(self):
+ entry = self
+ while entry is not None:
+ if entry.gcref:
+ w_obj = try_cast_gcref_to_w_root(entry.gcref)
+ if w_obj is not None:
+ return w_obj
+ entry = entry.prev
+ return None
+
+def do_get_referrers(w_arg):
+ result_w = {}
+ gcarg = rgc.cast_instance_to_gcref(w_arg)
+ roots = [gcref for gcref in rgc.get_rpy_roots() if gcref]
+ head = PathEntry(None, rgc.NULL_GCREF, roots)
+ while True:
+ head.remaining -= 1
+ if head.remaining >= 0:
+ gcref = head.referents[head.remaining]
+ if not rgc.get_gcflag_extra(gcref):
+ # not visited so far
+ if gcref == gcarg:
+ w_obj = head.get_most_recent_w_obj()
+ if w_obj is not None:
+ result_w[w_obj] = None # found!
+ rgc.toggle_gcflag_extra(gcref)
+ head = PathEntry(head, gcref, rgc.get_rpy_referents(gcref))
+ else:
+ # no more referents to visit
+ head = head.prev
+ if head is None:
+ break
+ clear_gcflag_extra(roots)
+ return result_w.keys()
+
+# ____________________________________________________________
+
+def _list_w_obj_referents(gcref, result_w):
+ # Get all W_Root reachable directly from gcref, and add them to
+ # the list 'result_w'.
+ pending = [] # = list of all objects whose gcflag was toggled
+ i = 0
+ gcrefparent = gcref
+ while True:
+ for gcref in rgc.get_rpy_referents(gcrefparent):
+ if rgc.get_gcflag_extra(gcref):
+ continue
+ rgc.toggle_gcflag_extra(gcref)
+ pending.append(gcref)
+
+ while i < len(pending):
+ gcrefparent = pending[i]
+ i += 1
+ w_obj = try_cast_gcref_to_w_root(gcrefparent)
+ if w_obj is not None:
+ result_w.append(w_obj)
+ else:
+ break # jump back to the start of the outermost loop
+ else:
+ break # done
+
+ for gcref in pending:
+ rgc.toggle_gcflag_extra(gcref) # reset the gcflag_extra's
+
+# ____________________________________________________________
+
def get_rpy_roots(space):
lst = rgc.get_rpy_roots()
if lst is None:
@@ -79,93 +182,35 @@
raise missing_operation(space)
return space.wrap(index)
-def _list_w_obj_referents(gcref, result_w):
- # Get all W_Root reachable directly from gcref, and add them to
- # the list 'result_w'. The logic here is not robust against gc
- # moves, and may return the same object several times.
- seen = {} # map {current_addr: obj}
- pending = [gcref]
- i = 0
- while i < len(pending):
- gcrefparent = pending[i]
- i += 1
- for gcref in rgc.get_rpy_referents(gcrefparent):
- key = rgc.cast_gcref_to_int(gcref)
- if gcref == seen.get(key, rgc.NULL_GCREF):
- continue # already in 'seen'
- seen[key] = gcref
- w_obj = try_cast_gcref_to_w_root(gcref)
- if w_obj is not None:
- result_w.append(w_obj)
- else:
- pending.append(gcref)
-
-def _get_objects_from_rpy(list_of_gcrefs):
- # given a list of gcrefs that may or may not be W_Roots, build a list
- # of W_Roots obtained by following references from there.
- result_w = [] # <- list of W_Roots
- for gcref in list_of_gcrefs:
- if gcref:
- w_obj = try_cast_gcref_to_w_root(gcref)
- if w_obj is not None:
- result_w.append(w_obj)
- else:
- _list_w_obj_referents(gcref, result_w)
- return result_w
-
def get_objects(space):
"""Return a list of all app-level objects."""
- roots = rgc.get_rpy_roots()
- pending_w = _get_objects_from_rpy(roots)
- # continue by following every W_Root. Note that this will force a hash
- # on every W_Root, which is kind of bad, but not on every RPython object,
- # which is really good.
- result_w = {}
- while len(pending_w) > 0:
- previous_w = pending_w
- pending_w = []
- for w_obj in previous_w:
- if w_obj not in result_w:
- result_w[w_obj] = None
- gcref = rgc.cast_instance_to_gcref(w_obj)
- _list_w_obj_referents(gcref, pending_w)
- return space.newlist(result_w.keys())
+ if not rgc.has_gcflag_extra():
+ raise missing_operation(space)
+ result_w = do_get_objects()
+ rgc.assert_no_more_gcflags()
+ return space.newlist(result_w)
def get_referents(space, args_w):
"""Return a list of objects directly referred to by any of the arguments.
- Approximative: follow references recursively until it finds
- app-level objects. May return several times the same object, too."""
- result = []
+ """
+ if not rgc.has_gcflag_extra():
+ raise missing_operation(space)
+ result_w = []
for w_obj in args_w:
gcref = rgc.cast_instance_to_gcref(w_obj)
- _list_w_obj_referents(gcref, result)
- return space.newlist(result)
+ _list_w_obj_referents(gcref, result_w)
+ rgc.assert_no_more_gcflags()
+ return space.newlist(result_w)
def get_referrers(space, args_w):
"""Return the list of objects that directly refer to any of objs."""
- roots = rgc.get_rpy_roots()
- pending_w = _get_objects_from_rpy(roots)
- arguments_w = {}
- for w_obj in args_w:
- arguments_w[w_obj] = None
- # continue by following every W_Root. Same remark about hashes as
- # in get_objects().
- result_w = {}
- seen_w = {}
- while len(pending_w) > 0:
- previous_w = pending_w
- pending_w = []
- for w_obj in previous_w:
- if w_obj not in seen_w:
- seen_w[w_obj] = None
- gcref = rgc.cast_instance_to_gcref(w_obj)
- referents_w = []
- _list_w_obj_referents(gcref, referents_w)
- for w_subobj in referents_w:
- if w_subobj in arguments_w:
- result_w[w_obj] = None
- pending_w += referents_w
- return space.newlist(result_w.keys())
+ if not rgc.has_gcflag_extra():
+ raise missing_operation(space)
+ result_w = []
+ for w_arg in args_w:
+ result_w += do_get_referrers(w_arg)
+ rgc.assert_no_more_gcflags()
+ return space.newlist(result_w)
@unwrap_spec(fd=int)
def _dump_rpy_heap(space, fd):
diff --git a/pypy/rlib/rgc.py b/pypy/rlib/rgc.py
--- a/pypy/rlib/rgc.py
+++ b/pypy/rlib/rgc.py
@@ -308,6 +308,32 @@
"NOT_RPYTHON"
raise NotImplementedError
+def has_gcflag_extra():
+ "NOT_RPYTHON"
+ return True
+has_gcflag_extra._subopnum = 1
+
+_gcflag_extras = []
+
+def get_gcflag_extra(gcref):
+ "NOT_RPYTHON"
+ assert gcref # not NULL!
+ return gcref in _gcflag_extras # XXX slow
+get_gcflag_extra._subopnum = 2
+
+def toggle_gcflag_extra(gcref):
+ "NOT_RPYTHON"
+ assert gcref # not NULL!
+ try:
+ _gcflag_extras.remove(gcref) # XXX slow
+ except ValueError:
+ _gcflag_extras.append(gcref)
+toggle_gcflag_extra._subopnum = 3
+
+def assert_no_more_gcflags():
+ if not we_are_translated():
+ assert not _gcflag_extras
+
ARRAY_OF_CHAR = lltype.Array(lltype.Char)
NULL_GCREF = lltype.nullptr(llmemory.GCREF.TO)
@@ -476,5 +502,17 @@
hop.exception_is_here()
return hop.genop('gc_typeids_z', [], resulttype = hop.r_result)
+class Entry(ExtRegistryEntry):
+ _about_ = (has_gcflag_extra, get_gcflag_extra, toggle_gcflag_extra)
+ def compute_result_annotation(self, s_arg=None):
+ from pypy.annotation.model import s_Bool
+ return s_Bool
+ def specialize_call(self, hop):
+ subopnum = self.instance._subopnum
+ vlist = [hop.inputconst(lltype.Signed, subopnum)]
+ vlist += hop.inputargs(*hop.args_r)
+ hop.exception_cannot_occur()
+ return hop.genop('gc_gcflag_extra', vlist, resulttype = hop.r_result)
+
def lltype_is_gc(TP):
return getattr(getattr(TP, "TO", None), "_gckind", "?") == 'gc'
diff --git a/pypy/rpython/llinterp.py b/pypy/rpython/llinterp.py
--- a/pypy/rpython/llinterp.py
+++ b/pypy/rpython/llinterp.py
@@ -921,6 +921,9 @@
def op_gc_typeids_z(self):
raise NotImplementedError("gc_typeids_z")
+ def op_gc_gcflag_extra(self, subopnum, *args):
+ return self.heap.gcflag_extra(subopnum, *args)
+
def op_do_malloc_fixedsize_clear(self):
raise NotImplementedError("do_malloc_fixedsize_clear")
diff --git a/pypy/rpython/lltypesystem/lloperation.py
b/pypy/rpython/lltypesystem/lloperation.py
--- a/pypy/rpython/lltypesystem/lloperation.py
+++ b/pypy/rpython/lltypesystem/lloperation.py
@@ -497,6 +497,7 @@
'gc_is_rpy_instance' : LLOp(),
'gc_dump_rpy_heap' : LLOp(),
'gc_typeids_z' : LLOp(),
+ 'gc_gcflag_extra' : LLOp(),
'gc_add_memory_pressure': LLOp(),
# ------- JIT & GC interaction, only for some GCs ----------
diff --git a/pypy/rpython/memory/gc/minimark.py
b/pypy/rpython/memory/gc/minimark.py
--- a/pypy/rpython/memory/gc/minimark.py
+++ b/pypy/rpython/memory/gc/minimark.py
@@ -108,6 +108,9 @@
# collection. See pypy/doc/discussion/finalizer-order.txt
GCFLAG_FINALIZATION_ORDERING = first_gcflag << 4
+# This flag is reserved for RPython.
+GCFLAG_EXTRA = first_gcflag << 5
+
# The following flag is set on externally raw_malloc'ed arrays of pointers.
# They are allocated with some extra space in front of them for a bitfield,
# one bit per 'card_page_indices' indices.
@@ -116,7 +119,6 @@
# note that GCFLAG_CARDS_SET is the most significant bit of a byte:
# this is required for the JIT (x86)
-#GCFLAG_UNUSED = first_gcflag << 5 # this flag is free
TID_MASK = (first_gcflag << 8) - 1
@@ -133,7 +135,7 @@
needs_write_barrier = True
prebuilt_gc_objects_are_static_roots = False
malloc_zero_filled = True # xxx experiment with False
- gcflag_extra = GCFLAG_FINALIZATION_ORDERING
+ gcflag_extra = GCFLAG_EXTRA
# All objects start with a HDR, i.e. with a field 'tid' which contains
# a word. This word is divided in two halves: the lower half contains
diff --git a/pypy/rpython/memory/gcwrapper.py b/pypy/rpython/memory/gcwrapper.py
--- a/pypy/rpython/memory/gcwrapper.py
+++ b/pypy/rpython/memory/gcwrapper.py
@@ -153,6 +153,20 @@
else:
return True
+ def gcflag_extra(self, subopnum, *args):
+ if subopnum == 1: # has_gcflag_extra
+ assert len(args) == 0
+ return self.gc.gcflag_extra != 0
+ assert len(args) == 1
+ addr = llmemory.cast_ptr_to_adr(args[0])
+ hdr = self.gc.header(addr)
+ if subopnum == 3: # toggle_gcflag_extra
+ if hdr.tid & self.gc.gcflag_extra:
+ hdr.tid &= ~self.gc.gcflag_extra
+ else:
+ hdr.tid |= self.gc.gcflag_extra
+ return (hdr.tid & self.gc.gcflag_extra) != 0
+
# ____________________________________________________________
class LLInterpRootWalker:
diff --git a/pypy/rpython/memory/test/test_gc.py
b/pypy/rpython/memory/test/test_gc.py
--- a/pypy/rpython/memory/test/test_gc.py
+++ b/pypy/rpython/memory/test/test_gc.py
@@ -746,6 +746,30 @@
res = self.interpret(fn, [])
assert res == ord('y')
+ def test_gcflag_extra(self):
+ class A:
+ pass
+ a1 = A()
+ def fn():
+ a2 = A()
+ if not rgc.has_gcflag_extra():
+ return # cannot test it then
+ assert rgc.get_gcflag_extra(a1) == False
+ assert rgc.get_gcflag_extra(a2) == False
+ rgc.toggle_gcflag_extra(a1)
+ assert rgc.get_gcflag_extra(a1) == True
+ assert rgc.get_gcflag_extra(a2) == False
+ rgc.toggle_gcflag_extra(a2)
+ assert rgc.get_gcflag_extra(a1) == True
+ assert rgc.get_gcflag_extra(a2) == True
+ rgc.toggle_gcflag_extra(a1)
+ assert rgc.get_gcflag_extra(a1) == False
+ assert rgc.get_gcflag_extra(a2) == True
+ rgc.toggle_gcflag_extra(a2)
+ assert rgc.get_gcflag_extra(a1) == False
+ assert rgc.get_gcflag_extra(a2) == False
+ self.interpret(fn, [])
+
from pypy.rlib.objectmodel import UnboxedValue
class TaggedBase(object):
_______________________________________________
pypy-commit mailing list
[email protected]
http://mail.python.org/mailman/listinfo/pypy-commit