Author: Armin Rigo <[email protected]>
Branch: stm-thread-2
Changeset: r61735:a1140a91bd3f
Date: 2013-02-24 18:41 +0100
http://bitbucket.org/pypy/pypy/changeset/a1140a91bd3f/
Log: Implement id() by using shadows, like minimark.py.
diff --git a/rpython/memory/gc/stmgc.py b/rpython/memory/gc/stmgc.py
--- a/rpython/memory/gc/stmgc.py
+++ b/rpython/memory/gc/stmgc.py
@@ -63,7 +63,18 @@
# the LOCAL COPY objects, but only on them.
#
# - GCFLAG_HASH_FIELD: the object contains an extra field added at the
-# end, with the hash value
+# end. If GCFLAG_WITH_HASH (usual case), then the field contains
+# both the hash and id value given to this object. Otherwise,
+# it's a prebuilt object; if GCFLAG_PREBUILT_ORIGINAL then the
+# field contains the hash result but the id is the address of the
+# object; otherwise the field contains the address of the original
+# prebuilt object, where the hash result can be indirectly found.
+#
+# - GCFLAG_WITH_HASH: the hash/id has been taken. On a nursery object,
+# means that it has an entry in 'nursery_objects_shadows'. Otherwise,
+# if GCFLAG_HASH_FIELD is set, that field stores the hash/id value.
+# Otherwise, means that the hash/id is equal to this exact object's
+# address.
#
# Invariant: between two transactions, all objects visible from the current
# thread are always GLOBAL. In particular:
@@ -115,9 +126,12 @@
GCFLAG_LOCAL_COPY = first_gcflag << 3 # keep in sync with et.h
GCFLAG_VISITED = first_gcflag << 4 # keep in sync with et.h
GCFLAG_HASH_FIELD = first_gcflag << 5
-GCFLAG_NEW_HASH = first_gcflag << 6
+GCFLAG_WITH_HASH = first_gcflag << 6
+GCFLAG_PREBUILT_ORIGINAL = first_gcflag << 7
-GCFLAG_PREBUILT = GCFLAG_GLOBAL | GCFLAG_NOT_WRITTEN
+GCFLAG_PREBUILT_FLAGS = (GCFLAG_GLOBAL |
+ GCFLAG_NOT_WRITTEN |
+ GCFLAG_PREBUILT_ORIGINAL)
REV_INITIAL = r_uint(1)
@@ -306,7 +320,7 @@
hdr.tid = self.combine(typeid16, flags)
def init_gc_object_immortal(self, addr, typeid16, flags=0):
- flags |= GCFLAG_PREBUILT
+ flags |= GCFLAG_PREBUILT_FLAGS
self.init_gc_object(addr, typeid16, flags)
hdr = llmemory.cast_adr_to_ptr(addr, lltype.Ptr(self.HDR))
hdr.revision = REV_INITIAL
@@ -329,7 +343,8 @@
return llmemory.NULL
#
hdr = self.header(localobj)
- hdr.tid &= ~(GCFLAG_GLOBAL | GCFLAG_POSSIBLY_OUTDATED)
+ hdr.tid &= ~(GCFLAG_GLOBAL | GCFLAG_POSSIBLY_OUTDATED |
+ GCFLAG_PREBUILT_ORIGINAL)
hdr.tid |= (GCFLAG_VISITED | GCFLAG_LOCAL_COPY)
return localobj
@@ -345,29 +360,88 @@
# ----------
# id() and identityhash() support
+ def id_or_identityhash(self, gcobj, is_hash):
+ """Implement the common logic of id() and identityhash()
+ of an object, given as a GCREF.
+ """
+ # First go to the most up-to-date version of gcobj. It can
+ # be the latest global version, or the local version if it was
+ # already modified during this transaction.
+ gcobj = llop.stm_read_barrier(lltype.typeOf(gcobj), gcobj)
+ obj = llmemory.cast_ptr_to_adr(gcobj)
+ #
+ flags = self.header(obj).tid & (GCFLAG_HASH_FIELD | GCFLAG_WITH_HASH)
+ #
+ if flags == GCFLAG_HASH_FIELD | GCFLAG_WITH_HASH:
+ # 'obj' has already an explicit hash/id field, and is not a
+ # prebuilt object at all. Return the content of that field.
+ return self._get_hash_field(obj)
+ #
+ elif flags == GCFLAG_HASH_FIELD | 0:
+ # 'obj' is a prebuilt object with a hash field, or a runtime
+ # copy of such an object.
+ if not (self.header(obj).tid & GCFLAG_PREBUILT_ORIGINAL):
+ # 'obj' is a runtime copy of an original prebuilt object.
+ # Fetch from the "hash field" the address of this original.
+ obj = self._get_hash_field(obj)
+ obj = llmemory.cast_int_to_adr(obj)
+ ll_assert((self.header(obj).tid & GCFLAG_PREBUILT_ORIGINAL)
+ != 0, "id/hash: expected a prebuilt_original")
+ #
+ if is_hash:
+ return self._get_hash_field(obj)
+ #
+ elif flags == 0 | GCFLAG_WITH_HASH:
+ # 'obj' doesn't have a hash/id field, but we already took its
+ # hash/id. If it is a nursery object, go to its shadow.
+ tls = self.get_tls()
+ if tls.is_in_nursery(obj):
+ obj = tls.nursery_objects_shadows.get(obj)
+ ll_assert(obj != NULL,
+ "GCFLAG_WITH_HASH on nursery obj but no shadow found")
+ #
+ else: # flags == 0, 'obj' has no hash/id at all so far.
+ # We are going to force one; this is a write operation.
+ # Note that we cannot get here twice for the same gcobj
+ # in the same transaction: after stm_write_barrier, the
+ # stm_read_barrier() above will return the local object
+ # with GCFLAG_WITH_HASH set.
+ localgcobj = llop.stm_write_barrier(lltype.typeOf(gcobj),gcobj)
+ obj = llmemory.cast_ptr_to_adr(localgcobj)
+ realobj = obj
+ #
+ # If 'obj' is a nursery object, we need to make a shadow
+ tls = self.get_tls()
+ if tls.is_in_nursery(obj):
+ size_gc_header = self.gcheaderbuilder.size_gc_header
+ size = self.get_size(obj)
+ shadowhdr = tls.sharedarea_tls.malloc_object(
+ size_gc_header + size)
+ # XXX must initialize the shadow enough to be considered
+ # a valid gc object by the next major collection
+ obj = shadowhdr + size_gc_header
+ tls.nursery_objects_shadows.setitem(realobj, obj)
+ #
+ self.header(realobj).tid |= GCFLAG_WITH_HASH
+ #
+ # Cases that fall through are cases where the answer is the
+ # mangled address of 'obj'.
+ return self._get_mangled_address(obj)
+
def id(self, gcobj):
- debug_print("XXX: id() not implemented")
- return self.identityhash(gcobj)
+ return self.id_or_identityhash(gcobj, False)
def identityhash(self, gcobj):
- gcobj = llop.stm_read_barrier(lltype.typeOf(gcobj), gcobj)
- obj = llmemory.cast_ptr_to_adr(gcobj)
- if not (self.header(obj).tid & (GCFLAG_HASH_FIELD | GCFLAG_NEW_HASH)):
- # 'obj' has no hash so far. Force one; this is a write operation.
- localgcobj = llop.stm_write_barrier(lltype.typeOf(gcobj), gcobj)
- obj = llmemory.cast_ptr_to_adr(localgcobj)
- self.header(obj).tid |= GCFLAG_NEW_HASH
- #
- return self._get_object_hash(obj)
+ return self.id_or_identityhash(gcobj, True)
- def _get_object_hash(self, obj):
- if self.header(obj).tid & GCFLAG_HASH_FIELD:
- objsize = self.get_size(obj)
- obj = llarena.getfakearenaaddress(obj)
- return (obj + objsize).signed[0]
- else:
- # XXX improve hash(nursery_object)
- return mangle_hash(llmemory.cast_adr_to_int(obj))
+ def _get_hash_field(self, obj):
+ objsize = self.get_size(obj)
+ obj = llarena.getfakearenaaddress(obj)
+ return (obj + objsize).signed[0]
+
+ def _get_mangled_address(self, obj):
+ i = llmemory.cast_adr_to_int(obj)
+ return mangle_hash(i)
def can_move(self, addr):
tls = self.get_tls()
diff --git a/rpython/memory/gc/stmtls.py b/rpython/memory/gc/stmtls.py
--- a/rpython/memory/gc/stmtls.py
+++ b/rpython/memory/gc/stmtls.py
@@ -14,7 +14,8 @@
from rpython.memory.gc.stmgc import GCFLAG_LOCAL_COPY
from rpython.memory.gc.stmgc import GCFLAG_POSSIBLY_OUTDATED
from rpython.memory.gc.stmgc import GCFLAG_NOT_WRITTEN
-from rpython.memory.gc.stmgc import GCFLAG_HASH_FIELD, GCFLAG_NEW_HASH
+from rpython.memory.gc.stmgc import GCFLAG_HASH_FIELD, GCFLAG_WITH_HASH
+from rpython.memory.gc.stmgc import GCFLAG_PREBUILT_ORIGINAL
from rpython.memory.gc.stmgc import hdr_revision, set_hdr_revision
SIZE_OF_SIGNED = llmemory.sizeof(lltype.Signed)
@@ -55,6 +56,11 @@
# in the appropriate place, like sharedarea_tls, if needed.
self.local_weakrefs = self.AddressStack()
#
+ # Support for id and identityhash: map nursery objects with
+ # GCFLAG_HAS_SHADOW to their future location after the next
+ # local collection.
+ self.nursery_objects_shadows = self.AddressDict()
+ #
self._register_with_C_code()
debug_stop("gc-init")
@@ -215,6 +221,10 @@
if self.local_weakrefs.non_empty():
self.update_local_weakrefs()
#
+ # Clear this mapping.
+ if self.nursery_objects_shadows.length() > 0:
+ self.nursery_objects_shadows.clear()
+ #
# Visit all previous OLD objects. Free the ones that have not been
# visited above, and reset GCFLAG_VISITED on the others.
self.mass_free_old_local(previous_sharedarea_tls)
@@ -439,7 +449,7 @@
# First visit to 'obj': we must move this YOUNG obj out of the
# nursery. This is the common case. Allocate a new location
# for it outside the nursery.
- newobj = self.duplicate_obj(obj, size)
+ newobj = self.duplicate_obj(obj, size, from_nursery=True)
#
# Note that references from 'obj' to other objects in the
# nursery are kept unchanged in this step: they are copied
@@ -463,18 +473,43 @@
# walk 'pending_list'.
self.pending.append(newobj)
- def duplicate_obj(self, obj, objsize):
+ def duplicate_obj(self, obj, objsize, from_nursery=False):
size_gc_header = self.gc.gcheaderbuilder.size_gc_header
totalsize_without_hash = size_gc_header + objsize
hdr = self.gc.header(obj)
- has_hash = (hdr.tid & (GCFLAG_HASH_FIELD | GCFLAG_NEW_HASH))
- if has_hash:
- newtotalsize = totalsize_without_hash + (
- llmemory.sizeof(lltype.Signed))
+ #
+ make_hash_field = False
+ has_shadow = False
+ if from_nursery:
+ # 'obj' is a nursery object: check if it has a shadow.
+ # Note that if it does, the shadow doesn't have an extra
+ # hash field either, but will simply have the same flag
+ # combination, i.e. (GCFLAG_WITH_HASH & ~GCFLAG_HASH_FIELD).
+ # So future reads of the hash/id on this new object will
+ # continue to return the mangled address of this new
+ # object (which was merely the shadow until now).
+ ll_assert((hdr.tid & GCFLAG_HASH_FIELD) == 0,
+ "nursery object with GCFLAG_HASH_FIELD")
+ if hdr.tid & GCFLAG_WITH_HASH:
+ has_shadow = True
else:
- newtotalsize = totalsize_without_hash
+ # From a non-nursery object: we need a hash field if
+ # any of the following two flags is already set on 'obj'
+ if hdr.tid & (GCFLAG_HASH_FIELD|GCFLAG_WITH_HASH):
+ make_hash_field = True
#
- newaddr = self.sharedarea_tls.malloc_object(newtotalsize)
+ if has_shadow:
+ newobj = self.nursery_objects_shadows.get(obj)
+ ll_assert(newobj != NULL,
+ "duplicate_obj: GCFLAG_WITH_HASH but no shadow found")
+ newaddr = newobj - size_gc_header
+ else:
+ if make_hash_field:
+ newtotalsize = totalsize_without_hash + (
+ llmemory.sizeof(lltype.Signed))
+ else:
+ newtotalsize = totalsize_without_hash
+ newaddr = self.sharedarea_tls.malloc_object(newtotalsize)
#
# Initialize the copy by doing a memcpy of the bytes.
# The object header of localobj will then be fixed by the C code.
@@ -484,12 +519,37 @@
totalsize_without_hash)
newobj = newaddr + size_gc_header
#
- if has_hash:
- hash = self.gc._get_object_hash(obj)
+ if make_hash_field:
+ # we have to write a value inside the new hash field
+ #
+ if hdr.tid & GCFLAG_HASH_FIELD:
+ #
+ if hdr.tid & GCFLAG_WITH_HASH:
+ # 'obj' has already an explicit hash/id field, and is not
+ # a prebuilt object at all. Just propagate the content
+ # of that field.
+ hash = self.gc._get_hash_field(obj)
+ #
+ elif hdr.tid & GCFLAG_PREBUILT_ORIGINAL:
+ # 'obj' is an original prebuilt object with a hash field.
+ # In the new hash field, store the original's address
+ hash = llmemory.cast_adr_to_int(obj)
+ else:
+ # 'obj' is already a modified copy of a prebuilt object.
+ # Propagate the content of the field.
+ hash = self.gc._get_hash_field(obj)
+ #
+ else:
+ # No previous field; store in the new field the old mangled
+ # address, and fix the new tid flags.
+ newhdr = self.gc.header(newobj)
+ ll_assert((newhdr.tid & GCFLAG_WITH_HASH) != 0, "gc bug!")
+ newhdr.tid |= GCFLAG_HASH_FIELD
+ hash = self.gc._get_mangled_address(obj)
+ #
hashaddr = llarena.getfakearenaaddress(newobj) + objsize
llarena.arena_reserve(hashaddr, SIZE_OF_SIGNED)
hashaddr.signed[0] = hash
- self.gc.header(newobj).tid |= GCFLAG_HASH_FIELD
#
return newobj
diff --git a/rpython/memory/support.py b/rpython/memory/support.py
--- a/rpython/memory/support.py
+++ b/rpython/memory/support.py
@@ -1,6 +1,6 @@
from rpython.rtyper.lltypesystem import lltype, llmemory
from rpython.rlib.objectmodel import free_non_gc_object, we_are_translated
-from rpython.rlib.rarithmetic import r_uint, LONG_BIT
+from rpython.rlib.rarithmetic import r_uint, intmask, LONG_BIT
from rpython.rlib.debug import ll_assert
from rpython.tool.identity_dict import identity_dict
@@ -9,7 +9,9 @@
# To hash pointers in dictionaries. Assumes that i shows some
# alignment (to 4, 8, maybe 16 bytes), so we use the following
# formula to avoid the trailing bits being always 0.
- return i ^ (i >> 4)
+ # This formula is reversible: two different values of 'i' will
+ # always give two different results.
+ return i ^ intmask(r_uint(i) >> 4)
# ____________________________________________________________
_______________________________________________
pypy-commit mailing list
[email protected]
http://mail.python.org/mailman/listinfo/pypy-commit