Author: Armin Rigo <[email protected]>
Branch: 
Changeset: r83593:a07ab092b64a
Date: 2016-04-09 17:14 +0300
http://bitbucket.org/pypy/pypy/changeset/a07ab092b64a/

Log:    hg merge jit-constptr-2

        Remove the forced minor collection that occurs when rewriting the
        assembler at the start of the JIT backend. This is done by emitting
        the ConstPtrs in a separate table, and loading from the table.

        Gives improved warm-up time and memory usage. Also removes annoying
        special-purpose code for pinned pointers.

diff --git a/rpython/jit/backend/arm/assembler.py 
b/rpython/jit/backend/arm/assembler.py
--- a/rpython/jit/backend/arm/assembler.py
+++ b/rpython/jit/backend/arm/assembler.py
@@ -14,7 +14,7 @@
     CoreRegisterManager, check_imm_arg, VFPRegisterManager,
     operations as regalloc_operations)
 from rpython.jit.backend.llsupport import jitframe, rewrite
-from rpython.jit.backend.llsupport.assembler import DEBUG_COUNTER, 
debug_bridge, BaseAssembler
+from rpython.jit.backend.llsupport.assembler import DEBUG_COUNTER, 
BaseAssembler
 from rpython.jit.backend.llsupport.regalloc import get_scale, 
valid_addressing_size
 from rpython.jit.backend.llsupport.asmmemmgr import MachineDataBlockWrapper
 from rpython.jit.backend.model import CompiledLoopToken
@@ -481,8 +481,9 @@
 
     def generate_quick_failure(self, guardtok):
         startpos = self.mc.currpos()
-        fail_descr, target = self.store_info_on_descr(startpos, guardtok)
-        self.regalloc_push(imm(fail_descr))
+        faildescrindex, target = self.store_info_on_descr(startpos, guardtok)
+        self.load_from_gc_table(r.ip.value, faildescrindex)
+        self.regalloc_push(r.ip)
         self.push_gcmap(self.mc, gcmap=guardtok.gcmap, push=True)
         self.mc.BL(target)
         return startpos
@@ -556,7 +557,7 @@
         debug_stop('jit-backend-ops')
 
     def _call_header(self):
-        assert self.mc.currpos() == 0
+        # there is the gc table before this point
         self.gen_func_prolog()
 
     def _call_header_with_stack_check(self):
@@ -596,20 +597,22 @@
         frame_info = self.datablockwrapper.malloc_aligned(
             jitframe.JITFRAMEINFO_SIZE, alignment=WORD)
         clt.frame_info = rffi.cast(jitframe.JITFRAMEINFOPTR, frame_info)
-        clt.allgcrefs = []
         clt.frame_info.clear() # for now
 
         if log:
             operations = self._inject_debugging_code(looptoken, operations,
                                                      'e', looptoken.number)
 
+        regalloc = Regalloc(assembler=self)
+        allgcrefs = []
+        operations = regalloc.prepare_loop(inputargs, operations, looptoken,
+                                           allgcrefs)
+        self.reserve_gcref_table(allgcrefs)
+        functionpos = self.mc.get_relative_pos()
+
         self._call_header_with_stack_check()
         self._check_frame_depth_debug(self.mc)
 
-        regalloc = Regalloc(assembler=self)
-        operations = regalloc.prepare_loop(inputargs, operations, looptoken,
-                                           clt.allgcrefs)
-
         loop_head = self.mc.get_relative_pos()
         looptoken._ll_loop_code = loop_head
         #
@@ -620,9 +623,11 @@
 
         self.write_pending_failure_recoveries()
 
+        full_size = self.mc.get_relative_pos()
         rawstart = self.materialize_loop(looptoken)
-        looptoken._function_addr = looptoken._ll_function_addr = rawstart
+        looptoken._ll_function_addr = rawstart + functionpos
 
+        self.patch_gcref_table(looptoken, rawstart)
         self.process_pending_guards(rawstart)
         self.fixup_target_tokens(rawstart)
 
@@ -641,7 +646,13 @@
             looptoken.number, loopname,
             r_uint(rawstart + loop_head),
             r_uint(rawstart + size_excluding_failure_stuff),
-            r_uint(rawstart)))
+            r_uint(rawstart + functionpos)))
+        debug_print("       gc table: 0x%x" % r_uint(rawstart))
+        debug_print("       function: 0x%x" % r_uint(rawstart + functionpos))
+        debug_print("         resops: 0x%x" % r_uint(rawstart + loop_head))
+        debug_print("       failures: 0x%x" % r_uint(rawstart +
+                                                 size_excluding_failure_stuff))
+        debug_print("            end: 0x%x" % r_uint(rawstart + full_size))
         debug_stop("jit-backend-addr")
 
         return AsmInfo(ops_offset, rawstart + loop_head,
@@ -678,27 +689,43 @@
         arglocs = self.rebuild_faillocs_from_descr(faildescr, inputargs)
 
         regalloc = Regalloc(assembler=self)
-        startpos = self.mc.get_relative_pos()
+        allgcrefs = []
         operations = regalloc.prepare_bridge(inputargs, arglocs,
                                              operations,
-                                             self.current_clt.allgcrefs,
+                                             allgcrefs,
                                              self.current_clt.frame_info)
+        self.reserve_gcref_table(allgcrefs)
+        startpos = self.mc.get_relative_pos()
 
         self._check_frame_depth(self.mc, regalloc.get_gcmap())
 
+        bridgestartpos = self.mc.get_relative_pos()
         frame_depth_no_fixed_size = self._assemble(regalloc, inputargs, 
operations)
 
         codeendpos = self.mc.get_relative_pos()
 
         self.write_pending_failure_recoveries()
 
+        fullsize = self.mc.get_relative_pos()
         rawstart = self.materialize_loop(original_loop_token)
 
+        self.patch_gcref_table(original_loop_token, rawstart)
         self.process_pending_guards(rawstart)
 
+        debug_start("jit-backend-addr")
+        debug_print("bridge out of Guard 0x%x has address 0x%x to 0x%x" %
+                    (r_uint(descr_number), r_uint(rawstart + startpos),
+                        r_uint(rawstart + codeendpos)))
+        debug_print("       gc table: 0x%x" % r_uint(rawstart))
+        debug_print("    jump target: 0x%x" % r_uint(rawstart + startpos))
+        debug_print("         resops: 0x%x" % r_uint(rawstart + 
bridgestartpos))
+        debug_print("       failures: 0x%x" % r_uint(rawstart + codeendpos))
+        debug_print("            end: 0x%x" % r_uint(rawstart + fullsize))
+        debug_stop("jit-backend-addr")
+
         # patch the jump from original guard
         self.patch_trace(faildescr, original_loop_token,
-                                    rawstart, regalloc)
+                                    rawstart + startpos, regalloc)
 
         self.patch_stack_checks(frame_depth_no_fixed_size + 
JITFRAME_FIXED_SIZE,
                                 rawstart)
@@ -716,9 +743,53 @@
                               ops_offset=ops_offset)
         self.teardown()
 
-        debug_bridge(descr_number, rawstart, codeendpos)
+        return AsmInfo(ops_offset, startpos + rawstart, codeendpos - startpos)
 
-        return AsmInfo(ops_offset, startpos + rawstart, codeendpos - startpos)
+    def reserve_gcref_table(self, allgcrefs):
+        gcref_table_size = len(allgcrefs) * WORD
+        # align to a multiple of 16 and reserve space at the beginning
+        # of the machine code for the gc table.  This lets us write
+        # machine code with relative addressing (see load_from_gc_table())
+        gcref_table_size = (gcref_table_size + 15) & ~15
+        mc = self.mc
+        assert mc.get_relative_pos() == 0
+        for i in range(gcref_table_size):
+            mc.writechar('\x00')
+        self.setup_gcrefs_list(allgcrefs)
+
+    def patch_gcref_table(self, looptoken, rawstart):
+        # the gc table is at the start of the machine code.  Fill it now
+        tracer = self.cpu.gc_ll_descr.make_gcref_tracer(rawstart,
+                                                        self._allgcrefs)
+        gcreftracers = self.get_asmmemmgr_gcreftracers(looptoken)
+        gcreftracers.append(tracer)    # keepalive
+        self.teardown_gcrefs_list()
+
+    def load_from_gc_table(self, regnum, index):
+        """emits either:
+               LDR Rt, [PC, #offset]    if -4095 <= offset
+          or:
+               gen_load_int(Rt, offset)
+               LDR Rt, [PC, Rt]         for larger offsets
+        """
+        mc = self.mc
+        address_in_buffer = index * WORD   # at the start of the buffer
+        offset = address_in_buffer - (mc.get_relative_pos() + 8)   # negative
+        if offset >= -4095:
+            mc.LDR_ri(regnum, r.pc.value, offset)
+        else:
+            # The offset we're loading is negative: right now,
+            # gen_load_int() will always use exactly
+            # get_max_size_of_gen_load_int() instructions.  No point
+            # in optimizing in case we get less.  Just in case though,
+            # we check and pad with nops.
+            extra_bytes = mc.get_max_size_of_gen_load_int() * 2
+            offset -= extra_bytes
+            start = mc.get_relative_pos()
+            mc.gen_load_int(regnum, offset)
+            while mc.get_relative_pos() != start + extra_bytes:
+                mc.NOP()
+            mc.LDR_rr(regnum, r.pc.value, regnum)
 
     def new_stack_loc(self, i, tp):
         base_ofs = self.cpu.get_baseofs_of_frame_field()
@@ -929,6 +1000,12 @@
             clt.asmmemmgr_blocks = []
         return clt.asmmemmgr_blocks
 
+    def get_asmmemmgr_gcreftracers(self, looptoken):
+        clt = looptoken.compiled_loop_token
+        if clt.asmmemmgr_gcreftracers is None:
+            clt.asmmemmgr_gcreftracers = []
+        return clt.asmmemmgr_gcreftracers
+
     def _walk_operations(self, inputargs, operations, regalloc):
         fcond = c.AL
         self._regalloc = regalloc
diff --git a/rpython/jit/backend/arm/opassembler.py 
b/rpython/jit/backend/arm/opassembler.py
--- a/rpython/jit/backend/arm/opassembler.py
+++ b/rpython/jit/backend/arm/opassembler.py
@@ -35,9 +35,9 @@
 
 class ArmGuardToken(GuardToken):
     def __init__(self, cpu, gcmap, faildescr, failargs, fail_locs,
-                 offset, guard_opnum, frame_depth, fcond=c.AL):
+                 offset, guard_opnum, frame_depth, faildescrindex, fcond=c.AL):
         GuardToken.__init__(self, cpu, gcmap, faildescr, failargs, fail_locs,
-                            guard_opnum, frame_depth)
+                            guard_opnum, frame_depth, faildescrindex)
         self.fcond = fcond
         self.offset = offset
 
@@ -178,6 +178,7 @@
         assert isinstance(descr, AbstractFailDescr)
 
         gcmap = allocate_gcmap(self, frame_depth, JITFRAME_FIXED_SIZE)
+        faildescrindex = self.get_gcref_from_faildescr(descr)
         token = ArmGuardToken(self.cpu, gcmap,
                                     descr,
                                     failargs=op.getfailargs(),
@@ -185,6 +186,7 @@
                                     offset=offset,
                                     guard_opnum=op.getopnum(),
                                     frame_depth=frame_depth,
+                                    faildescrindex=faildescrindex,
                                     fcond=fcond)
         return token
 
@@ -398,14 +400,13 @@
 
     def emit_op_finish(self, op, arglocs, regalloc, fcond):
         base_ofs = self.cpu.get_baseofs_of_frame_field()
-        if len(arglocs) == 2:
-            [return_val, fail_descr_loc] = arglocs
+        if len(arglocs) > 0:
+            [return_val] = arglocs
             self.store_reg(self.mc, return_val, r.fp, base_ofs)
-        else:
-            [fail_descr_loc] = arglocs
         ofs = self.cpu.get_ofs_of_frame_field('jf_descr')
 
-        self.mc.gen_load_int(r.ip.value, fail_descr_loc.value)
+        faildescrindex = self.get_gcref_from_faildescr(op.getdescr())
+        self.load_from_gc_table(r.ip.value, faildescrindex)
         # XXX self.mov(fail_descr_loc, RawStackLoc(ofs))
         self.store_reg(self.mc, r.ip, r.fp, ofs, helper=r.lr)
         if op.numargs() > 0 and op.getarg(0).type == REF:
@@ -1035,9 +1036,9 @@
         assert (guard_op.getopnum() == rop.GUARD_NOT_FORCED or
                 guard_op.getopnum() == rop.GUARD_NOT_FORCED_2)
         faildescr = guard_op.getdescr()
+        faildescrindex = self.get_gcref_from_faildescr(faildescr)
         ofs = self.cpu.get_ofs_of_frame_field('jf_force_descr')
-        value = rffi.cast(lltype.Signed, cast_instance_to_gcref(faildescr))
-        self.mc.gen_load_int(r.ip.value, value)
+        self.load_from_gc_table(r.ip.value, faildescrindex)
         self.store_reg(self.mc, r.ip, r.fp, ofs)
 
     def _find_nearby_operation(self, delta):
@@ -1250,3 +1251,9 @@
         self._load_from_mem(res_loc, res_loc, ofs_loc, imm(scale), signed,
                             fcond)
         return fcond
+
+    def emit_op_load_from_gc_table(self, op, arglocs, regalloc, fcond):
+        res_loc, = arglocs
+        index = op.getarg(0).getint()
+        self.load_from_gc_table(res_loc.value, index)
+        return fcond
diff --git a/rpython/jit/backend/arm/regalloc.py 
b/rpython/jit/backend/arm/regalloc.py
--- a/rpython/jit/backend/arm/regalloc.py
+++ b/rpython/jit/backend/arm/regalloc.py
@@ -1,5 +1,4 @@
 from rpython.rtyper.annlowlevel import cast_instance_to_gcref
-from rpython.rlib import rgc
 from rpython.rlib.debug import debug_print, debug_start, debug_stop
 from rpython.jit.backend.llsupport.regalloc import FrameManager, \
         RegisterManager, TempVar, compute_vars_longevity, BaseRegalloc, \
@@ -627,16 +626,11 @@
     def prepare_op_finish(self, op, fcond):
         # the frame is in fp, but we have to point where in the frame is
         # the potential argument to FINISH
-        descr = op.getdescr()
-        fail_descr = cast_instance_to_gcref(descr)
-        # we know it does not move, but well
-        rgc._make_sure_does_not_move(fail_descr)
-        fail_descr = rffi.cast(lltype.Signed, fail_descr)
         if op.numargs() == 1:
             loc = self.make_sure_var_in_reg(op.getarg(0))
-            locs = [loc, imm(fail_descr)]
+            locs = [loc]
         else:
-            locs = [imm(fail_descr)]
+            locs = []
         return locs
 
     def load_condition_into_cc(self, box):
@@ -892,6 +886,10 @@
     prepare_op_same_as_r = _prepare_op_same_as
     prepare_op_same_as_f = _prepare_op_same_as
 
+    def prepare_op_load_from_gc_table(self, op, fcond):
+        resloc = self.force_allocate_reg(op)
+        return [resloc]
+
     def prepare_op_call_malloc_nursery(self, op, fcond):
         size_box = op.getarg(0)
         assert isinstance(size_box, ConstInt)
diff --git a/rpython/jit/backend/llsupport/assembler.py 
b/rpython/jit/backend/llsupport/assembler.py
--- a/rpython/jit/backend/llsupport/assembler.py
+++ b/rpython/jit/backend/llsupport/assembler.py
@@ -23,10 +23,11 @@
 
 class GuardToken(object):
     def __init__(self, cpu, gcmap, faildescr, failargs, fail_locs,
-                 guard_opnum, frame_depth):
+                 guard_opnum, frame_depth, faildescrindex):
         assert isinstance(faildescr, AbstractFailDescr)
         self.cpu = cpu
         self.faildescr = faildescr
+        self.faildescrindex = faildescrindex
         self.failargs = failargs
         self.fail_locs = fail_locs
         self.gcmap = self.compute_gcmap(gcmap, failargs,
@@ -144,6 +145,22 @@
             self.codemap_builder = CodemapBuilder()
         self._finish_gcmap = lltype.nullptr(jitframe.GCMAP)
 
+    def setup_gcrefs_list(self, allgcrefs):
+        self._allgcrefs = allgcrefs
+        self._allgcrefs_faildescr_next = 0
+
+    def teardown_gcrefs_list(self):
+        self._allgcrefs = None
+
+    def get_gcref_from_faildescr(self, descr):
+        """This assumes that it is called in order for all faildescrs."""
+        search = cast_instance_to_gcref(descr)
+        while not _safe_eq(
+                self._allgcrefs[self._allgcrefs_faildescr_next], search):
+            self._allgcrefs_faildescr_next += 1
+            assert self._allgcrefs_faildescr_next < len(self._allgcrefs)
+        return self._allgcrefs_faildescr_next
+
     def set_debug(self, v):
         r = self._debug
         self._debug = v
@@ -186,8 +203,7 @@
                 break
         exc = guardtok.must_save_exception()
         target = self.failure_recovery_code[exc + 2 * withfloats]
-        fail_descr = cast_instance_to_gcref(guardtok.faildescr)
-        fail_descr = rffi.cast(lltype.Signed, fail_descr)
+        faildescrindex = guardtok.faildescrindex
         base_ofs = self.cpu.get_baseofs_of_frame_field()
         #
         # in practice, about 2/3rd of 'positions' lists that we build are
@@ -229,7 +245,7 @@
         self._previous_rd_locs = positions
         # write down the positions of locs
         guardtok.faildescr.rd_locs = positions
-        return fail_descr, target
+        return faildescrindex, target
 
     def enter_portal_frame(self, op):
         if self.cpu.HAS_CODEMAP:
@@ -288,7 +304,7 @@
 
         gcref = cast_instance_to_gcref(value)
         if gcref:
-            rgc._make_sure_does_not_move(gcref)
+            rgc._make_sure_does_not_move(gcref)    # but should be prebuilt
         value = rffi.cast(lltype.Signed, gcref)
         je_location = self._call_assembler_check_descr(value, tmploc)
         #
@@ -451,3 +467,8 @@
                     r_uint(rawstart + codeendpos)))
     debug_stop("jit-backend-addr")
 
+def _safe_eq(x, y):
+    try:
+        return x == y
+    except AttributeError:    # minor mess
+        return False
diff --git a/rpython/jit/backend/llsupport/gc.py 
b/rpython/jit/backend/llsupport/gc.py
--- a/rpython/jit/backend/llsupport/gc.py
+++ b/rpython/jit/backend/llsupport/gc.py
@@ -22,38 +22,6 @@
 from rpython.memory.gctransform import asmgcroot
 from rpython.jit.codewriter.effectinfo import EffectInfo
 
-class MovableObjectTracker(object):
-
-    ptr_array_type = lltype.GcArray(llmemory.GCREF)
-    ptr_array_gcref = lltype.nullptr(llmemory.GCREF.TO)
-
-    def __init__(self, cpu, const_pointers):
-        size = len(const_pointers)
-        # check that there are any moving object (i.e. chaning pointers).
-        # Otherwise there is no reason for an instance of this class.
-        assert size > 0
-        #
-        # prepare GC array to hold the pointers that may change
-        self.ptr_array = lltype.malloc(MovableObjectTracker.ptr_array_type, 
size)
-        self.ptr_array_descr = 
cpu.arraydescrof(MovableObjectTracker.ptr_array_type)
-        self.ptr_array_gcref = lltype.cast_opaque_ptr(llmemory.GCREF, 
self.ptr_array)
-        # use always the same ConstPtr to access the array
-        # (easer to read JIT trace)
-        self.const_ptr_gcref_array = ConstPtr(self.ptr_array_gcref)
-        #
-        # assign each pointer an index and put the pointer into the GC array.
-        # as pointers and addresses are not a good key to use before 
translation
-        # ConstPtrs are used as the key for the dict.
-        self._indexes = {}
-        for index in range(size):
-            ptr = const_pointers[index]
-            self._indexes[ptr] = index
-            self.ptr_array[index] = ptr.value
-
-    def get_array_index(self, const_ptr):
-        index = self._indexes[const_ptr]
-        assert const_ptr.value == self.ptr_array[index]
-        return index
 # ____________________________________________________________
 
 class GcLLDescription(GcCache):
@@ -129,96 +97,9 @@
     def gc_malloc_unicode(self, num_elem):
         return self._bh_malloc_array(num_elem, self.unicode_descr)
 
-    def _record_constptrs(self, op, gcrefs_output_list,
-                          ops_with_movable_const_ptr,
-                          changeable_const_pointers):
-        l = None
-        for i in range(op.numargs()):
-            v = op.getarg(i)
-            if isinstance(v, ConstPtr) and bool(v.value):
-                p = v.value
-                if rgc._make_sure_does_not_move(p):
-                    gcrefs_output_list.append(p)
-                else:
-                    if l is None:
-                        l = [i]
-                    else:
-                        l.append(i)
-                    if v not in changeable_const_pointers:
-                        changeable_const_pointers.append(v)
-        #
-        if op.is_guard() or op.getopnum() == rop.FINISH:
-            llref = cast_instance_to_gcref(op.getdescr())
-            assert rgc._make_sure_does_not_move(llref)
-            gcrefs_output_list.append(llref)
-        #
-        if l:
-            ops_with_movable_const_ptr[op] = l
-
-    def _rewrite_changeable_constptrs(self, op, ops_with_movable_const_ptr, 
moving_obj_tracker):
-        newops = []
-        for arg_i in ops_with_movable_const_ptr[op]:
-            v = op.getarg(arg_i)
-            # assert to make sure we got what we expected
-            assert isinstance(v, ConstPtr)
-            array_index = moving_obj_tracker.get_array_index(v)
-
-            size, offset, _ = 
unpack_arraydescr(moving_obj_tracker.ptr_array_descr)
-            array_index = array_index * size + offset
-            args = [moving_obj_tracker.const_ptr_gcref_array,
-                    ConstInt(array_index),
-                    ConstInt(size)]
-            load_op = ResOperation(rop.GC_LOAD_R, args)
-            newops.append(load_op)
-            op.setarg(arg_i, load_op)
-        #
-        newops.append(op)
-        return newops
-
     def rewrite_assembler(self, cpu, operations, gcrefs_output_list):
         rewriter = GcRewriterAssembler(self, cpu)
-        newops = rewriter.rewrite(operations)
-
-        # the key is an operation that contains a ConstPtr as an argument and
-        # this ConstPtrs pointer might change as it points to an object that
-        # can't be made non-moving (e.g. the object is pinned).
-        ops_with_movable_const_ptr = {}
-        #
-        # a list of such not really constant ConstPtrs.
-        changeable_const_pointers = []
-        for op in newops:
-            # record all GCREFs, because the GC (or Boehm) cannot see them and
-            # keep them alive if they end up as constants in the assembler.
-            # If such a GCREF can change and we can't make the object it points
-            # to non-movable, we have to handle it seperatly. Such GCREF's are
-            # returned as ConstPtrs in 'changeable_const_pointers' and the
-            # affected operation is returned in 'op_with_movable_const_ptr'.
-            # For this special case see 'rewrite_changeable_constptrs'.
-            self._record_constptrs(op, gcrefs_output_list,
-                    ops_with_movable_const_ptr, changeable_const_pointers)
-        #
-        # handle pointers that are not guaranteed to stay the same
-        if len(ops_with_movable_const_ptr) > 0:
-            moving_obj_tracker = MovableObjectTracker(cpu, 
changeable_const_pointers)
-            #
-            if not we_are_translated():
-                # used for testing
-                self.last_moving_obj_tracker = moving_obj_tracker
-            # make sure the array containing the pointers is not collected by
-            # the GC (or Boehm)
-            gcrefs_output_list.append(moving_obj_tracker.ptr_array_gcref)
-            rgc._make_sure_does_not_move(moving_obj_tracker.ptr_array_gcref)
-
-            ops = newops
-            newops = []
-            for op in ops:
-                if op in ops_with_movable_const_ptr:
-                    rewritten_ops = self._rewrite_changeable_constptrs(op,
-                            ops_with_movable_const_ptr, moving_obj_tracker)
-                    newops.extend(rewritten_ops)
-                else:
-                    newops.append(op)
-        #
+        newops = rewriter.rewrite(operations, gcrefs_output_list)
         return newops
 
     @specialize.memo()
@@ -244,6 +125,14 @@
         """
         return jitframe.JITFRAME.allocate(frame_info)
 
+    def make_gcref_tracer(self, array_base_addr, gcrefs):
+        # for tests, or for Boehm.  Overridden for framework GCs
+        from rpython.jit.backend.llsupport import gcreftracer
+        return gcreftracer.make_boehm_tracer(array_base_addr, gcrefs)
+
+    def clear_gcref_tracer(self, tracer):
+        pass    # nothing needed unless overridden
+
 class JitFrameDescrs:
     def _freeze_(self):
         return True
@@ -752,6 +641,13 @@
         p = rffi.cast(rffi.CCHARP, p)
         return (ord(p[0]) & IS_OBJECT_FLAG) != 0
 
+    def make_gcref_tracer(self, array_base_addr, gcrefs):
+        from rpython.jit.backend.llsupport import gcreftracer
+        return gcreftracer.make_framework_tracer(array_base_addr, gcrefs)
+
+    def clear_gcref_tracer(self, tracer):
+        tracer.array_length = 0
+
 # ____________________________________________________________
 
 def get_ll_description(gcdescr, translator=None, rtyper=None):
diff --git a/rpython/jit/backend/llsupport/gcreftracer.py 
b/rpython/jit/backend/llsupport/gcreftracer.py
new file mode 100644
--- /dev/null
+++ b/rpython/jit/backend/llsupport/gcreftracer.py
@@ -0,0 +1,49 @@
+from rpython.rlib import rgc
+from rpython.rtyper.lltypesystem import lltype, llmemory, rffi
+from rpython.rtyper.lltypesystem.lloperation import llop
+from rpython.jit.backend.llsupport.symbolic import WORD
+
+
+GCREFTRACER = lltype.GcStruct(
+    'GCREFTRACER',
+    ('array_base_addr', lltype.Signed),
+    ('array_length', lltype.Signed),
+    rtti=True)
+
+def gcrefs_trace(gc, obj_addr, callback, arg):
+    obj = llmemory.cast_adr_to_ptr(obj_addr, lltype.Ptr(GCREFTRACER))
+    i = 0
+    length = obj.array_length
+    addr = obj.array_base_addr
+    while i < length:
+        p = rffi.cast(llmemory.Address, addr + i * WORD)
+        gc._trace_callback(callback, arg, p)
+        i += 1
+lambda_gcrefs_trace = lambda: gcrefs_trace
+
+def make_framework_tracer(array_base_addr, gcrefs):
+    # careful about the order here: the allocation of the GCREFTRACER
+    # can trigger a GC.  So we must write the gcrefs into the raw
+    # array only afterwards...
+    rgc.register_custom_trace_hook(GCREFTRACER, lambda_gcrefs_trace)
+    length = len(gcrefs)
+    tr = lltype.malloc(GCREFTRACER)
+    # --no GC from here--
+    tr.array_base_addr = array_base_addr
+    tr.array_length = length
+    i = 0
+    while i < length:
+        p = rffi.cast(rffi.SIGNEDP, array_base_addr + i * WORD)
+        p[0] = rffi.cast(lltype.Signed, gcrefs[i])
+        i += 1
+    llop.gc_writebarrier(lltype.Void, tr)
+    # --no GC until here--
+    return tr
+
+def make_boehm_tracer(array_base_addr, gcrefs):
+    # copy the addresses, but return 'gcrefs' as the object that must be
+    # kept alive
+    for i in range(len(gcrefs)):
+        p = rffi.cast(rffi.SIGNEDP, array_base_addr + i * WORD)
+        p[0] = rffi.cast(lltype.Signed, gcrefs[i])
+    return gcrefs
diff --git a/rpython/jit/backend/llsupport/llmodel.py 
b/rpython/jit/backend/llsupport/llmodel.py
--- a/rpython/jit/backend/llsupport/llmodel.py
+++ b/rpython/jit/backend/llsupport/llmodel.py
@@ -246,6 +246,13 @@
 
     def free_loop_and_bridges(self, compiled_loop_token):
         AbstractCPU.free_loop_and_bridges(self, compiled_loop_token)
+        # turn off all gcreftracers
+        tracers = compiled_loop_token.asmmemmgr_gcreftracers
+        if tracers is not None:
+            compiled_loop_token.asmmemmgr_gcreftracers = None
+            for tracer in tracers:
+                self.gc_ll_descr.clear_gcref_tracer(tracer)
+        # then free all blocks of code and raw data
         blocks = compiled_loop_token.asmmemmgr_blocks
         if blocks is not None:
             compiled_loop_token.asmmemmgr_blocks = None
diff --git a/rpython/jit/backend/llsupport/rewrite.py 
b/rpython/jit/backend/llsupport/rewrite.py
--- a/rpython/jit/backend/llsupport/rewrite.py
+++ b/rpython/jit/backend/llsupport/rewrite.py
@@ -1,10 +1,12 @@
 from rpython.rlib import rgc
-from rpython.rlib.objectmodel import we_are_translated
+from rpython.rlib.objectmodel import we_are_translated, r_dict
 from rpython.rlib.rarithmetic import ovfcheck, highest_bit
 from rpython.rtyper.lltypesystem import llmemory, lltype, rstr
+from rpython.rtyper.annlowlevel import cast_instance_to_gcref
 from rpython.jit.metainterp import history
 from rpython.jit.metainterp.history import ConstInt, ConstPtr
 from rpython.jit.metainterp.resoperation import ResOperation, rop, OpHelpers
+from rpython.jit.metainterp.typesystem import rd_eq, rd_hash
 from rpython.jit.codewriter import heaptracker
 from rpython.jit.backend.llsupport.symbolic import (WORD,
         get_array_token)
@@ -94,21 +96,28 @@
         op = self.get_box_replacement(op)
         orig_op = op
         replaced = False
+        opnum = op.getopnum()
+        keep = (opnum == rop.JIT_DEBUG)
         for i in range(op.numargs()):
             orig_arg = op.getarg(i)
             arg = self.get_box_replacement(orig_arg)
+            if isinstance(arg, ConstPtr) and bool(arg.value) and not keep:
+                arg = self.remove_constptr(arg)
             if orig_arg is not arg:
                 if not replaced:
-                    op = op.copy_and_change(op.getopnum())
+                    op = op.copy_and_change(opnum)
                     orig_op.set_forwarded(op)
                     replaced = True
                 op.setarg(i, arg)
-        if rop.is_guard(op.opnum):
+        if rop.is_guard(opnum):
             if not replaced:
-                op = op.copy_and_change(op.getopnum())
+                op = op.copy_and_change(opnum)
                 orig_op.set_forwarded(op)
             op.setfailargs([self.get_box_replacement(a, True)
                             for a in op.getfailargs()])
+        if rop.is_guard(opnum) or opnum == rop.FINISH:
+            llref = cast_instance_to_gcref(op.getdescr())
+            self.gcrefs_output_list.append(llref)
         self._newops.append(op)
 
     def replace_op_with(self, op, newop):
@@ -304,13 +313,16 @@
         return False
 
 
-    def rewrite(self, operations):
+    def rewrite(self, operations, gcrefs_output_list):
         # we can only remember one malloc since the next malloc can possibly
         # collect; but we can try to collapse several known-size mallocs into
         # one, both for performance and to reduce the number of write
         # barriers.  We do this on each "basic block" of operations, which in
         # this case means between CALLs or unknown-size mallocs.
         #
+        self.gcrefs_output_list = gcrefs_output_list
+        self.gcrefs_map = None
+        self.gcrefs_recently_loaded = None
         operations = self.remove_bridge_exception(operations)
         self._changed_op = None
         for i in range(len(operations)):
@@ -333,8 +345,7 @@
             elif rop.can_malloc(op.opnum):
                 self.emitting_an_operation_that_can_collect()
             elif op.getopnum() == rop.LABEL:
-                self.emitting_an_operation_that_can_collect()
-                self._known_lengths.clear()
+                self.emit_label()
             # ---------- write barriers ----------
             if self.gc_ll_descr.write_barrier_descr is not None:
                 if op.getopnum() == rop.SETFIELD_GC:
@@ -940,3 +951,37 @@
                 operations[start+2].getopnum() == rop.RESTORE_EXCEPTION):
                 return operations[:start] + operations[start+3:]
         return operations
+
+    def emit_label(self):
+        self.emitting_an_operation_that_can_collect()
+        self._known_lengths.clear()
+        self.gcrefs_recently_loaded = None
+
+    def _gcref_index(self, gcref):
+        if self.gcrefs_map is None:
+            self.gcrefs_map = r_dict(rd_eq, rd_hash)
+        try:
+            return self.gcrefs_map[gcref]
+        except KeyError:
+            pass
+        index = len(self.gcrefs_output_list)
+        self.gcrefs_map[gcref] = index
+        self.gcrefs_output_list.append(gcref)
+        return index
+
+    def remove_constptr(self, c):
+        """Remove all ConstPtrs, and replace them with load_from_gc_table.
+        """
+        # Note: currently, gcrefs_recently_loaded is only cleared in
+        # LABELs.  We'd like something better, like "don't spill it",
+        # but that's the wrong level...
+        index = self._gcref_index(c.value)
+        if self.gcrefs_recently_loaded is None:
+            self.gcrefs_recently_loaded = {}
+        try:
+            load_op = self.gcrefs_recently_loaded[index]
+        except KeyError:
+            load_op = ResOperation(rop.LOAD_FROM_GC_TABLE, [ConstInt(index)])
+            self._newops.append(load_op)
+            self.gcrefs_recently_loaded[index] = load_op
+        return load_op
diff --git a/rpython/jit/backend/llsupport/test/test_gc.py 
b/rpython/jit/backend/llsupport/test/test_gc.py
--- a/rpython/jit/backend/llsupport/test/test_gc.py
+++ b/rpython/jit/backend/llsupport/test/test_gc.py
@@ -196,31 +196,6 @@
         assert is_valid_int(wbdescr.jit_wb_if_flag_byteofs)
         assert is_valid_int(wbdescr.jit_wb_if_flag_singlebyte)
 
-    def test_record_constptrs(self):
-        class MyFakeCPU(object):
-            def cast_adr_to_int(self, adr):
-                assert adr == "some fake address"
-                return 43
-        class MyFakeGCRefList(object):
-            def get_address_of_gcref(self, s_gcref1):
-                assert s_gcref1 == s_gcref
-                return "some fake address"
-        S = lltype.GcStruct('S')
-        s = lltype.malloc(S)
-        s_gcref = lltype.cast_opaque_ptr(llmemory.GCREF, s)
-        v_random_box = InputArgRef()
-        operations = [
-            ResOperation(rop.PTR_EQ, [v_random_box, ConstPtr(s_gcref)]),
-            ]
-        gc_ll_descr = self.gc_ll_descr
-        gc_ll_descr.gcrefs = MyFakeGCRefList()
-        gcrefs = []
-        operations = get_deep_immutable_oplist(operations)
-        operations2 = gc_ll_descr.rewrite_assembler(MyFakeCPU(), operations,
-                                                   gcrefs)
-        assert operations2 == operations
-        assert gcrefs == [s_gcref]
-
 
 class TestFrameworkMiniMark(TestFramework):
     gc = 'minimark'
diff --git a/rpython/jit/backend/llsupport/test/test_gcreftracer.py 
b/rpython/jit/backend/llsupport/test/test_gcreftracer.py
new file mode 100644
--- /dev/null
+++ b/rpython/jit/backend/llsupport/test/test_gcreftracer.py
@@ -0,0 +1,53 @@
+from rpython.rtyper.lltypesystem import lltype, llmemory, rffi
+from rpython.jit.backend.llsupport.gcreftracer import GCREFTRACER, gcrefs_trace
+from rpython.jit.backend.llsupport.gcreftracer import make_framework_tracer
+from rpython.jit.backend.llsupport.gcreftracer import make_boehm_tracer
+
+
+class FakeGC:
+    def __init__(self):
+        self.called = []
+    def _trace_callback(self, callback, arg, addr):
+        assert callback == "callback"
+        assert arg == "arg"
+        assert lltype.typeOf(addr) == llmemory.Address
+        self.called.append(addr)
+
+
+def test_gcreftracer():
+    a = lltype.malloc(rffi.CArray(lltype.Signed), 3, flavor='raw')
+    a[0] = 123
+    a[1] = 456
+    a[2] = 789
+    tr = lltype.malloc(GCREFTRACER)
+    tr.array_base_addr = base = rffi.cast(lltype.Signed, a)
+    tr.array_length = 3
+    gc = FakeGC()
+    gcrefs_trace(gc, llmemory.cast_ptr_to_adr(tr), "callback", "arg")
+    assert len(gc.called) == 3
+    WORD = rffi.sizeof(lltype.Signed)
+    for i in range(3):
+        assert gc.called[i] == rffi.cast(llmemory.Address, base + i * WORD)
+    lltype.free(a, flavor='raw')
+
+def test_make_framework_tracer():
+    a = lltype.malloc(rffi.CArray(lltype.Signed), 3, flavor='raw')
+    base = rffi.cast(lltype.Signed, a)
+    tr = make_framework_tracer(base, [123, 456, 789])
+    assert a[0] == 123
+    assert a[1] == 456
+    assert a[2] == 789
+    assert tr.array_base_addr == base
+    assert tr.array_length == 3
+    lltype.free(a, flavor='raw')
+
+def test_make_boehm_tracer():
+    a = lltype.malloc(rffi.CArray(lltype.Signed), 3, flavor='raw')
+    base = rffi.cast(lltype.Signed, a)
+    lst = [123, 456, 789]
+    tr = make_boehm_tracer(base, lst)
+    assert a[0] == 123
+    assert a[1] == 456
+    assert a[2] == 789
+    assert tr is lst
+    lltype.free(a, flavor='raw')
diff --git a/rpython/jit/backend/llsupport/test/test_pinned_object_rewrite.py 
b/rpython/jit/backend/llsupport/test/test_pinned_object_rewrite.py
deleted file mode 100644
--- a/rpython/jit/backend/llsupport/test/test_pinned_object_rewrite.py
+++ /dev/null
@@ -1,149 +0,0 @@
-from test_rewrite import get_size_descr, get_array_descr, get_description, 
BaseFakeCPU
-from rpython.jit.backend.llsupport.descr import get_size_descr,\
-     get_field_descr, get_array_descr, ArrayDescr, FieldDescr,\
-     SizeDescr, get_interiorfield_descr
-from rpython.jit.backend.llsupport.gc import GcLLDescr_boehm,\
-     GcLLDescr_framework, MovableObjectTracker
-from rpython.jit.backend.llsupport import jitframe, gc
-from rpython.jit.metainterp.gc import get_description
-from rpython.jit.tool.oparser import parse
-from rpython.jit.metainterp.optimizeopt.util import equaloplists
-from rpython.jit.metainterp.history import JitCellToken, FLOAT
-from rpython.rtyper.lltypesystem import lltype, rffi, lltype, llmemory
-from rpython.rtyper import rclass
-from rpython.jit.backend.x86.arch import WORD
-from rpython.rlib import rgc
-
-class Evaluator(object):
-    def __init__(self, scope):
-        self.scope = scope
-    def __getitem__(self, key):
-        return eval(key, self.scope)
-
-
-class FakeLoopToken(object):
-    pass
-
-# The following class is based on 
rpython.jit.backend.llsupport.test.test_rewrite.RewriteTests.
-# It's modified to be able to test the object pinning specific features.
-class RewriteTests(object):
-    def check_rewrite(self, frm_operations, to_operations, **namespace):
-        # objects to use inside the test
-        A = lltype.GcArray(lltype.Signed)
-        adescr = get_array_descr(self.gc_ll_descr, A)
-        adescr.tid = 4321
-        alendescr = adescr.lendescr
-        #
-        pinned_obj_type = lltype.GcStruct('PINNED_STRUCT', ('my_int', 
lltype.Signed))
-        pinned_obj_my_int_descr = get_field_descr(self.gc_ll_descr, 
pinned_obj_type, 'my_int')
-        pinned_obj_ptr = lltype.malloc(pinned_obj_type)
-        pinned_obj_gcref = lltype.cast_opaque_ptr(llmemory.GCREF, 
pinned_obj_ptr)
-        assert rgc.pin(pinned_obj_gcref)
-        #
-        notpinned_obj_type = lltype.GcStruct('NOT_PINNED_STRUCT', ('my_int', 
lltype.Signed))
-        notpinned_obj_my_int_descr = get_field_descr(self.gc_ll_descr, 
notpinned_obj_type, 'my_int')
-        notpinned_obj_ptr = lltype.malloc(notpinned_obj_type)
-        notpinned_obj_gcref = lltype.cast_opaque_ptr(llmemory.GCREF, 
notpinned_obj_ptr)
-        #
-        ptr_array_descr = 
self.cpu.arraydescrof(MovableObjectTracker.ptr_array_type)
-        #
-        vtable_descr = self.gc_ll_descr.fielddescr_vtable
-        O = lltype.GcStruct('O', ('parent', rclass.OBJECT),
-                                 ('x', lltype.Signed))
-        o_vtable = lltype.malloc(rclass.OBJECT_VTABLE, immortal=True)
-        #
-        tiddescr = self.gc_ll_descr.fielddescr_tid
-        wbdescr = self.gc_ll_descr.write_barrier_descr
-        WORD = globals()['WORD']
-        #
-        strdescr     = self.gc_ll_descr.str_descr
-        unicodedescr = self.gc_ll_descr.unicode_descr
-        strlendescr     = strdescr.lendescr
-        unicodelendescr = unicodedescr.lendescr
-
-        casmdescr = JitCellToken()
-        clt = FakeLoopToken()
-        clt._ll_initial_locs = [0, 8]
-        frame_info = lltype.malloc(jitframe.JITFRAMEINFO, flavor='raw')
-        clt.frame_info = frame_info
-        frame_info.jfi_frame_depth = 13
-        frame_info.jfi_frame_size = 255
-        framedescrs = self.gc_ll_descr.getframedescrs(self.cpu)
-        framelendescr = framedescrs.arraydescr.lendescr
-        jfi_frame_depth = framedescrs.jfi_frame_depth
-        jfi_frame_size = framedescrs.jfi_frame_size
-        jf_frame_info = framedescrs.jf_frame_info
-        signedframedescr = self.cpu.signedframedescr
-        floatframedescr = self.cpu.floatframedescr
-        casmdescr.compiled_loop_token = clt
-        tzdescr = None # noone cares
-        #
-        namespace.update(locals())
-        #
-        for funcname in self.gc_ll_descr._generated_functions:
-            namespace[funcname] = self.gc_ll_descr.get_malloc_fn(funcname)
-            namespace[funcname + '_descr'] = getattr(self.gc_ll_descr,
-                                                     '%s_descr' % funcname)
-        #
-        ops = parse(frm_operations, namespace=namespace)
-        operations = self.gc_ll_descr.rewrite_assembler(self.cpu,
-                                                        ops.operations,
-                                                        [])
-        # make the array containing the GCREF's accessible inside the tests.
-        # This must be done after we call 'rewrite_assembler'. Before that
-        # call 'last_moving_obj_tracker' is None or filled with some old
-        # value.
-        namespace['ptr_array_gcref'] = 
self.gc_ll_descr.last_moving_obj_tracker.ptr_array_gcref
-        expected = parse(to_operations % Evaluator(namespace),
-                         namespace=namespace)
-        equaloplists(operations, expected.operations)
-        lltype.free(frame_info, flavor='raw')
-
-class TestFramework(RewriteTests):
-    def setup_method(self, meth):
-        class config_(object):
-            class translation(object):
-                gc = 'minimark'
-                gcrootfinder = 'asmgcc'
-                gctransformer = 'framework'
-                gcremovetypeptr = False
-        gcdescr = get_description(config_)
-        self.gc_ll_descr = GcLLDescr_framework(gcdescr, None, None, None,
-                                               really_not_translated=True)
-        self.gc_ll_descr.write_barrier_descr.has_write_barrier_from_array = (
-            lambda cpu: True)
-        #
-        class FakeCPU(BaseFakeCPU):
-            def sizeof(self, STRUCT, is_object):
-                descr = SizeDescr(104)
-                descr.tid = 9315
-                descr.vtable = 12
-                return descr
-        self.cpu = FakeCPU()
-
-    def test_simple_getfield(self):
-        self.check_rewrite("""
-            []
-            i0 = getfield_gc_i(ConstPtr(pinned_obj_gcref), 
descr=pinned_obj_my_int_descr)
-            """, """
-            []
-            p1 = gc_load_r(ConstPtr(ptr_array_gcref), %(0 * 
ptr_array_descr.itemsize + 1)s, %(ptr_array_descr.itemsize)s)
-            i0 = gc_load_i(p1, 0, -%(pinned_obj_my_int_descr.field_size)s)
-            """)
-        assert len(self.gc_ll_descr.last_moving_obj_tracker._indexes) == 1
-
-    def test_simple_getfield_twice(self):
-        self.check_rewrite("""
-            []
-            i0 = getfield_gc_i(ConstPtr(pinned_obj_gcref), 
descr=pinned_obj_my_int_descr)
-            i1 = getfield_gc_i(ConstPtr(notpinned_obj_gcref), 
descr=notpinned_obj_my_int_descr)
-            i2 = getfield_gc_i(ConstPtr(pinned_obj_gcref), 
descr=pinned_obj_my_int_descr)
-            """, """
-            []
-            p1 = gc_load_r(ConstPtr(ptr_array_gcref), %(0 * 
ptr_array_descr.itemsize + 1)s, %(ptr_array_descr.itemsize)s)
-            i0 = gc_load_i(p1, 0, -%(pinned_obj_my_int_descr.field_size)s)
-            i1 = gc_load_i(ConstPtr(notpinned_obj_gcref), 0, 
-%(notpinned_obj_my_int_descr.field_size)s)
-            p2 = gc_load_r(ConstPtr(ptr_array_gcref), %(1 * 
ptr_array_descr.itemsize + 1)s, %(ptr_array_descr.itemsize)s)
-            i2 = gc_load_i(p2, 0, -%(pinned_obj_my_int_descr.field_size)s)
-            """)
-        assert len(self.gc_ll_descr.last_moving_obj_tracker._indexes) == 2
diff --git a/rpython/jit/backend/llsupport/test/test_rewrite.py 
b/rpython/jit/backend/llsupport/test/test_rewrite.py
--- a/rpython/jit/backend/llsupport/test/test_rewrite.py
+++ b/rpython/jit/backend/llsupport/test/test_rewrite.py
@@ -10,7 +10,7 @@
 from rpython.jit.metainterp.optimizeopt.util import equaloplists
 from rpython.jit.metainterp.history import JitCellToken, FLOAT
 from rpython.jit.metainterp.history import AbstractFailDescr
-from rpython.rtyper.lltypesystem import lltype, rffi
+from rpython.rtyper.lltypesystem import lltype, llmemory, rffi
 from rpython.rtyper import rclass
 from rpython.jit.backend.x86.arch import WORD
 from rpython.jit.backend.llsupport.symbolic import (WORD,
@@ -77,6 +77,9 @@
         tdescr = get_size_descr(self.gc_ll_descr, T)
         tdescr.tid = 5678
         tzdescr = get_field_descr(self.gc_ll_descr, T, 'z')
+        myT = lltype.cast_opaque_ptr(llmemory.GCREF,
+                                     lltype.malloc(T, zero=True))
+        self.myT = myT
         #
         A = lltype.GcArray(lltype.Signed)
         adescr = get_array_descr(self.gc_ll_descr, A)
@@ -112,6 +115,12 @@
         xdescr = get_field_descr(self.gc_ll_descr, R1, 'x')
         ydescr = get_field_descr(self.gc_ll_descr, R1, 'y')
         zdescr = get_field_descr(self.gc_ll_descr, R1, 'z')
+        myR1 = lltype.cast_opaque_ptr(llmemory.GCREF,
+                                      lltype.malloc(R1, zero=True))
+        myR1b = lltype.cast_opaque_ptr(llmemory.GCREF,
+                                       lltype.malloc(R1, zero=True))
+        self.myR1 = myR1
+        self.myR1b = myR1b
         #
         E = lltype.GcStruct('Empty')
         edescr = get_size_descr(self.gc_ll_descr, E)
@@ -174,9 +183,10 @@
         ops = parse(frm_operations, namespace=namespace)
         expected = parse(to_operations % Evaluator(namespace),
                          namespace=namespace)
+        self.gcrefs = []
         operations = self.gc_ll_descr.rewrite_assembler(self.cpu,
                                                         ops.operations,
-                                                        [])
+                                                        self.gcrefs)
         remap = {}
         for a, b in zip(ops.inputargs, expected.inputargs):
             remap[b] = a
@@ -1281,3 +1291,124 @@
                 {t}
                 jump()
             """.format(**locals()))
+
+    def test_load_from_gc_table_1i(self):
+        self.check_rewrite("""
+            [i1]
+            setfield_gc(ConstPtr(myR1), i1, descr=xdescr)
+            jump()
+        """, """
+            [i1]
+            p0 = load_from_gc_table(0)
+            gc_store(p0, %(xdescr.offset)s, i1, %(xdescr.field_size)s)
+            jump()
+        """)
+        assert self.gcrefs == [self.myR1]
+
+    def test_load_from_gc_table_1p(self):
+        self.check_rewrite("""
+            [p1]
+            setfield_gc(ConstPtr(myT), p1, descr=tzdescr)
+            jump()
+        """, """
+            [i1]
+            p0 = load_from_gc_table(0)
+            cond_call_gc_wb(p0, descr=wbdescr)
+            gc_store(p0, %(tzdescr.offset)s, i1, %(tzdescr.field_size)s)
+            jump()
+        """)
+        assert self.gcrefs == [self.myT]
+
+    def test_load_from_gc_table_2(self):
+        self.check_rewrite("""
+            [i1, f2]
+            setfield_gc(ConstPtr(myR1), i1, descr=xdescr)
+            setfield_gc(ConstPtr(myR1), f2, descr=ydescr)
+            jump()
+        """, """
+            [i1, f2]
+            p0 = load_from_gc_table(0)
+            gc_store(p0, %(xdescr.offset)s, i1, %(xdescr.field_size)s)
+            gc_store(p0, %(ydescr.offset)s, f2, %(ydescr.field_size)s)
+            jump()
+        """)
+        assert self.gcrefs == [self.myR1]
+
+    def test_load_from_gc_table_3(self):
+        self.check_rewrite("""
+            [i1, f2]
+            setfield_gc(ConstPtr(myR1), i1, descr=xdescr)
+            label(f2)
+            setfield_gc(ConstPtr(myR1), f2, descr=ydescr)
+            jump()
+        """, """
+            [i1, f2]
+            p0 = load_from_gc_table(0)
+            gc_store(p0, %(xdescr.offset)s, i1, %(xdescr.field_size)s)
+            label(f2)
+            p1 = load_from_gc_table(0)
+            gc_store(p1, %(ydescr.offset)s, f2, %(ydescr.field_size)s)
+            jump()
+        """)
+        assert self.gcrefs == [self.myR1]
+
+    def test_load_from_gc_table_4(self):
+        self.check_rewrite("""
+            [i1, f2]
+            setfield_gc(ConstPtr(myR1), i1, descr=xdescr)
+            setfield_gc(ConstPtr(myR1b), f2, descr=ydescr)
+            jump()
+        """, """
+            [i1, f2]
+            p0 = load_from_gc_table(0)
+            gc_store(p0, %(xdescr.offset)s, i1, %(xdescr.field_size)s)
+            p1 = load_from_gc_table(1)
+            gc_store(p1, %(ydescr.offset)s, f2, %(ydescr.field_size)s)
+            jump()
+        """)
+        assert self.gcrefs == [self.myR1, self.myR1b]
+
+    def test_pinned_simple_getfield(self):
+        # originally in test_pinned_object_rewrite; now should give the
+        # same result for pinned objects and for normal objects
+        self.check_rewrite("""
+            []
+            i0 = getfield_gc_i(ConstPtr(myR1), descr=xdescr)
+        """, """
+            []
+            p1 = load_from_gc_table(0)
+            i0 = gc_load_i(p1, %(xdescr.offset)s, -%(xdescr.field_size)s)
+        """)
+        assert self.gcrefs == [self.myR1]
+
+    def test_pinned_simple_getfield_twice(self):
+        # originally in test_pinned_object_rewrite; now should give the
+        # same result for pinned objects and for normal objects
+        self.check_rewrite("""
+            []
+            i0 = getfield_gc_i(ConstPtr(myR1), descr=xdescr)
+            i1 = getfield_gc_i(ConstPtr(myR1b), descr=xdescr)
+            i2 = getfield_gc_i(ConstPtr(myR1), descr=xdescr)
+        """, """
+            []
+            p1 = load_from_gc_table(0)
+            i0 = gc_load_i(p1, %(xdescr.offset)s, -%(xdescr.field_size)s)
+            p2 = load_from_gc_table(1)
+            i1 = gc_load_i(p2, %(xdescr.offset)s, -%(xdescr.field_size)s)
+            i2 = gc_load_i(p1, %(xdescr.offset)s, -%(xdescr.field_size)s)
+        """)
+        assert self.gcrefs == [self.myR1, self.myR1b]
+
+    def test_guard_in_gcref(self):
+        self.check_rewrite("""
+            [i1, i2]
+            guard_true(i1) []
+            guard_true(i2) []
+            jump()
+        """, """
+            [i1, i2]
+            guard_true(i1) []
+            guard_true(i2) []
+            jump()
+        """)
+        assert len(self.gcrefs) == 2
diff --git a/rpython/jit/backend/model.py b/rpython/jit/backend/model.py
--- a/rpython/jit/backend/model.py
+++ b/rpython/jit/backend/model.py
@@ -285,7 +285,7 @@
 
 class CompiledLoopToken(object):
     asmmemmgr_blocks = None
-    asmmemmgr_gcroots = 0
+    asmmemmgr_gcreftracers = None
 
     def __init__(self, cpu, number):
         cpu.tracker.total_compiled_loops += 1
diff --git a/rpython/jit/backend/x86/assembler.py 
b/rpython/jit/backend/x86/assembler.py
--- a/rpython/jit/backend/x86/assembler.py
+++ b/rpython/jit/backend/x86/assembler.py
@@ -4,7 +4,7 @@
 
 from rpython.jit.backend.llsupport import symbolic, jitframe, rewrite
 from rpython.jit.backend.llsupport.assembler import (GuardToken, BaseAssembler,
-                                                DEBUG_COUNTER, debug_bridge)
+                                                DEBUG_COUNTER)
 from rpython.jit.backend.llsupport.asmmemmgr import MachineDataBlockWrapper
 from rpython.jit.backend.llsupport.gcmap import allocate_gcmap
 from rpython.jit.metainterp.history import (Const, VOID, ConstInt)
@@ -489,7 +489,6 @@
         frame_info = self.datablockwrapper.malloc_aligned(
             jitframe.JITFRAMEINFO_SIZE, alignment=WORD)
         clt.frame_info = rffi.cast(jitframe.JITFRAMEINFOPTR, frame_info)
-        clt.allgcrefs = []
         clt.frame_info.clear() # for now
 
         if log:
@@ -498,10 +497,13 @@
 
         regalloc = RegAlloc(self, self.cpu.translate_support_code)
         #
+        allgcrefs = []
+        operations = regalloc.prepare_loop(inputargs, operations,
+                                           looptoken, allgcrefs)
+        self.reserve_gcref_table(allgcrefs)
+        functionpos = self.mc.get_relative_pos()
         self._call_header_with_stack_check()
         self._check_frame_depth_debug(self.mc)
-        operations = regalloc.prepare_loop(inputargs, operations,
-                                           looptoken, clt.allgcrefs)
         looppos = self.mc.get_relative_pos()
         frame_depth_no_fixed_size = self._assemble(regalloc, inputargs,
                                                    operations)
@@ -512,6 +514,7 @@
         full_size = self.mc.get_relative_pos()
         #
         rawstart = self.materialize_loop(looptoken)
+        self.patch_gcref_table(looptoken, rawstart)
         self.patch_stack_checks(frame_depth_no_fixed_size + 
JITFRAME_FIXED_SIZE,
                                 rawstart)
         looptoken._ll_loop_code = looppos + rawstart
@@ -520,7 +523,13 @@
             looptoken.number, loopname,
             r_uint(rawstart + looppos),
             r_uint(rawstart + size_excluding_failure_stuff),
-            r_uint(rawstart)))
+            r_uint(rawstart + functionpos)))
+        debug_print("       gc table: 0x%x" % r_uint(self.gc_table_addr))
+        debug_print("       function: 0x%x" % r_uint(rawstart + functionpos))
+        debug_print("         resops: 0x%x" % r_uint(rawstart + looppos))
+        debug_print("       failures: 0x%x" % r_uint(rawstart +
+                                                 size_excluding_failure_stuff))
+        debug_print("            end: 0x%x" % r_uint(rawstart + full_size))
         debug_stop("jit-backend-addr")
         self.patch_pending_failure_recoveries(rawstart)
         #
@@ -530,7 +539,7 @@
             looptoken._x86_rawstart = rawstart
             looptoken._x86_fullsize = full_size
             looptoken._x86_ops_offset = ops_offset
-        looptoken._ll_function_addr = rawstart
+        looptoken._ll_function_addr = rawstart + functionpos
         if logger:
             logger.log_loop(inputargs, operations, 0, "rewritten",
                             name=loopname, ops_offset=ops_offset)
@@ -563,11 +572,13 @@
                                                      'b', descr_number)
         arglocs = self.rebuild_faillocs_from_descr(faildescr, inputargs)
         regalloc = RegAlloc(self, self.cpu.translate_support_code)
-        startpos = self.mc.get_relative_pos()
+        allgcrefs = []
         operations = regalloc.prepare_bridge(inputargs, arglocs,
                                              operations,
-                                             self.current_clt.allgcrefs,
+                                             allgcrefs,
                                              self.current_clt.frame_info)
+        self.reserve_gcref_table(allgcrefs)
+        startpos = self.mc.get_relative_pos()
         self._check_frame_depth(self.mc, regalloc.get_gcmap())
         bridgestartpos = self.mc.get_relative_pos()
         self._update_at_exit(arglocs, inputargs, faildescr, regalloc)
@@ -577,12 +588,22 @@
         fullsize = self.mc.get_relative_pos()
         #
         rawstart = self.materialize_loop(original_loop_token)
+        self.patch_gcref_table(original_loop_token, rawstart)
         self.patch_stack_checks(frame_depth_no_fixed_size + 
JITFRAME_FIXED_SIZE,
                                 rawstart)
-        debug_bridge(descr_number, rawstart, codeendpos)
+        debug_start("jit-backend-addr")
+        debug_print("bridge out of Guard 0x%x has address 0x%x to 0x%x" %
+                    (r_uint(descr_number), r_uint(rawstart + startpos),
+                        r_uint(rawstart + codeendpos)))
+        debug_print("       gc table: 0x%x" % r_uint(self.gc_table_addr))
+        debug_print("    jump target: 0x%x" % r_uint(rawstart + startpos))
+        debug_print("         resops: 0x%x" % r_uint(rawstart + 
bridgestartpos))
+        debug_print("       failures: 0x%x" % r_uint(rawstart + codeendpos))
+        debug_print("            end: 0x%x" % r_uint(rawstart + fullsize))
+        debug_stop("jit-backend-addr")
         self.patch_pending_failure_recoveries(rawstart)
         # patch the jump from original guard
-        self.patch_jump_for_descr(faildescr, rawstart)
+        self.patch_jump_for_descr(faildescr, rawstart + startpos)
         ops_offset = self.mc.ops_offset
         frame_depth = max(self.current_clt.frame_info.jfi_frame_depth,
                           frame_depth_no_fixed_size + JITFRAME_FIXED_SIZE)
@@ -667,6 +688,39 @@
             mc.JMP_r(X86_64_SCRATCH_REG.value)
         mc.copy_to_raw_memory(adr_jump_offset)
 
+    def reserve_gcref_table(self, allgcrefs):
+        gcref_table_size = len(allgcrefs) * WORD
+        if IS_X86_64:
+            # align to a multiple of 16 and reserve space at the beginning
+            # of the machine code for the gc table.  This lets us write
+            # machine code with relative addressing (%rip - constant).
+            gcref_table_size = (gcref_table_size + 15) & ~15
+            mc = self.mc
+            assert mc.get_relative_pos() == 0
+            for i in range(gcref_table_size):
+                mc.writechar('\x00')
+        elif IS_X86_32:
+            # allocate the gc table right now.  This lets us write
+            # machine code with absolute 32-bit addressing.
+            self.gc_table_addr = self.datablockwrapper.malloc_aligned(
+                gcref_table_size, alignment=WORD)
+        #
+        self.setup_gcrefs_list(allgcrefs)
+
+    def patch_gcref_table(self, looptoken, rawstart):
+        if IS_X86_64:
+            # the gc table is at the start of the machine code
+            self.gc_table_addr = rawstart
+        elif IS_X86_32:
+            # the gc table was already allocated by reserve_gcref_table()
+            rawstart = self.gc_table_addr
+        #
+        tracer = self.cpu.gc_ll_descr.make_gcref_tracer(rawstart,
+                                                        self._allgcrefs)
+        gcreftracers = self.get_asmmemmgr_gcreftracers(looptoken)
+        gcreftracers.append(tracer)    # keepalive
+        self.teardown_gcrefs_list()
+
     def write_pending_failure_recoveries(self, regalloc):
         # for each pending guard, generate the code of the recovery stub
         # at the end of self.mc.
@@ -790,6 +844,12 @@
             clt.asmmemmgr_blocks = []
         return clt.asmmemmgr_blocks
 
+    def get_asmmemmgr_gcreftracers(self, looptoken):
+        clt = looptoken.compiled_loop_token
+        if clt.asmmemmgr_gcreftracers is None:
+            clt.asmmemmgr_gcreftracers = []
+        return clt.asmmemmgr_gcreftracers
+
     def materialize_loop(self, looptoken):
         self.datablockwrapper.done()      # finish using cpu.asmmemmgr
         self.datablockwrapper = None
@@ -1368,6 +1428,29 @@
     genop_cast_ptr_to_int = _genop_same_as
     genop_cast_int_to_ptr = _genop_same_as
 
+    def _patch_load_from_gc_table(self, index):
+        # must be called immediately after a "p"-mode instruction
+        # has been emitted.  64-bit mode only.
+        assert IS_X86_64
+        address_in_buffer = index * WORD   # at the start of the buffer
+        p_location = self.mc.get_relative_pos()
+        offset = address_in_buffer - p_location
+        self.mc.overwrite32(p_location-4, offset)
+
+    def _addr_from_gc_table(self, index):
+        # get the address of the gc table entry 'index'.  32-bit mode only.
+        assert IS_X86_32
+        return self.gc_table_addr + index * WORD
+
+    def genop_load_from_gc_table(self, op, arglocs, resloc):
+        index = op.getarg(0).getint()
+        assert isinstance(resloc, RegLoc)
+        if IS_X86_64:
+            self.mc.MOV_rp(resloc.value, 0)    # %rip-relative
+            self._patch_load_from_gc_table(index)
+        elif IS_X86_32:
+            self.mc.MOV_rj(resloc.value, self._addr_from_gc_table(index))
+
     def genop_int_force_ge_zero(self, op, arglocs, resloc):
         self.mc.TEST(arglocs[0], arglocs[0])
         self.mov(imm0, resloc)
@@ -1843,8 +1926,9 @@
     def implement_guard_recovery(self, guard_opnum, faildescr, failargs,
                                  fail_locs, frame_depth):
         gcmap = allocate_gcmap(self, frame_depth, JITFRAME_FIXED_SIZE)
+        faildescrindex = self.get_gcref_from_faildescr(faildescr)
         return GuardToken(self.cpu, gcmap, faildescr, failargs, fail_locs,
-                          guard_opnum, frame_depth)
+                          guard_opnum, frame_depth, faildescrindex)
 
     def generate_propagate_error_64(self):
         assert WORD == 8
@@ -1862,8 +1946,12 @@
         self._update_at_exit(guardtok.fail_locs, guardtok.failargs,
                              guardtok.faildescr, regalloc)
         #
-        fail_descr, target = self.store_info_on_descr(startpos, guardtok)
-        self.mc.PUSH(imm(fail_descr))
+        faildescrindex, target = self.store_info_on_descr(startpos, guardtok)
+        if IS_X86_64:
+            self.mc.PUSH_p(0)     # %rip-relative
+            self._patch_load_from_gc_table(faildescrindex)
+        elif IS_X86_32:
+            self.mc.PUSH_j(self._addr_from_gc_table(faildescrindex))
         self.push_gcmap(self.mc, guardtok.gcmap, push=True)
         self.mc.JMP(imm(target))
         return startpos
@@ -1967,17 +2055,24 @@
 
     def genop_finish(self, op, arglocs, result_loc):
         base_ofs = self.cpu.get_baseofs_of_frame_field()
-        if len(arglocs) == 2:
-            [return_val, fail_descr_loc] = arglocs
+        if len(arglocs) > 0:
+            [return_val] = arglocs
             if op.getarg(0).type == FLOAT and not IS_X86_64:
                 size = WORD * 2
             else:
                 size = WORD
             self.save_into_mem(raw_stack(base_ofs), return_val, imm(size))
-        else:
-            [fail_descr_loc] = arglocs
         ofs = self.cpu.get_ofs_of_frame_field('jf_descr')
-        self.mov(fail_descr_loc, RawEbpLoc(ofs))
+        
+        descr = op.getdescr()
+        faildescrindex = self.get_gcref_from_faildescr(descr)
+        if IS_X86_64:
+            self.mc.MOV_rp(eax.value, 0)
+            self._patch_load_from_gc_table(faildescrindex)
+        elif IS_X86_32:
+            self.mc.MOV_rj(eax.value, self._addr_from_gc_table(faildescrindex))
+        self.mov(eax, RawEbpLoc(ofs))
+
         arglist = op.getarglist()
         if arglist and arglist[0].type == REF:
             if self._finish_gcmap:
@@ -2047,8 +2142,16 @@
                 guard_op.getopnum() == rop.GUARD_NOT_FORCED_2)
         faildescr = guard_op.getdescr()
         ofs = self.cpu.get_ofs_of_frame_field('jf_force_descr')
-        self.mc.MOV(raw_stack(ofs), imm(rffi.cast(lltype.Signed,
-                                 cast_instance_to_gcref(faildescr))))
+
+        faildescrindex = self.get_gcref_from_faildescr(faildescr)
+        if IS_X86_64:
+            self.mc.MOV_rp(X86_64_SCRATCH_REG.value, 0)
+            self._patch_load_from_gc_table(faildescrindex)
+            self.mc.MOV(raw_stack(ofs), X86_64_SCRATCH_REG)
+        elif IS_X86_32:
+            # XXX need a scratch reg here for efficiency; be more clever
+            self.mc.PUSH_j(self._addr_from_gc_table(faildescrindex))
+            self.mc.POP(raw_stack(ofs))
 
     def _find_nearby_operation(self, delta):
         regalloc = self._regalloc
diff --git a/rpython/jit/backend/x86/regalloc.py 
b/rpython/jit/backend/x86/regalloc.py
--- a/rpython/jit/backend/x86/regalloc.py
+++ b/rpython/jit/backend/x86/regalloc.py
@@ -423,16 +423,11 @@
     def consider_finish(self, op):
         # the frame is in ebp, but we have to point where in the frame is
         # the potential argument to FINISH
-        descr = op.getdescr()
-        fail_descr = cast_instance_to_gcref(descr)
-        # we know it does not move, but well
-        rgc._make_sure_does_not_move(fail_descr)
-        fail_descr = rffi.cast(lltype.Signed, fail_descr)
         if op.numargs() == 1:
             loc = self.make_sure_var_in_reg(op.getarg(0))
-            locs = [loc, imm(fail_descr)]
+            locs = [loc]
         else:
-            locs = [imm(fail_descr)]
+            locs = []
         self.perform(op, locs, None)
 
     def consider_guard_no_exception(self, op):
@@ -1141,6 +1136,10 @@
     consider_same_as_r = _consider_same_as
     consider_same_as_f = _consider_same_as
 
+    def consider_load_from_gc_table(self, op):
+        resloc = self.rm.force_allocate_reg(op)
+        self.perform(op, [], resloc)
+
     def consider_int_force_ge_zero(self, op):
         argloc = self.make_sure_var_in_reg(op.getarg(0))
         resloc = self.force_allocate_reg(op, [op.getarg(0)])
diff --git a/rpython/jit/backend/x86/rx86.py b/rpython/jit/backend/x86/rx86.py
--- a/rpython/jit/backend/x86/rx86.py
+++ b/rpython/jit/backend/x86/rx86.py
@@ -297,6 +297,20 @@
     return encode_abs, argnum, None, None
 
 # ____________________________________________________________
+# ***X86_64 only*** 
+# Emit a mod/rm referencing an address "RIP + immediate_offset".
+
[email protected](2)
+def encode_rip_offset(mc, immediate, _, orbyte):
+    assert mc.WORD == 8
+    mc.writechar(chr(0x05 | orbyte))
+    mc.writeimm32(immediate)
+    return 0
+
+def rip_offset(argnum):
+    return encode_rip_offset, argnum, None, None
+
+# ____________________________________________________________
 # For 64-bits mode: the REX.W, REX.R, REX.X, REG.B prefixes
 
 REX_W = 8
@@ -586,6 +600,8 @@
     PUS1_r = insn(rex_nw, register(1), '\x50')
     PUS1_b = insn(rex_nw, '\xFF', orbyte(6<<3), stack_bp(1))
     PUS1_m = insn(rex_nw, '\xFF', orbyte(6<<3), mem_reg_plus_const(1))
+    PUS1_j = insn(rex_nw, '\xFF', orbyte(6<<3), abs_(1))
+    PUS1_p = insn(rex_nw, '\xFF', orbyte(6<<3), rip_offset(1))
     PUS1_i8 = insn('\x6A', immediate(1, 'b'))
     PUS1_i32 = insn('\x68', immediate(1, 'i'))
 
@@ -608,6 +624,14 @@
             self.PUS1_i32(immed)
         self.stack_frame_size_delta(+self.WORD)
 
+    def PUSH_j(self, abs_addr):
+        self.PUS1_j(abs_addr)
+        self.stack_frame_size_delta(+self.WORD)
+
+    def PUSH_p(self, rip_offset):
+        self.PUS1_p(rip_offset)
+        self.stack_frame_size_delta(+self.WORD)
+
     PO1_r = insn(rex_nw, register(1), '\x58')
     PO1_b = insn(rex_nw, '\x8F', orbyte(0<<3), stack_bp(1))
 
@@ -914,6 +938,7 @@
     add_insn('m', mem_reg_plus_const(modrm_argnum))
     add_insn('a', mem_reg_plus_scaled_reg_plus_const(modrm_argnum))
     add_insn('j', abs_(modrm_argnum))
+    add_insn('p', rip_offset(modrm_argnum))
 
 # Define a regular MOV, and a variant MOV32 that only uses the low 4 bytes of a
 # register
diff --git a/rpython/jit/backend/x86/test/test_rx86_32_auto_encoding.py 
b/rpython/jit/backend/x86/test/test_rx86_32_auto_encoding.py
--- a/rpython/jit/backend/x86/test/test_rx86_32_auto_encoding.py
+++ b/rpython/jit/backend/x86/test/test_rx86_32_auto_encoding.py
@@ -279,6 +279,8 @@
         if modes:
             tests = self.get_all_tests()
             m = modes[0]
+            if m == 'p' and self.WORD == 4:
+                return []
             lst = tests[m]()
             random.shuffle(lst)
             if methname == 'PSRAD_xi' and m == 'i':
diff --git a/rpython/jit/backend/x86/test/test_rx86_64_auto_encoding.py 
b/rpython/jit/backend/x86/test/test_rx86_64_auto_encoding.py
--- a/rpython/jit/backend/x86/test/test_rx86_64_auto_encoding.py
+++ b/rpython/jit/backend/x86/test/test_rx86_64_auto_encoding.py
@@ -51,3 +51,19 @@
     def test_extra_MOV_ri64(self):
         self.imm32_tests = self.imm64_tests      # patch on 'self'
         self.complete_test('MOV_ri')
+
+    def rip_relative_tests(self):
+        return [-0x80000000, 0x7FFFFFFF, 128, 256, -129, -255, 0, 127]
+
+    def get_all_tests(self):
+        d = super(TestRx86_64, self).get_all_tests()
+        d['p'] = self.rip_relative_tests
+        return d
+
+    def assembler_operand_rip_relative(self, value):
+        return '%d(%%rip)' % value
+
+    def get_all_assembler_operands(self):
+        d = super(TestRx86_64, self).get_all_assembler_operands()
+        d['p'] = self.assembler_operand_rip_relative
+        return d
diff --git a/rpython/jit/metainterp/executor.py 
b/rpython/jit/metainterp/executor.py
--- a/rpython/jit/metainterp/executor.py
+++ b/rpython/jit/metainterp/executor.py
@@ -408,6 +408,7 @@
                          rop.GC_LOAD_INDEXED_R,
                          rop.GC_STORE,
                          rop.GC_STORE_INDEXED,
+                         rop.LOAD_FROM_GC_TABLE,
                          ):      # list of opcodes never executed by pyjitpl
                 continue
             if rop._VEC_PURE_FIRST <= value <= rop._VEC_PURE_LAST:
diff --git a/rpython/jit/metainterp/resoperation.py 
b/rpython/jit/metainterp/resoperation.py
--- a/rpython/jit/metainterp/resoperation.py
+++ b/rpython/jit/metainterp/resoperation.py
@@ -1056,6 +1056,8 @@
     'UNICODELEN/1/i',
     'UNICODEGETITEM/2/i',
     #
+    'LOAD_FROM_GC_TABLE/1/r',    # only emitted by rewrite.py
+    #
     '_ALWAYS_PURE_LAST',  # ----- end of always_pure operations -----
 
     # parameters GC_LOAD
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit

Reply via email to