Author: Richard Plangger <[email protected]>
Branch: new-jit-log
Changeset: r83613:03ed7b8b8776
Date: 2016-04-12 14:00 +0200
http://bitbucket.org/pypy/pypy/changeset/03ed7b8b8776/

Log:    catchup with default

diff too long, truncating to 2000 out of 3747 lines

diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst
--- a/pypy/doc/whatsnew-head.rst
+++ b/pypy/doc/whatsnew-head.rst
@@ -35,3 +35,20 @@
 
 .. branch: win32-lib-name
 
+.. branch: remove-frame-forcing-in-executioncontext
+
+.. branch: rposix-for-3
+
+Wrap more POSIX functions in `rpython.rlib.rposix`.
+
+.. branch: cleanup-history-rewriting
+
+A local clean-up in the JIT front-end.
+
+.. branch: jit-constptr-2
+
+Remove the forced minor collection that occurs when rewriting the
+assembler at the start of the JIT backend. This is done by emitting
+the ConstPtrs in a separate table, and loading from the table.  It
+gives improved warm-up time and memory usage, and also removes
+annoying special-purpose code for pinned pointers.
diff --git a/pypy/interpreter/error.py b/pypy/interpreter/error.py
--- a/pypy/interpreter/error.py
+++ b/pypy/interpreter/error.py
@@ -277,9 +277,18 @@
         raise NotImplementedError
 
     def get_traceback(self):
-        """Get the PyTraceback object, for app-level Python code.
+        """Calling this marks the PyTraceback as escaped, i.e. it becomes
+        accessible and inspectable by app-level Python code.  For the JIT.
+        Note that this has no effect if there are already several traceback
+        frames recorded, because in this case they are already marked as
+        escaping by executioncontext.leave() being called with
+        got_exception=True.
         """
-        return self._application_traceback
+        from pypy.interpreter.pytraceback import PyTraceback
+        tb = self._application_traceback
+        if tb is not None and isinstance(tb, PyTraceback):
+            tb.frame.mark_as_escaped()
+        return tb
 
     def set_traceback(self, traceback):
         """Set the current traceback."""
diff --git a/pypy/interpreter/executioncontext.py 
b/pypy/interpreter/executioncontext.py
--- a/pypy/interpreter/executioncontext.py
+++ b/pypy/interpreter/executioncontext.py
@@ -74,6 +74,15 @@
         finally:
             frame_vref = self.topframeref
             self.topframeref = frame.f_backref
+            if frame.escaped or got_exception:
+                # if this frame escaped to applevel, we must ensure that also
+                # f_back does
+                f_back = frame.f_backref()
+                if f_back:
+                    f_back.mark_as_escaped()
+                # force the frame (from the JIT point of view), so that it can
+                # be accessed also later
+                frame_vref()
             jit.virtual_ref_finish(frame_vref, frame)
 
     # ________________________________________________________________
diff --git a/pypy/interpreter/pyframe.py b/pypy/interpreter/pyframe.py
--- a/pypy/interpreter/pyframe.py
+++ b/pypy/interpreter/pyframe.py
@@ -65,6 +65,7 @@
     last_exception           = None
     f_backref                = jit.vref_None
     
+    escaped                  = False  # see mark_as_escaped()
     debugdata                = None
 
     pycode = None # code object executed by that frame
@@ -151,6 +152,15 @@
         assert isinstance(cell, Cell)
         return cell
 
+    def mark_as_escaped(self):
+        """
+        Must be called on frames that are exposed to applevel, e.g. by
+        sys._getframe().  This ensures that the virtualref holding the frame
+        is properly forced by ec.leave(), and thus the frame will be still
+        accessible even after the corresponding C stack died.
+        """
+        self.escaped = True
+
     def append_block(self, block):
         assert block.previous is self.lastblock
         self.lastblock = block
diff --git a/pypy/module/__pypy__/test/test_magic.py 
b/pypy/module/__pypy__/test/test_magic.py
--- a/pypy/module/__pypy__/test/test_magic.py
+++ b/pypy/module/__pypy__/test/test_magic.py
@@ -53,7 +53,7 @@
         assert _promote(1) == 1
         assert _promote(1.1) == 1.1
         assert _promote("abc") == "abc"
-        assert _promote(u"abc") == u"abc"
+        raises(TypeError, _promote, u"abc")
         l = []
         assert _promote(l) is l
         class A(object):
diff --git a/pypy/module/_file/test/test_file.py 
b/pypy/module/_file/test/test_file.py
--- a/pypy/module/_file/test/test_file.py
+++ b/pypy/module/_file/test/test_file.py
@@ -285,6 +285,8 @@
             from posix import openpty, fdopen, write, close
         except ImportError:
             skip('no openpty on this platform')
+        if 'gnukfreebsd' in sys.platform:
+            skip('close() hangs forever on kFreeBSD')
         read_fd, write_fd = openpty()
         write(write_fd, 'Abc\n')
         close(write_fd)
diff --git a/pypy/module/_socket/test/test_sock_app.py 
b/pypy/module/_socket/test/test_sock_app.py
--- a/pypy/module/_socket/test/test_sock_app.py
+++ b/pypy/module/_socket/test/test_sock_app.py
@@ -733,6 +733,7 @@
         try:
             while 1:
                 count += cli.send(b'foobar' * 70)
+                assert count < 100000
         except timeout:
             pass
         t.recv(count)
diff --git a/pypy/module/_vmprof/test/test__vmprof.py 
b/pypy/module/_vmprof/test/test__vmprof.py
--- a/pypy/module/_vmprof/test/test__vmprof.py
+++ b/pypy/module/_vmprof/test/test__vmprof.py
@@ -14,7 +14,7 @@
         tmpfile2 = open(self.tmpfilename2, 'wb')
         tmpfileno2 = tmpfile2.fileno()
 
-        import struct, sys
+        import struct, sys, gc
 
         WORD = struct.calcsize('l')
         
@@ -46,6 +46,8 @@
             return count
         
         import _vmprof
+        gc.collect()  # try to make the weakref list deterministic
+        gc.collect()  # by freeing all dead code objects
         _vmprof.enable(tmpfileno, 0.01)
         _vmprof.disable()
         s = open(self.tmpfilename, 'rb').read()
@@ -57,6 +59,8 @@
             pass
         """ in d
 
+        gc.collect()
+        gc.collect()
         _vmprof.enable(tmpfileno2, 0.01)
 
         exec """def foo2():
diff --git a/pypy/module/cpyext/api.py b/pypy/module/cpyext/api.py
--- a/pypy/module/cpyext/api.py
+++ b/pypy/module/cpyext/api.py
@@ -1001,12 +1001,6 @@
     functions = []
     decls = {}
     pypy_decls = decls['pypy_decl.h'] = []
-    pypy_decls.append("#ifndef _PYPY_PYPY_DECL_H\n")
-    pypy_decls.append("#define _PYPY_PYPY_DECL_H\n")
-    pypy_decls.append("#ifndef PYPY_STANDALONE\n")
-    pypy_decls.append("#ifdef __cplusplus")
-    pypy_decls.append("extern \"C\" {")
-    pypy_decls.append("#endif\n")
     pypy_decls.append('#define Signed   long           /* xxx temporary fix 
*/\n')
     pypy_decls.append('#define Unsigned unsigned long  /* xxx temporary fix 
*/\n')
 
@@ -1047,11 +1041,6 @@
 
     pypy_decls.append('#undef Signed    /* xxx temporary fix */\n')
     pypy_decls.append('#undef Unsigned  /* xxx temporary fix */\n')
-    pypy_decls.append("#ifdef __cplusplus")
-    pypy_decls.append("}")
-    pypy_decls.append("#endif")
-    pypy_decls.append("#endif /*PYPY_STANDALONE*/\n")
-    pypy_decls.append("#endif /*_PYPY_PYPY_DECL_H*/\n")
 
     for header_name, header_decls in decls.iteritems():
         decl_h = udir.join(header_name)
diff --git a/pypy/module/cpyext/include/Python.h 
b/pypy/module/cpyext/include/Python.h
--- a/pypy/module/cpyext/include/Python.h
+++ b/pypy/module/cpyext/include/Python.h
@@ -132,7 +132,18 @@
 /* Missing definitions */
 #include "missing.h"
 
-#include <pypy_decl.h>
+/* The declarations of most API functions are generated in a separate file */
+/* Don't include them while building PyPy, RPython also generated signatures
+ * which are similar but not identical. */
+#ifndef PYPY_STANDALONE
+#ifdef __cplusplus
+extern "C" {
+#endif
+  #include <pypy_decl.h>
+#ifdef __cplusplus
+}
+#endif
+#endif  /* PYPY_STANDALONE */
 
 /* Define macros for inline documentation. */
 #define PyDoc_VAR(name) static char name[]
diff --git a/pypy/module/cpyext/include/structmember.h 
b/pypy/module/cpyext/include/structmember.h
--- a/pypy/module/cpyext/include/structmember.h
+++ b/pypy/module/cpyext/include/structmember.h
@@ -78,7 +78,11 @@
 
 
 /* API functions. */
+/* Don't include them while building PyPy, RPython also generated signatures
+ * which are similar but not identical. */
+#ifndef PYPY_STANDALONE
 #include "pypy_structmember_decl.h"
+#endif
 
 
 #ifdef __cplusplus
diff --git a/pypy/module/cpyext/structmember.py 
b/pypy/module/cpyext/structmember.py
--- a/pypy/module/cpyext/structmember.py
+++ b/pypy/module/cpyext/structmember.py
@@ -2,7 +2,7 @@
 from pypy.interpreter.typedef import TypeDef, GetSetProperty
 from rpython.rtyper.lltypesystem import rffi, lltype
 from pypy.module.cpyext.structmemberdefs import *
-from pypy.module.cpyext.api import ADDR, PyObjectP, cpython_api
+from pypy.module.cpyext.api import ADDR, PyObjectP, cpython_api, CONST_STRING
 from pypy.module.cpyext.intobject import PyInt_AsLong, PyInt_AsUnsignedLong
 from pypy.module.cpyext.pyerrors import PyErr_Occurred
 from pypy.module.cpyext.pyobject import PyObject, Py_DecRef, from_ref, make_ref
@@ -34,7 +34,7 @@
 _HEADER = 'pypy_structmember_decl.h'
 
 
-@cpython_api([PyObject, lltype.Ptr(PyMemberDef)], PyObject, header=_HEADER)
+@cpython_api([CONST_STRING, lltype.Ptr(PyMemberDef)], PyObject, header=_HEADER)
 def PyMember_GetOne(space, obj, w_member):
     addr = rffi.cast(ADDR, obj)
     addr += w_member.c_offset
@@ -85,7 +85,7 @@
     return w_result
 
 
-@cpython_api([PyObject, lltype.Ptr(PyMemberDef), PyObject], rffi.INT_real,
+@cpython_api([rffi.CCHARP, lltype.Ptr(PyMemberDef), PyObject], rffi.INT_real,
              error=-1, header=_HEADER)
 def PyMember_SetOne(space, obj, w_member, w_value):
     addr = rffi.cast(ADDR, obj)
diff --git a/pypy/module/cpyext/typeobject.py b/pypy/module/cpyext/typeobject.py
--- a/pypy/module/cpyext/typeobject.py
+++ b/pypy/module/cpyext/typeobject.py
@@ -271,17 +271,32 @@
     def member_getter(self, space, w_self):
         assert isinstance(self, W_MemberDescr)
         check_descr(space, w_self, self.w_type)
-        return PyMember_GetOne(space, w_self, self.member)
+        pyref = make_ref(space, w_self)
+        try:
+            return PyMember_GetOne(
+                space, rffi.cast(rffi.CCHARP, pyref), self.member)
+        finally:
+            Py_DecRef(space, pyref)
 
     def member_delete(self, space, w_self):
         assert isinstance(self, W_MemberDescr)
         check_descr(space, w_self, self.w_type)
-        PyMember_SetOne(space, w_self, self.member, None)
+        pyref = make_ref(space, w_self)
+        try:
+            PyMember_SetOne(
+                space, rffi.cast(rffi.CCHARP, pyref), self.member, None)
+        finally:
+            Py_DecRef(space, pyref)
 
     def member_setter(self, space, w_self, w_value):
         assert isinstance(self, W_MemberDescr)
         check_descr(space, w_self, self.w_type)
-        PyMember_SetOne(space, w_self, self.member, w_value)
+        pyref = make_ref(space, w_self)
+        try:
+            PyMember_SetOne(
+                space, rffi.cast(rffi.CCHARP, pyref), self.member, w_value)
+        finally:
+            Py_DecRef(space, pyref)
 
 class W_PyCTypeObject(W_TypeObject):
     @jit.dont_look_inside
diff --git a/pypy/module/sys/vm.py b/pypy/module/sys/vm.py
--- a/pypy/module/sys/vm.py
+++ b/pypy/module/sys/vm.py
@@ -37,6 +37,7 @@
             raise OperationError(space.w_ValueError,
                                  space.wrap("call stack is not deep enough"))
         if depth == 0:
+            f.mark_as_escaped()
             return space.wrap(f)
         depth -= 1
         f = ec.getnextframe_nohidden(f)
diff --git a/pypy/module/test_lib_pypy/pyrepl/infrastructure.py 
b/pypy/module/test_lib_pypy/pyrepl/infrastructure.py
--- a/pypy/module/test_lib_pypy/pyrepl/infrastructure.py
+++ b/pypy/module/test_lib_pypy/pyrepl/infrastructure.py
@@ -18,6 +18,9 @@
 # CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 
 from __future__ import print_function
+from contextlib import contextmanager
+import os
+
 from pyrepl.reader import Reader
 from pyrepl.console import Console, Event
 
@@ -71,3 +74,14 @@
     con = TestConsole(test_spec, verbose=True)
     reader = reader_class(con)
     reader.readline()
+
+
+@contextmanager
+def sane_term():
+    """Ensure a TERM that supports clear"""
+    old_term, os.environ['TERM'] = os.environ.get('TERM'), 'xterm'
+    yield
+    if old_term is not None:
+        os.environ['TERM'] = old_term
+    else:
+        del os.environ['TERM']
diff --git a/pypy/module/test_lib_pypy/pyrepl/test_bugs.py 
b/pypy/module/test_lib_pypy/pyrepl/test_bugs.py
--- a/pypy/module/test_lib_pypy/pyrepl/test_bugs.py
+++ b/pypy/module/test_lib_pypy/pyrepl/test_bugs.py
@@ -18,7 +18,7 @@
 # CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 
 from pyrepl.historical_reader import HistoricalReader
-from .infrastructure import EA, BaseTestReader, read_spec
+from .infrastructure import EA, BaseTestReader, sane_term, read_spec
 
 # this test case should contain as-verbatim-as-possible versions of
 # (applicable) bug reports
@@ -46,7 +46,8 @@
     read_spec(spec, HistoricalTestReader)
 
 
[email protected]("os.name != 'posix' or 'darwin' in sys.platform")
[email protected]("os.name != 'posix' or 'darwin' in sys.platform or "
+                    "'kfreebsd' in sys.platform")
 def test_signal_failure(monkeypatch):
     import os
     import pty
@@ -61,13 +62,14 @@
 
     mfd, sfd = pty.openpty()
     try:
-        c = UnixConsole(sfd, sfd)
-        c.prepare()
-        c.restore()
-        monkeypatch.setattr(signal, 'signal', failing_signal)
-        c.prepare()
-        monkeypatch.setattr(signal, 'signal', really_failing_signal)
-        c.restore()
+        with sane_term():
+            c = UnixConsole(sfd, sfd)
+            c.prepare()
+            c.restore()
+            monkeypatch.setattr(signal, 'signal', failing_signal)
+            c.prepare()
+            monkeypatch.setattr(signal, 'signal', really_failing_signal)
+            c.restore()
     finally:
         os.close(mfd)
         os.close(sfd)
diff --git a/pypy/module/test_lib_pypy/pyrepl/test_readline.py 
b/pypy/module/test_lib_pypy/pyrepl/test_readline.py
--- a/pypy/module/test_lib_pypy/pyrepl/test_readline.py
+++ b/pypy/module/test_lib_pypy/pyrepl/test_readline.py
@@ -1,7 +1,10 @@
 import pytest
 
+from .infrastructure import sane_term
 
[email protected]("os.name != 'posix' or 'darwin' in sys.platform")
+
[email protected]("os.name != 'posix' or 'darwin' in sys.platform or "
+                    "'kfreebsd' in sys.platform")
 def test_raw_input():
     import os
     import pty
@@ -11,7 +14,8 @@
     readline_wrapper = _ReadlineWrapper(slave, slave)
     os.write(master, b'input\n')
 
-    result = readline_wrapper.get_reader().readline()
+    with sane_term():
+        result = readline_wrapper.get_reader().readline()
     #result = readline_wrapper.raw_input('prompt:')
     assert result == 'input'
     # A bytes string on python2, a unicode string on python3.
diff --git a/requirements.txt b/requirements.txt
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,4 +1,3 @@
 # hypothesis is used for test generation on untranslated jit tests
 hypothesis
-enum>=0.4.6 # is a dependency, but old pip does not pick it up
 enum34>=1.1.2
diff --git a/rpython/flowspace/specialcase.py b/rpython/flowspace/specialcase.py
--- a/rpython/flowspace/specialcase.py
+++ b/rpython/flowspace/specialcase.py
@@ -77,6 +77,7 @@
     for c in s:
         buf.append(c)
     buf.append(' ')
+rpython_print_item._annenforceargs_ = (str,)
 
 def rpython_print_newline():
     buf = stdoutbuffer.linebuf
diff --git a/rpython/jit/backend/arm/assembler.py 
b/rpython/jit/backend/arm/assembler.py
--- a/rpython/jit/backend/arm/assembler.py
+++ b/rpython/jit/backend/arm/assembler.py
@@ -14,7 +14,7 @@
     CoreRegisterManager, check_imm_arg, VFPRegisterManager,
     operations as regalloc_operations)
 from rpython.jit.backend.llsupport import jitframe, rewrite
-from rpython.jit.backend.llsupport.assembler import DEBUG_COUNTER, 
debug_bridge, BaseAssembler
+from rpython.jit.backend.llsupport.assembler import DEBUG_COUNTER, 
BaseAssembler
 from rpython.jit.backend.llsupport.regalloc import get_scale, 
valid_addressing_size
 from rpython.jit.backend.llsupport.asmmemmgr import MachineDataBlockWrapper
 from rpython.jit.backend.model import CompiledLoopToken
@@ -481,8 +481,9 @@
 
     def generate_quick_failure(self, guardtok):
         startpos = self.mc.currpos()
-        fail_descr, target = self.store_info_on_descr(startpos, guardtok)
-        self.regalloc_push(imm(fail_descr))
+        faildescrindex, target = self.store_info_on_descr(startpos, guardtok)
+        self.load_from_gc_table(r.ip.value, faildescrindex)
+        self.regalloc_push(r.ip)
         self.push_gcmap(self.mc, gcmap=guardtok.gcmap, push=True)
         self.mc.BL(target)
         return startpos
@@ -556,7 +557,7 @@
         debug_stop('jit-backend-ops')
 
     def _call_header(self):
-        assert self.mc.currpos() == 0
+        # there is the gc table before this point
         self.gen_func_prolog()
 
     def _call_header_with_stack_check(self):
@@ -596,20 +597,22 @@
         frame_info = self.datablockwrapper.malloc_aligned(
             jitframe.JITFRAMEINFO_SIZE, alignment=WORD)
         clt.frame_info = rffi.cast(jitframe.JITFRAMEINFOPTR, frame_info)
-        clt.allgcrefs = []
         clt.frame_info.clear() # for now
 
         if log:
             operations = self._inject_debugging_code(looptoken, operations,
                                                      'e', looptoken.number)
 
+        regalloc = Regalloc(assembler=self)
+        allgcrefs = []
+        operations = regalloc.prepare_loop(inputargs, operations, looptoken,
+                                           allgcrefs)
+        self.reserve_gcref_table(allgcrefs)
+        functionpos = self.mc.get_relative_pos()
+
         self._call_header_with_stack_check()
         self._check_frame_depth_debug(self.mc)
 
-        regalloc = Regalloc(assembler=self)
-        operations = regalloc.prepare_loop(inputargs, operations, looptoken,
-                                           clt.allgcrefs)
-
         loop_head = self.mc.get_relative_pos()
         looptoken._ll_loop_code = loop_head
         #
@@ -620,9 +623,11 @@
 
         self.write_pending_failure_recoveries()
 
+        full_size = self.mc.get_relative_pos()
         rawstart = self.materialize_loop(looptoken)
-        looptoken._function_addr = looptoken._ll_function_addr = rawstart
+        looptoken._ll_function_addr = rawstart + functionpos
 
+        self.patch_gcref_table(looptoken, rawstart)
         self.process_pending_guards(rawstart)
         self.fixup_target_tokens(rawstart)
 
@@ -641,7 +646,13 @@
             looptoken.number, loopname,
             r_uint(rawstart + loop_head),
             r_uint(rawstart + size_excluding_failure_stuff),
-            r_uint(rawstart)))
+            r_uint(rawstart + functionpos)))
+        debug_print("       gc table: 0x%x" % r_uint(rawstart))
+        debug_print("       function: 0x%x" % r_uint(rawstart + functionpos))
+        debug_print("         resops: 0x%x" % r_uint(rawstart + loop_head))
+        debug_print("       failures: 0x%x" % r_uint(rawstart +
+                                                 size_excluding_failure_stuff))
+        debug_print("            end: 0x%x" % r_uint(rawstart + full_size))
         debug_stop("jit-backend-addr")
 
         return AsmInfo(ops_offset, rawstart + loop_head,
@@ -678,27 +689,43 @@
         arglocs = self.rebuild_faillocs_from_descr(faildescr, inputargs)
 
         regalloc = Regalloc(assembler=self)
-        startpos = self.mc.get_relative_pos()
+        allgcrefs = []
         operations = regalloc.prepare_bridge(inputargs, arglocs,
                                              operations,
-                                             self.current_clt.allgcrefs,
+                                             allgcrefs,
                                              self.current_clt.frame_info)
+        self.reserve_gcref_table(allgcrefs)
+        startpos = self.mc.get_relative_pos()
 
         self._check_frame_depth(self.mc, regalloc.get_gcmap())
 
+        bridgestartpos = self.mc.get_relative_pos()
         frame_depth_no_fixed_size = self._assemble(regalloc, inputargs, 
operations)
 
         codeendpos = self.mc.get_relative_pos()
 
         self.write_pending_failure_recoveries()
 
+        fullsize = self.mc.get_relative_pos()
         rawstart = self.materialize_loop(original_loop_token)
 
+        self.patch_gcref_table(original_loop_token, rawstart)
         self.process_pending_guards(rawstart)
 
+        debug_start("jit-backend-addr")
+        debug_print("bridge out of Guard 0x%x has address 0x%x to 0x%x" %
+                    (r_uint(descr_number), r_uint(rawstart + startpos),
+                        r_uint(rawstart + codeendpos)))
+        debug_print("       gc table: 0x%x" % r_uint(rawstart))
+        debug_print("    jump target: 0x%x" % r_uint(rawstart + startpos))
+        debug_print("         resops: 0x%x" % r_uint(rawstart + 
bridgestartpos))
+        debug_print("       failures: 0x%x" % r_uint(rawstart + codeendpos))
+        debug_print("            end: 0x%x" % r_uint(rawstart + fullsize))
+        debug_stop("jit-backend-addr")
+
         # patch the jump from original guard
         self.patch_trace(faildescr, original_loop_token,
-                                    rawstart, regalloc)
+                                    rawstart + startpos, regalloc)
 
         self.patch_stack_checks(frame_depth_no_fixed_size + 
JITFRAME_FIXED_SIZE,
                                 rawstart)
@@ -716,9 +743,53 @@
                               ops_offset=ops_offset)
         self.teardown()
 
-        debug_bridge(descr_number, rawstart, codeendpos)
+        return AsmInfo(ops_offset, startpos + rawstart, codeendpos - startpos)
 
-        return AsmInfo(ops_offset, startpos + rawstart, codeendpos - startpos)
+    def reserve_gcref_table(self, allgcrefs):
+        gcref_table_size = len(allgcrefs) * WORD
+        # align to a multiple of 16 and reserve space at the beginning
+        # of the machine code for the gc table.  This lets us write
+        # machine code with relative addressing (see load_from_gc_table())
+        gcref_table_size = (gcref_table_size + 15) & ~15
+        mc = self.mc
+        assert mc.get_relative_pos() == 0
+        for i in range(gcref_table_size):
+            mc.writechar('\x00')
+        self.setup_gcrefs_list(allgcrefs)
+
+    def patch_gcref_table(self, looptoken, rawstart):
+        # the gc table is at the start of the machine code.  Fill it now
+        tracer = self.cpu.gc_ll_descr.make_gcref_tracer(rawstart,
+                                                        self._allgcrefs)
+        gcreftracers = self.get_asmmemmgr_gcreftracers(looptoken)
+        gcreftracers.append(tracer)    # keepalive
+        self.teardown_gcrefs_list()
+
+    def load_from_gc_table(self, regnum, index):
+        """emits either:
+               LDR Rt, [PC, #offset]    if -4095 <= offset
+          or:
+               gen_load_int(Rt, offset)
+               LDR Rt, [PC, Rt]         for larger offsets
+        """
+        mc = self.mc
+        address_in_buffer = index * WORD   # at the start of the buffer
+        offset = address_in_buffer - (mc.get_relative_pos() + 8)   # negative
+        if offset >= -4095:
+            mc.LDR_ri(regnum, r.pc.value, offset)
+        else:
+            # The offset we're loading is negative: right now,
+            # gen_load_int() will always use exactly
+            # get_max_size_of_gen_load_int() instructions.  No point
+            # in optimizing in case we get less.  Just in case though,
+            # we check and pad with nops.
+            extra_bytes = mc.get_max_size_of_gen_load_int() * 2
+            offset -= extra_bytes
+            start = mc.get_relative_pos()
+            mc.gen_load_int(regnum, offset)
+            while mc.get_relative_pos() != start + extra_bytes:
+                mc.NOP()
+            mc.LDR_rr(regnum, r.pc.value, regnum)
 
     def new_stack_loc(self, i, tp):
         base_ofs = self.cpu.get_baseofs_of_frame_field()
@@ -929,6 +1000,12 @@
             clt.asmmemmgr_blocks = []
         return clt.asmmemmgr_blocks
 
+    def get_asmmemmgr_gcreftracers(self, looptoken):
+        clt = looptoken.compiled_loop_token
+        if clt.asmmemmgr_gcreftracers is None:
+            clt.asmmemmgr_gcreftracers = []
+        return clt.asmmemmgr_gcreftracers
+
     def _walk_operations(self, inputargs, operations, regalloc):
         fcond = c.AL
         self._regalloc = regalloc
diff --git a/rpython/jit/backend/arm/opassembler.py 
b/rpython/jit/backend/arm/opassembler.py
--- a/rpython/jit/backend/arm/opassembler.py
+++ b/rpython/jit/backend/arm/opassembler.py
@@ -35,9 +35,9 @@
 
 class ArmGuardToken(GuardToken):
     def __init__(self, cpu, gcmap, faildescr, failargs, fail_locs,
-                 offset, guard_opnum, frame_depth, fcond=c.AL):
+                 offset, guard_opnum, frame_depth, faildescrindex, fcond=c.AL):
         GuardToken.__init__(self, cpu, gcmap, faildescr, failargs, fail_locs,
-                            guard_opnum, frame_depth)
+                            guard_opnum, frame_depth, faildescrindex)
         self.fcond = fcond
         self.offset = offset
 
@@ -178,6 +178,7 @@
         assert isinstance(descr, AbstractFailDescr)
 
         gcmap = allocate_gcmap(self, frame_depth, JITFRAME_FIXED_SIZE)
+        faildescrindex = self.get_gcref_from_faildescr(descr)
         token = ArmGuardToken(self.cpu, gcmap,
                                     descr,
                                     failargs=op.getfailargs(),
@@ -185,6 +186,7 @@
                                     offset=offset,
                                     guard_opnum=op.getopnum(),
                                     frame_depth=frame_depth,
+                                    faildescrindex=faildescrindex,
                                     fcond=fcond)
         return token
 
@@ -398,14 +400,13 @@
 
     def emit_op_finish(self, op, arglocs, regalloc, fcond):
         base_ofs = self.cpu.get_baseofs_of_frame_field()
-        if len(arglocs) == 2:
-            [return_val, fail_descr_loc] = arglocs
+        if len(arglocs) > 0:
+            [return_val] = arglocs
             self.store_reg(self.mc, return_val, r.fp, base_ofs)
-        else:
-            [fail_descr_loc] = arglocs
         ofs = self.cpu.get_ofs_of_frame_field('jf_descr')
 
-        self.mc.gen_load_int(r.ip.value, fail_descr_loc.value)
+        faildescrindex = self.get_gcref_from_faildescr(op.getdescr())
+        self.load_from_gc_table(r.ip.value, faildescrindex)
         # XXX self.mov(fail_descr_loc, RawStackLoc(ofs))
         self.store_reg(self.mc, r.ip, r.fp, ofs, helper=r.lr)
         if op.numargs() > 0 and op.getarg(0).type == REF:
@@ -1035,9 +1036,9 @@
         assert (guard_op.getopnum() == rop.GUARD_NOT_FORCED or
                 guard_op.getopnum() == rop.GUARD_NOT_FORCED_2)
         faildescr = guard_op.getdescr()
+        faildescrindex = self.get_gcref_from_faildescr(faildescr)
         ofs = self.cpu.get_ofs_of_frame_field('jf_force_descr')
-        value = rffi.cast(lltype.Signed, cast_instance_to_gcref(faildescr))
-        self.mc.gen_load_int(r.ip.value, value)
+        self.load_from_gc_table(r.ip.value, faildescrindex)
         self.store_reg(self.mc, r.ip, r.fp, ofs)
 
     def _find_nearby_operation(self, delta):
@@ -1250,3 +1251,9 @@
         self._load_from_mem(res_loc, res_loc, ofs_loc, imm(scale), signed,
                             fcond)
         return fcond
+
+    def emit_op_load_from_gc_table(self, op, arglocs, regalloc, fcond):
+        res_loc, = arglocs
+        index = op.getarg(0).getint()
+        self.load_from_gc_table(res_loc.value, index)
+        return fcond
diff --git a/rpython/jit/backend/arm/regalloc.py 
b/rpython/jit/backend/arm/regalloc.py
--- a/rpython/jit/backend/arm/regalloc.py
+++ b/rpython/jit/backend/arm/regalloc.py
@@ -1,5 +1,4 @@
 from rpython.rtyper.annlowlevel import cast_instance_to_gcref
-from rpython.rlib import rgc
 from rpython.rlib.debug import debug_print, debug_start, debug_stop
 from rpython.jit.backend.llsupport.regalloc import FrameManager, \
         RegisterManager, TempVar, compute_vars_longevity, BaseRegalloc, \
@@ -627,16 +626,11 @@
     def prepare_op_finish(self, op, fcond):
         # the frame is in fp, but we have to point where in the frame is
         # the potential argument to FINISH
-        descr = op.getdescr()
-        fail_descr = cast_instance_to_gcref(descr)
-        # we know it does not move, but well
-        rgc._make_sure_does_not_move(fail_descr)
-        fail_descr = rffi.cast(lltype.Signed, fail_descr)
         if op.numargs() == 1:
             loc = self.make_sure_var_in_reg(op.getarg(0))
-            locs = [loc, imm(fail_descr)]
+            locs = [loc]
         else:
-            locs = [imm(fail_descr)]
+            locs = []
         return locs
 
     def load_condition_into_cc(self, box):
@@ -892,6 +886,10 @@
     prepare_op_same_as_r = _prepare_op_same_as
     prepare_op_same_as_f = _prepare_op_same_as
 
+    def prepare_op_load_from_gc_table(self, op, fcond):
+        resloc = self.force_allocate_reg(op)
+        return [resloc]
+
     def prepare_op_call_malloc_nursery(self, op, fcond):
         size_box = op.getarg(0)
         assert isinstance(size_box, ConstInt)
diff --git a/rpython/jit/backend/detect_cpu.py 
b/rpython/jit/backend/detect_cpu.py
--- a/rpython/jit/backend/detect_cpu.py
+++ b/rpython/jit/backend/detect_cpu.py
@@ -146,7 +146,7 @@
         MODEL_X86_64: ['floats', 'singlefloats'],
         MODEL_X86_64_SSE4: ['floats', 'singlefloats'],
         MODEL_ARM: ['floats', 'singlefloats', 'longlong'],
-        MODEL_PPC_64: [], # we don't even have PPC directory, so no
+        MODEL_PPC_64: ['floats'],
         MODEL_S390_64: ['floats'],
     }[backend_name]
 
diff --git a/rpython/jit/backend/llsupport/asmmemmgr.py 
b/rpython/jit/backend/llsupport/asmmemmgr.py
--- a/rpython/jit/backend/llsupport/asmmemmgr.py
+++ b/rpython/jit/backend/llsupport/asmmemmgr.py
@@ -216,9 +216,6 @@
 
     gcroot_markers = None
 
-    frame_positions = None
-    frame_assignments = None
-
     def __init__(self, translated=None):
         if translated is None:
             translated = we_are_translated()
@@ -335,12 +332,6 @@
             assert gcrootmap is not None
             for pos, mark in self.gcroot_markers:
                 gcrootmap.register_asm_addr(rawstart + pos, mark)
-        if cpu.HAS_CODEMAP:
-            cpu.codemap.register_frame_depth_map(rawstart, rawstart + size,
-                                                 self.frame_positions,
-                                                 self.frame_assignments)
-        self.frame_positions = None
-        self.frame_assignments = None
         return rawstart
 
     def _become_a_plain_block_builder(self):
diff --git a/rpython/jit/backend/llsupport/assembler.py 
b/rpython/jit/backend/llsupport/assembler.py
--- a/rpython/jit/backend/llsupport/assembler.py
+++ b/rpython/jit/backend/llsupport/assembler.py
@@ -23,10 +23,11 @@
 
 class GuardToken(object):
     def __init__(self, cpu, gcmap, faildescr, failargs, fail_locs,
-                 guard_opnum, frame_depth):
+                 guard_opnum, frame_depth, faildescrindex):
         assert isinstance(faildescr, AbstractFailDescr)
         self.cpu = cpu
         self.faildescr = faildescr
+        self.faildescrindex = faildescrindex
         self.failargs = failargs
         self.fail_locs = fail_locs
         self.gcmap = self.compute_gcmap(gcmap, failargs,
@@ -144,6 +145,22 @@
             self.codemap_builder = CodemapBuilder()
         self._finish_gcmap = lltype.nullptr(jitframe.GCMAP)
 
+    def setup_gcrefs_list(self, allgcrefs):
+        self._allgcrefs = allgcrefs
+        self._allgcrefs_faildescr_next = 0
+
+    def teardown_gcrefs_list(self):
+        self._allgcrefs = None
+
+    def get_gcref_from_faildescr(self, descr):
+        """This assumes that it is called in order for all faildescrs."""
+        search = cast_instance_to_gcref(descr)
+        while not _safe_eq(
+                self._allgcrefs[self._allgcrefs_faildescr_next], search):
+            self._allgcrefs_faildescr_next += 1
+            assert self._allgcrefs_faildescr_next < len(self._allgcrefs)
+        return self._allgcrefs_faildescr_next
+
     def set_debug(self, v):
         r = self._debug
         self._debug = v
@@ -186,8 +203,7 @@
                 break
         exc = guardtok.must_save_exception()
         target = self.failure_recovery_code[exc + 2 * withfloats]
-        fail_descr = cast_instance_to_gcref(guardtok.faildescr)
-        fail_descr = rffi.cast(lltype.Signed, fail_descr)
+        faildescrindex = guardtok.faildescrindex
         base_ofs = self.cpu.get_baseofs_of_frame_field()
         #
         # in practice, about 2/3rd of 'positions' lists that we build are
@@ -229,7 +245,7 @@
         self._previous_rd_locs = positions
         # write down the positions of locs
         guardtok.faildescr.rd_locs = positions
-        return fail_descr, target
+        return faildescrindex, target
 
     def enter_portal_frame(self, op):
         if self.cpu.HAS_CODEMAP:
@@ -288,7 +304,7 @@
 
         gcref = cast_instance_to_gcref(value)
         if gcref:
-            rgc._make_sure_does_not_move(gcref)
+            rgc._make_sure_does_not_move(gcref)    # but should be prebuilt
         value = rffi.cast(lltype.Signed, gcref)
         je_location = self._call_assembler_check_descr(value, tmploc)
         #
@@ -456,3 +472,8 @@
                     r_uint(rawstart + codeendpos)))
     debug_stop("jit-backend-addr")
 
+def _safe_eq(x, y):
+    try:
+        return x == y
+    except AttributeError:    # minor mess
+        return False
diff --git a/rpython/jit/backend/llsupport/codemap.py 
b/rpython/jit/backend/llsupport/codemap.py
--- a/rpython/jit/backend/llsupport/codemap.py
+++ b/rpython/jit/backend/llsupport/codemap.py
@@ -41,10 +41,6 @@
 RPY_EXTERN long pypy_yield_codemap_at_addr(void *codemap_raw, long addr,
                                            long *current_pos_addr);
 
-RPY_EXTERN long pypy_jit_depthmap_add(unsigned long addr, unsigned int size,
-                                      unsigned int stackdepth);
-RPY_EXTERN void pypy_jit_depthmap_clear(unsigned long addr, unsigned int size);
-
 """], separate_module_sources=[
     open(os.path.join(srcdir, 'skiplist.c'), 'r').read() +
     open(os.path.join(srcdir, 'codemap.c'), 'r').read()
@@ -64,15 +60,6 @@
 pypy_jit_codemap_firstkey = llexternal('pypy_jit_codemap_firstkey',
                                        [], lltype.Signed)
 
-pypy_jit_depthmap_add = llexternal('pypy_jit_depthmap_add',
-                                   [lltype.Signed, lltype.Signed,
-                                    lltype.Signed], lltype.Signed)
-pypy_jit_depthmap_clear = llexternal('pypy_jit_depthmap_clear',
-                                     [lltype.Signed, lltype.Signed],
-                                     lltype.Void)
-
-stack_depth_at_loc = llexternal('pypy_jit_stack_depth_at_loc',
-                                [lltype.Signed], lltype.Signed)
 find_codemap_at_addr = llexternal('pypy_find_codemap_at_addr',
                                  [lltype.Signed, 
rffi.CArrayPtr(lltype.Signed)],
                                  llmemory.Address)
@@ -102,20 +89,6 @@
         items = pypy_jit_codemap_del(start, stop - start)
         if items:
             lltype.free(items, flavor='raw', track_allocation=False)
-        pypy_jit_depthmap_clear(start, stop - start)
-
-    def register_frame_depth_map(self, rawstart, rawstop, frame_positions,
-                                 frame_assignments):
-        if not frame_positions:
-            return
-        assert len(frame_positions) == len(frame_assignments)
-        for i in range(len(frame_positions)-1, -1, -1):
-            pos = rawstart + frame_positions[i]
-            length = rawstop - pos
-            if length > 0:
-                #print "ADD:", pos, length, frame_assignments[i]
-                pypy_jit_depthmap_add(pos, length, frame_assignments[i])
-            rawstop = pos
 
     def register_codemap(self, (start, size, l)):
         items = lltype.malloc(INT_LIST_PTR.TO, len(l), flavor='raw',
diff --git a/rpython/jit/backend/llsupport/gc.py 
b/rpython/jit/backend/llsupport/gc.py
--- a/rpython/jit/backend/llsupport/gc.py
+++ b/rpython/jit/backend/llsupport/gc.py
@@ -22,38 +22,6 @@
 from rpython.memory.gctransform import asmgcroot
 from rpython.jit.codewriter.effectinfo import EffectInfo
 
-class MovableObjectTracker(object):
-
-    ptr_array_type = lltype.GcArray(llmemory.GCREF)
-    ptr_array_gcref = lltype.nullptr(llmemory.GCREF.TO)
-
-    def __init__(self, cpu, const_pointers):
-        size = len(const_pointers)
-        # check that there are any moving object (i.e. chaning pointers).
-        # Otherwise there is no reason for an instance of this class.
-        assert size > 0
-        #
-        # prepare GC array to hold the pointers that may change
-        self.ptr_array = lltype.malloc(MovableObjectTracker.ptr_array_type, 
size)
-        self.ptr_array_descr = 
cpu.arraydescrof(MovableObjectTracker.ptr_array_type)
-        self.ptr_array_gcref = lltype.cast_opaque_ptr(llmemory.GCREF, 
self.ptr_array)
-        # use always the same ConstPtr to access the array
-        # (easer to read JIT trace)
-        self.const_ptr_gcref_array = ConstPtr(self.ptr_array_gcref)
-        #
-        # assign each pointer an index and put the pointer into the GC array.
-        # as pointers and addresses are not a good key to use before 
translation
-        # ConstPtrs are used as the key for the dict.
-        self._indexes = {}
-        for index in range(size):
-            ptr = const_pointers[index]
-            self._indexes[ptr] = index
-            self.ptr_array[index] = ptr.value
-
-    def get_array_index(self, const_ptr):
-        index = self._indexes[const_ptr]
-        assert const_ptr.value == self.ptr_array[index]
-        return index
 # ____________________________________________________________
 
 class GcLLDescription(GcCache):
@@ -129,96 +97,9 @@
     def gc_malloc_unicode(self, num_elem):
         return self._bh_malloc_array(num_elem, self.unicode_descr)
 
-    def _record_constptrs(self, op, gcrefs_output_list,
-                          ops_with_movable_const_ptr,
-                          changeable_const_pointers):
-        l = None
-        for i in range(op.numargs()):
-            v = op.getarg(i)
-            if isinstance(v, ConstPtr) and bool(v.value):
-                p = v.value
-                if rgc._make_sure_does_not_move(p):
-                    gcrefs_output_list.append(p)
-                else:
-                    if l is None:
-                        l = [i]
-                    else:
-                        l.append(i)
-                    if v not in changeable_const_pointers:
-                        changeable_const_pointers.append(v)
-        #
-        if op.is_guard() or op.getopnum() == rop.FINISH:
-            llref = cast_instance_to_gcref(op.getdescr())
-            assert rgc._make_sure_does_not_move(llref)
-            gcrefs_output_list.append(llref)
-        #
-        if l:
-            ops_with_movable_const_ptr[op] = l
-
-    def _rewrite_changeable_constptrs(self, op, ops_with_movable_const_ptr, 
moving_obj_tracker):
-        newops = []
-        for arg_i in ops_with_movable_const_ptr[op]:
-            v = op.getarg(arg_i)
-            # assert to make sure we got what we expected
-            assert isinstance(v, ConstPtr)
-            array_index = moving_obj_tracker.get_array_index(v)
-
-            size, offset, _ = 
unpack_arraydescr(moving_obj_tracker.ptr_array_descr)
-            array_index = array_index * size + offset
-            args = [moving_obj_tracker.const_ptr_gcref_array,
-                    ConstInt(array_index),
-                    ConstInt(size)]
-            load_op = ResOperation(rop.GC_LOAD_R, args)
-            newops.append(load_op)
-            op.setarg(arg_i, load_op)
-        #
-        newops.append(op)
-        return newops
-
     def rewrite_assembler(self, cpu, operations, gcrefs_output_list):
         rewriter = GcRewriterAssembler(self, cpu)
-        newops = rewriter.rewrite(operations)
-
-        # the key is an operation that contains a ConstPtr as an argument and
-        # this ConstPtrs pointer might change as it points to an object that
-        # can't be made non-moving (e.g. the object is pinned).
-        ops_with_movable_const_ptr = {}
-        #
-        # a list of such not really constant ConstPtrs.
-        changeable_const_pointers = []
-        for op in newops:
-            # record all GCREFs, because the GC (or Boehm) cannot see them and
-            # keep them alive if they end up as constants in the assembler.
-            # If such a GCREF can change and we can't make the object it points
-            # to non-movable, we have to handle it seperatly. Such GCREF's are
-            # returned as ConstPtrs in 'changeable_const_pointers' and the
-            # affected operation is returned in 'op_with_movable_const_ptr'.
-            # For this special case see 'rewrite_changeable_constptrs'.
-            self._record_constptrs(op, gcrefs_output_list,
-                    ops_with_movable_const_ptr, changeable_const_pointers)
-        #
-        # handle pointers that are not guaranteed to stay the same
-        if len(ops_with_movable_const_ptr) > 0:
-            moving_obj_tracker = MovableObjectTracker(cpu, 
changeable_const_pointers)
-            #
-            if not we_are_translated():
-                # used for testing
-                self.last_moving_obj_tracker = moving_obj_tracker
-            # make sure the array containing the pointers is not collected by
-            # the GC (or Boehm)
-            gcrefs_output_list.append(moving_obj_tracker.ptr_array_gcref)
-            rgc._make_sure_does_not_move(moving_obj_tracker.ptr_array_gcref)
-
-            ops = newops
-            newops = []
-            for op in ops:
-                if op in ops_with_movable_const_ptr:
-                    rewritten_ops = self._rewrite_changeable_constptrs(op,
-                            ops_with_movable_const_ptr, moving_obj_tracker)
-                    newops.extend(rewritten_ops)
-                else:
-                    newops.append(op)
-        #
+        newops = rewriter.rewrite(operations, gcrefs_output_list)
         return newops
 
     @specialize.memo()
@@ -244,6 +125,14 @@
         """
         return jitframe.JITFRAME.allocate(frame_info)
 
+    def make_gcref_tracer(self, array_base_addr, gcrefs):
+        # for tests, or for Boehm.  Overridden for framework GCs
+        from rpython.jit.backend.llsupport import gcreftracer
+        return gcreftracer.make_boehm_tracer(array_base_addr, gcrefs)
+
+    def clear_gcref_tracer(self, tracer):
+        pass    # nothing needed unless overridden
+
 class JitFrameDescrs:
     def _freeze_(self):
         return True
@@ -752,6 +641,13 @@
         p = rffi.cast(rffi.CCHARP, p)
         return (ord(p[0]) & IS_OBJECT_FLAG) != 0
 
+    def make_gcref_tracer(self, array_base_addr, gcrefs):
+        from rpython.jit.backend.llsupport import gcreftracer
+        return gcreftracer.make_framework_tracer(array_base_addr, gcrefs)
+
+    def clear_gcref_tracer(self, tracer):
+        tracer.array_length = 0
+
 # ____________________________________________________________
 
 def get_ll_description(gcdescr, translator=None, rtyper=None):
diff --git a/rpython/jit/backend/llsupport/gcreftracer.py 
b/rpython/jit/backend/llsupport/gcreftracer.py
new file mode 100644
--- /dev/null
+++ b/rpython/jit/backend/llsupport/gcreftracer.py
@@ -0,0 +1,49 @@
+from rpython.rlib import rgc
+from rpython.rtyper.lltypesystem import lltype, llmemory, rffi
+from rpython.rtyper.lltypesystem.lloperation import llop
+from rpython.jit.backend.llsupport.symbolic import WORD
+
+
+GCREFTRACER = lltype.GcStruct(
+    'GCREFTRACER',
+    ('array_base_addr', lltype.Signed),
+    ('array_length', lltype.Signed),
+    rtti=True)
+
+def gcrefs_trace(gc, obj_addr, callback, arg):
+    obj = llmemory.cast_adr_to_ptr(obj_addr, lltype.Ptr(GCREFTRACER))
+    i = 0
+    length = obj.array_length
+    addr = obj.array_base_addr
+    while i < length:
+        p = rffi.cast(llmemory.Address, addr + i * WORD)
+        gc._trace_callback(callback, arg, p)
+        i += 1
+lambda_gcrefs_trace = lambda: gcrefs_trace
+
+def make_framework_tracer(array_base_addr, gcrefs):
+    # careful about the order here: the allocation of the GCREFTRACER
+    # can trigger a GC.  So we must write the gcrefs into the raw
+    # array only afterwards...
+    rgc.register_custom_trace_hook(GCREFTRACER, lambda_gcrefs_trace)
+    length = len(gcrefs)
+    tr = lltype.malloc(GCREFTRACER)
+    # --no GC from here--
+    tr.array_base_addr = array_base_addr
+    tr.array_length = length
+    i = 0
+    while i < length:
+        p = rffi.cast(rffi.SIGNEDP, array_base_addr + i * WORD)
+        p[0] = rffi.cast(lltype.Signed, gcrefs[i])
+        i += 1
+    llop.gc_writebarrier(lltype.Void, tr)
+    # --no GC until here--
+    return tr
+
+def make_boehm_tracer(array_base_addr, gcrefs):
+    # copy the addresses, but return 'gcrefs' as the object that must be
+    # kept alive
+    for i in range(len(gcrefs)):
+        p = rffi.cast(rffi.SIGNEDP, array_base_addr + i * WORD)
+        p[0] = rffi.cast(lltype.Signed, gcrefs[i])
+    return gcrefs
diff --git a/rpython/jit/backend/llsupport/llmodel.py 
b/rpython/jit/backend/llsupport/llmodel.py
--- a/rpython/jit/backend/llsupport/llmodel.py
+++ b/rpython/jit/backend/llsupport/llmodel.py
@@ -246,6 +246,13 @@
 
     def free_loop_and_bridges(self, compiled_loop_token):
         AbstractCPU.free_loop_and_bridges(self, compiled_loop_token)
+        # turn off all gcreftracers
+        tracers = compiled_loop_token.asmmemmgr_gcreftracers
+        if tracers is not None:
+            compiled_loop_token.asmmemmgr_gcreftracers = None
+            for tracer in tracers:
+                self.gc_ll_descr.clear_gcref_tracer(tracer)
+        # then free all blocks of code and raw data
         blocks = compiled_loop_token.asmmemmgr_blocks
         if blocks is not None:
             compiled_loop_token.asmmemmgr_blocks = None
diff --git a/rpython/jit/backend/llsupport/rewrite.py 
b/rpython/jit/backend/llsupport/rewrite.py
--- a/rpython/jit/backend/llsupport/rewrite.py
+++ b/rpython/jit/backend/llsupport/rewrite.py
@@ -1,10 +1,12 @@
 from rpython.rlib import rgc
-from rpython.rlib.objectmodel import we_are_translated
+from rpython.rlib.objectmodel import we_are_translated, r_dict
 from rpython.rlib.rarithmetic import ovfcheck, highest_bit
 from rpython.rtyper.lltypesystem import llmemory, lltype, rstr
+from rpython.rtyper.annlowlevel import cast_instance_to_gcref
 from rpython.jit.metainterp import history
 from rpython.jit.metainterp.history import ConstInt, ConstPtr
 from rpython.jit.metainterp.resoperation import ResOperation, rop, OpHelpers
+from rpython.jit.metainterp.typesystem import rd_eq, rd_hash
 from rpython.jit.codewriter import heaptracker
 from rpython.jit.backend.llsupport.symbolic import (WORD,
         get_array_token)
@@ -94,21 +96,28 @@
         op = self.get_box_replacement(op)
         orig_op = op
         replaced = False
+        opnum = op.getopnum()
+        keep = (opnum == rop.JIT_DEBUG)
         for i in range(op.numargs()):
             orig_arg = op.getarg(i)
             arg = self.get_box_replacement(orig_arg)
+            if isinstance(arg, ConstPtr) and bool(arg.value) and not keep:
+                arg = self.remove_constptr(arg)
             if orig_arg is not arg:
                 if not replaced:
-                    op = op.copy_and_change(op.getopnum())
+                    op = op.copy_and_change(opnum)
                     orig_op.set_forwarded(op)
                     replaced = True
                 op.setarg(i, arg)
-        if rop.is_guard(op.opnum):
+        if rop.is_guard(opnum):
             if not replaced:
-                op = op.copy_and_change(op.getopnum())
+                op = op.copy_and_change(opnum)
                 orig_op.set_forwarded(op)
             op.setfailargs([self.get_box_replacement(a, True)
                             for a in op.getfailargs()])
+        if rop.is_guard(opnum) or opnum == rop.FINISH:
+            llref = cast_instance_to_gcref(op.getdescr())
+            self.gcrefs_output_list.append(llref)
         self._newops.append(op)
 
     def replace_op_with(self, op, newop):
@@ -304,13 +313,16 @@
         return False
 
 
-    def rewrite(self, operations):
+    def rewrite(self, operations, gcrefs_output_list):
         # we can only remember one malloc since the next malloc can possibly
         # collect; but we can try to collapse several known-size mallocs into
         # one, both for performance and to reduce the number of write
         # barriers.  We do this on each "basic block" of operations, which in
         # this case means between CALLs or unknown-size mallocs.
         #
+        self.gcrefs_output_list = gcrefs_output_list
+        self.gcrefs_map = None
+        self.gcrefs_recently_loaded = None
         operations = self.remove_bridge_exception(operations)
         self._changed_op = None
         for i in range(len(operations)):
@@ -333,8 +345,7 @@
             elif rop.can_malloc(op.opnum):
                 self.emitting_an_operation_that_can_collect()
             elif op.getopnum() == rop.LABEL:
-                self.emitting_an_operation_that_can_collect()
-                self._known_lengths.clear()
+                self.emit_label()
             # ---------- write barriers ----------
             if self.gc_ll_descr.write_barrier_descr is not None:
                 if op.getopnum() == rop.SETFIELD_GC:
@@ -940,3 +951,37 @@
                 operations[start+2].getopnum() == rop.RESTORE_EXCEPTION):
                 return operations[:start] + operations[start+3:]
         return operations
+
+    def emit_label(self):
+        self.emitting_an_operation_that_can_collect()
+        self._known_lengths.clear()
+        self.gcrefs_recently_loaded = None
+
+    def _gcref_index(self, gcref):
+        if self.gcrefs_map is None:
+            self.gcrefs_map = r_dict(rd_eq, rd_hash)
+        try:
+            return self.gcrefs_map[gcref]
+        except KeyError:
+            pass
+        index = len(self.gcrefs_output_list)
+        self.gcrefs_map[gcref] = index
+        self.gcrefs_output_list.append(gcref)
+        return index
+
+    def remove_constptr(self, c):
+        """Remove all ConstPtrs, and replace them with load_from_gc_table.
+        """
+        # Note: currently, gcrefs_recently_loaded is only cleared in
+        # LABELs.  We'd like something better, like "don't spill it",
+        # but that's the wrong level...
+        index = self._gcref_index(c.value)
+        if self.gcrefs_recently_loaded is None:
+            self.gcrefs_recently_loaded = {}
+        try:
+            load_op = self.gcrefs_recently_loaded[index]
+        except KeyError:
+            load_op = ResOperation(rop.LOAD_FROM_GC_TABLE, [ConstInt(index)])
+            self._newops.append(load_op)
+            self.gcrefs_recently_loaded[index] = load_op
+        return load_op
diff --git a/rpython/jit/backend/llsupport/src/codemap.c 
b/rpython/jit/backend/llsupport/src/codemap.c
--- a/rpython/jit/backend/llsupport/src/codemap.c
+++ b/rpython/jit/backend/llsupport/src/codemap.c
@@ -139,78 +139,3 @@
         current_pos = data->bytecode_info[current_pos + 3];
     }
 }
-
-/************************************************************/
-/***  depthmap storage                                    ***/
-/************************************************************/
-
-typedef struct {
-    unsigned int block_size;
-    unsigned int stack_depth;
-} depthmap_data_t;
-
-static skipnode_t jit_depthmap_head;
-
-/*** interface used from codemap.py ***/
-
-RPY_EXTERN
-long pypy_jit_depthmap_add(unsigned long addr, unsigned int size,
-                           unsigned int stackdepth)
-{
-    skipnode_t *new = skiplist_malloc(sizeof(depthmap_data_t));
-    depthmap_data_t *data;
-    if (new == NULL)
-        return -1;   /* too bad */
-
-    new->key = addr;
-    data = (depthmap_data_t *)new->data;
-    data->block_size = size;
-    data->stack_depth = stackdepth;
-
-    pypy_codemap_invalid_set(1);
-    skiplist_insert(&jit_depthmap_head, new);
-    pypy_codemap_invalid_set(0);
-    return 0;
-}
-
-RPY_EXTERN
-void pypy_jit_depthmap_clear(unsigned long addr, unsigned int size)
-{
-    unsigned long search_key = addr + size - 1;
-    if (size == 0)
-        return;
-
-    pypy_codemap_invalid_set(1);
-    while (1) {
-        /* search for all nodes belonging to the range, and remove them */
-        skipnode_t *node = skiplist_search(&jit_depthmap_head, search_key);
-        if (node->key < addr)
-            break;   /* exhausted */
-        skiplist_remove(&jit_depthmap_head, node->key);
-        free(node);
-    }
-    pypy_codemap_invalid_set(0);
-}
-
-/*** interface used from pypy/module/_vmprof ***/
-
-RPY_EXTERN
-long pypy_jit_stack_depth_at_loc(long loc)
-{
-    skipnode_t *depthmap = skiplist_search(&jit_depthmap_head,
-                                           (unsigned long)loc);
-    depthmap_data_t *data;
-    unsigned long rel_addr;
-
-    if (depthmap == &jit_depthmap_head)
-        return -1;
-
-    rel_addr = (unsigned long)loc - depthmap->key;
-    data = (depthmap_data_t *)depthmap->data;
-    if (rel_addr >= data->block_size)
-        return -1;
-
-    return data->stack_depth;
-}
-
-/************************************************************/
diff --git a/rpython/jit/backend/llsupport/test/test_codemap.py 
b/rpython/jit/backend/llsupport/test/test_codemap.py
--- a/rpython/jit/backend/llsupport/test/test_codemap.py
+++ b/rpython/jit/backend/llsupport/test/test_codemap.py
@@ -1,6 +1,5 @@
 
 from rpython.rtyper.lltypesystem import rffi, lltype
-from rpython.jit.backend.llsupport.codemap import stack_depth_at_loc
 from rpython.jit.backend.llsupport.codemap import CodemapStorage, \
      CodemapBuilder, unpack_traceback, find_codemap_at_addr
 
@@ -27,34 +26,6 @@
     #
     codemap.free()
 
-def test_find_jit_frame_depth():
-    codemap = CodemapStorage()
-    codemap.setup()
-    codemap.register_frame_depth_map(11, 26, [0, 5, 10], [1, 2, 3])
-    codemap.register_frame_depth_map(30, 41, [0, 5, 10], [4, 5, 6])
-    codemap.register_frame_depth_map(0, 11, [0, 5, 10], [7, 8, 9])
-    assert stack_depth_at_loc(13) == 1
-    assert stack_depth_at_loc(-3) == -1
-    assert stack_depth_at_loc(40) == 6
-    assert stack_depth_at_loc(41) == -1
-    assert stack_depth_at_loc(5) == 8
-    assert stack_depth_at_loc(17) == 2
-    assert stack_depth_at_loc(38) == 5
-    assert stack_depth_at_loc(25) == 3
-    assert stack_depth_at_loc(26) == -1
-    assert stack_depth_at_loc(11) == 1
-    assert stack_depth_at_loc(10) == 9
-    codemap.free_asm_block(11, 26)
-    assert stack_depth_at_loc(11) == -1
-    assert stack_depth_at_loc(13) == -1
-    assert stack_depth_at_loc(-3) == -1
-    assert stack_depth_at_loc(40) == 6
-    assert stack_depth_at_loc(41) == -1
-    assert stack_depth_at_loc(5) == 8
-    assert stack_depth_at_loc(38) == 5
-    assert stack_depth_at_loc(10) == 9
-    codemap.free()
-
 def test_free_with_alignment():
     codemap = CodemapStorage()
     codemap.setup()
diff --git a/rpython/jit/backend/llsupport/test/test_gc.py 
b/rpython/jit/backend/llsupport/test/test_gc.py
--- a/rpython/jit/backend/llsupport/test/test_gc.py
+++ b/rpython/jit/backend/llsupport/test/test_gc.py
@@ -196,31 +196,6 @@
         assert is_valid_int(wbdescr.jit_wb_if_flag_byteofs)
         assert is_valid_int(wbdescr.jit_wb_if_flag_singlebyte)
 
-    def test_record_constptrs(self):
-        class MyFakeCPU(object):
-            def cast_adr_to_int(self, adr):
-                assert adr == "some fake address"
-                return 43
-        class MyFakeGCRefList(object):
-            def get_address_of_gcref(self, s_gcref1):
-                assert s_gcref1 == s_gcref
-                return "some fake address"
-        S = lltype.GcStruct('S')
-        s = lltype.malloc(S)
-        s_gcref = lltype.cast_opaque_ptr(llmemory.GCREF, s)
-        v_random_box = InputArgRef()
-        operations = [
-            ResOperation(rop.PTR_EQ, [v_random_box, ConstPtr(s_gcref)]),
-            ]
-        gc_ll_descr = self.gc_ll_descr
-        gc_ll_descr.gcrefs = MyFakeGCRefList()
-        gcrefs = []
-        operations = get_deep_immutable_oplist(operations)
-        operations2 = gc_ll_descr.rewrite_assembler(MyFakeCPU(), operations,
-                                                   gcrefs)
-        assert operations2 == operations
-        assert gcrefs == [s_gcref]
-
 
 class TestFrameworkMiniMark(TestFramework):
     gc = 'minimark'
diff --git a/rpython/jit/backend/llsupport/test/test_gcreftracer.py 
b/rpython/jit/backend/llsupport/test/test_gcreftracer.py
new file mode 100644
--- /dev/null
+++ b/rpython/jit/backend/llsupport/test/test_gcreftracer.py
@@ -0,0 +1,53 @@
+from rpython.rtyper.lltypesystem import lltype, llmemory, rffi
+from rpython.jit.backend.llsupport.gcreftracer import GCREFTRACER, gcrefs_trace
+from rpython.jit.backend.llsupport.gcreftracer import make_framework_tracer
+from rpython.jit.backend.llsupport.gcreftracer import make_boehm_tracer
+
+
+class FakeGC:
+    def __init__(self):
+        self.called = []
+    def _trace_callback(self, callback, arg, addr):
+        assert callback == "callback"
+        assert arg == "arg"
+        assert lltype.typeOf(addr) == llmemory.Address
+        self.called.append(addr)
+
+
+def test_gcreftracer():
+    a = lltype.malloc(rffi.CArray(lltype.Signed), 3, flavor='raw')
+    a[0] = 123
+    a[1] = 456
+    a[2] = 789
+    tr = lltype.malloc(GCREFTRACER)
+    tr.array_base_addr = base = rffi.cast(lltype.Signed, a)
+    tr.array_length = 3
+    gc = FakeGC()
+    gcrefs_trace(gc, llmemory.cast_ptr_to_adr(tr), "callback", "arg")
+    assert len(gc.called) == 3
+    WORD = rffi.sizeof(lltype.Signed)
+    for i in range(3):
+        assert gc.called[i] == rffi.cast(llmemory.Address, base + i * WORD)
+    lltype.free(a, flavor='raw')
+
+def test_make_framework_tracer():
+    a = lltype.malloc(rffi.CArray(lltype.Signed), 3, flavor='raw')
+    base = rffi.cast(lltype.Signed, a)
+    tr = make_framework_tracer(base, [123, 456, 789])
+    assert a[0] == 123
+    assert a[1] == 456
+    assert a[2] == 789
+    assert tr.array_base_addr == base
+    assert tr.array_length == 3
+    lltype.free(a, flavor='raw')
+
+def test_make_boehm_tracer():
+    a = lltype.malloc(rffi.CArray(lltype.Signed), 3, flavor='raw')
+    base = rffi.cast(lltype.Signed, a)
+    lst = [123, 456, 789]
+    tr = make_boehm_tracer(base, lst)
+    assert a[0] == 123
+    assert a[1] == 456
+    assert a[2] == 789
+    assert tr is lst
+    lltype.free(a, flavor='raw')
diff --git a/rpython/jit/backend/llsupport/test/test_pinned_object_rewrite.py 
b/rpython/jit/backend/llsupport/test/test_pinned_object_rewrite.py
deleted file mode 100644
--- a/rpython/jit/backend/llsupport/test/test_pinned_object_rewrite.py
+++ /dev/null
@@ -1,149 +0,0 @@
-from test_rewrite import get_size_descr, get_array_descr, get_description, 
BaseFakeCPU
-from rpython.jit.backend.llsupport.descr import get_size_descr,\
-     get_field_descr, get_array_descr, ArrayDescr, FieldDescr,\
-     SizeDescr, get_interiorfield_descr
-from rpython.jit.backend.llsupport.gc import GcLLDescr_boehm,\
-     GcLLDescr_framework, MovableObjectTracker
-from rpython.jit.backend.llsupport import jitframe, gc
-from rpython.jit.metainterp.gc import get_description
-from rpython.jit.tool.oparser import parse
-from rpython.jit.metainterp.optimizeopt.util import equaloplists
-from rpython.jit.metainterp.history import JitCellToken, FLOAT
-from rpython.rtyper.lltypesystem import lltype, rffi, lltype, llmemory
-from rpython.rtyper import rclass
-from rpython.jit.backend.x86.arch import WORD
-from rpython.rlib import rgc
-
-class Evaluator(object):
-    def __init__(self, scope):
-        self.scope = scope
-    def __getitem__(self, key):
-        return eval(key, self.scope)
-
-
-class FakeLoopToken(object):
-    pass
-
-# The following class is based on 
rpython.jit.backend.llsupport.test.test_rewrite.RewriteTests.
-# It's modified to be able to test the object pinning specific features.
-class RewriteTests(object):
-    def check_rewrite(self, frm_operations, to_operations, **namespace):
-        # objects to use inside the test
-        A = lltype.GcArray(lltype.Signed)
-        adescr = get_array_descr(self.gc_ll_descr, A)
-        adescr.tid = 4321
-        alendescr = adescr.lendescr
-        #
-        pinned_obj_type = lltype.GcStruct('PINNED_STRUCT', ('my_int', 
lltype.Signed))
-        pinned_obj_my_int_descr = get_field_descr(self.gc_ll_descr, 
pinned_obj_type, 'my_int')
-        pinned_obj_ptr = lltype.malloc(pinned_obj_type)
-        pinned_obj_gcref = lltype.cast_opaque_ptr(llmemory.GCREF, 
pinned_obj_ptr)
-        assert rgc.pin(pinned_obj_gcref)
-        #
-        notpinned_obj_type = lltype.GcStruct('NOT_PINNED_STRUCT', ('my_int', 
lltype.Signed))
-        notpinned_obj_my_int_descr = get_field_descr(self.gc_ll_descr, 
notpinned_obj_type, 'my_int')
-        notpinned_obj_ptr = lltype.malloc(notpinned_obj_type)
-        notpinned_obj_gcref = lltype.cast_opaque_ptr(llmemory.GCREF, 
notpinned_obj_ptr)
-        #
-        ptr_array_descr = 
self.cpu.arraydescrof(MovableObjectTracker.ptr_array_type)
-        #
-        vtable_descr = self.gc_ll_descr.fielddescr_vtable
-        O = lltype.GcStruct('O', ('parent', rclass.OBJECT),
-                                 ('x', lltype.Signed))
-        o_vtable = lltype.malloc(rclass.OBJECT_VTABLE, immortal=True)
-        #
-        tiddescr = self.gc_ll_descr.fielddescr_tid
-        wbdescr = self.gc_ll_descr.write_barrier_descr
-        WORD = globals()['WORD']
-        #
-        strdescr     = self.gc_ll_descr.str_descr
-        unicodedescr = self.gc_ll_descr.unicode_descr
-        strlendescr     = strdescr.lendescr
-        unicodelendescr = unicodedescr.lendescr
-
-        casmdescr = JitCellToken()
-        clt = FakeLoopToken()
-        clt._ll_initial_locs = [0, 8]
-        frame_info = lltype.malloc(jitframe.JITFRAMEINFO, flavor='raw')
-        clt.frame_info = frame_info
-        frame_info.jfi_frame_depth = 13
-        frame_info.jfi_frame_size = 255
-        framedescrs = self.gc_ll_descr.getframedescrs(self.cpu)
-        framelendescr = framedescrs.arraydescr.lendescr
-        jfi_frame_depth = framedescrs.jfi_frame_depth
-        jfi_frame_size = framedescrs.jfi_frame_size
-        jf_frame_info = framedescrs.jf_frame_info
-        signedframedescr = self.cpu.signedframedescr
-        floatframedescr = self.cpu.floatframedescr
-        casmdescr.compiled_loop_token = clt
-        tzdescr = None # noone cares
-        #
-        namespace.update(locals())
-        #
-        for funcname in self.gc_ll_descr._generated_functions:
-            namespace[funcname] = self.gc_ll_descr.get_malloc_fn(funcname)
-            namespace[funcname + '_descr'] = getattr(self.gc_ll_descr,
-                                                     '%s_descr' % funcname)
-        #
-        ops = parse(frm_operations, namespace=namespace)
-        operations = self.gc_ll_descr.rewrite_assembler(self.cpu,
-                                                        ops.operations,
-                                                        [])
-        # make the array containing the GCREF's accessible inside the tests.
-        # This must be done after we call 'rewrite_assembler'. Before that
-        # call 'last_moving_obj_tracker' is None or filled with some old
-        # value.
-        namespace['ptr_array_gcref'] = 
self.gc_ll_descr.last_moving_obj_tracker.ptr_array_gcref
-        expected = parse(to_operations % Evaluator(namespace),
-                         namespace=namespace)
-        equaloplists(operations, expected.operations)
-        lltype.free(frame_info, flavor='raw')
-
-class TestFramework(RewriteTests):
-    def setup_method(self, meth):
-        class config_(object):
-            class translation(object):
-                gc = 'minimark'
-                gcrootfinder = 'asmgcc'
-                gctransformer = 'framework'
-                gcremovetypeptr = False
-        gcdescr = get_description(config_)
-        self.gc_ll_descr = GcLLDescr_framework(gcdescr, None, None, None,
-                                               really_not_translated=True)
-        self.gc_ll_descr.write_barrier_descr.has_write_barrier_from_array = (
-            lambda cpu: True)
-        #
-        class FakeCPU(BaseFakeCPU):
-            def sizeof(self, STRUCT, is_object):
-                descr = SizeDescr(104)
-                descr.tid = 9315
-                descr.vtable = 12
-                return descr
-        self.cpu = FakeCPU()
-
-    def test_simple_getfield(self):
-        self.check_rewrite("""
-            []
-            i0 = getfield_gc_i(ConstPtr(pinned_obj_gcref), 
descr=pinned_obj_my_int_descr)
-            """, """
-            []
-            p1 = gc_load_r(ConstPtr(ptr_array_gcref), %(0 * 
ptr_array_descr.itemsize + 1)s, %(ptr_array_descr.itemsize)s)
-            i0 = gc_load_i(p1, 0, -%(pinned_obj_my_int_descr.field_size)s)
-            """)
-        assert len(self.gc_ll_descr.last_moving_obj_tracker._indexes) == 1
-
-    def test_simple_getfield_twice(self):
-        self.check_rewrite("""
-            []
-            i0 = getfield_gc_i(ConstPtr(pinned_obj_gcref), 
descr=pinned_obj_my_int_descr)
-            i1 = getfield_gc_i(ConstPtr(notpinned_obj_gcref), 
descr=notpinned_obj_my_int_descr)
-            i2 = getfield_gc_i(ConstPtr(pinned_obj_gcref), 
descr=pinned_obj_my_int_descr)
-            """, """
-            []
-            p1 = gc_load_r(ConstPtr(ptr_array_gcref), %(0 * 
ptr_array_descr.itemsize + 1)s, %(ptr_array_descr.itemsize)s)
-            i0 = gc_load_i(p1, 0, -%(pinned_obj_my_int_descr.field_size)s)
-            i1 = gc_load_i(ConstPtr(notpinned_obj_gcref), 0, 
-%(notpinned_obj_my_int_descr.field_size)s)
-            p2 = gc_load_r(ConstPtr(ptr_array_gcref), %(1 * 
ptr_array_descr.itemsize + 1)s, %(ptr_array_descr.itemsize)s)
-            i2 = gc_load_i(p2, 0, -%(pinned_obj_my_int_descr.field_size)s)
-            """)
-        assert len(self.gc_ll_descr.last_moving_obj_tracker._indexes) == 2
diff --git a/rpython/jit/backend/llsupport/test/test_rewrite.py 
b/rpython/jit/backend/llsupport/test/test_rewrite.py
--- a/rpython/jit/backend/llsupport/test/test_rewrite.py
+++ b/rpython/jit/backend/llsupport/test/test_rewrite.py
@@ -10,9 +10,8 @@
 from rpython.jit.metainterp.optimizeopt.util import equaloplists
 from rpython.jit.metainterp.history import JitCellToken, FLOAT
 from rpython.jit.metainterp.history import AbstractFailDescr
-from rpython.rtyper.lltypesystem import lltype, rffi
+from rpython.rtyper.lltypesystem import lltype, llmemory, rffi
 from rpython.rtyper import rclass
-from rpython.jit.backend.x86.arch import WORD
 from rpython.jit.backend.llsupport.symbolic import (WORD,
         get_array_token)
 
@@ -77,6 +76,9 @@
         tdescr = get_size_descr(self.gc_ll_descr, T)
         tdescr.tid = 5678
         tzdescr = get_field_descr(self.gc_ll_descr, T, 'z')
+        myT = lltype.cast_opaque_ptr(llmemory.GCREF,
+                                     lltype.malloc(T, zero=True))
+        self.myT = myT
         #
         A = lltype.GcArray(lltype.Signed)
         adescr = get_array_descr(self.gc_ll_descr, A)
@@ -112,6 +114,12 @@
         xdescr = get_field_descr(self.gc_ll_descr, R1, 'x')
         ydescr = get_field_descr(self.gc_ll_descr, R1, 'y')
         zdescr = get_field_descr(self.gc_ll_descr, R1, 'z')
+        myR1 = lltype.cast_opaque_ptr(llmemory.GCREF,
+                                      lltype.malloc(R1, zero=True))
+        myR1b = lltype.cast_opaque_ptr(llmemory.GCREF,
+                                       lltype.malloc(R1, zero=True))
+        self.myR1 = myR1
+        self.myR1b = myR1b
         #
         E = lltype.GcStruct('Empty')
         edescr = get_size_descr(self.gc_ll_descr, E)
@@ -174,9 +182,10 @@
         ops = parse(frm_operations, namespace=namespace)
         expected = parse(to_operations % Evaluator(namespace),
                          namespace=namespace)
+        self.gcrefs = []
         operations = self.gc_ll_descr.rewrite_assembler(self.cpu,
                                                         ops.operations,
-                                                        [])
+                                                        self.gcrefs)
         remap = {}
         for a, b in zip(ops.inputargs, expected.inputargs):
             remap[b] = a
@@ -1281,3 +1290,124 @@
                 {t}
                 jump()
             """.format(**locals()))
+
+    def test_load_from_gc_table_1i(self):
+        self.check_rewrite("""
+            [i1]
+            setfield_gc(ConstPtr(myR1), i1, descr=xdescr)
+            jump()
+        """, """
+            [i1]
+            p0 = load_from_gc_table(0)
+            gc_store(p0, %(xdescr.offset)s, i1, %(xdescr.field_size)s)
+            jump()
+        """)
+        assert self.gcrefs == [self.myR1]
+
+    def test_load_from_gc_table_1p(self):
+        self.check_rewrite("""
+            [p1]
+            setfield_gc(ConstPtr(myT), p1, descr=tzdescr)
+            jump()
+        """, """
+            [i1]
+            p0 = load_from_gc_table(0)
+            cond_call_gc_wb(p0, descr=wbdescr)
+            gc_store(p0, %(tzdescr.offset)s, i1, %(tzdescr.field_size)s)
+            jump()
+        """)
+        assert self.gcrefs == [self.myT]
+
+    def test_load_from_gc_table_2(self):
+        self.check_rewrite("""
+            [i1, f2]
+            setfield_gc(ConstPtr(myR1), i1, descr=xdescr)
+            setfield_gc(ConstPtr(myR1), f2, descr=ydescr)
+            jump()
+        """, """
+            [i1, f2]
+            p0 = load_from_gc_table(0)
+            gc_store(p0, %(xdescr.offset)s, i1, %(xdescr.field_size)s)
+            gc_store(p0, %(ydescr.offset)s, f2, %(ydescr.field_size)s)
+            jump()
+        """)
+        assert self.gcrefs == [self.myR1]
+
+    def test_load_from_gc_table_3(self):
+        self.check_rewrite("""
+            [i1, f2]
+            setfield_gc(ConstPtr(myR1), i1, descr=xdescr)
+            label(f2)
+            setfield_gc(ConstPtr(myR1), f2, descr=ydescr)
+            jump()
+        """, """
+            [i1, f2]
+            p0 = load_from_gc_table(0)
+            gc_store(p0, %(xdescr.offset)s, i1, %(xdescr.field_size)s)
+            label(f2)
+            p1 = load_from_gc_table(0)
+            gc_store(p1, %(ydescr.offset)s, f2, %(ydescr.field_size)s)
+            jump()
+        """)
+        assert self.gcrefs == [self.myR1]
+
+    def test_load_from_gc_table_4(self):
+        self.check_rewrite("""
+            [i1, f2]
+            setfield_gc(ConstPtr(myR1), i1, descr=xdescr)
+            setfield_gc(ConstPtr(myR1b), f2, descr=ydescr)
+            jump()
+        """, """
+            [i1, f2]
+            p0 = load_from_gc_table(0)
+            gc_store(p0, %(xdescr.offset)s, i1, %(xdescr.field_size)s)
+            p1 = load_from_gc_table(1)
+            gc_store(p1, %(ydescr.offset)s, f2, %(ydescr.field_size)s)
+            jump()
+        """)
+        assert self.gcrefs == [self.myR1, self.myR1b]
+
+    def test_pinned_simple_getfield(self):
+        # originally in test_pinned_object_rewrite; now should give the
+        # same result for pinned objects and for normal objects
+        self.check_rewrite("""
+            []
+            i0 = getfield_gc_i(ConstPtr(myR1), descr=xdescr)
+        """, """
+            []
+            p1 = load_from_gc_table(0)
+            i0 = gc_load_i(p1, %(xdescr.offset)s, -%(xdescr.field_size)s)
+        """)
+        assert self.gcrefs == [self.myR1]
+
+    def test_pinned_simple_getfield_twice(self):
+        # originally in test_pinned_object_rewrite; now should give the
+        # same result for pinned objects and for normal objects
+        self.check_rewrite("""
+            []
+            i0 = getfield_gc_i(ConstPtr(myR1), descr=xdescr)
+            i1 = getfield_gc_i(ConstPtr(myR1b), descr=xdescr)
+            i2 = getfield_gc_i(ConstPtr(myR1), descr=xdescr)
+        """, """
+            []
+            p1 = load_from_gc_table(0)
+            i0 = gc_load_i(p1, %(xdescr.offset)s, -%(xdescr.field_size)s)
+            p2 = load_from_gc_table(1)
+            i1 = gc_load_i(p2, %(xdescr.offset)s, -%(xdescr.field_size)s)
+            i2 = gc_load_i(p1, %(xdescr.offset)s, -%(xdescr.field_size)s)
+        """)
+        assert self.gcrefs == [self.myR1, self.myR1b]
+
+    def test_guard_in_gcref(self):
+        self.check_rewrite("""
+            [i1, i2]
+            guard_true(i1) []
+            guard_true(i2) []
+            jump()
+        """, """
+            [i1, i2]
+            guard_true(i1) []
+            guard_true(i2) []
+            jump()
+        """)
+        assert len(self.gcrefs) == 2
diff --git a/rpython/jit/backend/llsupport/test/zrpy_vmprof_test.py 
b/rpython/jit/backend/llsupport/test/zrpy_vmprof_test.py
--- a/rpython/jit/backend/llsupport/test/zrpy_vmprof_test.py
+++ b/rpython/jit/backend/llsupport/test/zrpy_vmprof_test.py
@@ -6,6 +6,7 @@
 from rpython.rlib import rthread
 from rpython.translator.translator import TranslationContext
 from rpython.jit.backend.detect_cpu import getcpuclass
+from rpython.rlib.rweaklist import RWeakListMixin
 
 class CompiledVmprofTest(CCompiledMixin):
     CPUClass = getcpuclass()
@@ -21,6 +22,7 @@
 
         class MyCode:
             _vmprof_unique_id = 0
+            _vmprof_weak_list = RWeakListMixin() ; 
_vmprof_weak_list.initialize()
             def __init__(self, name):
                 self.name = name
 
diff --git a/rpython/jit/backend/model.py b/rpython/jit/backend/model.py
--- a/rpython/jit/backend/model.py
+++ b/rpython/jit/backend/model.py
@@ -285,7 +285,7 @@
 
 class CompiledLoopToken(object):
     asmmemmgr_blocks = None
-    asmmemmgr_gcroots = 0
+    asmmemmgr_gcreftracers = None
 
     def __init__(self, cpu, number):
         cpu.tracker.total_compiled_loops += 1
diff --git a/rpython/jit/backend/x86/assembler.py 
b/rpython/jit/backend/x86/assembler.py
--- a/rpython/jit/backend/x86/assembler.py
+++ b/rpython/jit/backend/x86/assembler.py
@@ -499,10 +499,13 @@
 
         regalloc = RegAlloc(self, self.cpu.translate_support_code)
         #
+        allgcrefs = []
+        operations = regalloc.prepare_loop(inputargs, operations,
+                                           looptoken, allgcrefs)
+        self.reserve_gcref_table(allgcrefs)
+        functionpos = self.mc.get_relative_pos()
         self._call_header_with_stack_check()
         self._check_frame_depth_debug(self.mc)
-        operations = regalloc.prepare_loop(inputargs, operations,
-                                           looptoken, clt.allgcrefs)
         looppos = self.mc.get_relative_pos()
         frame_depth_no_fixed_size = self._assemble(regalloc, inputargs,
                                                    operations)
@@ -513,6 +516,7 @@
         full_size = self.mc.get_relative_pos()
         #
         rawstart = self.materialize_loop(looptoken)
+        self.patch_gcref_table(looptoken, rawstart)
         self.patch_stack_checks(frame_depth_no_fixed_size + 
JITFRAME_FIXED_SIZE,
                                 rawstart)
         looptoken._ll_loop_code = looppos + rawstart
@@ -521,7 +525,13 @@
             looptoken.number, loopname,
             r_uint(rawstart + looppos),
             r_uint(rawstart + size_excluding_failure_stuff),
-            r_uint(rawstart)))
+            r_uint(rawstart + functionpos)))
+        debug_print("       gc table: 0x%x" % r_uint(self.gc_table_addr))
+        debug_print("       function: 0x%x" % r_uint(rawstart + functionpos))
+        debug_print("         resops: 0x%x" % r_uint(rawstart + looppos))
+        debug_print("       failures: 0x%x" % r_uint(rawstart +
+                                                 size_excluding_failure_stuff))
+        debug_print("            end: 0x%x" % r_uint(rawstart + full_size))
         debug_stop("jit-backend-addr")
         self.patch_pending_failure_recoveries(rawstart)
         #
@@ -531,10 +541,11 @@
             looptoken._x86_rawstart = rawstart
             looptoken._x86_fullsize = full_size
             looptoken._x86_ops_offset = ops_offset
-        looptoken._ll_function_addr = rawstart
+        looptoken._ll_function_addr = rawstart + functionpos
         if logger:
             log = logger.log_trace(MARK_TRACE_ASM, None, self.mc)
             log.write(inputargs, operations, None, ops_offset=ops_offset, 
unique_id=rawstart)
+        
         self.fixup_target_tokens(rawstart)
         self.teardown()
         # oprofile support
@@ -563,11 +574,13 @@
                                                      'b', descr_number)
         arglocs = self.rebuild_faillocs_from_descr(faildescr, inputargs)
         regalloc = RegAlloc(self, self.cpu.translate_support_code)
-        startpos = self.mc.get_relative_pos()
+        allgcrefs = []
         operations = regalloc.prepare_bridge(inputargs, arglocs,
                                              operations,
-                                             self.current_clt.allgcrefs,
+                                             allgcrefs,
                                              self.current_clt.frame_info)
+        self.reserve_gcref_table(allgcrefs)
+        startpos = self.mc.get_relative_pos()
         self._check_frame_depth(self.mc, regalloc.get_gcmap())
         bridgestartpos = self.mc.get_relative_pos()
         self._update_at_exit(arglocs, inputargs, faildescr, regalloc)
@@ -577,14 +590,22 @@
         fullsize = self.mc.get_relative_pos()
         #
         rawstart = self.materialize_loop(original_loop_token)
+        self.patch_gcref_table(original_loop_token, rawstart)
         self.patch_stack_checks(frame_depth_no_fixed_size + 
JITFRAME_FIXED_SIZE,
                                 rawstart)
-        debug_bridge(descr_number, rawstart, codeendpos)
+        debug_start("jit-backend-addr")
+        debug_print("bridge out of Guard 0x%x has address 0x%x to 0x%x" %
+                    (r_uint(descr_number), r_uint(rawstart + startpos),
+                        r_uint(rawstart + codeendpos)))
+        debug_print("       gc table: 0x%x" % r_uint(self.gc_table_addr))
+        debug_print("    jump target: 0x%x" % r_uint(rawstart + startpos))
+        debug_print("         resops: 0x%x" % r_uint(rawstart + 
bridgestartpos))
+        debug_print("       failures: 0x%x" % r_uint(rawstart + codeendpos))
+        debug_print("            end: 0x%x" % r_uint(rawstart + fullsize))
+        debug_stop("jit-backend-addr")
         self.patch_pending_failure_recoveries(rawstart)
         # patch the jump from original guard
-        if logger:
-            logger.log_patch_guard(descr_number, rawstart)
-        self.patch_jump_for_descr(faildescr, rawstart)
+        self.patch_jump_for_descr(faildescr, rawstart + startpos)
         ops_offset = self.mc.ops_offset
         frame_depth = max(self.current_clt.frame_info.jfi_frame_depth,
                           frame_depth_no_fixed_size + JITFRAME_FIXED_SIZE)
@@ -669,6 +690,39 @@
             mc.JMP_r(X86_64_SCRATCH_REG.value)
         mc.copy_to_raw_memory(adr_jump_offset)
 
+    def reserve_gcref_table(self, allgcrefs):
+        gcref_table_size = len(allgcrefs) * WORD
+        if IS_X86_64:
+            # align to a multiple of 16 and reserve space at the beginning
+            # of the machine code for the gc table.  This lets us write
+            # machine code with relative addressing (%rip - constant).
+            gcref_table_size = (gcref_table_size + 15) & ~15
+            mc = self.mc
+            assert mc.get_relative_pos() == 0
+            for i in range(gcref_table_size):
+                mc.writechar('\x00')
+        elif IS_X86_32:
+            # allocate the gc table right now.  This lets us write
+            # machine code with absolute 32-bit addressing.
+            self.gc_table_addr = self.datablockwrapper.malloc_aligned(
+                gcref_table_size, alignment=WORD)
+        #
+        self.setup_gcrefs_list(allgcrefs)
+
+    def patch_gcref_table(self, looptoken, rawstart):
+        if IS_X86_64:
+            # the gc table is at the start of the machine code
+            self.gc_table_addr = rawstart
+        elif IS_X86_32:
+            # the gc table was already allocated by reserve_gcref_table()
+            rawstart = self.gc_table_addr
+        #
+        tracer = self.cpu.gc_ll_descr.make_gcref_tracer(rawstart,
+                                                        self._allgcrefs)
+        gcreftracers = self.get_asmmemmgr_gcreftracers(looptoken)
+        gcreftracers.append(tracer)    # keepalive
+        self.teardown_gcrefs_list()
+
     def write_pending_failure_recoveries(self, regalloc):
         # for each pending guard, generate the code of the recovery stub
         # at the end of self.mc.
@@ -792,6 +846,12 @@
             clt.asmmemmgr_blocks = []
         return clt.asmmemmgr_blocks
 
+    def get_asmmemmgr_gcreftracers(self, looptoken):
+        clt = looptoken.compiled_loop_token
+        if clt.asmmemmgr_gcreftracers is None:
+            clt.asmmemmgr_gcreftracers = []
+        return clt.asmmemmgr_gcreftracers
+
     def materialize_loop(self, looptoken):
         self.datablockwrapper.done()      # finish using cpu.asmmemmgr
         self.datablockwrapper = None
@@ -1370,6 +1430,29 @@
     genop_cast_ptr_to_int = _genop_same_as
     genop_cast_int_to_ptr = _genop_same_as
 
+    def _patch_load_from_gc_table(self, index):
+        # must be called immediately after a "p"-mode instruction
+        # has been emitted.  64-bit mode only.
+        assert IS_X86_64
+        address_in_buffer = index * WORD   # at the start of the buffer
+        p_location = self.mc.get_relative_pos()
+        offset = address_in_buffer - p_location
+        self.mc.overwrite32(p_location-4, offset)
+
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit

Reply via email to