Author: Armin Rigo <ar...@tunes.org>
Branch: ppc-updated-backend
Changeset: r79926:7d9d9d7d1398
Date: 2015-10-02 10:35 +0200
http://bitbucket.org/pypy/pypy/changeset/7d9d9d7d1398/

Log:    PPC Backend #6 step 1

diff too long, truncating to 2000 out of 4756 lines

diff --git a/rpython/jit/backend/arm/assembler.py 
b/rpython/jit/backend/arm/assembler.py
--- a/rpython/jit/backend/arm/assembler.py
+++ b/rpython/jit/backend/arm/assembler.py
@@ -225,6 +225,10 @@
         if not for_frame:
             self._push_all_regs_to_jitframe(mc, [], withfloats, 
callee_only=True)
         else:
+            # NOTE: don't save registers on the jitframe here!  It might
+            # override already-saved values that will be restored
+            # later...
+            #
             # we're possibly called from the slowpath of malloc
             # save the caller saved registers
             # assuming we do not collect here
diff --git a/rpython/jit/backend/arm/regalloc.py 
b/rpython/jit/backend/arm/regalloc.py
--- a/rpython/jit/backend/arm/regalloc.py
+++ b/rpython/jit/backend/arm/regalloc.py
@@ -1259,18 +1259,6 @@
         self.possibly_free_vars(guard_op.getfailargs())
         return locs + [resloc, tmploc]
 
-    def _prepare_args_for_new_op(self, new_args):
-        gc_ll_descr = self.cpu.gc_ll_descr
-        args = gc_ll_descr.args_for_new(new_args)
-        arglocs = []
-        for i in range(len(args)):
-            arg = args[i]
-            t = TempInt()
-            l = self.force_allocate_reg(t, selected_reg=r.all_regs[i])
-            self.assembler.load(l, imm(arg))
-            arglocs.append(t)
-        return arglocs
-
     prepare_op_float_add = prepare_float_op(name='prepare_op_float_add')
     prepare_op_float_sub = prepare_float_op(name='prepare_op_float_sub')
     prepare_op_float_mul = prepare_float_op(name='prepare_op_float_mul')
diff --git a/rpython/jit/backend/llsupport/test/ztranslation_test.py 
b/rpython/jit/backend/llsupport/test/ztranslation_test.py
--- a/rpython/jit/backend/llsupport/test/ztranslation_test.py
+++ b/rpython/jit/backend/llsupport/test/ztranslation_test.py
@@ -303,7 +303,7 @@
         for line in open(str(logfile)):
             if 'guard_class' in line:
                 guard_class += 1
-        # if we get many more guard_classes, it means that we generate
+        # if we get many more guard_classes (~93), it means that we generate
         # guards that always fail (the following assert's original purpose
         # is to catch the following case: each GUARD_CLASS is misgenerated
         # and always fails with "gcremovetypeptr")
diff --git a/rpython/jit/backend/ppc/callbuilder.py 
b/rpython/jit/backend/ppc/callbuilder.py
--- a/rpython/jit/backend/ppc/callbuilder.py
+++ b/rpython/jit/backend/ppc/callbuilder.py
@@ -126,8 +126,8 @@
             if gcrootmap.is_shadow_stack and self.is_call_release_gil:
                 # in this mode, 'ebx' happens to contain the shadowstack
                 # top at this point, so reuse it instead of loading it again
-                ssreg = ebx
-        self.asm._reload_frame_if_necessary(self.mc)
+                ssreg = self.RSHADOWPTR
+        self.asm._reload_frame_if_necessary(self.mc, shadowstack_reg=ssreg)
 
     def emit_raw_call(self):
         self.mc.raw_call()
@@ -151,9 +151,10 @@
         # Save this thread's shadowstack pointer into r29, for later comparison
         gcrootmap = self.asm.cpu.gc_ll_descr.gcrootmap
         if gcrootmap:
-            rst = gcrootmap.get_root_stack_top_addr()
-            self.mc.load_imm(RSHADOWPTR, rst)
-            self.mc.load(RSHADOWOLD.value, RSHADOWPTR.value, 0)
+            if gcrootmap.is_shadow_stack:
+                rst = gcrootmap.get_root_stack_top_addr()
+                self.mc.load_imm(RSHADOWPTR, rst)
+                self.mc.load(RSHADOWOLD.value, RSHADOWPTR.value, 0)
         #
         # change 'rpy_fastgil' to 0 (it should be non-zero right now)
         self.mc.load_imm(RFASTGILPTR, fastgil)
@@ -184,7 +185,8 @@
 
         self.mc.cmpdi(0, r.r10.value, 0)
         b1_location = self.mc.currpos()
-        self.mc.trap()       # patched with a BEQ: jump if r10 is zero
+        self.mc.trap()       # boehm: patched with a BEQ: jump if r10 is zero
+                             # shadowstack: patched with BNE instead
 
         if self.asm.cpu.gc_ll_descr.gcrootmap:
             # When doing a call_release_gil with shadowstack, there
@@ -192,20 +194,23 @@
             # current shadowstack can be the one of a different
             # thread.  So here we check if the shadowstack pointer
             # is still the same as before we released the GIL (saved
-            # in 'r7'), and if not, we fall back to 'reacqgil_addr'.
-            XXXXXXXXXXXXXXXXXXX
-            self.mc.LDR_ri(r.ip.value, r.r5.value, cond=c.EQ)
-            self.mc.CMP_rr(r.ip.value, r.r7.value, cond=c.EQ)
+            # in RSHADOWOLD), and if not, we fall back to 'reacqgil_addr'.
+            self.mc.load(r.r9.value, RSHADOWPTR.value, 0)
+            self.mc.cmpdi(0, r.r9.value, RSHADOWOLD.value)
+            bne_location = b1_location
             b1_location = self.mc.currpos()
-            self.mc.BKPT()                       # BEQ below
-            # there are two cases here: either EQ was false from
-            # the beginning, or EQ was true at first but the CMP
-            # made it false.  In the second case we need to
-            # release the fastgil here.  We know which case it is
-            # by checking again r3.
-            self.mc.CMP_ri(r.r3.value, 0)
-            self.mc.STR_ri(r.r3.value, r.r6.value, cond=c.EQ)
+            self.mc.trap()
+
+            # revert the rpy_fastgil acquired above, so that the
+            # general 'reacqgil_addr' below can acquire it again...
+            # (here, r10 is conveniently zero)
+            self.mc.std(r.r10.value, RFASTGILPTR.value, 0)
+
+            pmc = OverwritingBuilder(self.mc, bne_location, 1)
+            pmc.bne(self.mc.currpos() - bne_location)
+            pmc.overwrite()
         #
+        # Yes, we need to call the reacqgil() function.
         # save the result we just got
         RSAVEDRES = RFASTGILPTR     # can reuse this reg here
         reg = self.resloc
@@ -225,9 +230,8 @@
                             PARAM_SAVE_AREA_OFFSET + 7 * WORD)
 
         # replace b1_location with BEQ(here)
-        jmp_target = self.mc.currpos()
         pmc = OverwritingBuilder(self.mc, b1_location, 1)
-        pmc.beq(jmp_target - b1_location)
+        pmc.beq(self.mc.currpos() - b1_location)
         pmc.overwrite()
 
         if not we_are_translated():        # for testing: now we can access
diff --git a/rpython/jit/backend/ppc/codebuilder.py 
b/rpython/jit/backend/ppc/codebuilder.py
--- a/rpython/jit/backend/ppc/codebuilder.py
+++ b/rpython/jit/backend/ppc/codebuilder.py
@@ -17,11 +17,15 @@
 from rpython.jit.backend.ppc.rassemblermaker import make_rassembler
 
 
-# these are the *forbidden* encodings that don't accept register r0:
-#    addi rX, r0, immed
-#    subi rX, r0, immed
-#    addis rX, r0, immed
-#    subis rX, r0, immed
+# the following instructions can't accept "r0" as the second argument
+# (i.e. the base address): it is recognized as "0" instead, or is
+# even invalid (load-with-update, store-with-update).
+#
+#    any load or store instruction
+#    addi rD, r0, immed
+#    subi rD, r0, immed
+#    addis rD, r0, immed
+#    subis rD, r0, immed
 
 
 A = Form("frD", "frA", "frB", "XO3", "Rc")
@@ -1000,12 +1004,23 @@
             if word & 0xFFFF != 0:
                 self.ori(rD, rD, lo(word))
 
+    def load_imm_plus(self, dest_reg, word):
+        """Like load_imm(), but with one instruction less, and
+        leaves the loaded value off by some signed 16-bit difference.
+        Returns that difference."""
+        diff = rffi.cast(lltype.Signed, rffi.cast(rffi.SHORT, word))
+        word -= diff
+        assert word & 0xFFFF == 0
+        self.load_imm(dest_reg, word)
+        return diff
+
     def load_from_addr(self, rD, addr):
-        self.load_imm(rD, addr)
+        assert rD is not r.r0
+        diff = self.load_imm_plus(rD, addr)
         if IS_PPC_32:
-            self.lwzx(rD.value, 0, rD.value)
+            self.lwz(rD.value, rD.value, diff)
         else:
-            self.ldx(rD.value, 0, rD.value)
+            self.ld(rD.value, rD.value, diff)
 
     def b_offset(self, target):
         curpos = self.currpos()
@@ -1073,60 +1088,6 @@
         # Call the function
         self.bctrl()
 
-    ## def call(self, address):
-    ##     """ do a call to an absolute address
-    ##     """
-    ##     with scratch_reg(self):
-    ##         if IS_PPC_32:
-    ##             self.load_imm(r.SCRATCH, address)
-    ##         else:
-    ##             self.store(r.TOC.value, r.SP.value, 5 * WORD)
-    ##             self.load_imm(r.r11, address)
-    ##             self.load(r.SCRATCH.value, r.r11.value, 0)
-    ##             self.load(r.TOC.value, r.r11.value, WORD)
-    ##             self.load(r.r11.value, r.r11.value, 2 * WORD)
-    ##         self.mtctr(r.SCRATCH.value)
-    ##     self.bctrl()
-
-    ##     if IS_PPC_64:
-    ##         self.load(r.TOC.value, r.SP.value, 5 * WORD)
-
-    ## def call_register(self, call_reg):
-    ##     """ do a call to an address given in a register
-    ##     """
-    ##     assert isinstance(call_reg, RegisterLocation)
-    ##     with scratch_reg(self):
-    ##         if IS_PPC_32:
-    ##             self.mr(r.SCRATCH.value, call_reg.value)
-    ##         else:
-    ##             self.store(r.TOC.value, r.SP.value, 5 * WORD)
-    ##             self.mr(r.r11.value, call_reg.value)
-    ##             self.load(r.SCRATCH.value, r.r11.value, 0)
-    ##             self.load(r.TOC.value, r.r11.value, WORD)
-    ##             self.load(r.r11.value, r.r11.value, 2 * WORD)
-    ##         self.mtctr(r.SCRATCH.value)
-    ##     self.bctrl()
-
-    ##     if IS_PPC_64:
-    ##         self.load(r.TOC.value, r.SP.value, 5 * WORD)
-
-    ## def make_function_prologue(self, frame_size):
-    ##     """ Build a new stackframe of size frame_size 
-    ##         and store the LR in the previous frame.
-    ##     """
-    ##     with scratch_reg(self):
-    ##         self.store_update(r.SP.value, r.SP.value, -frame_size)
-    ##         self.mflr(r.SCRATCH.value)
-    ##         self.store(r.SCRATCH.value, r.SP.value, frame_size + 
LR_BC_OFFSET) 
-
-    def restore_LR_from_caller_frame(self, frame_size):
-        """ Restore the LR from the calling frame.
-            frame_size is the size of the current frame.
-        """
-        with scratch_reg(self):
-            lr_offset = frame_size + LR_BC_OFFSET
-            self.load(r.SCRATCH.value, r.SP.value, lr_offset)
-            self.mtlr(r.SCRATCH.value)
 
     def load(self, target_reg, base_reg, offset):
         if IS_PPC_32:
@@ -1266,6 +1227,22 @@
         #assert self.r0_in_use
         #self.r0_in_use = False
 
+    def get_assembler_function(self):
+        "NOT_RPYTHON: tests only"
+        from rpython.jit.backend.llsupport.asmmemmgr import AsmMemoryManager
+        class FakeCPU:
+            HAS_CODEMAP = False
+            asmmemmgr = AsmMemoryManager()
+        addr = self.materialize(FakeCPU(), [])
+        if IS_BIG_ENDIAN:
+            mc = PPCBuilder()
+            mc.write64(addr)     # the 3-words descriptor
+            mc.write64(0)
+            mc.write64(0)
+            addr = mc.materialize(FakeCPU(), [])
+        return rffi.cast(lltype.Ptr(lltype.FuncType([], lltype.Signed)), addr)
+
+
 class scratch_reg(object):
     def __init__(self, mc):
         self.mc = mc
diff --git a/rpython/jit/backend/ppc/opassembler.py 
b/rpython/jit/backend/ppc/opassembler.py
--- a/rpython/jit/backend/ppc/opassembler.py
+++ b/rpython/jit/backend/ppc/opassembler.py
@@ -59,23 +59,23 @@
         else:
             self.mc.mulld(res.value, l0.value, l1.value)
 
-    def do_emit_int_binary_ovf(self, op, arglocs, emit):
+    def do_emit_int_binary_ovf(self, op, arglocs):
         l0, l1, res = arglocs[0], arglocs[1], arglocs[2]
         self.mc.load_imm(r.SCRATCH, 0)
         self.mc.mtxer(r.SCRATCH.value)
-        emit(res.value, l0.value, l1.value)
+        return (res.value, l0.value, l1.value)
 
     def emit_int_add_ovf(self, op, arglocs, regalloc):
-        self.do_emit_int_binary_ovf(op, arglocs, self.mc.addox)
+        self.mc.addox(*self.do_emit_int_binary_ovf(op, arglocs))
 
     def emit_int_sub_ovf(self, op, arglocs, regalloc):
-        self.do_emit_int_binary_ovf(op, arglocs, self.mc.subox)
+        self.mc.subox(*self.do_emit_int_binary_ovf(op, arglocs))
 
     def emit_int_mul_ovf(self, op, arglocs, regalloc):
         if IS_PPC_32:
-            self.do_emit_int_binary_ovf(op, arglocs, self.mc.mullwox)
+            self.mc.mullwox(*self.do_emit_int_binary_ovf(op, arglocs))
         else:
-            self.do_emit_int_binary_ovf(op, arglocs, self.mc.mulldox)
+            self.mc.mulldox(*self.do_emit_int_binary_ovf(op, arglocs))
 
     def emit_int_floordiv(self, op, arglocs, regalloc):
         l0, l1, res = arglocs
@@ -343,12 +343,11 @@
             # this half-word is at offset 0 on a little-endian machine;
             # but it is at offset 2 (32 bit) or 4 (64 bit) on a
             # big-endian machine.
-            with scratch_reg(self.mc):
-                if IS_PPC_32:
-                    self.mc.lhz(r.SCRATCH.value, locs[0].value, 2)
-                else:
-                    self.mc.lwz(r.SCRATCH.value, locs[0].value, 4)
-                self.mc.cmp_op(0, r.SCRATCH.value, typeid.value, 
imm=typeid.is_imm())
+            if IS_PPC_32:
+                self.mc.lhz(r.SCRATCH.value, locs[0].value, 2 * IS_BIG_ENDIAN)
+            else:
+                self.mc.lwz(r.SCRATCH.value, locs[0].value, 4 * IS_BIG_ENDIAN)
+            self.mc.cmp_op(0, r.SCRATCH.value, typeid.value, 
imm=typeid.is_imm())
 
     def emit_guard_not_invalidated(self, op, arglocs, regalloc):
         self._emit_guard(op, arglocs, is_guard_not_invalidated=True)
@@ -461,23 +460,24 @@
             pmc.overwrite()
 
     def emit_guard_exception(self, op, arglocs, regalloc):
-        # XXX FIXME
-        # XXX pos_exc_value and pos_exception are 8 bytes apart, don't need 
both
-        loc, loc1, resloc, pos_exc_value, pos_exception = arglocs[:5]
-        failargs = arglocs[5:]
-        self.mc.load_imm(loc1, pos_exception.value)
-        self.mc.load(r.SCRATCH.value, loc1.value, 0)
-        self.mc.cmp_op(0, r.SCRATCH.value, loc.value)
+        loc, resloc = arglocs[:2]
+        failargs = arglocs[2:]
+
+        mc = self.mc
+        mc.load_imm(r.SCRATCH2, self.cpu.pos_exc_value())
+        diff = self.cpu.pos_exception() - self.cpu.pos_exc_value()
+        assert _check_imm_arg(diff)
+
+        mc.load(r.SCRATCH.value, r.SCRATCH2.value, diff)
+        mc.cmp_op(0, r.SCRATCH.value, loc.value)
         self.guard_success_cc = c.EQ
         self._emit_guard(op, failargs, save_exc=True)
-        self.mc.load_imm(loc, pos_exc_value.value)
 
         if resloc:
-            self.mc.load(resloc.value, loc.value, 0)
-
-        self.mc.load_imm(r.SCRATCH, 0)
-        self.mc.store(r.SCRATCH.value, loc.value, 0)
-        self.mc.store(r.SCRATCH.value, loc1.value, 0)
+            mc.load(resloc.value, r.SCRATCH2.value, 0)
+        mc.load_imm(r.SCRATCH, 0)
+        mc.store(r.SCRATCH.value, r.SCRATCH2.value, 0)
+        mc.store(r.SCRATCH.value, r.SCRATCH2.value, diff)
 
 
 class CallOpAssembler(object):
@@ -687,7 +687,7 @@
             if _check_imm_arg(multiply_by):
                 self.mc.mulli(scratch_loc.value, loc.value, multiply_by)
             else:
-                self.mc.load_imm(scratch_loc.value, multiply_by)
+                self.mc.load_imm(scratch_loc, multiply_by)
                 if IS_PPC_32:
                     self.mc.mullw(scratch_loc.value, loc.value,
                                   scratch_loc.value)
@@ -766,6 +766,23 @@
             self.mc.mr(r.SCRATCH2.value, loc.value)
         return r.SCRATCH2
 
+    # RPythonic workaround for emit_zero_array()
+    def eza_stXux(self, a, b, c, itemsize):
+        if itemsize & 1:                  self.mc.stbux(a, b, c)
+        elif itemsize & 2:                self.mc.sthux(a, b, c)
+        elif (itemsize & 4) or IS_PPC_32: self.mc.stwux(a, b, c)
+        else:                             self.mc.stdux(a, b, c)
+    def eza_stXu(self, a, b, c, itemsize):
+        if itemsize & 1:                  self.mc.stbu(a, b, c)
+        elif itemsize & 2:                self.mc.sthu(a, b, c)
+        elif (itemsize & 4) or IS_PPC_32: self.mc.stwu(a, b, c)
+        else:                             self.mc.stdu(a, b, c)
+    def eza_stX(self, a, b, c, itemsize):
+        if itemsize & 1:                  self.mc.stb(a, b, c)
+        elif itemsize & 2:                self.mc.sth(a, b, c)
+        elif (itemsize & 4) or IS_PPC_32: self.mc.stw(a, b, c)
+        else:                             self.mc.std(a, b, c)
+
     def emit_zero_array(self, op, arglocs, regalloc):
         base_loc, startindex_loc, length_loc, ofs_loc, itemsize_loc = arglocs
 
@@ -774,26 +791,10 @@
         # * if N % 4 == 0, then all items are aligned to a multiple of 4
         # * if N % 8 == 0, then all items are aligned to a multiple of 8
         itemsize = itemsize_loc.getint()
-        if itemsize & 1:
-            stepsize = 1
-            stXux = self.mc.stbux
-            stXu = self.mc.stbu
-            stX  = self.mc.stb
-        elif itemsize & 2:
-            stepsize = 2
-            stXux = self.mc.sthux
-            stXu = self.mc.sthu
-            stX  = self.mc.sth
-        elif (itemsize & 4) or IS_PPC_32:
-            stepsize = 4
-            stXux = self.mc.stwux
-            stXu = self.mc.stwu
-            stX  = self.mc.stw
-        else:
-            stepsize = WORD
-            stXux = self.mc.stdux
-            stXu = self.mc.stdu
-            stX  = self.mc.std
+        if itemsize & 1:                  stepsize = 1
+        elif itemsize & 2:                stepsize = 2
+        elif (itemsize & 4) or IS_PPC_32: stepsize = 4
+        else:                             stepsize = WORD
 
         repeat_factor = itemsize // stepsize
         if repeat_factor != 1:
@@ -816,9 +817,11 @@
         if unroll > 0:
             assert repeat_factor == 1
             self.mc.li(r.SCRATCH.value, 0)
-            stXux(r.SCRATCH.value, ofs_loc.value, base_loc.value)
+            self.eza_stXux(r.SCRATCH.value, ofs_loc.value, base_loc.value,
+                           itemsize)
             for i in range(1, unroll):
-                stX(r.SCRATCH.value, ofs_loc.value, i * stepsize)
+                self.eza_stX(r.SCRATCH.value, ofs_loc.value, i * stepsize,
+                             itemsize)
 
         else:
             if length_loc.is_imm():
@@ -836,12 +839,14 @@
             self.mc.mtctr(length_loc.value)
             self.mc.li(r.SCRATCH.value, 0)
 
-            stXux(r.SCRATCH.value, ofs_loc.value, base_loc.value)
+            self.eza_stXux(r.SCRATCH.value, ofs_loc.value, base_loc.value,
+                           itemsize)
             bdz_location = self.mc.currpos()
             self.mc.trap()
 
             loop_location = self.mc.currpos()
-            stXu(r.SCRATCH.value, ofs_loc.value, stepsize)
+            self.eza_stXu(r.SCRATCH.value, ofs_loc.value, stepsize,
+                          itemsize)
             self.mc.bdnz(loop_location - self.mc.currpos())
 
             pmc = OverwritingBuilder(self.mc, bdz_location, 1)
@@ -958,10 +963,13 @@
 
     def emit_call_malloc_nursery_varsize(self, op, arglocs, regalloc):
         # registers r.RES and r.RSZ are allocated for this call
+        gc_ll_descr = self.cpu.gc_ll_descr
+        if not hasattr(gc_ll_descr, 'max_size_of_young_obj'):
+            raise Exception("unreachable code")
+            # for boehm, this function should never be called
         [lengthloc] = arglocs
         arraydescr = op.getdescr()
         itemsize = op.getarg(1).getint()
-        gc_ll_descr = self.cpu.gc_ll_descr
         maxlength = (gc_ll_descr.max_size_of_young_obj - WORD * 2) / itemsize
         gcmap = regalloc.get_gcmap([r.RES, r.RSZ])
         self.malloc_cond_varsize(
@@ -976,6 +984,12 @@
     emit_jit_debug = emit_debug_merge_point
     emit_keepalive = emit_debug_merge_point
 
+    def emit_enter_portal_frame(self, op, arglocs, regalloc):
+        self.enter_portal_frame(op)
+
+    def emit_leave_portal_frame(self, op, arglocs, regalloc):
+        self.leave_portal_frame(op)
+
     def _write_barrier_fastpath(self, mc, descr, arglocs, regalloc, 
array=False,
                                 is_frame=False):
         # Write code equivalent to write_barrier() in the GC: it checks
@@ -1212,6 +1226,7 @@
                   StrOpAssembler, CallOpAssembler,
                   UnicodeOpAssembler, ForceOpAssembler,
                   AllocOpAssembler, FloatOpAssembler):
+    _mixin_ = True
 
     def nop(self):
         self.mc.ori(0, 0, 0)
diff --git a/rpython/jit/backend/ppc/ppc_assembler.py 
b/rpython/jit/backend/ppc/ppc_assembler.py
--- a/rpython/jit/backend/ppc/ppc_assembler.py
+++ b/rpython/jit/backend/ppc/ppc_assembler.py
@@ -127,20 +127,20 @@
     def _call_header_shadowstack(self, gcrootmap):
         # we need to put one word into the shadowstack: the jitframe (SPP)
         mc = self.mc
-        mc.load_imm(r.RCS1, gcrootmap.get_root_stack_top_addr())
-        mc.load(r.RCS2.value, r.RCS1.value, 0)    # ld RCS2, [rootstacktop]
+        diff = mc.load_imm_plus(r.RCS1, gcrootmap.get_root_stack_top_addr())
+        mc.load(r.RCS2.value, r.RCS1.value, diff) # ld RCS2, [rootstacktop]
         #
         mc.addi(r.RCS3.value, r.RCS2.value, WORD) # add RCS3, RCS2, WORD
         mc.store(r.SPP.value, r.RCS2.value, 0)    # std SPP, RCS2
         #
-        mc.store(r.RCS3.value, r.RCS1.value, 0)   # std RCS3, [rootstacktop]
+        mc.store(r.RCS3.value, r.RCS1.value, diff)# std RCS3, [rootstacktop]
 
     def _call_footer_shadowstack(self, gcrootmap):
         mc = self.mc
-        mc.load_imm(r.RCS1, gcrootmap.get_root_stack_top_addr())
-        mc.load(r.RCS2.value, r.RCS1.value, 0)     # ld RCS2, [rootstacktop]
-        mc.addi(r.RCS2.value, r.RCS2.value, WORD)  # sub RCS2, RCS2, WORD
-        mc.store(r.RCS2.value, r.RCS1.value, 0)    # std RCS2, [rootstacktop]
+        diff = mc.load_imm_plus(r.RCS1, gcrootmap.get_root_stack_top_addr())
+        mc.load(r.RCS2.value, r.RCS1.value, diff)  # ld RCS2, [rootstacktop]
+        mc.subi(r.RCS2.value, r.RCS2.value, WORD)  # sub RCS2, RCS2, WORD
+        mc.store(r.RCS2.value, r.RCS1.value, diff) # std RCS2, [rootstacktop]
 
     def new_stack_loc(self, i, tp):
         base_ofs = self.cpu.get_baseofs_of_frame_field()
@@ -248,8 +248,8 @@
 
         gcrootmap = self.cpu.gc_ll_descr.gcrootmap
         if gcrootmap and gcrootmap.is_shadow_stack:
-            mc.load_imm(r.r5, gcrootmap.get_root_stack_top_addr())
-            mc.load(r.r5.value, r.r5.value, 0)
+            diff = mc.load_imm_plus(r.r5, gcrootmap.get_root_stack_top_addr())
+            mc.load(r.r5.value, r.r5.value, diff)
             mc.store(r.r3.value, r.r5.value, -WORD)
 
         mc.mtlr(r.RCS1.value)     # restore LR
@@ -283,13 +283,16 @@
         mc.store(excvalloc.value, r.r2.value, 0)
         mc.store(exctploc.value, r.r2.value, diff)
 
-    def _reload_frame_if_necessary(self, mc):
+    def _reload_frame_if_necessary(self, mc, shadowstack_reg=None):
         gcrootmap = self.cpu.gc_ll_descr.gcrootmap
         if gcrootmap:
             if gcrootmap.is_shadow_stack:
-                mc.load_imm(r.SPP, gcrootmap.get_root_stack_top_addr())
-                mc.load(r.SPP.value, r.SPP.value, 0)
-                mc.load(r.SPP.value, r.SPP.value, -WORD)
+                if shadowstack_reg is None:
+                    diff = mc.load_imm_plus(r.SPP,
+                                            
gcrootmap.get_root_stack_top_addr())
+                    mc.load(r.SPP.value, r.SPP.value, diff)
+                    shadowstack_reg = r.SPP
+                mc.load(r.SPP.value, shadowstack_reg.value, -WORD)
         wbdescr = self.cpu.gc_ll_descr.write_barrier_descr
         if gcrootmap and wbdescr:
             # frame never uses card marking, so we enforce this is not
@@ -430,100 +433,38 @@
         if slowpathaddr == 0 or not self.cpu.propagate_exception_descr:
             return      # no stack check (for tests, or non-translated)
         #
-        # make a "function" that is called immediately at the start of
-        # an assembler function.  In particular, the stack looks like:
+        # make a regular function that is called from a point near the start
+        # of an assembler function (after it adjusts the stack and saves
+        # registers).
+        mc = PPCBuilder()
         #
-        # |                             |
-        # |        OLD BACKCHAIN        |
-        # |                             |
-        # =============================== -
-        # |                             |  | 
-        # |          BACKCHAIN          |  | > MINI FRAME (BACHCHAIN SIZE * 
WORD)
-        # |                             |  |
-        # =============================== - 
-        # |                             |
-        # |       SAVED PARAM REGS      |
-        # |                             |
-        # -------------------------------
-        # |                             |
-        # |          BACKCHAIN          |
-        # |                             |
-        # =============================== <- SP
+        # Save away the LR inside r30
+        mc.mflr(r.RCS1.value)
         #
-        mc = PPCBuilder()
-        
-        # make small frame to store data (parameter regs + LR + SCRATCH) in
-        # there.  Allocate additional fixed save area for PPC64.
-        PARAM_AREA = len(r.PARAM_REGS)
-        FIXED_AREA = BACKCHAIN_SIZE
-        if IS_PPC_64:
-            FIXED_AREA += MAX_REG_PARAMS
-        frame_size = (FIXED_AREA + PARAM_AREA) * WORD
-
-        # align the SP
-        MINIFRAME_SIZE = BACKCHAIN_SIZE * WORD
-        while (frame_size + MINIFRAME_SIZE) % (4 * WORD) != 0:
-            frame_size += WORD
-
-        # write function descriptor
-        if IS_PPC_64 and IS_BIG_ENDIAN:
-            for _ in range(3):
-                mc.write64(0)
-
-        # build frame
-        mc.make_function_prologue(frame_size)
-
-        # save parameter registers
-        for i, reg in enumerate(r.PARAM_REGS):
-            mc.store(reg.value, r.SP.value, (i + FIXED_AREA) * WORD)
-
+        # Do the call
         # use SP as single parameter for the call
         mc.mr(r.r3.value, r.SP.value)
-
-        # stack still aligned
-        mc.call(slowpathaddr)
-
-        with scratch_reg(mc):
-            mc.load_imm(r.SCRATCH, self.cpu.pos_exception())
-            mc.loadx(r.SCRATCH.value, 0, r.SCRATCH.value)
-            # if this comparison is true, then everything is ok,
-            # else we have an exception
-            mc.cmp_op(0, r.SCRATCH.value, 0, imm=True)
-
-        jnz_location = mc.currpos()
-        mc.trap()
-
-        # restore parameter registers
-        for i, reg in enumerate(r.PARAM_REGS):
-            mc.load(reg.value, r.SP.value, (i + FIXED_AREA) * WORD)
-
-        # restore LR
-        mc.restore_LR_from_caller_frame(frame_size)
-
-        # reset SP
-        mc.addi(r.SP.value, r.SP.value, frame_size)
-        #mc.blr()
-        mc.b(self.propagate_exception_path)
-
-        pmc = OverwritingBuilder(mc, jnz_location, 1)
-        pmc.bne(mc.currpos() - jnz_location)
-        pmc.overwrite()
-
-        # restore link register out of preprevious frame
-        offset_LR = frame_size + MINIFRAME_SIZE + LR_BC_OFFSET
-
-        with scratch_reg(mc):
-            mc.load(r.SCRATCH.value, r.SP.value, offset_LR)
-            mc.mtlr(r.SCRATCH.value)
-
-        # remove this frame and the miniframe
-        both_framesizes = frame_size + MINIFRAME_SIZE
-        mc.addi(r.SP.value, r.SP.value, both_framesizes)
-        mc.blr()
-
+        mc.load_imm(mc.RAW_CALL_REG, slowpathaddr)
+        mc.raw_call()
+        #
+        # Restore LR
+        mc.mtlr(r.RCS1.value)
+        #
+        # Check if it raised StackOverflow
+        mc.load_imm(r.SCRATCH, self.cpu.pos_exception())
+        mc.loadx(r.SCRATCH.value, 0, r.SCRATCH.value)
+        # if this comparison is true, then everything is ok,
+        # else we have an exception
+        mc.cmp_op(0, r.SCRATCH.value, 0, imm=True)
+        #
+        # So we return to LR back to our caller, conditionally if "EQ"
+        mc.beqlr()
+        #
+        # Else, jump to propagate_exception_path
+        assert self.propagate_exception_path
+        mc.b_abs(self.propagate_exception_path)
+        #
         rawstart = mc.materialize(self.cpu, [])
-        if IS_PPC_64:
-            self.write_64_bit_func_descr(rawstart, rawstart+3*WORD)
         self.stack_check_slowpath = rawstart
 
     def _build_wb_slowpath(self, withcards, withfloats=False, for_frame=False):
@@ -553,6 +494,10 @@
         self.mc = mc
 
         if for_frame:
+            # NOTE: don't save registers on the jitframe here!  It might
+            # override already-saved values that will be restored
+            # later...
+            #
             # This 'for_frame' version is called after a CALL.  It does not
             # need to save many registers: the registers that are anyway
             # destroyed by the call can be ignored (VOLATILES), and the
@@ -560,8 +505,19 @@
             # to save r.RCS1 (used below), r3 and f1 (possible results of
             # the call), and two more non-volatile registers (used to store
             # the RPython exception that occurred in the CALL, if any).
-            saved_regs = [r.r3, r.RCS1, r.RCS2, r.RCS3]
-            saved_fp_regs = [r.f1]
+            #
+            # We need to increase our stack frame size a bit to store them.
+            #
+            self.mc.load(r.SCRATCH.value, r.SP.value, 0)    # SP back chain
+            self.mc.store_update(r.SCRATCH.value, r.SP.value, -6 * WORD)
+            self.mc.std(r.RCS1.value, r.SP.value, 1 * WORD)
+            self.mc.std(r.RCS2.value, r.SP.value, 2 * WORD)
+            self.mc.std(r.RCS3.value, r.SP.value, 3 * WORD)
+            self.mc.std(r.r3.value, r.SP.value, 4 * WORD)
+            self.mc.stfd(r.f1.value, r.SP.value, 5 * WORD)
+            saved_regs = None
+            saved_fp_regs = None
+
         else:
             # push all volatile registers, push RCS1, and sometimes push RCS2
             if withcards:
@@ -573,8 +529,8 @@
             else:
                 saved_fp_regs = []
 
-        self._push_core_regs_to_jitframe(mc, saved_regs)
-        self._push_fp_regs_to_jitframe(mc, saved_fp_regs)
+            self._push_core_regs_to_jitframe(mc, saved_regs)
+            self._push_fp_regs_to_jitframe(mc, saved_fp_regs)
 
         if for_frame:
             # note that it's safe to store the exception in register,
@@ -608,8 +564,18 @@
             mc.lbz(r.RCS2.value, r.RCS2.value, descr.jit_wb_if_flag_byteofs)
             mc.andix(r.RCS2.value, r.RCS2.value, card_marking_mask & 0xFF)
 
-        self._pop_core_regs_from_jitframe(mc, saved_regs)
-        self._pop_fp_regs_from_jitframe(mc, saved_fp_regs)
+        if for_frame:
+            self.mc.ld(r.RCS1.value, r.SP.value, 1 * WORD)
+            self.mc.ld(r.RCS2.value, r.SP.value, 2 * WORD)
+            self.mc.ld(r.RCS3.value, r.SP.value, 3 * WORD)
+            self.mc.ld(r.r3.value, r.SP.value, 4 * WORD)
+            self.mc.lfd(r.f1.value, r.SP.value, 5 * WORD)
+            self.mc.addi(r.SP.value, r.SP.value, 6 * WORD)
+
+        else:
+            self._pop_core_regs_from_jitframe(mc, saved_regs)
+            self._pop_fp_regs_from_jitframe(mc, saved_fp_regs)
+
         mc.blr()
 
         self.mc = old_mc
@@ -675,54 +641,19 @@
         if self.stack_check_slowpath == 0:
             pass            # not translated
         else:
-            XXXX
-            # this is the size for the miniframe
-            frame_size = BACKCHAIN_SIZE * WORD
+            endaddr, lengthaddr, _ = self.cpu.insert_stack_check()
+            diff = lengthaddr - endaddr
+            assert _check_imm_arg(diff)
 
-            endaddr, lengthaddr, _ = self.cpu.insert_stack_check()
-
-            # save r16
-            self.mc.mtctr(r.r16.value)
-
-            with scratch_reg(self.mc):
-                self.mc.load_imm(r.SCRATCH, endaddr)        # load SCRATCH, 
[start]
-                self.mc.loadx(r.SCRATCH.value, 0, r.SCRATCH.value)
-                self.mc.subf(r.SCRATCH.value, r.SP.value, r.SCRATCH.value)
-                self.mc.load_imm(r.r16, lengthaddr)
-                self.mc.load(r.r16.value, r.r16.value, 0)
-                self.mc.cmp_op(0, r.SCRATCH.value, r.r16.value, signed=False)
-
-            # restore r16
-            self.mc.mfctr(r.r16.value)
-
-            patch_loc = self.mc.currpos()
-            self.mc.trap()
-
-            # make minimal frame which contains the LR
-            #
-            # |         OLD    FRAME       |
-            # ==============================
-            # |                            |
-            # |         BACKCHAIN          | > BACKCHAIN_SIZE * WORD
-            # |                            |
-            # ============================== <- SP
-
-            self.mc.make_function_prologue(frame_size)
-
-            # make check
-            self.mc.call(self.stack_check_slowpath)
-
-            # restore LR
-            self.mc.restore_LR_from_caller_frame(frame_size)
-
-            # remove minimal frame
-            self.mc.addi(r.SP.value, r.SP.value, frame_size)
-
-            offset = self.mc.currpos() - patch_loc
-            #
-            pmc = OverwritingBuilder(self.mc, patch_loc, 1)
-            pmc.ble(offset) # jump if SCRATCH <= r16, i. e. not(SCRATCH > r16)
-            pmc.overwrite()
+            mc = self.mc
+            mc.load_imm(r.SCRATCH, self.stack_check_slowpath)
+            mc.load_imm(r.SCRATCH2, endaddr)                 # li r2, endaddr
+            mc.mtctr(r.SCRATCH.value)
+            mc.load(r.SCRATCH.value, r.SCRATCH2.value, 0)    # ld r0, [end]
+            mc.load(r.SCRATCH2.value, r.SCRATCH2.value, diff)# ld r2, [length]
+            mc.subf(r.SCRATCH.value, r.SP.value, r.SCRATCH.value)  # sub r0, SP
+            mc.cmp_op(0, r.SCRATCH.value, r.SCRATCH2.value, signed=False)
+            mc.bgtctrl()
 
     def _call_footer(self):
         # the return value is the jitframe
@@ -1012,8 +943,7 @@
             addr = rawstart + tok.pos_jump_offset
             #
             # XXX see patch_jump_for_descr()
-            #tok.faildescr.adr_jump_offset = addr
-            tok.faildescr.adr_recovery_stub = rawstart + tok.pos_recovery_stub
+            tok.faildescr.adr_jump_offset = rawstart + tok.pos_recovery_stub
             #
             relative_target = tok.pos_recovery_stub - tok.pos_jump_offset
             #
@@ -1039,7 +969,9 @@
         # --- XXX for now we always use the second solution ---
         mc = PPCBuilder()
         mc.b_abs(adr_new_target)
-        mc.copy_to_raw_memory(faildescr.adr_recovery_stub)
+        mc.copy_to_raw_memory(faildescr.adr_jump_offset)
+        assert faildescr.adr_jump_offset != 0
+        faildescr.adr_jump_offset = 0    # means "patched"
 
     def get_asmmemmgr_blocks(self, looptoken):
         clt = looptoken.compiled_loop_token
@@ -1390,16 +1322,7 @@
         with scratch_reg(self.mc):
             self.mc.load_imm(r.SCRATCH, fail_index)
             self.mc.store(r.SCRATCH.value, r.SPP.value, FORCE_INDEX_OFS)
-            
-    def load(self, loc, value):
-        assert (loc.is_reg() and value.is_imm()
-                or loc.is_fp_reg() and value.is_imm_float())
-        if value.is_imm():
-            self.mc.load_imm(loc, value.getint())
-        elif value.is_imm_float():
-            with scratch_reg(self.mc):
-                self.mc.load_imm(r.SCRATCH, value.getint())
-                self.mc.lfdx(loc.value, 0, r.SCRATCH.value)
+
 
 def notimplemented_op(self, op, arglocs, regalloc):
     print "[PPC/asm] %s not implemented" % op.getopname()
diff --git a/rpython/jit/backend/ppc/regalloc.py 
b/rpython/jit/backend/ppc/regalloc.py
--- a/rpython/jit/backend/ppc/regalloc.py
+++ b/rpython/jit/backend/ppc/regalloc.py
@@ -61,11 +61,15 @@
     save_around_call_regs = r.VOLATILES_FLOAT
     assert set(save_around_call_regs).issubset(all_regs)
 
-    def convert_to_imm(self, c):
+    def convert_to_adr(self, c):
         assert isinstance(c, ConstFloat)
         adr = self.assembler.datablockwrapper.malloc_aligned(8, 8)
         x = c.getfloatstorage()
         rffi.cast(rffi.CArrayPtr(longlong.FLOATSTORAGE), adr)[0] = x
+        return adr
+
+    def convert_to_imm(self, c):
+        adr = self.convert_to_adr(c)
         return locations.ConstFloatLoc(adr)
 
     def __init__(self, longevity, frame_manager=None, assembler=None):
@@ -77,8 +81,10 @@
     def ensure_reg(self, box):
         if isinstance(box, Const):
             loc = self.get_scratch_reg()
-            immvalue = self.convert_to_imm(box)
-            self.assembler.load(loc, immvalue)
+            immadrvalue = self.convert_to_adr(box)
+            mc = self.assembler.mc
+            mc.load_imm(r.SCRATCH, immadrvalue)
+            mc.lfdx(loc.value, 0, r.SCRATCH.value)
         else:
             assert box in self.temp_boxes
             loc = self.make_sure_var_in_reg(box,
@@ -134,19 +140,22 @@
     def call_result_location(self, v):
         return r.r3
 
-    def convert_to_imm(self, c):
+    def convert_to_int(self, c):
         if isinstance(c, ConstInt):
-            val = rffi.cast(lltype.Signed, c.value)
-            return locations.ImmLocation(val)
+            return rffi.cast(lltype.Signed, c.value)
         else:
             assert isinstance(c, ConstPtr)
-            return locations.ImmLocation(rffi.cast(lltype.Signed, c.value))
+            return rffi.cast(lltype.Signed, c.value)
+
+    def convert_to_imm(self, c):
+        val = self.convert_to_int(c)
+        return locations.ImmLocation(val)
 
     def ensure_reg(self, box):
         if isinstance(box, Const):
             loc = self.get_scratch_reg()
-            immvalue = self.convert_to_imm(box)
-            self.assembler.load(loc, immvalue)
+            immvalue = self.convert_to_int(box)
+            self.assembler.mc.load_imm(loc, immvalue)
         else:
             assert box in self.temp_boxes
             loc = self.make_sure_var_in_reg(box,
@@ -593,15 +602,11 @@
 
     def prepare_guard_exception(self, op):
         loc = self.ensure_reg(op.getarg(0))
-        loc1 = r.SCRATCH2
         if op.result in self.longevity:
             resloc = self.force_allocate_reg(op.result)
         else:
             resloc = None
-        pos_exc_value = imm(self.cpu.pos_exc_value())
-        pos_exception = imm(self.cpu.pos_exception())
-        arglocs = self._prepare_guard(op,
-                    [loc, loc1, resloc, pos_exc_value, pos_exception])
+        arglocs = self._prepare_guard(op, [loc, resloc])
         return arglocs
 
     def prepare_guard_no_exception(self, op):
@@ -644,7 +649,7 @@
             #     offset in type_info_group
             #   - add 16/32 bytes, to go past the TYPE_INFO structure
             classptr = y_val
-            from pypy.rpython.memory.gctypelayout import GCData
+            from rpython.memory.gctypelayout import GCData
             sizeof_ti = rffi.sizeof(GCData.TYPE_INFO)
             type_info_group = llop.gc_get_type_info_group(llmemory.Address)
             type_info_group = rffi.cast(lltype.Signed, type_info_group)
@@ -962,10 +967,6 @@
         return [sizeloc]
 
     def prepare_call_malloc_nursery_varsize(self, op):
-        gc_ll_descr = self.assembler.cpu.gc_ll_descr
-        if not hasattr(gc_ll_descr, 'max_size_of_young_obj'):
-            raise Exception("unreachable code")
-            # for boehm, this function should never be called
         # the result will be in r.RES
         self.rm.force_allocate_reg(op.result, selected_reg=r.RES)
         self.rm.temp_boxes.append(op.result)
@@ -984,6 +985,8 @@
     prepare_debug_merge_point = void
     prepare_jit_debug = void
     prepare_keepalive = void
+    prepare_enter_portal_frame = void
+    prepare_leave_portal_frame = void
 
     def prepare_cond_call_gc_wb(self, op):
         arglocs = [self.ensure_reg(op.getarg(0))]
@@ -1019,9 +1022,8 @@
         #
         # we need to make sure that no variable is stored in spp (=r31)
         for arg in inputargs:
-            if self.loc(arg) is r.SPP:
-                loc2 = self.fm.loc(arg)
-                self.assembler.mc.store(r.SPP, loc2)
+            assert self.loc(arg) is not r.SPP, (
+                "variable stored in spp in prepare_label")
         self.rm.bindings_to_frame_reg.clear()
         #
         for i in range(len(inputargs)):
@@ -1062,18 +1064,6 @@
         resloc = self.after_call(op.result)
         return [resloc] + locs
 
-    def _prepare_args_for_new_op(self, new_args):
-        gc_ll_descr = self.cpu.gc_ll_descr
-        args = gc_ll_descr.args_for_new(new_args)
-        arglocs = []
-        for i in range(len(args)):
-            arg = args[i]
-            t = TempInt()
-            l = self.force_allocate_reg(t, selected_reg=r.MANAGED_REGS[i])
-            self.assembler.load(l, imm(arg))
-            arglocs.append(t)
-        return arglocs
-
     def prepare_force_spill(self, op):
         self.force_spill_var(op.getarg(0))
         return []
diff --git a/rpython/jit/backend/ppc/runner.py 
b/rpython/jit/backend/ppc/runner.py
--- a/rpython/jit/backend/ppc/runner.py
+++ b/rpython/jit/backend/ppc/runner.py
@@ -2,6 +2,7 @@
 from rpython.rtyper.lltypesystem import lltype, llmemory, rffi
 from rpython.rtyper.llinterp import LLInterpreter
 from rpython.rlib import rgc
+from rpython.rlib.jit_hooks import LOOP_RUN_CONTAINER
 from rpython.jit.backend.llsupport.llmodel import AbstractLLCPU
 from rpython.jit.backend.ppc.ppc_assembler import AssemblerPPC
 from rpython.jit.backend.ppc.arch import WORD
@@ -79,3 +80,7 @@
             mc.copy_to_raw_memory(jmp)
         # positions invalidated
         looptoken.compiled_loop_token.invalidate_positions = []
+
+    def get_all_loop_runs(self):
+        # not implemented
+        return lltype.malloc(LOOP_RUN_CONTAINER, 0)
diff --git a/rpython/jit/backend/ppc/test/autopath.py 
b/rpython/jit/backend/ppc/test/autopath.py
deleted file mode 100644
--- a/rpython/jit/backend/ppc/test/autopath.py
+++ /dev/null
@@ -1,114 +0,0 @@
-"""
-self cloning, automatic path configuration 
-
-copy this into any subdirectory of pypy from which scripts need 
-to be run, typically all of the test subdirs. 
-The idea is that any such script simply issues
-
-    import autopath
-
-and this will make sure that the parent directory containing "pypy"
-is in sys.path. 
-
-If you modify the master "autopath.py" version (in pypy/tool/autopath.py) 
-you can directly run it which will copy itself on all autopath.py files
-it finds under the pypy root directory. 
-
-This module always provides these attributes:
-
-    pypydir    pypy root directory path 
-    this_dir   directory where this autopath.py resides 
-
-"""
-
-
-def __dirinfo(part):
-    """ return (partdir, this_dir) and insert parent of partdir
-    into sys.path.  If the parent directories don't have the part
-    an EnvironmentError is raised."""
-
-    import sys, os
-    try:
-        head = this_dir = os.path.realpath(os.path.dirname(__file__))
-    except NameError:
-        head = this_dir = os.path.realpath(os.path.dirname(sys.argv[0]))
-
-    while head:
-        partdir = head
-        head, tail = os.path.split(head)
-        if tail == part:
-            break
-    else:
-        raise EnvironmentError, "'%s' missing in '%r'" % (partdir, this_dir)
-    
-    pypy_root = os.path.join(head, '')
-    try:
-        sys.path.remove(head)
-    except ValueError:
-        pass
-    sys.path.insert(0, head)
-
-    munged = {}
-    for name, mod in sys.modules.items():
-        if '.' in name:
-            continue
-        fn = getattr(mod, '__file__', None)
-        if not isinstance(fn, str):
-            continue
-        newname = os.path.splitext(os.path.basename(fn))[0]
-        if not newname.startswith(part + '.'):
-            continue
-        path = os.path.join(os.path.dirname(os.path.realpath(fn)), '')
-        if path.startswith(pypy_root) and newname != part:
-            modpaths = os.path.normpath(path[len(pypy_root):]).split(os.sep)
-            if newname != '__init__':
-                modpaths.append(newname)
-            modpath = '.'.join(modpaths)
-            if modpath not in sys.modules:
-                munged[modpath] = mod
-
-    for name, mod in munged.iteritems():
-        if name not in sys.modules:
-            sys.modules[name] = mod
-        if '.' in name:
-            prename = name[:name.rfind('.')]
-            postname = name[len(prename)+1:]
-            if prename not in sys.modules:
-                __import__(prename)
-                if not hasattr(sys.modules[prename], postname):
-                    setattr(sys.modules[prename], postname, mod)
-
-    return partdir, this_dir
-
-def __clone():
-    """ clone master version of autopath.py into all subdirs """
-    from os.path import join, walk
-    if not this_dir.endswith(join('pypy','tool')):
-        raise EnvironmentError("can only clone master version "
-                               "'%s'" % join(pypydir, 'tool',_myname))
-
-
-    def sync_walker(arg, dirname, fnames):
-        if _myname in fnames:
-            fn = join(dirname, _myname)
-            f = open(fn, 'rwb+')
-            try:
-                if f.read() == arg:
-                    print "checkok", fn
-                else:
-                    print "syncing", fn
-                    f = open(fn, 'w')
-                    f.write(arg)
-            finally:
-                f.close()
-    s = open(join(pypydir, 'tool', _myname), 'rb').read()
-    walk(pypydir, sync_walker, s)
-
-_myname = 'autopath.py'
-
-# set guaranteed attributes
-
-pypydir, this_dir = __dirinfo('pypy')
-
-if __name__ == '__main__':
-    __clone()
diff --git a/rpython/jit/backend/ppc/test/test_call_assembler.py 
b/rpython/jit/backend/ppc/test/test_call_assembler.py
deleted file mode 100644
--- a/rpython/jit/backend/ppc/test/test_call_assembler.py
+++ /dev/null
@@ -1,76 +0,0 @@
-import py
-from rpython.jit.metainterp.history import BoxInt, ConstInt
-from rpython.jit.metainterp.history import (BoxPtr, ConstPtr, BasicFailDescr,
-                                            BasicFinalDescr)
-from rpython.jit.metainterp.history import JitCellToken
-from rpython.jit.metainterp.resoperation import rop, ResOperation
-from rpython.jit.codewriter import heaptracker
-from rpython.jit.backend.llsupport.descr import GcCache
-from rpython.jit.backend.llsupport.gc import GcLLDescription
-from rpython.jit.backend.detect_cpu import getcpuclass
-from rpython.jit.tool.oparser import parse
-from rpython.rtyper.lltypesystem import lltype, llmemory, rffi
-from rpython.rtyper.annlowlevel import llhelper
-from rpython.rtyper.lltypesystem import rclass, rstr
-from rpython.jit.backend.llsupport.gc import GcLLDescr_framework
-
-from rpython.jit.codewriter.effectinfo import EffectInfo
-from rpython.jit.backend.ppc.runner import PPC_CPU
-from rpython.jit.backend.ppc.test.test_runner import FakeStats
-
-class TestAssembler(object):
-
-    type_system = 'lltype'
-
-    def setup_class(cls):
-        cls.cpu = PPC_CPU(rtyper=None, stats=FakeStats())
-        cls.cpu.setup_once()
-
-    def interpret_direct_entry_point(self, ops, args, namespace):
-        loop = self.parse(ops, namespace)
-        looptoken = JitCellToken()
-        self.cpu.compile_loop(loop.inputargs, loop.operations, looptoken)
-        param_sign_list = []
-        for i, arg in enumerate(args):
-            if isinstance(arg, int):
-                param_sign_list.append(lltype.Signed)
-            elif isinstance(arg, float):
-                assert 0, "not implemented yet"
-            else:
-                assert 0, "not implemented yet"
-
-        signature = lltype.FuncType(param_sign_list, lltype.Signed)
-        fail_descr = self.cpu.execute_token(looptoken, *args)
-        return fail_descr
-
-    def parse(self, s, namespace, boxkinds=None):
-        return parse(s, self.cpu, namespace,
-                     type_system=self.type_system,
-                     boxkinds=boxkinds)
-
-    # XXX this test should also be used by the other backends
-    def test_call_assembler_vary_arguments(self):
-        namespace = {}
-        numargs = 20
-
-        for i in range(numargs + 1):
-            namespace["fdescr%d" % i] = BasicFailDescr(i)
-        namespace["finishdescr"] = BasicFinalDescr(numargs + 1)
-
-        for i in range(1, numargs + 1):
-            arglist = []
-            guardlist = []
-
-            for k in range(i):
-                name = "i%d" % k
-                arglist.append(name)
-                guardlist.append("guard_value(%s, %d, descr=fdescr%d) [%s]"
-                        % (name, k, k, name))
-
-            argstr = "".join(("[", ", ".join(arglist), "]\n"))
-            guardstr = "\n".join(guardlist) + "\n"
-            finish = "finish(descr=finishdescr)\n"
-
-            trace = "".join((argstr, guardstr, finish))
-            fail_descr = self.interpret_direct_entry_point(trace, range(i), 
namespace)
-            assert fail_descr.identifier == namespace["finishdescr"].identifier
diff --git a/rpython/jit/backend/ppc/test/test_calling_convention.py 
b/rpython/jit/backend/ppc/test/test_calling_convention.py
--- a/rpython/jit/backend/ppc/test/test_calling_convention.py
+++ b/rpython/jit/backend/ppc/test/test_calling_convention.py
@@ -1,5 +1,6 @@
 from rpython.jit.backend.test.calling_convention_test import CallingConvTests
 from rpython.jit.backend.ppc.codebuilder import PPCBuilder
+from rpython.rtyper.lltypesystem import lltype, rffi
 import rpython.jit.backend.ppc.register as r
 
 
@@ -10,7 +11,7 @@
         mc = PPCBuilder()
         mc.mr(r.r3.value, r.r1.value)
         mc.blr()
-        return mc.materialize(self.cpu, [])
+        return rffi.cast(lltype.Signed, mc.get_assembler_function())
 
     def get_alignment_requirements(self):
         return 16
diff --git a/rpython/jit/backend/ppc/test/test_field.py 
b/rpython/jit/backend/ppc/test/test_field.py
--- a/rpython/jit/backend/ppc/test/test_field.py
+++ b/rpython/jit/backend/ppc/test/test_field.py
@@ -1,5 +1,3 @@
-import autopath
-
 from rpython.jit.backend.ppc.field import Field
 from py.test import raises
 
diff --git a/rpython/jit/backend/ppc/test/test_form.py 
b/rpython/jit/backend/ppc/test/test_form.py
--- a/rpython/jit/backend/ppc/test/test_form.py
+++ b/rpython/jit/backend/ppc/test/test_form.py
@@ -1,11 +1,11 @@
-import autopath
 from rpython.jit.backend.ppc.codebuilder import b
 import random
 import sys
+from py.test import raises
 
 from rpython.jit.backend.ppc.form import Form, FormException
 from rpython.jit.backend.ppc.field import Field
-from rpython.jit.backend.ppc.assembler import Assembler
+from rpython.jit.backend.ppc.opassembler import OpAssembler as Assembler
 
 # 0                              31
 # +-------------------------------+
@@ -23,9 +23,9 @@
     'hh': Field('hh',  0,  7),
 }
 
-def p(w):
+def p(a):
     import struct
-    w = w.assemble()
+    w = a.insts[-1].assemble()
     return struct.pack('>i', w)
 
 class TestForm(Form):
@@ -43,26 +43,28 @@
             j = i(h=1)
             k = i(l=3)
             raises(FormException, k, l=0)
+            insts = []
         a = T()
         a.i(5, 6)
-        assert p(a.assemble0()[0]) == '\000\005\000\006'
+        assert p(a) == '\000\005\000\006'
         a = T()
         a.j(2)
-        assert p(a.assemble0()[0]) == '\000\001\000\002'
+        assert p(a) == '\000\001\000\002'
         a = T()
         a.k(4)
-        assert p(a.assemble0()[0]) == '\000\004\000\003'
+        assert p(a) == '\000\004\000\003'
 
     def test_defdesc(self):
         class T(Assembler):
             i = TestForm('hh', 'hl', 'lh', 'll')()
             i.default(hl=0).default(hh=1)
+            insts = []
         a = T()
         a.i(1, 2, 3, 4)
-        assert p(a.assemble0()[0]) == '\001\002\003\004'
+        assert p(a) == '\001\002\003\004'
         a = T()
         a.i(1, 3, 4)
-        assert p(a.assemble0()[0]) == '\001\000\003\004'
+        assert p(a) == '\001\000\003\004'
         a = T()
         a.i(3, 4)
-        assert p(a.assemble0()[0]) == '\001\000\003\004'
+        assert p(a) == '\001\000\003\004'
diff --git a/rpython/jit/backend/ppc/test/test_generated.py 
b/rpython/jit/backend/ppc/test/test_generated.py
deleted file mode 100644
--- a/rpython/jit/backend/ppc/test/test_generated.py
+++ /dev/null
@@ -1,525 +0,0 @@
-import py
-from rpython.jit.metainterp.history import (AbstractFailDescr,
-                                            AbstractDescr,
-                                            BasicFailDescr,
-                                            BoxInt, Box, BoxPtr,
-                                            ConstInt, ConstPtr,
-                                            BoxObj, Const,
-                                            ConstObj, BoxFloat, ConstFloat)
-from rpython.jit.metainterp.history import JitCellToken
-from rpython.jit.metainterp.resoperation import ResOperation, rop
-from rpython.rtyper.test.test_llinterp import interpret
-from rpython.jit.backend.detect_cpu import getcpuclass
-
-CPU = getcpuclass()
-class TestStuff(object):
-
-    def test0(self):
-        faildescr1 = BasicFailDescr(1)
-        faildescr2 = BasicFailDescr(2)
-        v1 = BoxInt()
-        v2 = BoxInt()
-        v3 = BoxInt()
-        v4 = BoxInt()
-        v5 = BoxInt()
-        v6 = BoxInt()
-        v7 = BoxInt()
-        v8 = BoxInt()
-        v9 = BoxInt()
-        v10 = BoxInt()
-        v11 = BoxInt()
-        v12 = BoxInt()
-        cpu = CPU(None, None)
-        cpu.setup_once()
-        inputargs = [v1, v2, v3, v4, v5, v6, v7, v8, v9, v10]
-        operations = [
-            ResOperation(rop.INT_SUB, [ConstInt(-1073741824), v7], v11),
-            ResOperation(rop.INT_GE, [v3, ConstInt(23)], v12),
-            ResOperation(rop.GUARD_TRUE, [v12], None, descr=faildescr1),
-            ResOperation(rop.FINISH, [v9, v6, v10, v2, v8, v5, v1, v4], None, 
descr=faildescr2),
-            ]
-        looptoken = JitCellToken()
-        operations[2].setfailargs([v12, v8, v3, v2, v1, v11])
-        cpu.compile_loop(inputargs, operations, looptoken)
-        args = [-12 , -26 , -19 , 7 , -5 , -24 , -37 , 62 , 9 , 12]
-        op = cpu.execute_token(looptoken, *args)
-        assert cpu.get_latest_value_int(0) == 0
-        assert cpu.get_latest_value_int(1) == 62
-        assert cpu.get_latest_value_int(2) == -19
-        assert cpu.get_latest_value_int(3) == -26
-        assert cpu.get_latest_value_int(4) == -12
-        assert cpu.get_latest_value_int(5) == -1073741787
-
-    def test_overflow(self):
-        faildescr1 = BasicFailDescr(1)
-        faildescr2 = BasicFailDescr(2)
-        faildescr3 = BasicFailDescr(3)
-        v1 = BoxInt()
-        v2 = BoxInt()
-        v3 = BoxInt()
-        v4 = BoxInt()
-        v5 = BoxInt()
-        v6 = BoxInt()
-        v7 = BoxInt()
-        v8 = BoxInt()
-        v9 = BoxInt()
-        v10 = BoxInt()
-        v11 = BoxInt()
-        v12 = BoxInt()
-        v13 = BoxInt()
-        v14 = BoxInt()
-        v15 = BoxInt()
-        v16 = BoxInt()
-        v17 = BoxInt()
-        v18 = BoxInt()
-        cpu = CPU(None, None)
-        cpu.setup_once()
-        inputargs = [v1, v2, v3, v4, v5, v6, v7, v8, v9, v10]
-        operations = [
-            ResOperation(rop.INT_SUB, [ConstInt(21), v5], v11),
-            ResOperation(rop.INT_MUL_OVF, [v8, v4], v12),
-            ResOperation(rop.GUARD_NO_OVERFLOW, [], None, descr=faildescr1),
-            ResOperation(rop.UINT_LT, [v10, v3], v13),
-            ResOperation(rop.INT_IS_TRUE, [v3], v14),
-            ResOperation(rop.INT_XOR, [v9, v8], v15),
-            ResOperation(rop.INT_LE, [v12, v6], v16),
-            ResOperation(rop.UINT_GT, [v15, v5], v17),
-            ResOperation(rop.UINT_LE, [ConstInt(-9), v13], v18),
-            ResOperation(rop.GUARD_FALSE, [v13], None, descr=faildescr2),
-            ResOperation(rop.FINISH, [v7, v1, v2], None, descr=faildescr3),
-            ]
-        operations[2].setfailargs([v10, v6])
-        operations[9].setfailargs([v15, v7, v10, v18, v4, v17, v1])
-        looptoken = JitCellToken()
-        cpu.compile_loop(inputargs, operations, looptoken)
-        args = [16 , 5 , 5 , 16 , 46 , 6 , 63 , 39 , 78 , 0]
-        op = cpu.execute_token(looptoken, *args)
-        assert cpu.get_latest_value_int(0) == 105
-        assert cpu.get_latest_value_int(1) == 63
-        assert cpu.get_latest_value_int(2) == 0
-        assert cpu.get_latest_value_int(3) == 0
-        assert cpu.get_latest_value_int(4) == 16
-        assert cpu.get_latest_value_int(5) == 1
-        assert cpu.get_latest_value_int(6) == 16
-
-    def test_sub_with_neg_const_first_arg(self):
-        faildescr1 = BasicFailDescr(1)
-        faildescr2 = BasicFailDescr(2)
-        faildescr3 = BasicFailDescr(3)
-        v1 = BoxInt()
-        v2 = BoxInt()
-        v3 = BoxInt()
-        v4 = BoxInt()
-        v5 = BoxInt()
-        v6 = BoxInt()
-        v7 = BoxInt()
-        v8 = BoxInt()
-        v9 = BoxInt()
-        v10 = BoxInt()
-        v11 = BoxInt()
-        v12 = BoxInt()
-        tmp13 = BoxInt()
-        cpu = CPU(None, None)
-        cpu.setup_once()
-        inputargs = [v1, v2, v3, v4, v5, v6, v7, v8, v9, v10]
-        operations = [
-            ResOperation(rop.INT_EQ, [ConstInt(17), v9], v11),
-            ResOperation(rop.INT_SUB_OVF, [ConstInt(-32), v7], v12),
-            ResOperation(rop.GUARD_NO_OVERFLOW, [], None, descr=faildescr1),
-            ResOperation(rop.INT_IS_ZERO, [v12], tmp13),
-            ResOperation(rop.GUARD_TRUE, [tmp13], None, descr=faildescr2),
-            ResOperation(rop.FINISH, [v5, v2, v1, v10, v3, v8, v4, v6], None, 
descr=faildescr3)
-            ]
-        operations[2].setfailargs([v8, v3])
-        operations[4].setfailargs([v2, v12, v1, v3, v4])
-        looptoken = JitCellToken()
-        cpu.compile_loop(inputargs, operations, looptoken)
-        args = [-5 , 24 , 46 , -15 , 13 , -8 , 0 , -6 , 6 , 6]
-        op = cpu.execute_token(looptoken, *args)
-        assert op.identifier == 2
-        assert cpu.get_latest_value_int(0) == 24
-        assert cpu.get_latest_value_int(1) == -32
-        assert cpu.get_latest_value_int(2) == -5
-        assert cpu.get_latest_value_int(3) == 46
-        assert cpu.get_latest_value_int(4) == -15
-
-    def test_tempbox_spilling_in_sub(self):
-        faildescr1 = BasicFailDescr(1)
-        faildescr2 = BasicFailDescr(2)
-        v1 = BoxInt()
-        v2 = BoxInt()
-        v3 = BoxInt()
-        v4 = BoxInt()
-        v5 = BoxInt()
-        v6 = BoxInt()
-        v7 = BoxInt()
-        v8 = BoxInt()
-        v9 = BoxInt()
-        v10 = BoxInt()
-        v11 = BoxInt()
-        v12 = BoxInt()
-        v13 = BoxInt()
-        v14 = BoxInt()
-        v15 = BoxInt()
-        cpu = CPU(None, None)
-        cpu.setup_once()
-        inputargs = [v1, v2, v3, v4, v5, v6, v7, v8, v9, v10]
-        operations = [
-            ResOperation(rop.INT_LT, [v9, v9], v11),
-            ResOperation(rop.INT_ADD, [ConstInt(715827882), v4], v12),
-            ResOperation(rop.INT_NEG, [v11], v13),
-            ResOperation(rop.INT_IS_TRUE, [v3], v14),
-            ResOperation(rop.INT_SUB_OVF, [v3, ConstInt(-95)], v15),
-            ResOperation(rop.GUARD_NO_OVERFLOW, [], None, descr=faildescr1),
-            ResOperation(rop.FINISH, [v8, v2, v6, v5, v7, v1, v10], None, 
descr=faildescr2),
-            ]
-        operations[5].setfailargs([])
-        looptoken = JitCellToken()
-        cpu.compile_loop(inputargs, operations, looptoken)
-        args = [19 , -3 , -58 , -7 , 12 , 22 , -54 , -29 , -19 , -64]
-        op = cpu.execute_token(looptoken, *args)
-        assert cpu.get_latest_value_int(0) == -29
-        assert cpu.get_latest_value_int(1) == -3
-        assert cpu.get_latest_value_int(2) == 22
-        assert cpu.get_latest_value_int(3) == 12
-        assert cpu.get_latest_value_int(4) == -54
-        assert cpu.get_latest_value_int(5) == 19
-        assert cpu.get_latest_value_int(6) == -64
-
-    def test_tempbox2(self):
-        faildescr1 = BasicFailDescr(1)
-        faildescr2 = BasicFailDescr(2)
-        v1 = BoxInt()
-        v2 = BoxInt()
-        v3 = BoxInt()
-        v4 = BoxInt()
-        v5 = BoxInt()
-        v6 = BoxInt()
-        v7 = BoxInt()
-        v8 = BoxInt()
-        v9 = BoxInt()
-        v10 = BoxInt()
-        v11 = BoxInt()
-        v12 = BoxInt()
-        v13 = BoxInt()
-        v14 = BoxInt()
-        v15 = BoxInt()
-        cpu = CPU(None, None)
-        cpu.setup_once()
-        inputargs = [v1, v2, v3, v4, v5, v6, v7, v8, v9, v10]
-        operations = [
-            ResOperation(rop.INT_LT, [v5, ConstInt(-67)], v11),
-            ResOperation(rop.INT_INVERT, [v2], v12),
-            ResOperation(rop.INT_SUB, [ConstInt(-45), v2], v13),
-            ResOperation(rop.INT_SUB, [ConstInt(99), v6], v14),
-            ResOperation(rop.INT_MUL_OVF, [v6, v9], v15),
-            ResOperation(rop.GUARD_NO_OVERFLOW, [], None, descr=faildescr1),
-            ResOperation(rop.FINISH, [v1, v4, v10, v8, v7, v3], None, 
descr=faildescr2),
-            ]
-        looptoken = JitCellToken()
-        operations[5].setfailargs([])
-        cpu.compile_loop(inputargs, operations, looptoken)
-        args = [1073741824 , 95 , -16 , 5 , 92 , 12 , 32 , 17 , 37 , -63]
-        op = cpu.execute_token(looptoken, *args)
-        assert cpu.get_latest_value_int(0) == 1073741824
-        assert cpu.get_latest_value_int(1) == 5
-        assert cpu.get_latest_value_int(2) == -63
-        assert cpu.get_latest_value_int(3) == 17
-        assert cpu.get_latest_value_int(4) == 32
-        assert cpu.get_latest_value_int(5) == -16
-
-    def test_wrong_guard(self):
-        # generated by:
-        # ../test/ test/test_zll_random.py -l -k arm -s --block-length=10 
--random-seed=4338
-
-        faildescr1 = BasicFailDescr(1)
-        faildescr2 = BasicFailDescr(2)
-        faildescr3 = BasicFailDescr(3)
-        faildescr4 = BasicFailDescr(4)
-        v1 = BoxInt(32)
-        v2 = BoxInt(41)
-        v3 = BoxInt(-9)
-        v4 = BoxInt(12)
-        v5 = BoxInt(-18)
-        v6 = BoxInt(46)
-        v7 = BoxInt(15)
-        v8 = BoxInt(17)
-        v9 = BoxInt(10)
-        v10 = BoxInt(12)
-        v11 = BoxInt()
-        v12 = BoxInt()
-        v13 = BoxInt()
-        v14 = BoxInt()
-        tmp15 = BoxInt()
-        tmp16 = BoxInt()
-        tmp17 = BoxInt()
-        cpu = CPU(None, None)
-        cpu.setup_once()
-        inputargs = [v1, v2, v3, v4, v5, v6, v7, v8, v9, v10]
-        operations = [
-            ResOperation(rop.INT_IS_TRUE, [v1], tmp15),
-            ResOperation(rop.GUARD_TRUE, [tmp15], None, descr=faildescr1),
-            ResOperation(rop.INT_GT, [v4, v5], v11),
-            ResOperation(rop.INT_XOR, [ConstInt(-4), v7], v12),
-            ResOperation(rop.INT_MUL, [ConstInt(23), v11], v13),
-            ResOperation(rop.UINT_GE, [ConstInt(1), v13], v14),
-            ResOperation(rop.INT_IS_ZERO, [v14], tmp16),
-            ResOperation(rop.GUARD_TRUE, [tmp16], None, descr=faildescr2),
-            ResOperation(rop.INT_IS_TRUE, [v12], tmp17),
-            ResOperation(rop.GUARD_FALSE, [tmp17], None, descr=faildescr3),
-            ResOperation(rop.FINISH, [v8, v10, v6, v3, v2, v9], None, 
descr=faildescr4),
-            ]
-        looptoken = JitCellToken()
-        operations[1].setfailargs([v8, v6, v1])
-        operations[7].setfailargs([v4])
-        operations[9].setfailargs([v10, v13])
-        args = [32 , 41 , -9 , 12 , -18 , 46 , 15 , 17 , 10 , 12]
-        cpu.compile_loop(inputargs, operations, looptoken)
-        op = cpu.execute_token(looptoken, *args)
-        assert op.identifier == 3
-        assert cpu.get_latest_value_int(0) == 12
-        assert cpu.get_latest_value_int(1) == 23
-
-    def test_wrong_guard2(self):
-        # random seed: 8029
-        # block length: 10
-        faildescr1 = BasicFailDescr(1)
-        faildescr2 = BasicFailDescr(2)
-        faildescr3 = BasicFailDescr(3)
-        v1 = BoxInt()
-        v2 = BoxInt()
-        v3 = BoxInt()
-        v4 = BoxInt()
-        v5 = BoxInt()
-        v6 = BoxInt()
-        v7 = BoxInt()
-        v8 = BoxInt()
-        v9 = BoxInt()
-        v10 = BoxInt()
-        v11 = BoxInt()
-        v12 = BoxInt()
-        v13 = BoxInt()
-        v14 = BoxInt()
-        v15 = BoxInt()
-        v16 = BoxInt()
-        tmp17 = BoxInt()
-        cpu = CPU(None, None)
-        cpu.setup_once()
-        inputargs = [v1, v2, v3, v4, v5, v6, v7, v8, v9, v10]
-        operations = [
-            ResOperation(rop.INT_ADD_OVF, [v8, ConstInt(-30)], v11),
-            ResOperation(rop.GUARD_NO_OVERFLOW, [], None, descr=faildescr1),
-            ResOperation(rop.UINT_LE, [v11, v1], v12),
-            ResOperation(rop.INT_AND, [v11, ConstInt(31)], tmp17),
-            ResOperation(rop.UINT_RSHIFT, [v12, tmp17], v13),
-            ResOperation(rop.INT_NE, [v3, v2], v14),
-            ResOperation(rop.INT_NE, [ConstInt(1), v11], v15),
-            ResOperation(rop.INT_NE, [ConstInt(23), v15], v16),
-            ResOperation(rop.GUARD_FALSE, [v15], None, descr=faildescr2),
-            ResOperation(rop.FINISH, [v4, v10, v6, v5, v9, v7], None, 
descr=faildescr3),
-            ]
-        operations[1].setfailargs([v6, v8, v1, v4])
-        operations[8].setfailargs([v5, v9])
-        looptoken = JitCellToken()
-        cpu.compile_loop(inputargs, operations, looptoken)
-        args = [-8 , 0 , 62 , 35 , 16 , 9 , 30 , 581610154 , -1 , 738197503]
-        op = cpu.execute_token(looptoken, *args)
-        assert op.identifier == 2
-        assert cpu.get_latest_value_int(0) == 16
-        assert cpu.get_latest_value_int(1) == -1
-
-    def test_wrong_guard3(self):
-        # random seed: 8029
-        # block length: 10
-        faildescr1 = BasicFailDescr(1)
-        faildescr2 = BasicFailDescr(2)
-        faildescr3 = BasicFailDescr(3)
-        faildescr4 = BasicFailDescr(4)
-        v1 = BoxInt()
-        v2 = BoxInt()
-        v3 = BoxInt()
-        v4 = BoxInt()
-        v5 = BoxInt()
-        v6 = BoxInt()
-        v7 = BoxInt()
-        v8 = BoxInt()
-        v9 = BoxInt()
-        v10 = BoxInt()
-        v11 = BoxInt()
-        v12 = BoxInt()
-        v13 = BoxInt()
-        v14 = BoxInt()
-        v15 = BoxInt()
-        v16 = BoxInt()
-        cpu = CPU(None, None)
-        cpu.setup_once()
-        inputargs = [v1, v2, v3, v4, v5, v6, v7, v8, v9, v10]
-        operations = [
-            ResOperation(rop.UINT_LT, [ConstInt(-11), v7], v11),
-            ResOperation(rop.INT_GE, [v3, v5], v12),
-            ResOperation(rop.INT_INVERT, [v9], v13),
-            ResOperation(rop.GUARD_VALUE, [v13, ConstInt(14)], None, 
descr=faildescr3),
-            ResOperation(rop.INT_IS_ZERO, [v12], v14),
-            ResOperation(rop.INT_SUB, [v2, v13], v15),
-            ResOperation(rop.GUARD_VALUE, [v15, ConstInt(-32)], None, 
descr=faildescr4),
-            ResOperation(rop.INT_FLOORDIV, [v3, ConstInt(805306366)], v16),
-            ResOperation(rop.GUARD_VALUE, [v15, ConstInt(0)], None, 
descr=faildescr1),
-            ResOperation(rop.FINISH, [v10, v8, v1, v6, v4], None, 
descr=faildescr2),
-            ]
-        operations[3].setfailargs([])
-        operations[-4].setfailargs([v15])
-        operations[-2].setfailargs([v9, v4, v10, v11, v14])
-        looptoken = JitCellToken()
-        cpu.compile_loop(inputargs, operations, looptoken)
-        args = [-39 , -18 , 1588243114 , -9 , -4 , 1252698794 , 0 , 715827882 
, -15 , 536870912]
-        op = cpu.execute_token(looptoken, *args)
-        assert op.identifier == 1
-        assert cpu.get_latest_value_int(0) == -15
-        assert cpu.get_latest_value_int(1) == -9
-        assert cpu.get_latest_value_int(2) == 536870912
-        assert cpu.get_latest_value_int(3) == 0
-        assert cpu.get_latest_value_int(4) == 0
-
-    def test_wrong_result(self):
-        # generated by:
-        # ../test/ test/test_zll_random.py -l -k arm -s --block-length=10 
--random-seed=7389
-        faildescr1 = BasicFailDescr(1)
-        faildescr2 = BasicFailDescr(2)
-        faildescr3 = BasicFailDescr(3)
-        faildescr4 = BasicFailDescr(4)
-        v1 = BoxInt()
-        v2 = BoxInt()
-        v3 = BoxInt()
-        v4 = BoxInt()
-        v5 = BoxInt()
-        v6 = BoxInt()
-        v7 = BoxInt()
-        v8 = BoxInt()
-        v9 = BoxInt()
-        v10 = BoxInt()
-        v11 = BoxInt()
-        v12 = BoxInt()
-        v13 = BoxInt()
-        v14 = BoxInt()
-        v15 = BoxInt()
-        tmp16 = BoxInt()
-        tmp17 = BoxInt()
-        cpu = CPU(None, None)
-        cpu.setup_once()
-        inputargs = [v1, v2, v3, v4, v5, v6, v7, v8, v9, v10]
-        operations = [
-            ResOperation(rop.INT_IS_TRUE, [v3], tmp16),
-            ResOperation(rop.GUARD_TRUE, [tmp16], None, descr=faildescr1),
-            ResOperation(rop.INT_AND, [v7, ConstInt(31)], tmp17),
-            ResOperation(rop.INT_RSHIFT, [v5, tmp17], v11),
-            ResOperation(rop.INT_OR, [v6, v8], v12),
-            ResOperation(rop.GUARD_VALUE, [v11, ConstInt(-2)], None, 
descr=faildescr2),
-            ResOperation(rop.INT_LE, [ConstInt(1789569706), v10], v13),
-            ResOperation(rop.INT_IS_TRUE, [v4], v14),
-            ResOperation(rop.INT_XOR, [v14, v3], v15),
-            ResOperation(rop.GUARD_VALUE, [v8, ConstInt(-8)], None, 
descr=faildescr3),
-            ResOperation(rop.FINISH, [v1, v2, v9], None, descr=faildescr4),
-            ]
-        operations[1].setfailargs([v9, v1])
-        operations[5].setfailargs([v10, v2, v11, v3])
-        operations[9].setfailargs([v5, v7, v12, v14, v2, v13, v8])
-        looptoken = JitCellToken()
-        cpu.compile_loop(inputargs, operations, looptoken)
-        args = [0 , -2 , 24 , 1 , -4 , 13 , -95 , 33 , 2 , -44]
-        op = cpu.execute_token(looptoken, *args)
-        assert op.identifier == 3
-        assert cpu.get_latest_value_int(0) == -4
-        assert cpu.get_latest_value_int(1) == -95
-        assert cpu.get_latest_value_int(2) == 45
-        assert cpu.get_latest_value_int(3) == 1
-        assert cpu.get_latest_value_int(4) == -2
-        assert cpu.get_latest_value_int(5) == 0
-        assert cpu.get_latest_value_int(6) == 33
-
-    def test_int_add(self):
-        # random seed: 1202
-        # block length: 4
-        # AssertionError: Got 1431655764, expected 357913940 for value #3
-        faildescr1 = BasicFailDescr(1)
-        faildescr2 = BasicFailDescr(2)
-        v1 = BoxInt()
-        v2 = BoxInt()
-        v3 = BoxInt()
-        v4 = BoxInt()
-        v5 = BoxInt()
-        v6 = BoxInt()
-        v7 = BoxInt()
-        v8 = BoxInt()
-        v9 = BoxInt()
-        v10 = BoxInt()
-        v11 = BoxInt()
-        tmp12 = BoxInt()
-        cpu = CPU(None, None)
-        cpu.setup_once()
-        inputargs = [v1, v2, v3, v4, v5, v6, v7, v8, v9, v10]
-        operations = [
-            ResOperation(rop.INT_ADD, [ConstInt(-1073741825), v3], v11),
-            ResOperation(rop.INT_IS_TRUE, [v1], tmp12),
-            ResOperation(rop.GUARD_FALSE, [tmp12], None, descr=faildescr1),
-            ResOperation(rop.FINISH, [v8, v2, v10, v6, v7, v9, v5, v4], None, 
descr=faildescr2),
-            ]
-        operations[2].setfailargs([v10, v3, v6, v11, v9, v2])
-        looptoken = JitCellToken()
-        cpu.compile_loop(inputargs, operations, looptoken)
-        args = [3 , -5 , 1431655765 , 47 , 12 , 1789569706 , 15 , 939524096 , 
16 , -43]
-        op = cpu.execute_token(looptoken, *args)
-        assert op.identifier == 1
-        assert cpu.get_latest_value_int(0) == -43
-        assert cpu.get_latest_value_int(1) == 1431655765
-        assert cpu.get_latest_value_int(2) == 1789569706
-        assert cpu.get_latest_value_int(3) == 357913940
-        assert cpu.get_latest_value_int(4) == 16
-        assert cpu.get_latest_value_int(5) == -5
-
-    def test_wrong_result2(self):
-        # block length 10
-        # random seed 1
-        f1 = BasicFailDescr(1)
-        f2 = BasicFailDescr(2)
-        f3 = BasicFailDescr(3)
-        v1 = BoxInt()
-        v2 = BoxInt()
-        v3 = BoxInt()
-        v4 = BoxInt()
-        v5 = BoxInt()
-        v6 = BoxInt()
-        v7 = BoxInt()
-        v8 = BoxInt()
-        v9 = BoxInt()
-        v10 = BoxInt()
-        v11 = BoxInt()
-        v12 = BoxInt()
-        v13 = BoxInt()
-        v14 = BoxInt()
-        v15 = BoxInt()
-        cpu = CPU(None, None)
-        cpu.setup_once()
-        inputargs = [v1, v2, v3, v4, v5, v6, v7, v8, v9, v10]
-        operations = [
-            ResOperation(rop.INT_LE, [v6, v1], v11),
-            ResOperation(rop.SAME_AS, [ConstInt(-14)], v12),
-            ResOperation(rop.INT_ADD, [ConstInt(24), v4], v13),
-            ResOperation(rop.UINT_RSHIFT, [v6, ConstInt(0)], v14),
-            ResOperation(rop.GUARD_VALUE, [v14, ConstInt(1)], None, descr=f3),
-            ResOperation(rop.INT_MUL, [v13, ConstInt(12)], v15),
-            ResOperation(rop.GUARD_FALSE, [v11], None, descr=f1),
-            ResOperation(rop.FINISH, [v2, v3, v5, v7, v10, v8, v9], None, 
descr=f2),
-            ]
-        operations[-2].setfailargs([v4, v10, v3, v9, v14, v2])
-        operations[4].setfailargs([v14])
-        looptoken = JitCellToken()
-        cpu.compile_loop(inputargs, operations, looptoken)
-        args = [14 , -20 , 18 , -2058005163 , 6 , 1 , -16 , 11 , 0 , 19]
-        op = cpu.execute_token(looptoken, *args)
-        assert op.identifier == 1
-        assert cpu.get_latest_value_int(0) == -2058005163
-        assert cpu.get_latest_value_int(1) == 19
-        assert cpu.get_latest_value_int(2) == 18
-        assert cpu.get_latest_value_int(3) == 0
-        assert cpu.get_latest_value_int(4) == 1
-        assert cpu.get_latest_value_int(5) == -20
diff --git a/rpython/jit/backend/ppc/test/test_ppc.py 
b/rpython/jit/backend/ppc/test/test_ppc.py
--- a/rpython/jit/backend/ppc/test/test_ppc.py
+++ b/rpython/jit/backend/ppc/test/test_ppc.py
@@ -6,7 +6,8 @@
 from rpython.jit.backend.ppc.register import *
 from rpython.jit.backend.ppc import form
 from rpython.jit.backend import detect_cpu
-from rpython.jit.backend.ppc.arch import IS_PPC_32, IS_PPC_64, WORD
+from rpython.jit.backend.ppc.arch import IS_PPC_32, IS_PPC_64, IS_BIG_ENDIAN
+from rpython.jit.backend.ppc.arch import WORD
 
 from rpython.rtyper.lltypesystem import lltype, rffi
 from rpython.rtyper.annlowlevel import llhelper
@@ -15,7 +16,8 @@
 
 class TestDisassemble(object):
     def test_match(self):
-        A = BasicPPCAssembler
+        class A(BasicPPCAssembler):
+            insts = []
         a = A()
         a.add(1, 2, 3)
         inst = a.insts[-1]
@@ -29,12 +31,11 @@
 - Create a function and call it
 - Compare the return value with the expected result
 """
-def asmtest(expected=-1):
+def asmtest(expected):
     def testmaker(test):
         def newtest(self):
             a = PPCBuilder()
             test(self, a)
-            #f = a.assemble()
             f = a.get_assembler_function()
             assert f() == expected
         return newtest
@@ -196,10 +197,16 @@
         a.li(3, 50)
         if IS_PPC_32:
             a.load_imm(r10, call_addr)
-        else:
+        elif IS_BIG_ENDIAN:
+            # load the 3-words descriptor
             a.load_from_addr(r10, call_addr)
             a.load_from_addr(r2, call_addr+WORD)
             a.load_from_addr(r11, call_addr+2*WORD)
+        else:
+            # no descriptor on little-endian, but the ABI says r12 must
+            # contain the function pointer
+            a.load_imm(r10, call_addr)
+            a.mr(12, 10)
         a.mtctr(10)
         a.bctr()
         a.blr()
@@ -306,21 +313,6 @@
         lltype.free(p, flavor="raw")
 
 
-class AsmCode(object):
-    def __init__(self, size):
-        self.code = MachineCodeBlockWrapper()
-
-    def emit(self, insn):
-        bytes = struct.pack("i", insn)
-        for byte in bytes:
-            self.code.writechar(byte)
-
-    def get_function(self):
-        i = self.code.materialize(AsmMemoryManager(), [])
-        t = lltype.FuncType([], lltype.Signed)
-        return rffi.cast(lltype.Ptr(t), i)
-
-
 def func(arg):
     return arg + 15
 
diff --git a/rpython/jit/backend/ppc/test/test_rassemblermaker.py 
b/rpython/jit/backend/ppc/test/test_rassemblermaker.py
deleted file mode 100644
--- a/rpython/jit/backend/ppc/test/test_rassemblermaker.py
+++ /dev/null
@@ -1,39 +0,0 @@
-from rpython.jit.backend.ppc.rassemblermaker import make_rassembler
-from rpython.jit.backend.ppc.codebuilder import PPCAssembler
-
-RPPCAssembler = make_rassembler(PPCAssembler)
-
-_a = PPCAssembler()
-_a.add(3, 3, 4)
-add_r3_r3_r4 = _a.insts[0]
-
-def test_simple():
-    ra = RPPCAssembler()
-    ra.add(3, 3, 4)
-    assert ra.insts == [add_r3_r3_r4]
-
-def test_rtyped():
-    from rpython.rtyper.test.test_llinterp import interpret
-    def f():
-        ra = RPPCAssembler()
-        ra.add(3, 3, 4)
-        ra.lwz(1, 1, 1)  # ensure that high bit doesn't produce long but r_uint
-        return ra.insts[0]
-    res = interpret(f, [])
-    assert res == add_r3_r3_r4
-
-def test_mnemonic():
-    mrs = []
-    for A in PPCAssembler, RPPCAssembler:
-        a = A()
-        a.mr(3, 4)
-        mrs.append(a.insts[0])
-    assert mrs[0] == mrs[1]
-
-def test_spr_coding():
-    mrs = []
-    for A in PPCAssembler, RPPCAssembler:
-        a = A()
-        a.mtctr(3)
-        mrs.append(a.insts[0])
-    assert mrs[0] == mrs[1]
diff --git a/rpython/jit/backend/ppc/test/test_regalloc.py 
b/rpython/jit/backend/ppc/test/test_regalloc.py
--- a/rpython/jit/backend/ppc/test/test_regalloc.py
+++ b/rpython/jit/backend/ppc/test/test_regalloc.py
@@ -1,5 +1,6 @@
 from rpython.rtyper.lltypesystem import lltype, llmemory
-from rpython.rtyper.lltypesystem import rclass, rstr
+from rpython.rtyper.lltypesystem import rstr
+from rpython.rtyper import rclass
 from rpython.rtyper.annlowlevel import llhelper
 from rpython.rlib.objectmodel import instantiate
 from rpython.jit.backend.ppc.locations import (imm, RegisterLocation,
@@ -8,8 +9,6 @@
 from rpython.jit.backend.ppc.codebuilder import hi, lo
 from rpython.jit.backend.ppc.ppc_assembler import AssemblerPPC
 from rpython.jit.backend.ppc.arch import WORD
-from rpython.jit.backend.ppc.locations import get_spp_offset
-from rpython.jit.backend.detect_cpu import getcpuclass
 from rpython.jit.codewriter.effectinfo import EffectInfo
 from rpython.jit.codewriter import longlong
 from rpython.jit.metainterp.history import BasicFailDescr, \
@@ -118,8 +117,8 @@
     def test_mem_to_reg(self):
         self.asm.regalloc_mov(stack(5), reg(10))
         self.asm.regalloc_mov(stack(0), reg(0))
-        exp_instrs = [MI("load", r10.value, SPP.value, -(5 * WORD + WORD)),
-                      MI("load", r0.value, SPP.value, -(WORD))]
+        exp_instrs = [MI("load", r10.value, SPP.value, get_spp_offset(5)),
+                      MI("load", r0.value, SPP.value, get_spp_offset(0))]
         assert self.asm.mc.instrs == exp_instrs
 
     def test_mem_to_mem(self):
@@ -141,143 +140,15 @@
     def test_reg_to_mem(self):
         self.asm.regalloc_mov(reg(5), stack(10))
         self.asm.regalloc_mov(reg(0), stack(2))
-        exp_instrs = [MI("store", r5.value, SPP.value, -(10 * WORD + WORD)),
-                      MI("store", r0.value, SPP.value, -(2 * WORD + WORD))]
+        exp_instrs = [MI("store", r5.value, SPP.value, get_spp_offset(10)),
+                      MI("store", r0.value, SPP.value, get_spp_offset(2))]
         assert self.asm.mc.instrs == exp_instrs
 
 def reg(i):
     return RegisterLocation(i)
 
 def stack(i):
-    return StackLocation(i)
+    return StackLocation(i, get_spp_offset(i))
 
-CPU = getcpuclass()
_______________________________________________
pypy-commit mailing list
pypy-commit@python.org
https://mail.python.org/mailman/listinfo/pypy-commit

Reply via email to