Author: Armin Rigo <ar...@tunes.org> Branch: ppc-updated-backend Changeset: r79681:72dfc868373f Date: 2015-09-14 10:45 +0200 http://bitbucket.org/pypy/pypy/changeset/72dfc868373f/
Log: PPC Backend #4: get test_runner fully passing. Fix many details, remove old code, etc. diff --git a/rpython/jit/backend/detect_cpu.py b/rpython/jit/backend/detect_cpu.py --- a/rpython/jit/backend/detect_cpu.py +++ b/rpython/jit/backend/detect_cpu.py @@ -59,6 +59,7 @@ 'x86': MODEL_X86, # Apple 'Power Macintosh': MODEL_PPC_64, 'ppc64': MODEL_PPC_64, + 'ppc64le': MODEL_PPC_64, 'x86_64': MODEL_X86, 'amd64': MODEL_X86, # freebsd 'AMD64': MODEL_X86, # win64 diff --git a/rpython/jit/backend/llsupport/assembler.py b/rpython/jit/backend/llsupport/assembler.py --- a/rpython/jit/backend/llsupport/assembler.py +++ b/rpython/jit/backend/llsupport/assembler.py @@ -213,6 +213,23 @@ self.mc.get_relative_pos()) def call_assembler(self, op, argloc, vloc, result_loc, tmploc): + """ + * argloc: location of the frame argument that we're passing to + the called assembler (this is the first return value + of locs_for_call_assembler()) + + * vloc: location of the virtualizable (not in a register; + this is the optional second return value of + locs_for_call_assembler(), or imm(0) if none returned) + + * result_loc: location of op.result (which is not be + confused with the next one) + + * tmploc: location where the actual call to the other piece + of assembler will return its jitframe result + (which is always a REF), before the helper may be + called + """ descr = op.getdescr() assert isinstance(descr, JitCellToken) # diff --git a/rpython/jit/backend/ppc/_flush_icache.c b/rpython/jit/backend/ppc/_flush_icache.c deleted file mode 100644 --- a/rpython/jit/backend/ppc/_flush_icache.c +++ /dev/null @@ -1,26 +0,0 @@ -#include <Python.h> -#include "../../../translator/c/src/asm_ppc.h" - -static PyObject* -_flush_icache(PyObject *self, PyObject *args) -{ - long base, size; - - if (!PyArg_ParseTuple(args, "ii:_flush_icache", &base, &size)) - return NULL; - - LL_flush_icache(base, size); - Py_INCREF(Py_None); - return Py_None; -} - -PyMethodDef _flush_icache_methods[] = { - {"_flush_icache", _flush_icache, METH_VARARGS, ""}, - {0, 0} -}; - -PyMODINIT_FUNC -init_flush_icache(void) -{ - Py_InitModule("_flush_icache", _flush_icache_methods); -} diff --git a/rpython/jit/backend/ppc/_ppcgen.c b/rpython/jit/backend/ppc/_ppcgen.c deleted file mode 100644 --- a/rpython/jit/backend/ppc/_ppcgen.c +++ /dev/null @@ -1,154 +0,0 @@ -#include <Python.h> -#include <sys/mman.h> - -#define __dcbf(base, index) \ - __asm__ ("dcbf %0, %1" : /*no result*/ : "b%" (index), "r" (base) : "memory") - - -static PyTypeObject* mmap_type; - -#if defined(__APPLE__) - -#include <mach-o/dyld.h> - -static PyObject* -_ppy_NSLookupAndBindSymbol(PyObject* self, PyObject* args) -{ - char *s; - NSSymbol sym; - - if (!PyArg_ParseTuple(args, "s", &s)) - return NULL; - - if (!NSIsSymbolNameDefined(s)) { - return PyErr_Format(PyExc_ValueError, - "symbol '%s' not found", s); - } - - sym = NSLookupAndBindSymbol(s); - - return PyInt_FromLong((long)NSAddressOfSymbol(sym)); -} - - -#elif defined(linux) - -#include <dlfcn.h> - -static PyObject* -_ppy_dlsym(PyObject* self, PyObject* args) -{ - char *s; - void *handle; - void *sym; - - if (!PyArg_ParseTuple(args, "s", &s)) - return NULL; - - handle = dlopen(RTLD_DEFAULT, RTLD_LAZY); - sym = dlsym(handle, s); - if (sym == NULL) { - return PyErr_Format(PyExc_ValueError, - "symbol '%s' not found", s); - } - return PyInt_FromLong((long)sym); -} - -#else - -#error "OS not supported" - -#endif - - -static PyObject* -_ppy_mmap_exec(PyObject* self, PyObject* args) -{ - PyObject* code_args; - PyObject* r; - PyObject* mmap_obj; - char* code; - size_t size; - - if (!PyArg_ParseTuple(args, "O!O!:mmap_exec", - mmap_type, &mmap_obj, - &PyTuple_Type, &code_args)) - return NULL; - - code = *((char**)mmap_obj + 2); - size = *((size_t*)mmap_obj + 3); - - r = ((PyCFunction)code)(NULL, code_args); - - Py_DECREF(args); - - return r; -} - -static PyObject* -_ppy_mmap_flush(PyObject* self, PyObject* arg) -{ - char* code; - size_t size; - int i = 0; - - if (!PyObject_TypeCheck(arg, mmap_type)) { - PyErr_SetString(PyExc_TypeError, - "mmap_flush: single argument must be mmap object"); - } - - code = *((char**)arg + 2); - size = *((size_t*)arg + 3); - - for (; i < size; i += 32){ - __dcbf(code, i); - } - - Py_INCREF(Py_None); - return Py_None; -} - - -PyMethodDef _ppy_methods[] = { -#if defined(__APPLE__) - {"NSLookupAndBindSymbol", _ppy_NSLookupAndBindSymbol, - METH_VARARGS, ""}, -#elif defined(linux) - {"dlsym", _ppy_dlsym, METH_VARARGS, ""}, -#endif - {"mmap_exec", _ppy_mmap_exec, METH_VARARGS, ""}, - {"mmap_flush", _ppy_mmap_flush, METH_O, ""}, - {0, 0} -}; - -#if !defined(MAP_ANON) && defined(__APPLE__) -#define MAP_ANON 0x1000 -#endif - -PyMODINIT_FUNC -init_ppcgen(void) -{ - PyObject* m; - PyObject* mmap_module; - PyObject* mmap_func; - PyObject* mmap_obj; - - m = Py_InitModule("_ppcgen", _ppy_methods); - - /* argh */ - /* time to campaign for a C API for the mmap module! */ - mmap_module = PyImport_ImportModule("mmap"); - if (!mmap_module) - return; - mmap_func = PyObject_GetAttrString(mmap_module, "mmap"); - if (!mmap_func) - return; - mmap_obj = PyEval_CallFunction(mmap_func, "iii", -1, 0, MAP_ANON); - if (!mmap_obj) - return; - mmap_type = mmap_obj->ob_type; - Py_INCREF(mmap_type); - Py_DECREF(mmap_obj); - Py_DECREF(mmap_func); - Py_DECREF(mmap_module); -} diff --git a/rpython/jit/backend/ppc/callbuilder.py b/rpython/jit/backend/ppc/callbuilder.py --- a/rpython/jit/backend/ppc/callbuilder.py +++ b/rpython/jit/backend/ppc/callbuilder.py @@ -214,7 +214,7 @@ # replace b1_location with BEQ(here) jmp_target = self.mc.currpos() pmc = OverwritingBuilder(self.mc, b1_location, 1) - pmc.bc(12, 2, jmp_target - b1_location) # "beq" + pmc.beq(jmp_target - b1_location) pmc.overwrite() if not we_are_translated(): # for testing: now we can access diff --git a/rpython/jit/backend/ppc/codebuilder.py b/rpython/jit/backend/ppc/codebuilder.py --- a/rpython/jit/backend/ppc/codebuilder.py +++ b/rpython/jit/backend/ppc/codebuilder.py @@ -16,6 +16,14 @@ from rpython.translator.tool.cbuild import ExternalCompilationInfo from rpython.jit.backend.ppc.rassemblermaker import make_rassembler + +# these are the *forbidden* encodings that don't accept register r0: +# addi rX, r0, immed +# subi rX, r0, immed +# addis rX, r0, immed +# subis rX, r0, immed + + A = Form("frD", "frA", "frB", "XO3", "Rc") A1 = Form("frD", "frB", "XO3", "Rc") A2 = Form("frD", "frA", "frC", "XO3", "Rc") @@ -910,30 +918,27 @@ def high(w): return (w >> 16) & 0x0000FFFF -# XXX check this -if we_are_translated(): - eci = ExternalCompilationInfo(includes = ['asm_ppc.h']) +_eci = ExternalCompilationInfo(post_include_bits=[ + '#define rpython_flush_icache() asm("isync":::"memory")\n' + ]) +flush_icache = rffi.llexternal( + "rpython_flush_icache", + [], + lltype.Void, + compilation_info=_eci, + _nowrapper=True, + sandboxsafe=True) - flush_icache = rffi.llexternal( - "LL_flush_icache", - [lltype.Signed, lltype.Signed], - lltype.Void, - compilation_info=eci, - _nowrapper=True, - sandboxsafe=True) -else: - def flush_icache(x, y): pass class PPCGuardToken(GuardToken): def __init__(self, cpu, gcmap, descr, failargs, faillocs, exc, frame_depth, is_guard_not_invalidated=False, is_guard_not_forced=False, fcond=c.cond_none): - assert fcond != c.cond_none GuardToken.__init__(self, cpu, gcmap, descr, failargs, faillocs, exc, frame_depth, is_guard_not_invalidated, is_guard_not_forced) self.fcond = fcond - #self.offset = offset + class OverwritingBuilder(PPCAssembler): def __init__(self, mc, start, num_insts=0): @@ -1205,14 +1210,10 @@ def currpos(self): return self.get_relative_pos() - def flush_cache(self, addr): - startaddr = rffi.cast(lltype.Signed, addr) - size = rffi.cast(lltype.Signed, self.get_relative_pos()) - flush_icache(startaddr, size) - def copy_to_raw_memory(self, addr): self._copy_to_raw_memory(addr) - self.flush_cache(addr) + if we_are_translated(): + flush_icache() self._dump(addr, "jit-backend-dump", 'ppc') def cmp_op(self, block, a, b, imm=False, signed=True, fp=False): diff --git a/rpython/jit/backend/ppc/opassembler.py b/rpython/jit/backend/ppc/opassembler.py --- a/rpython/jit/backend/ppc/opassembler.py +++ b/rpython/jit/backend/ppc/opassembler.py @@ -6,7 +6,9 @@ from rpython.jit.backend.ppc.locations import imm as make_imm_loc from rpython.jit.backend.ppc.arch import (IS_PPC_32, IS_PPC_64, WORD, MAX_REG_PARAMS, MAX_FREG_PARAMS, - PARAM_SAVE_AREA_OFFSET) + PARAM_SAVE_AREA_OFFSET, + THREADLOCAL_ADDR_OFFSET, + IS_BIG_ENDIAN) from rpython.jit.metainterp.history import (JitCellToken, TargetToken, Box, AbstractFailDescr, FLOAT, INT, REF) @@ -22,6 +24,7 @@ from rpython.rtyper.lltypesystem import rstr, rffi, lltype from rpython.rtyper.annlowlevel import cast_instance_to_gcref from rpython.jit.metainterp.resoperation import rop +from rpython.jit.codewriter.effectinfo import EffectInfo from rpython.jit.backend.ppc import callbuilder class IntOpAssembler(object): @@ -209,7 +212,7 @@ l0, res = arglocs self.mc.fabs(res.value, l0.value) - def emit_math_sqrt(self, op, arglocs, regalloc): + def _emit_math_sqrt(self, op, arglocs, regalloc): l0, res = arglocs self.mc.fsqrt(res.value, l0.value) @@ -320,7 +323,7 @@ self.mc.trap() self._cmp_guard_class(op, arglocs, regalloc) pmc = OverwritingBuilder(self.mc, patch_pos, 1) - pmc.bc(12, 0, self.mc.currpos() - patch_pos) # LT + pmc.blt(self.mc.currpos() - patch_pos) pmc.overwrite() self.guard_success_cc = c.EQ self._emit_guard(op, arglocs[3:]) @@ -355,6 +358,13 @@ self.guard_success_cc = c.EQ self._emit_guard(op, arglocs) + def emit_guard_not_forced_2(self, op, arglocs, regalloc): + guard_token = self.build_guard_token(op, arglocs[0].value, arglocs[1:], + c.cond_none, save_exc=False) + self._finish_gcmap = guard_token.gcmap + self._store_force_index(op) + self.store_info_on_descr(0, guard_token) + class MiscOpAssembler(object): @@ -448,6 +458,8 @@ pmc.overwrite() def emit_guard_exception(self, op, arglocs, regalloc): + # XXX FIXME + # XXX pos_exc_value and pos_exception are 8 bytes apart, don't need both loc, loc1, resloc, pos_exc_value, pos_exception = arglocs[:5] failargs = arglocs[5:] self.mc.load_imm(loc1, pos_exception.value) @@ -490,6 +502,9 @@ cb.emit() def emit_call(self, op, arglocs, regalloc): + oopspecindex = regalloc.get_oopspecindex(op) + if oopspecindex == EffectInfo.OS_MATH_SQRT: + return self._emit_math_sqrt(op, arglocs, regalloc) self._emit_call(op, arglocs) def emit_call_may_force(self, op, arglocs, regalloc): @@ -832,7 +847,7 @@ if jz_location != -1: pmc = OverwritingBuilder(self.mc, jz_location, 1) - pmc.bc(4, 1, self.mc.currpos() - jz_location) # !GT + pmc.ble(self.mc.currpos() - jz_location) # !GT pmc.overwrite() class StrOpAssembler(object): @@ -843,118 +858,61 @@ emit_strgetitem = FieldOpAssembler.emit_getarrayitem_gc emit_strsetitem = FieldOpAssembler.emit_setarrayitem_gc - #from ../x86/regalloc.py:928 ff. def emit_copystrcontent(self, op, arglocs, regalloc): - assert len(arglocs) == 0 - self._emit_copystrcontent(op, regalloc, is_unicode=False) + self._emit_copycontent(arglocs, is_unicode=False) def emit_copyunicodecontent(self, op, arglocs, regalloc): - assert len(arglocs) == 0 - self._emit_copystrcontent(op, regalloc, is_unicode=True) + self._emit_copycontent(arglocs, is_unicode=True) - def _emit_copystrcontent(self, op, regalloc, is_unicode): - # compute the source address - args = op.getarglist() - base_loc = regalloc._ensure_value_is_boxed(args[0], args) - ofs_loc = regalloc._ensure_value_is_boxed(args[2], args) - assert args[0] is not args[1] # forbidden case of aliasing - regalloc.possibly_free_var(args[0]) - if args[3] is not args[2] is not args[4]: # MESS MESS MESS: don't free - regalloc.possibly_free_var(args[2]) # it if ==args[3] or args[4] - srcaddr_box = TempPtr() - forbidden_vars = [args[1], args[3], args[4], srcaddr_box] - srcaddr_loc = regalloc.force_allocate_reg(srcaddr_box) - self._gen_address_inside_string(base_loc, ofs_loc, srcaddr_loc, - is_unicode=is_unicode) + def _emit_load_for_copycontent(self, dst, src_ptr, src_ofs, scale): + if src_ofs.is_imm(): + value = src_ofs.value << scale + if value < 32768: + self.mc.addi(dst.value, src_ptr.value, value) + else: + self.mc.load_imm(dst, value) + self.mc.add(dst.value, src_ptr.value, dst.value) + elif scale == 0: + self.mc.add(dst.value, src_ptr.value, src_ofs.value) + else: + self.mc.sldi(dst.value, src_ofs.value, scale) + self.mc.add(dst.value, src_ptr.value, dst.value) - # compute the destination address - forbidden_vars = [args[4], args[3], srcaddr_box] - dstaddr_box = TempPtr() - dstaddr_loc = regalloc.force_allocate_reg(dstaddr_box) - forbidden_vars.append(dstaddr_box) - base_loc = regalloc._ensure_value_is_boxed(args[1], forbidden_vars) - ofs_loc = regalloc._ensure_value_is_boxed(args[3], forbidden_vars) - assert base_loc.is_reg() - assert ofs_loc.is_reg() - regalloc.possibly_free_var(args[1]) - if args[3] is not args[4]: # more of the MESS described above - regalloc.possibly_free_var(args[3]) - regalloc.free_temp_vars() - self._gen_address_inside_string(base_loc, ofs_loc, dstaddr_loc, - is_unicode=is_unicode) + def _emit_copycontent(self, arglocs, is_unicode): + [src_ptr_loc, dst_ptr_loc, + src_ofs_loc, dst_ofs_loc, length_loc] = arglocs - # compute the length in bytes - forbidden_vars = [srcaddr_box, dstaddr_box] - if isinstance(args[4], Box): - length_box = args[4] - length_loc = regalloc.make_sure_var_in_reg(args[4], forbidden_vars) + if is_unicode: + basesize, itemsize, _ = symbolic.get_array_token(rstr.UNICODE, + self.cpu.translate_support_code) + if itemsize == 2: scale = 1 + elif itemsize == 4: scale = 2 + else: raise AssertionError else: - length_box = TempInt() - length_loc = regalloc.force_allocate_reg(length_box, forbidden_vars) - xxxxxxxxxxxxxxxxxxxxxxxx - imm = regalloc.convert_to_imm(args[4]) - self.load(length_loc, imm) - if is_unicode: - bytes_box = TempPtr() - bytes_loc = regalloc.force_allocate_reg(bytes_box, forbidden_vars) - scale = self._get_unicode_item_scale() - assert length_loc.is_reg() - with scratch_reg(self.mc): - self.mc.load_imm(r.SCRATCH, 1 << scale) - if IS_PPC_32: - self.mc.mullw(bytes_loc.value, r.SCRATCH.value, length_loc.value) - else: - self.mc.mulld(bytes_loc.value, r.SCRATCH.value, length_loc.value) - length_box = bytes_box - length_loc = bytes_loc - # call memcpy() - regalloc.before_call() - imm_addr = make_imm_loc(self.memcpy_addr) - self._emit_call(imm_addr, - [dstaddr_loc, srcaddr_loc, length_loc]) - - regalloc.possibly_free_var(length_box) - regalloc.possibly_free_var(dstaddr_box) - regalloc.possibly_free_var(srcaddr_box) - - def _gen_address_inside_string(self, baseloc, ofsloc, resloc, is_unicode): - if is_unicode: - ofs_items, _, _ = symbolic.get_array_token(rstr.UNICODE, - self.cpu.translate_support_code) - scale = self._get_unicode_item_scale() - else: - ofs_items, itemsize, _ = symbolic.get_array_token(rstr.STR, - self.cpu.translate_support_code) + basesize, itemsize, _ = symbolic.get_array_token(rstr.STR, + self.cpu.translate_support_code) assert itemsize == 1 scale = 0 - self._gen_address(ofsloc, ofs_items, scale, resloc, baseloc) - def _gen_address(self, sizereg, baseofs, scale, result, baseloc=None): - assert sizereg.is_reg() - if scale > 0: - scaled_loc = r.r0 - if IS_PPC_32: - self.mc.slwi(scaled_loc.value, sizereg.value, scale) - else: - self.mc.sldi(scaled_loc.value, sizereg.value, scale) + self._emit_load_for_copycontent(r.r0, src_ptr_loc, src_ofs_loc, scale) + self._emit_load_for_copycontent(r.r2, dst_ptr_loc, dst_ofs_loc, scale) + + if length_loc.is_imm(): + length = length_loc.getint() + self.mc.load_imm(r.r5, length << scale) else: - scaled_loc = sizereg - if baseloc is not None: - assert baseloc.is_reg() - self.mc.add(result.value, baseloc.value, scaled_loc.value) - self.mc.addi(result.value, result.value, baseofs) - else: - self.mc.addi(result.value, scaled_loc.value, baseofs) + if scale > 0: + self.mc.sldi(r.r5.value, length_loc.value, scale) + elif length_loc is not r.r5: + self.mc.mr(r.r5.value, length_loc.value) - def _get_unicode_item_scale(self): - _, itemsize, _ = symbolic.get_array_token(rstr.UNICODE, - self.cpu.translate_support_code) - if itemsize == 4: - return 2 - elif itemsize == 2: - return 1 - else: - raise AssertionError("bad unicode item size") + self.mc.mr(r.r4.value, r.r0.value) + self.mc.addi(r.r4.value, r.r4.value, basesize) + self.mc.addi(r.r3.value, r.r2.value, basesize) + + cb = callbuilder.CallBuilder(self, imm(self.memcpy_addr), + [r.r3, r.r4, r.r5], None) + cb.emit() class UnicodeOpAssembler(object): @@ -991,135 +949,142 @@ emit_jit_debug = emit_debug_merge_point emit_keepalive = emit_debug_merge_point - def emit_cond_call_gc_wb(self, op, arglocs, regalloc): + def _write_barrier_fastpath(self, mc, descr, arglocs, regalloc, array=False, + is_frame=False, align_stack=False): # Write code equivalent to write_barrier() in the GC: it checks - # a flag in the object at arglocs[0], and if set, it calls the - # function remember_young_pointer() from the GC. The two arguments - # to the call are in arglocs[:2]. The latter saves registers as needed - # and call the function jit_remember_young_pointer() from the GC. - descr = op.getdescr() + # a flag in the object at arglocs[0], and if set, it calls a + # helper piece of assembler. The latter saves registers as needed + # and call the function remember_young_pointer() from the GC. if we_are_translated(): cls = self.cpu.gc_ll_descr.has_write_barrier_class() assert cls is not None and isinstance(descr, cls) # - opnum = op.getopnum() - card_marking = False + card_marking_mask = 0 mask = descr.jit_wb_if_flag_singlebyte - if opnum == rop.COND_CALL_GC_WB_ARRAY and descr.jit_wb_cards_set != 0: + if array and descr.jit_wb_cards_set != 0: # assumptions the rest of the function depends on: assert (descr.jit_wb_cards_set_byteofs == descr.jit_wb_if_flag_byteofs) - assert descr.jit_wb_cards_set_singlebyte == -0x80 - card_marking = True - mask = descr.jit_wb_if_flag_singlebyte | -0x80 + card_marking_mask = descr.jit_wb_cards_set_singlebyte # loc_base = arglocs[0] + assert loc_base.is_reg() + if is_frame: + assert loc_base is r.SPP assert _check_imm_arg(descr.jit_wb_if_flag_byteofs) - with scratch_reg(self.mc): - self.mc.lbz(r.SCRATCH.value, loc_base.value, - descr.jit_wb_if_flag_byteofs) - # test whether this bit is set - mask &= 0xFF - self.mc.andix(r.SCRATCH.value, r.SCRATCH.value, mask) + mc.lbz(r.SCRATCH2.value, loc_base.value, descr.jit_wb_if_flag_byteofs) + mc.andix(r.SCRATCH.value, r.SCRATCH2.value, mask & 0xFF) - jz_location = self.mc.currpos() - self.mc.nop() + jz_location = mc.get_relative_pos() + mc.trap() # patched later with 'beq' # for cond_call_gc_wb_array, also add another fast path: # if GCFLAG_CARDS_SET, then we can just set one bit and be done - if card_marking: - with scratch_reg(self.mc): - self.mc.lbz(r.SCRATCH.value, loc_base.value, - descr.jit_wb_if_flag_byteofs) - self.mc.extsb(r.SCRATCH.value, r.SCRATCH.value) - - # test whether this bit is set - self.mc.cmpwi(0, r.SCRATCH.value, 0) - - js_location = self.mc.currpos() - self.mc.nop() + if card_marking_mask: + # GCFLAG_CARDS_SET is in the same byte, loaded in r2 already + mc.andix(r.SCRATCH.value, r.SCRATCH2.value, + card_marking_mask & 0xFF) + js_location = mc.get_relative_pos() + mc.trap() # patched later with 'bne' else: js_location = 0 # Write only a CALL to the helper prepared in advance, passing it as # argument the address of the structure we are writing into # (the first argument to COND_CALL_GC_WB). - helper_num = card_marking - - if self._regalloc.fprm.reg_bindings: + helper_num = (card_marking_mask != 0) + if is_frame: + helper_num = 4 + elif regalloc.fprm.reg_bindings: helper_num += 2 if self.wb_slowpath[helper_num] == 0: # tests only assert not we_are_translated() self.cpu.gc_ll_descr.write_barrier_descr = descr - self._build_wb_slowpath(card_marking, - bool(self._regalloc.fprm.reg_bindings)) + self._build_wb_slowpath(card_marking_mask != 0, + bool(regalloc.fprm.reg_bindings)) assert self.wb_slowpath[helper_num] != 0 # - if loc_base is not r.r3: - self.mc.store(r.r3.value, r.SP.value, 24) - remap_frame_layout(self, [loc_base], [r.r3], r.SCRATCH) - addr = self.wb_slowpath[helper_num] - func = rffi.cast(lltype.Signed, addr) - self.mc.bl_abs(func) - if loc_base is not r.r3: - self.mc.load(r.r3.value, r.SP.value, 24) + if not is_frame: + mc.mr(r.r0.value, loc_base.value) # unusual argument location + if is_frame and align_stack: + XXXX + mc.SUB_ri(esp.value, 16 - WORD) # erase the return address + mc.load_imm(r.SCRATCH2, self.wb_slowpath[helper_num]) + mc.mtctr(r.SCRATCH2.value) + mc.bctrl() + if is_frame and align_stack: + XXXX + mc.ADD_ri(esp.value, 16 - WORD) # erase the return address - # if GCFLAG_CARDS_SET, then we can do the whole thing that would - # be done in the CALL above with just four instructions, so here - # is an inline copy of them - if card_marking: - with scratch_reg(self.mc): - jns_location = self.mc.currpos() - self.mc.nop() # jump to the exit, patched later - # patch the JS above - offset = self.mc.currpos() - pmc = OverwritingBuilder(self.mc, js_location, 1) - # Jump if JS comparison is less than (bit set) - pmc.bc(12, 0, offset - js_location) - pmc.overwrite() - # - # case GCFLAG_CARDS_SET: emit a few instructions to do - # directly the card flag setting - loc_index = arglocs[1] - assert loc_index.is_reg() - tmp1 = arglocs[-1] - tmp2 = arglocs[-2] - tmp3 = arglocs[-3] - #byteofs - s = 3 + descr.jit_wb_card_page_shift + if card_marking_mask: + # The helper ends again with a check of the flag in the object. + # So here, we can simply write again a beq, which will be + # taken if GCFLAG_CARDS_SET is still not set. + jns_location = mc.get_relative_pos() + mc.trap() + # + # patch the 'bne' above + currpos = mc.currpos() + pmc = OverwritingBuilder(mc, js_location, 1) + pmc.bne(currpos - js_location) + pmc.overwrite() + # + # case GCFLAG_CARDS_SET: emit a few instructions to do + # directly the card flag setting + loc_index = arglocs[1] + if loc_index.is_reg(): - self.mc.srli_op(tmp3.value, loc_index.value, s) - self.mc.not_(tmp3.value, tmp3.value) + tmp_loc = arglocs[2] + n = descr.jit_wb_card_page_shift - # byte_index - self.mc.li(r.SCRATCH.value, 7) - self.mc.srli_op(loc_index.value, loc_index.value, - descr.jit_wb_card_page_shift) - self.mc.and_(tmp1.value, r.SCRATCH.value, loc_index.value) + # compute in tmp_loc the byte offset: + # ~(index >> (card_page_shift + 3)) ('~' is 'not_' below) + mc.srli_op(tmp_loc.value, loc_index.value, n + 3) - # set the bit - self.mc.li(tmp2.value, 1) - self.mc.lbzx(r.SCRATCH.value, loc_base.value, tmp3.value) - self.mc.sl_op(tmp2.value, tmp2.value, tmp1.value) - self.mc.or_(r.SCRATCH.value, r.SCRATCH.value, tmp2.value) - self.mc.stbx(r.SCRATCH.value, loc_base.value, tmp3.value) + # compute in r2 the index of the bit inside the byte: + # (index >> card_page_shift) & 7 + mc.rldicl(r.SCRATCH2.value, loc_index.value, 64 - n, 61) + mc.li(r.SCRATCH.value, 1) + mc.not_(tmp_loc.value, tmp_loc.value) + + # set r2 to 1 << r2 + mc.sl_op(r.SCRATCH2.value, r.SCRATCH.value, r.SCRATCH2.value) + + # set this bit inside the byte of interest + mc.lbzx(r.SCRATCH.value, loc_base.value, tmp_loc.value) + mc.or_(r.SCRATCH.value, r.SCRATCH.value, r.SCRATCH2.value) + mc.stbx(r.SCRATCH.value, loc_base.value, tmp_loc.value) # done - # patch the JNS above - offset = self.mc.currpos() - pmc = OverwritingBuilder(self.mc, jns_location, 1) - # Jump if JNS comparison is not less than (bit not set) - pmc.bc(4, 0, offset - jns_location) - pmc.overwrite() + else: + byte_index = loc_index.value >> descr.jit_wb_card_page_shift + byte_ofs = ~(byte_index >> 3) + byte_val = 1 << (byte_index & 7) + assert _check_imm_arg(byte_ofs) + + mc.lbz(r.SCRATCH.value, loc_base.value, byte_ofs) + mc.ori(r.SCRATCH.value, r.SCRATCH.value, byte_val) + mc.stb(r.SCRATCH.value, loc_base.value, byte_ofs) + # + # patch the beq just above + currpos = mc.currpos() + pmc = OverwritingBuilder(mc, jns_location, 1) + pmc.beq(currpos - jns_location) + pmc.overwrite() # patch the JZ above - offset = self.mc.currpos() - pmc = OverwritingBuilder(self.mc, jz_location, 1) - # Jump if JZ comparison is zero (CMP 0 is equal) - pmc.bc(12, 2, offset - jz_location) + currpos = mc.currpos() + pmc = OverwritingBuilder(mc, jz_location, 1) + pmc.beq(currpos - jz_location) pmc.overwrite() - emit_cond_call_gc_wb_array = emit_cond_call_gc_wb + def emit_cond_call_gc_wb(self, op, arglocs, regalloc): + self._write_barrier_fastpath(self.mc, op.getdescr(), arglocs, regalloc) + + def emit_cond_call_gc_wb_array(self, op, arglocs, regalloc): + self._write_barrier_fastpath(self.mc, op.getdescr(), arglocs, regalloc, + array=True) + class ForceOpAssembler(object): @@ -1129,215 +1094,95 @@ res_loc = arglocs[0] self.mc.mr(res_loc.value, r.SPP.value) - # self._emit_guard(guard_op, regalloc._prepare_guard(guard_op), c.LT) - # from: ../x86/assembler.py:1668 - # XXX Split into some helper methods - def emit_guard_call_assembler(self, op, guard_op, arglocs, regalloc): - tmploc = arglocs[1] - resloc = arglocs[2] - callargs = arglocs[3:] + def emit_call_assembler(self, op, arglocs, regalloc): + if len(arglocs) == 3: + [result_loc, argloc, vloc] = arglocs + else: + [result_loc, argloc] = arglocs + vloc = imm(0) + self._store_force_index(self._find_nearby_operation(regalloc, +1)) + # 'result_loc' is either r3 or f1 + self.call_assembler(op, argloc, vloc, result_loc, r.r3) - faildescr = guard_op.getdescr() - fail_index = self.cpu.get_fail_descr_number(faildescr) - self._write_fail_index(fail_index) - descr = op.getdescr() - assert isinstance(descr, JitCellToken) - # check value - assert tmploc is r.RES - xxxxxxxxxxxx - self._emit_call(fail_index, imm(descr._ppc_func_addr), - callargs, result=tmploc) - if op.result is None: - value = self.cpu.done_with_this_frame_void_v + imm = staticmethod(imm) # for call_assembler() + + def _call_assembler_emit_call(self, addr, argloc, _): + self.regalloc_mov(argloc, r.r3) + self.mc.ld(r.r4.value, r.SP.value, THREADLOCAL_ADDR_OFFSET) + + cb = callbuilder.CallBuilder(self, addr, [r.r3, r.r4], r.r3) + cb.emit() + + def _call_assembler_emit_helper_call(self, addr, arglocs, result_loc): + cb = callbuilder.CallBuilder(self, addr, arglocs, result_loc) + cb.emit() + + def _call_assembler_check_descr(self, value, tmploc): + ofs = self.cpu.get_ofs_of_frame_field('jf_descr') + self.mc.ld(r.r5.value, r.r3.value, ofs) + if _check_imm_arg(value): + self.mc.cmp_op(0, r.r5.value, value, imm=True) else: + self.mc.load_imm(r.r4, value) + self.mc.cmp_op(0, r.r5.value, r.r4.value, imm=False) + jump_if_eq = self.mc.currpos() + self.mc.nop() # patched later + return jump_if_eq + + def _call_assembler_patch_je(self, result_loc, je_location): + jump_to_done = self.mc.currpos() + self.mc.nop() # patched later + # + currpos = self.mc.currpos() + pmc = OverwritingBuilder(self.mc, je_location, 1) + pmc.beq(currpos - je_location) + pmc.overwrite() + # + return jump_to_done + + def _call_assembler_load_result(self, op, result_loc): + if op.result is not None: + # load the return value from the dead frame's value index 0 kind = op.result.type - if kind == INT: - value = self.cpu.done_with_this_frame_int_v - elif kind == REF: - value = self.cpu.done_with_this_frame_ref_v - elif kind == FLOAT: - value = self.cpu.done_with_this_frame_float_v + descr = self.cpu.getarraydescr_for_frame(kind) + ofs = self.cpu.unpack_arraydescr(descr) + if kind == FLOAT: + assert result_loc is r.f1 + self.mc.lfd(r.f1.value, r.r3.value, ofs) else: - raise AssertionError(kind) + assert result_loc is r.r3 + self.mc.ld(r.r3.value, r.r3.value, ofs) - # take fast path on equality - # => jump on inequality - with scratch_reg(self.mc): - self.mc.load_imm(r.SCRATCH, value) - self.mc.cmp_op(0, tmploc.value, r.SCRATCH.value) - - #if values are equal we take the fast path - # Slow path, calling helper - # jump to merge point - - jd = descr.outermost_jitdriver_sd - assert jd is not None - - # Path A: load return value and reset token - # Fast Path using result boxes - - fast_jump_pos = self.mc.currpos() - self.mc.nop() - - # Reset the vable token --- XXX really too much special logic here:-( - if jd.index_of_virtualizable >= 0: - from pypy.jit.backend.llsupport.descr import FieldDescr - fielddescr = jd.vable_token_descr - assert isinstance(fielddescr, FieldDescr) - ofs = fielddescr.offset - tmploc = regalloc.get_scratch_reg(INT) - with scratch_reg(self.mc): - self.mov_loc_loc(arglocs[0], r.SCRATCH) - self.mc.li(tmploc.value, 0) - self.mc.storex(tmploc.value, 0, r.SCRATCH.value) - - if op.result is not None: - # load the return value from fail_boxes_xxx[0] - kind = op.result.type - if kind == INT: - adr = self.fail_boxes_int.get_addr_for_num(0) - elif kind == REF: - adr = self.fail_boxes_ptr.get_addr_for_num(0) - elif kind == FLOAT: - adr = self.fail_boxes_float.get_addr_for_num(0) - else: - raise AssertionError(kind) - with scratch_reg(self.mc): - self.mc.load_imm(r.SCRATCH, adr) - if op.result.type == FLOAT: - self.mc.lfdx(resloc.value, 0, r.SCRATCH.value) - else: - self.mc.loadx(resloc.value, 0, r.SCRATCH.value) - - # jump to merge point, patched later - fast_path_to_end_jump_pos = self.mc.currpos() - self.mc.nop() - - jmp_pos = self.mc.currpos() - pmc = OverwritingBuilder(self.mc, fast_jump_pos, 1) - pmc.bc(4, 2, jmp_pos - fast_jump_pos) + def _call_assembler_patch_jmp(self, jmp_location): + currpos = self.mc.currpos() + pmc = OverwritingBuilder(self.mc, jmp_location, 1) + pmc.b(currpos - jmp_location) pmc.overwrite() - # Path B: use assembler helper - asm_helper_adr = self.cpu.cast_adr_to_int(jd.assembler_helper_adr) - if self.cpu.supports_floats: - floats = r.VOLATILES_FLOAT - else: - floats = [] - - with Saved_Volatiles(self.mc, save_RES=False): - # result of previous call is in r3 - self.mov_loc_loc(arglocs[0], r.r4) - self.mc.call(asm_helper_adr) - - # merge point - currpos = self.mc.currpos() - pmc = OverwritingBuilder(self.mc, fast_path_to_end_jump_pos, 1) - pmc.b(currpos - fast_path_to_end_jump_pos) - pmc.overwrite() - - with scratch_reg(self.mc): - self.mc.load(r.SCRATCH.value, r.SPP.value, FORCE_INDEX_OFS) - self.mc.cmp_op(0, r.SCRATCH.value, 0, imm=True) - - self._emit_guard(guard_op, regalloc._prepare_guard(guard_op), - xxxxxxxxxxxxxxxxx+c.LT, save_exc=True) - - # ../x86/assembler.py:668 def redirect_call_assembler(self, oldlooptoken, newlooptoken): # some minimal sanity checking old_nbargs = oldlooptoken.compiled_loop_token._debug_nbargs new_nbargs = newlooptoken.compiled_loop_token._debug_nbargs assert old_nbargs == new_nbargs - oldadr = oldlooptoken._ppc_func_addr - target = newlooptoken._ppc_func_addr - if IS_PPC_32: - # we overwrite the instructions at the old _ppc_func_addr - # to start with a JMP to the new _ppc_func_addr. + oldadr = oldlooptoken._ll_function_addr + target = newlooptoken._ll_function_addr + if IS_PPC_32 or not IS_BIG_ENDIAN: + # we overwrite the instructions at the old _ll_function_addr + # to start with a JMP to the new _ll_function_addr. # Ideally we should rather patch all existing CALLs, but well. mc = PPCBuilder() mc.b_abs(target) mc.copy_to_raw_memory(oldadr) else: - # PPC64 trampolines are data so overwrite the code address - # in the function descriptor at the old address - # (TOC and static chain pointer are the same). + # PPC64 big-endian trampolines are data so overwrite the code + # address in the function descriptor at the old address. + # Copy the whole 3-word trampoline, even though the other + # words are always zero so far. odata = rffi.cast(rffi.CArrayPtr(lltype.Signed), oldadr) tdata = rffi.cast(rffi.CArrayPtr(lltype.Signed), target) odata[0] = tdata[0] - - def emit_guard_call_may_force(self, op, guard_op, arglocs, regalloc): - faildescr = guard_op.getdescr() - fail_index = self.cpu.get_fail_descr_number(faildescr) - self._write_fail_index(fail_index) - numargs = op.numargs() - callargs = arglocs[2:numargs + 1] # extract the arguments to the call - adr = arglocs[1] - resloc = arglocs[0] - # - descr = op.getdescr() - size = descr.get_result_size() - signed = descr.is_result_signed() - # - xxxxxxxxxxxxxx - self._emit_call(fail_index, adr, callargs, resloc, (size, signed)) - - with scratch_reg(self.mc): - self.mc.load(r.SCRATCH.value, r.SPP.value, FORCE_INDEX_OFS) - self.mc.cmp_op(0, r.SCRATCH.value, 0, imm=True) - - self._emit_guard(guard_op, arglocs[1 + numargs:], - xxxxxxxxxxxxxx+c.LT, save_exc=True) - - def emit_guard_call_release_gil(self, op, guard_op, arglocs, regalloc): - - # first, close the stack in the sense of the asmgcc GC root tracker - gcrootmap = self.cpu.gc_ll_descr.gcrootmap - numargs = op.numargs() - callargs = arglocs[2:numargs + 1] # extract the arguments to the call - adr = arglocs[1] - resloc = arglocs[0] - - if gcrootmap: - self.call_release_gil(gcrootmap, arglocs) - # do the call - faildescr = guard_op.getdescr() - fail_index = self.cpu.get_fail_descr_number(faildescr) - self._write_fail_index(fail_index) - # - descr = op.getdescr() - size = descr.get_result_size() - signed = descr.is_result_signed() - # - xxxxxxxxxxxxxxx - self._emit_call(fail_index, adr, callargs, resloc, (size, signed)) - # then reopen the stack - if gcrootmap: - self.call_reacquire_gil(gcrootmap, resloc) - - with scratch_reg(self.mc): - self.mc.load(r.SCRATCH.value, r.SPP.value, 0) - self.mc.cmp_op(0, r.SCRATCH.value, 0, imm=True) - - self._emit_guard(guard_op, arglocs[1 + numargs:], - xxxxxxxxxxxxxxxxxx+c.LT, save_exc=True) - - def call_release_gil(self, gcrootmap, save_registers): - # XXX don't know whether this is correct - # XXX use save_registers here - assert gcrootmap.is_shadow_stack - with Saved_Volatiles(self.mc): - #self._emit_call(NO_FORCE_INDEX, self.releasegil_addr, - # [], self._regalloc) - self._emit_call(imm(self.releasegil_addr), []) - - def call_reacquire_gil(self, gcrootmap, save_loc): - # save the previous result into the stack temporarily. - # XXX like with call_release_gil(), we assume that we don't need - # to save vfp regs in this case. Besides the result location - assert gcrootmap.is_shadow_stack - with Saved_Volatiles(self.mc): - self._emit_call(imm(self.reacqgil_addr), []) + odata[1] = tdata[1] + odata[2] = tdata[2] class OpAssembler(IntOpAssembler, GuardOpAssembler, diff --git a/rpython/jit/backend/ppc/ppc_assembler.py b/rpython/jit/backend/ppc/ppc_assembler.py --- a/rpython/jit/backend/ppc/ppc_assembler.py +++ b/rpython/jit/backend/ppc/ppc_assembler.py @@ -28,7 +28,7 @@ from rpython.rlib.debug import (debug_print, debug_start, debug_stop, have_debug_prints) from rpython.rlib import rgc -from rpython.rtyper.annlowlevel import llhelper +from rpython.rtyper.annlowlevel import llhelper, cast_instance_to_gcref from rpython.rlib.objectmodel import we_are_translated, specialize from rpython.rtyper.lltypesystem.lloperation import llop from rpython.jit.backend.ppc.locations import StackLocation, get_fp_offset, imm @@ -92,8 +92,10 @@ def __init__(self, cpu, translate_support_code=False): BaseAssembler.__init__(self, cpu, translate_support_code) self.loop_run_counters = [] + self.wb_slowpath = [0, 0, 0, 0, 0] self.setup_failure_recovery() self.stack_check_slowpath = 0 + self.propagate_exception_path = 0 self.teardown() def set_debug(self, v): @@ -122,33 +124,6 @@ mc.lfd(reg.value, spp_reg.value, self.OFFSET_SPP_TO_FPR_SAVE_AREA + WORD * i) - # The code generated here allocates a new stackframe - # and is the first machine code to be executed. - def _make_frame(self, frame_depth): - XXX - self.mc.make_function_prologue(frame_depth) - - # save SPP at the bottom of the stack frame - self.mc.store(r.SPP.value, r.SP.value, WORD) - - # compute spilling pointer (SPP) - self.mc.addi(r.SPP.value, r.SP.value, - frame_depth - self.OFFSET_SPP_TO_OLD_BACKCHAIN) - - # save nonvolatile registers - self._save_nonvolatiles() - - # save r31, use r30 as scratch register - # this is safe because r30 has been saved already - assert NONVOLATILES[-1] == r.SPP - ofs_to_r31 = (self.OFFSET_SPP_TO_GPR_SAVE_AREA + - WORD * (len(NONVOLATILES)-1)) - self.mc.load(r.r30.value, r.SP.value, WORD) - self.mc.store(r.r30.value, r.SPP.value, ofs_to_r31) - gcrootmap = self.cpu.gc_ll_descr.gcrootmap - if gcrootmap and gcrootmap.is_shadow_stack: - self.gen_shadowstack_header(gcrootmap) - def gen_shadowstack_header(self, gcrootmap): # we need to put two words into the shadowstack: the MARKER_FRAME # and the address of the frame (fp, actually) @@ -296,7 +271,7 @@ self._frame_realloc_slowpath = mc.materialize(self.cpu, []) self.mc = None - def _store_and_reset_exception(self, mc, excvalloc, exctploc): + def _store_and_reset_exception(self, mc, excvalloc, exctploc=None): """Reset the exception, after fetching it inside the two regs. """ mc.load_imm(r.r2, self.cpu.pos_exc_value()) @@ -304,7 +279,8 @@ assert _check_imm_arg(diff) # Load the exception fields into the two registers mc.load(excvalloc.value, r.r2.value, 0) - mc.load(exctploc.value, r.r2.value, diff) + if exctploc is not None: + mc.load(exctploc.value, r.r2.value, diff) # Zero out the exception fields mc.li(r.r0.value, 0) mc.store(r.r0.value, r.r2.value, 0) @@ -359,6 +335,7 @@ return mc.materialize(self.cpu, []) def _build_malloc_slowpath(self): + xxxxxxx mc = PPCBuilder() frame_size = (len(r.MANAGED_FP_REGS) * WORD + (BACKCHAIN_SIZE + MAX_REG_PARAMS) * WORD) @@ -405,7 +382,7 @@ # if r3 == 0 we skip the return above and jump to the exception path offset = mc.currpos() - jmp_pos pmc = OverwritingBuilder(mc, jmp_pos, 1) - pmc.bc(12, 2, offset) + pmc.beq(offset) pmc.overwrite() # restore the frame before leaving with scratch_reg(mc): @@ -500,7 +477,7 @@ mc.b(self.propagate_exception_path) pmc = OverwritingBuilder(mc, jnz_location, 1) - pmc.bc(4, 2, mc.currpos() - jnz_location) + pmc.bne(mc.currpos() - jnz_location) pmc.overwrite() # restore link register out of preprevious frame @@ -520,7 +497,6 @@ self.write_64_bit_func_descr(rawstart, rawstart+3*WORD) self.stack_check_slowpath = rawstart - # TODO: see what need to be done when for_frame is True def _build_wb_slowpath(self, withcards, withfloats=False, for_frame=False): descr = self.cpu.gc_ll_descr.write_barrier_descr if descr is None: @@ -536,56 +512,108 @@ # # This builds a helper function called from the slow path of # write barriers. It must save all registers, and optionally - # all fp registers. + # all fp registers. It takes its single argument in r0. mc = PPCBuilder() + old_mc = self.mc + self.mc = mc # - frame_size = ((len(r.VOLATILES) + len(r.VOLATILES_FLOAT) - + BACKCHAIN_SIZE + MAX_REG_PARAMS) * WORD) - mc.make_function_prologue(frame_size) - for i in range(len(r.VOLATILES)): - mc.store(r.VOLATILES[i].value, r.SP.value, - (BACKCHAIN_SIZE + MAX_REG_PARAMS + i) * WORD) - if self.cpu.supports_floats: - for i in range(len(r.VOLATILES_FLOAT)): - mc.stfd(r.VOLATILES_FLOAT[i].value, r.SP.value, - (len(r.VOLATILES) + BACKCHAIN_SIZE + MAX_REG_PARAMS + i) * WORD) + ignored_regs = [reg for reg in r.MANAGED_REGS if not ( + # 'reg' will be pushed if the following is true: + reg in r.VOLATILES or + reg is r.RCS1 or + (withcards and reg is r.RCS2))] + if not for_frame: + # push all volatile registers, push RCS1, and sometimes push RCS2 + self._push_all_regs_to_jitframe(mc, ignored_regs, withfloats) + else: + return #XXXXX + # we have one word to align + mc.SUB_ri(esp.value, 7 * WORD) # align and reserve some space + mc.MOV_sr(WORD, eax.value) # save for later + if self.cpu.supports_floats: + mc.MOVSD_sx(2 * WORD, xmm0.value) # 32-bit: also 3 * WORD + if IS_X86_32: + mc.MOV_sr(4 * WORD, edx.value) + mc.MOV_sr(0, ebp.value) + exc0, exc1 = esi, edi + else: + mc.MOV_rr(edi.value, ebp.value) + exc0, exc1 = ebx, r12 + mc.MOV(RawEspLoc(WORD * 5, REF), exc0) + mc.MOV(RawEspLoc(WORD * 6, INT), exc1) + # note that it's save to store the exception in register, + # since the call to write barrier can't collect + # (and this is assumed a bit left and right here, like lack + # of _reload_frame_if_necessary) + self._store_and_reset_exception(mc, exc0, exc1) - mc.call(rffi.cast(lltype.Signed, func)) - if self.cpu.supports_floats: - for i in range(len(r.VOLATILES_FLOAT)): - mc.lfd(r.VOLATILES_FLOAT[i].value, r.SP.value, - (len(r.VOLATILES) + BACKCHAIN_SIZE + MAX_REG_PARAMS + i) * WORD) - for i in range(len(r.VOLATILES)): - mc.load(r.VOLATILES[i].value, r.SP.value, - (BACKCHAIN_SIZE + MAX_REG_PARAMS + i) * WORD) - mc.restore_LR_from_caller_frame(frame_size) + if withcards: + mc.mr(r.RCS2.value, r.r0.value) + # + # Save the lr into r.RCS1 + mc.mflr(r.RCS1.value) + # + func = rffi.cast(lltype.Signed, func) + cb = callbuilder.CallBuilder(self, imm(func), [r.r0], None) + cb.emit() + # + # Restore lr + mc.mtlr(r.RCS1.value) # if withcards: - # A final compare before the RET, for the caller. Careful to + # A final andix before the blr, for the caller. Careful to # not follow this instruction with another one that changes - # the status of the CPU flags! - mc.lbz(r.SCRATCH.value, r.r3.value, - descr.jit_wb_if_flag_byteofs) - mc.extsb(r.SCRATCH.value, r.SCRATCH.value) - mc.cmpwi(0, r.SCRATCH.value, 0) + # the status of cr0! + card_marking_mask = descr.jit_wb_cards_set_singlebyte + mc.lbz(r.RCS2.value, r.RCS2.value, descr.jit_wb_if_flag_byteofs) + mc.andix(r.RCS2.value, r.RCS2.value, card_marking_mask & 0xFF) # - mc.addi(r.SP.value, r.SP.value, frame_size) - mc.blr() - # + + if not for_frame: + self._pop_all_regs_from_jitframe(mc, ignored_regs, withfloats) + mc.blr() + else: + XXXXXXX + if IS_X86_32: + mc.MOV_rs(edx.value, 4 * WORD) + if self.cpu.supports_floats: + mc.MOVSD_xs(xmm0.value, 2 * WORD) + mc.MOV_rs(eax.value, WORD) # restore + self._restore_exception(mc, exc0, exc1) + mc.MOV(exc0, RawEspLoc(WORD * 5, REF)) + mc.MOV(exc1, RawEspLoc(WORD * 6, INT)) + mc.LEA_rs(esp.value, 7 * WORD) + mc.RET() + + self.mc = old_mc rawstart = mc.materialize(self.cpu, []) - self.wb_slowpath[withcards + 2 * withfloats] = rawstart + if for_frame: + self.wb_slowpath[4] = rawstart + else: + self.wb_slowpath[withcards + 2 * withfloats] = rawstart def _build_propagate_exception_path(self): if not self.cpu.propagate_exception_descr: return - mc = PPCBuilder() - # the following call may be needed in the future: - # self._store_and_reset_exception() + self.mc = PPCBuilder() + # + # read and reset the current exception - mc.load_imm(r.RES, self.cpu.propagate_exception_descr) - self._gen_epilogue(mc) - self.propagate_exception_path = mc.materialize(self.cpu, []) + propagate_exception_descr = rffi.cast(lltype.Signed, + cast_instance_to_gcref(self.cpu.propagate_exception_descr)) + ofs3 = self.cpu.get_ofs_of_frame_field('jf_guard_exc') + ofs4 = self.cpu.get_ofs_of_frame_field('jf_descr') + + self._store_and_reset_exception(self.mc, r.r3) + self.mc.load_imm(r.r4, propagate_exception_descr) + self.mc.std(r.r3.value, r.SPP.value, ofs3) + self.mc.std(r.r4.value, r.SPP.value, ofs4) + # + self._call_footer() + rawstart = self.mc.materialize(self.cpu, []) + self.propagate_exception_path = rawstart + self.mc = None # The code generated here serves as an exit stub from # the executed machine code. @@ -617,28 +645,6 @@ return mc.materialize(self.cpu, [], self.cpu.gc_ll_descr.gcrootmap) - def _gen_epilogue(self, mc): - XXX - gcrootmap = self.cpu.gc_ll_descr.gcrootmap - if gcrootmap and gcrootmap.is_shadow_stack: - self.gen_footer_shadowstack(gcrootmap, mc) - - # save SPP back in r3 - mc.mr(r.r5.value, r.SPP.value) - self._restore_nonvolatiles(mc, r.r5) - # load old backchain into r4 - if IS_PPC_32: - ofs = WORD - else: - ofs = WORD * 2 - mc.load(r.r4.value, r.r5.value, self.OFFSET_SPP_TO_OLD_BACKCHAIN + ofs) - mc.mtlr(r.r4.value) # restore LR - # From SPP, we have a constant offset to the old backchain. We use the - # SPP to re-establish the old backchain because this exit stub is - # generated before we know how much space the entire frame will need. - mc.addi(r.SP.value, r.r5.value, self.OFFSET_SPP_TO_OLD_BACKCHAIN) # restore old SP - mc.blr() - def _save_managed_regs(self, mc): """ store managed registers in ENCODING AREA """ @@ -735,7 +741,7 @@ offset = self.mc.currpos() - patch_loc # pmc = OverwritingBuilder(self.mc, patch_loc, 1) - pmc.bc(4, 1, offset) # jump if SCRATCH <= r16, i. e. not(SCRATCH > r16) + pmc.ble(offset) # jump if SCRATCH <= r16, i. e. not(SCRATCH > r16) pmc.overwrite() def _call_footer(self): @@ -944,97 +950,11 @@ self.teardown() return AsmInfo(ops_offset, startpos + rawstart, codeendpos - startpos) - DESCR_REF = 0x00 - DESCR_INT = 0x01 - DESCR_FLOAT = 0x02 - DESCR_SPECIAL = 0x03 - CODE_FROMSTACK = 128 - CODE_STOP = 0 | DESCR_SPECIAL - CODE_HOLE = 4 | DESCR_SPECIAL - CODE_INPUTARG = 8 | DESCR_SPECIAL - - def gen_descr_encoding(self, descr, failargs, locs): - assert self.mc is not None - buf = [] - for i in range(len(failargs)): - arg = failargs[i] - if arg is not None: - if arg.type == REF: - kind = self.DESCR_REF - elif arg.type == INT: - kind = self.DESCR_INT - elif arg.type == FLOAT: - kind = self.DESCR_FLOAT - else: - raise AssertionError("bogus kind") - loc = locs[i] - if loc.is_stack(): - pos = loc.position - if pos < 0: - buf.append(self.CODE_INPUTARG) - pos = ~pos - n = self.CODE_FROMSTACK // 4 + pos - else: - assert loc.is_reg() or loc.is_fp_reg() - n = loc.value - n = kind + 4 * n - while n > 0x7F: - buf.append((n & 0x7F) | 0x80) - n >>= 7 - else: - n = self.CODE_HOLE - buf.append(n) - buf.append(self.CODE_STOP) - - fdescr = self.cpu.get_fail_descr_number(descr) - - buf.append((fdescr >> 24) & 0xFF) - buf.append((fdescr >> 16) & 0xFF) - buf.append((fdescr >> 8) & 0xFF) - buf.append( fdescr & 0xFF) - - lenbuf = len(buf) - # XXX fix memory leaks - enc_arr = lltype.malloc(rffi.CArray(rffi.CHAR), lenbuf, - flavor='raw', track_allocation=False) - enc_ptr = rffi.cast(lltype.Signed, enc_arr) - for i, byte in enumerate(buf): - enc_arr[i] = chr(byte) - # assert that the fail_boxes lists are big enough - assert len(failargs) <= self.fail_boxes_int.SIZE - return enc_ptr - - def align(self, size): - while size % 8 != 0: - size += 1 - return size - def teardown(self): self.pending_guard_tokens = None self.mc = None self.current_clt = None - def compute_frame_depth(self, spilling_area, param_depth): - PARAMETER_AREA = param_depth * WORD - if IS_PPC_64: - PARAMETER_AREA += MAX_REG_PARAMS * WORD - SPILLING_AREA = spilling_area * WORD - - frame_depth = ( GPR_SAVE_AREA - + FPR_SAVE_AREA - + FLOAT_INT_CONVERSION - + FORCE_INDEX - + self.ENCODING_AREA - + SPILLING_AREA - + PARAMETER_AREA - + BACKCHAIN_SIZE * WORD) - - # align stack pointer - while frame_depth % (4 * WORD) != 0: - frame_depth += WORD - - return frame_depth - def _find_failure_recovery_bytecode(self, faildescr): return faildescr._failure_recovery_code_adr @@ -1207,7 +1127,8 @@ with scratch_reg(self.mc): offset = loc.value self.mc.load_imm(r.SCRATCH, value) - self.mc.store(r.SCRATCH.value, r.SPP.value, offset) + self.mc.lfdx(r.FP_SCRATCH.value, 0, r.SCRATCH.value) + self.mc.stfd(r.FP_SCRATCH.value, r.SPP.value, offset) return assert 0, "not supported location" elif prev_loc.is_fp_reg(): @@ -1258,13 +1179,13 @@ self.mc.lfd(loc.value, r.SP.value, index) else: self.mc.lfd(r.FP_SCRATCH.value, r.SP.value, index) - self.regalloc_mov(r.FP_SCRATCH.value, loc) + self.regalloc_mov(r.FP_SCRATCH, loc) else: if loc.is_core_reg(): self.mc.ld(loc.value, r.SP.value, index) else: self.mc.ld(r.SCRATCH.value, r.SP.value, index) - self.regalloc_mov(r.SCRATCH.value, loc) + self.regalloc_mov(r.SCRATCH, loc) def malloc_cond(self, nursery_free_adr, nursery_top_adr, size): assert size & (WORD-1) == 0 # must be correctly aligned @@ -1301,7 +1222,7 @@ offset = self.mc.currpos() - fast_jmp_pos pmc = OverwritingBuilder(self.mc, fast_jmp_pos, 1) - pmc.bc(4, 1, offset) # jump if LE (not GT) + pmc.ble(offset) # jump if LE (not GT) pmc.overwrite() with scratch_reg(self.mc): @@ -1318,8 +1239,10 @@ gcrootmap.write_callshape(mark, force_index) def propagate_memoryerror_if_r3_is_null(self): - return # XXXXXXXXX - self.mc.cmp_op(0, r.RES.value, 0, imm=True) + # if self.propagate_exception_path == 0 (tests), this may jump to 0 + # and segfaults. too bad. the alternative is to continue anyway + # with r3==0, but that will segfault too. + self.mc.cmp_op(0, r.r3.value, 0, imm=True) self.mc.b_cond_abs(self.propagate_exception_path, c.EQ) def write_new_force_index(self): diff --git a/rpython/jit/backend/ppc/regalloc.py b/rpython/jit/backend/ppc/regalloc.py --- a/rpython/jit/backend/ppc/regalloc.py +++ b/rpython/jit/backend/ppc/regalloc.py @@ -490,7 +490,7 @@ prepare_int_force_ge_zero = helper.prepare_unary_op - def prepare_math_sqrt(self, op): + def _prepare_math_sqrt(self, op): loc = self.ensure_reg(op.getarg(1)) self.free_op_vars() res = self.fprm.force_allocate_reg(op.result) @@ -839,8 +839,17 @@ return [base_loc, index_loc, value_loc, ofs_loc, imm_size, imm_size] - #prepare_copystrcontent = void - #prepare_copyunicodecontent = void + def prepare_copystrcontent(self, op): + src_ptr_loc = self.ensure_reg(op.getarg(0)) + dst_ptr_loc = self.ensure_reg(op.getarg(1)) + src_ofs_loc = self.ensure_reg_or_any_imm(op.getarg(2)) + dst_ofs_loc = self.ensure_reg_or_any_imm(op.getarg(3)) + length_loc = self.ensure_reg_or_any_imm(op.getarg(4)) + self._spill_before_call(save_all_regs=False) + return [src_ptr_loc, dst_ptr_loc, + src_ofs_loc, dst_ofs_loc, length_loc] + + prepare_copyunicodecontent = prepare_copystrcontent def prepare_unicodelen(self, op): basesize, itemsize, ofs_length = symbolic.get_array_token(rstr.UNICODE, @@ -877,22 +886,21 @@ prepare_cast_ptr_to_int = prepare_same_as prepare_cast_int_to_ptr = prepare_same_as + def get_oopspecindex(self, op): + descr = op.getdescr() + assert descr is not None + effectinfo = descr.get_extra_info() + if effectinfo is not None: + return effectinfo.oopspecindex + return EffectInfo.OS_NONE + def prepare_call(self, op): - effectinfo = op.getdescr().get_extra_info() - if effectinfo is not None: - oopspecindex = effectinfo.oopspecindex - if oopspecindex == EffectInfo.OS_MATH_SQRT: - xxxxxxxxx - args = self.prepare_math_sqrt(op) - self.assembler.emit_math_sqrt(op, args, self) - return + oopspecindex = self.get_oopspecindex(op) + if oopspecindex == EffectInfo.OS_MATH_SQRT: + return self._prepare_math_sqrt(op) return self._prepare_call(op) - def _prepare_call(self, op, save_all_regs=False): - args = [] - args.append(None) - for i in range(op.numargs()): - args.append(self.loc(op.getarg(i))) + def _spill_before_call(self, save_all_regs=False): # spill variables that need to be saved around calls self.fprm.before_call(save_all_regs=save_all_regs) if not save_all_regs: @@ -900,10 +908,16 @@ if gcrootmap and gcrootmap.is_shadow_stack: save_all_regs = 2 self.rm.before_call(save_all_regs=save_all_regs) + + def _prepare_call(self, op, save_all_regs=False): + args = [] + args.append(None) + for i in range(op.numargs()): + args.append(self.loc(op.getarg(i))) + self._spill_before_call(save_all_regs) if op.result: resloc = self.after_call(op.result) args[0] = resloc - self.before_call_called = True return args def prepare_call_malloc_nursery(self, op): @@ -943,31 +957,16 @@ prepare_keepalive = void def prepare_cond_call_gc_wb(self, op): - assert op.result is None - # we force all arguments in a reg because it will be needed anyway by - # the following setfield_gc or setarrayitem_gc. It avoids loading it - # twice from the memory. - N = op.numargs() - args = op.getarglist() - arglocs = [self._ensure_value_is_boxed(op.getarg(i), args) - for i in range(N)] - card_marking = False - if op.getopnum() == rop.COND_CALL_GC_WB_ARRAY: - descr = op.getdescr() - if we_are_translated(): - cls = self.cpu.gc_ll_descr.has_write_barrier_class() - assert cls is not None and isinstance(descr, cls) - card_marking = descr.jit_wb_cards_set != 0 - if card_marking: # allocate scratch registers - tmp1 = self.get_scratch_reg(INT) - tmp2 = self.get_scratch_reg(INT) - tmp3 = self.get_scratch_reg(INT) - arglocs.append(tmp1) - arglocs.append(tmp2) - arglocs.append(tmp3) + arglocs = [self.ensure_reg(op.getarg(0))] return arglocs - prepare_cond_call_gc_wb_array = prepare_cond_call_gc_wb + def prepare_cond_call_gc_wb_array(self, op): + arglocs = [self.ensure_reg(op.getarg(0)), + self.ensure_reg_or_16bit_imm(op.getarg(1)), + None] + if arglocs[1].is_reg(): + arglocs[2] = self.get_scratch_reg(INT) + return arglocs def prepare_force_token(self, op): res_loc = self.force_allocate_reg(op.result) @@ -1028,21 +1027,11 @@ prepare_call_release_gil = prepare_call_may_force - def prepare_guard_call_assembler(self, op, guard_op): - descr = op.getdescr() - assert isinstance(descr, JitCellToken) - jd = descr.outermost_jitdriver_sd - assert jd is not None - vable_index = jd.index_of_virtualizable - if vable_index >= 0: - self._sync_var(op.getarg(vable_index)) - vable = self.frame_manager.loc(op.getarg(vable_index)) - else: - vable = imm(0) - # make sure the call result location is free - tmploc = self.get_scratch_reg(INT, selected_reg=r.RES) - self.possibly_free_vars(guard_op.getfailargs()) - return [vable, tmploc] + self._prepare_call(op, save_all_regs=True) + def prepare_call_assembler(self, op): + locs = self.locs_for_call_assembler(op) + self._spill_before_call(save_all_regs=True) + resloc = self.after_call(op.result) + return [resloc] + locs def _prepare_args_for_new_op(self, new_args): gc_ll_descr = self.cpu.gc_ll_descr @@ -1060,6 +1049,11 @@ self.force_spill_var(op.getarg(0)) return [] + def prepare_guard_not_forced_2(self, op): + self.rm.before_call(op.getfailargs(), save_all_regs=True) + arglocs = self._prepare_guard(op) + return arglocs + def prepare_zero_ptr_field(self, op): base_loc = self.ensure_reg(op.getarg(0)) ofs_loc = self.ensure_reg_or_16bit_imm(op.getarg(1)) diff --git a/rpython/jit/backend/ppc/runner.py b/rpython/jit/backend/ppc/runner.py --- a/rpython/jit/backend/ppc/runner.py +++ b/rpython/jit/backend/ppc/runner.py @@ -2,7 +2,6 @@ from rpython.rtyper.lltypesystem import lltype, llmemory, rffi from rpython.rtyper.llinterp import LLInterpreter from rpython.rlib import rgc -#from rpython.jit.backend.ppc.arch import FORCE_INDEX_OFS from rpython.jit.backend.llsupport.llmodel import AbstractLLCPU from rpython.jit.backend.ppc.ppc_assembler import AssemblerPPC from rpython.jit.backend.ppc.arch import WORD @@ -33,11 +32,6 @@ def __init__(self, rtyper, stats, opts=None, translate_support_code=False, gcdescr=None): - if gcdescr is not None: - gcdescr.force_index_ofs = FORCE_INDEX_OFS - # XXX for now the ppc backend does not support the gcremovetypeptr - # translation option - # assert gcdescr.config.translation.gcremovetypeptr is False AbstractLLCPU.__init__(self, rtyper, stats, opts, translate_support_code, gcdescr) @@ -80,8 +74,7 @@ for jmp, tgt in looptoken.compiled_loop_token.invalidate_positions: mc = PPCBuilder() - mc.b_offset(tgt) - mc.prepare_insts_blocks() + mc.b_offset(tgt) # a single instruction mc.copy_to_raw_memory(jmp) # positions invalidated looptoken.compiled_loop_token.invalidate_positions = [] diff --git a/rpython/jit/backend/ppc/symbol_lookup.py b/rpython/jit/backend/ppc/symbol_lookup.py deleted file mode 100644 --- a/rpython/jit/backend/ppc/symbol_lookup.py +++ /dev/null @@ -1,15 +0,0 @@ - -def lookup(sym): - global lookup - import py - - _ppcgen = py.magic.autopath().dirpath().join('_ppcgen.c')._getpymodule() - - try: - from _ppcgen import NSLookupAndBindSymbol - - def lookup(sym): - return NSLookupAndBindSymbol('_' + sym) - except ImportError: - from _ppcgen import dlsym as lookup - return lookup(sym) diff --git a/rpython/jit/backend/ppc/test/test_ppc.py b/rpython/jit/backend/ppc/test/test_ppc.py --- a/rpython/jit/backend/ppc/test/test_ppc.py +++ b/rpython/jit/backend/ppc/test/test_ppc.py @@ -2,7 +2,6 @@ import random, sys, os from rpython.jit.backend.ppc.codebuilder import BasicPPCAssembler, PPCBuilder -from rpython.jit.backend.ppc.symbol_lookup import lookup from rpython.jit.backend.ppc.regname import * from rpython.jit.backend.ppc.register import * from rpython.jit.backend.ppc import form diff --git a/rpython/jit/backend/ppc/test/test_runner.py b/rpython/jit/backend/ppc/test/test_runner.py --- a/rpython/jit/backend/ppc/test/test_runner.py +++ b/rpython/jit/backend/ppc/test/test_runner.py @@ -23,16 +23,22 @@ # ====> ../../test/runner_test.py if IS_PPC_32: - add_loop_instructions = ["mr", "add", "cmpwi", "beq", "b"] + add_loop_instructions = ["ld", "add", "cmpwi", "beq", "b"] else: - add_loop_instructions = ["mr", "add", "cmpdi", "beq", "b"] - bridge_loop_instructions_short = ["lis", "ori", "mtctr", "bctr"] - bridge_loop_instructions_long = ["lis", "ori", "rldicr", "oris", "ori", - "mtctr", "bctr"] - - def setup_method(self, meth): - self.cpu = PPC_CPU(rtyper=None, stats=FakeStats()) - self.cpu.setup_once() + add_loop_instructions = ["ld", "add", "cmpdi", "beq", "b"] + bridge_loop_instructions = [ + "ld", "cmpdi", "bge+", + "li", "lis", "ori", "mtctr", "bctrl", + "lis", "ori", "mtctr", "bctr"] + bridge_loop_instructions_alternative = [ + "ld", "cmpdi", "bge+", + "li", "li", "rldicr", "oris", "ori", "mtctr", "bctrl", + "li", "rldicr", "oris", "ori", "mtctr", "bctr"] + + def get_cpu(self): + cpu = PPC_CPU(rtyper=None, stats=FakeStats()) + cpu.setup_once() + return cpu def test_compile_loop_many_int_args(self): for numargs in range(2, 16): diff --git a/rpython/jit/backend/test/runner_test.py b/rpython/jit/backend/test/runner_test.py --- a/rpython/jit/backend/test/runner_test.py +++ b/rpython/jit/backend/test/runner_test.py @@ -1113,12 +1113,12 @@ r_box = self.alloc_string("!???????!") if r_box_is_const: r_box = r_box.constbox() - self.execute_operation(rop.COPYSTRCONTENT, - [s_box, r_box, - srcstart_box, - dststart_box, - length_box], 'void') - assert self.look_string(r_box) == "!??cdef?!" + self.execute_operation(rop.COPYSTRCONTENT, + [s_box, r_box, + srcstart_box, + dststart_box, + length_box], 'void') + assert self.look_string(r_box) == "!??cdef?!" def test_copyunicodecontent(self): s_box = self.alloc_unicode(u"abcdef") @@ -1130,12 +1130,12 @@ r_box = self.alloc_unicode(u"!???????!") if r_box_is_const: r_box = r_box.constbox() - self.execute_operation(rop.COPYUNICODECONTENT, - [s_box, r_box, - srcstart_box, - dststart_box, - length_box], 'void') - assert self.look_unicode(r_box) == u"!??cdef?!" + self.execute_operation(rop.COPYUNICODECONTENT, + [s_box, r_box, + srcstart_box, + dststart_box, + length_box], 'void') + assert self.look_unicode(r_box) == u"!??cdef?!" def test_do_unicode_basic(self): u = self.cpu.bh_newunicode(5) @@ -2178,7 +2178,7 @@ funcbox = self.get_funcbox(self.cpu, func_ptr) class WriteBarrierDescr(AbstractDescr): jit_wb_if_flag = 4096 - jit_wb_if_flag_byteofs = struct.pack("i", 4096).index('\x10') + jit_wb_if_flag_byteofs = struct.pack("l", 4096).index('\x10') jit_wb_if_flag_singlebyte = 0x10 def get_write_barrier_fn(self, cpu): return funcbox.getint() @@ -2212,7 +2212,7 @@ funcbox = self.get_funcbox(self.cpu, func_ptr) class WriteBarrierDescr(AbstractDescr): jit_wb_if_flag = 4096 - jit_wb_if_flag_byteofs = struct.pack("i", 4096).index('\x10') + jit_wb_if_flag_byteofs = struct.pack("l", 4096).index('\x10') jit_wb_if_flag_singlebyte = 0x10 jit_wb_cards_set = 0 # <= without card marking def get_write_barrier_fn(self, cpu): @@ -2259,10 +2259,10 @@ funcbox = self.get_funcbox(self.cpu, func_ptr) class WriteBarrierDescr(AbstractDescr): jit_wb_if_flag = 4096 - jit_wb_if_flag_byteofs = struct.pack("i", 4096).index('\x10') + jit_wb_if_flag_byteofs = struct.pack("l", 4096).index('\x10') jit_wb_if_flag_singlebyte = 0x10 jit_wb_cards_set = 32768 - jit_wb_cards_set_byteofs = struct.pack("i", 32768).index('\x80') + jit_wb_cards_set_byteofs = struct.pack("l", 32768).index('\x80') jit_wb_cards_set_singlebyte = -0x80 jit_wb_card_page_shift = 7 def get_write_barrier_from_array_fn(self, cpu): @@ -3674,6 +3674,7 @@ assert not called def test_assembler_call_propagate_exc(self): + # WARNING: this test depends on test_memoryerror first passing if not isinstance(self.cpu, AbstractLLCPU): py.test.skip("llgraph can't fake exceptions well enough, give up") @@ -4985,3 +4986,35 @@ assert a[i].a == a[i].b == val else: assert a[i] == rffi.cast(OF, val) + + def test_jump_float_constant(self): + f0 = BoxFloat() + f1 = BoxFloat() + i2 = BoxInt() + f3 = BoxFloat() + i4 = BoxInt() + looptoken = JitCellToken() + targettoken = TargetToken() + operations = [ + ResOperation(rop.LABEL, [f0, f1], None, descr=targettoken), + ResOperation(rop.CAST_FLOAT_TO_INT, [f1], i2), + ResOperation(rop.GUARD_VALUE, [i2, ConstInt(123456)], None, + descr=BasicFailDescr(6)), + ResOperation(rop.FLOAT_ADD, [f0, ConstFloat(-0.5)], f3), + ResOperation(rop.FLOAT_GT, [f3, ConstFloat(9.12)], i4), + ResOperation(rop.GUARD_TRUE, [i4], None, descr=BasicFailDescr(2)), + ResOperation(rop.JUMP, [f3, ConstFloat(123456.78912)], None, + descr=targettoken), + ] + inputargs = [f0, f1] + operations[2].setfailargs([]) + operations[-2].setfailargs([f1, f3]) + + self.cpu.compile_loop(inputargs, operations, looptoken) + deadframe = self.cpu.execute_token(looptoken, 12.25, 123456.01) + fail = self.cpu.get_latest_descr(deadframe) + assert fail.identifier == 2 + res = longlong.getrealfloat(self.cpu.get_float_value(deadframe, 0)) + assert res == 123456.78912 + res = longlong.getrealfloat(self.cpu.get_float_value(deadframe, 1)) + assert res == 8.75 diff --git a/rpython/jit/backend/tool/viewcode.py b/rpython/jit/backend/tool/viewcode.py --- a/rpython/jit/backend/tool/viewcode.py +++ b/rpython/jit/backend/tool/viewcode.py @@ -49,10 +49,12 @@ 'arm': 'arm', 'arm_32': 'arm', 'ppc' : 'powerpc:common64', + 'ppc-64' : 'powerpc:common64', } machine_endianness = { # default value: 'little' 'ppc' : sys.byteorder, # i.e. same as the running machine... + 'ppc-64' : sys.byteorder, # i.e. same as the running machine... } cmd = find_objdump() objdump = ('%(command)s -b binary -m %(machine)s ' _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit