Author: Richard Plangger <planri...@gmail.com> Branch: s390x-backend Changeset: r80471:3a2cb683d03e Date: 2015-10-28 10:11 +0100 http://bitbucket.org/pypy/pypy/changeset/3a2cb683d03e/
Log: adding resoperations to regalloc/assembler (label,int_(lt,eq,...), guards) diff --git a/rpython/jit/backend/zarch/assembler.py b/rpython/jit/backend/zarch/assembler.py --- a/rpython/jit/backend/zarch/assembler.py +++ b/rpython/jit/backend/zarch/assembler.py @@ -11,7 +11,7 @@ STD_FRAME_SIZE_IN_BYTES, GPR_STACK_SAVE_IN_BYTES, THREADLOCAL_ADDR_OFFSET) from rpython.jit.backend.zarch.opassembler import (IntOpAssembler, - FloatOpAssembler) + FloatOpAssembler, GuardOpAssembler) from rpython.jit.backend.zarch.regalloc import Regalloc from rpython.jit.metainterp.resoperation import rop from rpython.rlib.debug import (debug_print, debug_start, debug_stop, @@ -105,7 +105,8 @@ self.places = [] class AssemblerZARCH(BaseAssembler, - IntOpAssembler, FloatOpAssembler): + IntOpAssembler, FloatOpAssembler, + GuardOpAssembler): def __init__(self, cpu, translate_support_code=False): BaseAssembler.__init__(self, cpu, translate_support_code) @@ -145,6 +146,9 @@ self.mc = None self.pending_guards = None + def target_arglocs(self, looptoken): + return looptoken._zarch_arglocs + def get_asmmemmgr_blocks(self, looptoken): clt = looptoken.compiled_loop_token if clt.asmmemmgr_blocks is None: @@ -333,7 +337,7 @@ if prev_loc.is_imm(): value = prev_loc.getint() # move immediate value to register - if loc.is_core_reg(): + if loc.is_reg(): self.mc.load_imm(loc, value) return # move immediate value to memory @@ -347,7 +351,7 @@ elif prev_loc.is_stack(): offset = prev_loc.value # move from memory to register - if loc.is_core_reg(): + if loc.is_reg(): self.mc.load(loc, r.SPP, offset) return # move in memory @@ -363,17 +367,15 @@ self.mc.LDY(loc, l.addr(offset, r.SPP)) return assert 0, "not supported location" - elif prev_loc.is_core_reg(): - reg = prev_loc.value + elif prev_loc.is_reg(): # move to another register - if loc.is_core_reg(): - other_reg = loc.value - self.mc.mr(other_reg, reg) + if loc.is_reg(): + self.mc.LGR(loc, prev_loc) return # move to memory elif loc.is_stack(): offset = loc.value - self.mc.store(reg, r.SPP, offset) + self.mc.store(prev_loc, r.SPP, offset) return assert 0, "not supported location" elif prev_loc.is_imm_float(): @@ -517,6 +519,9 @@ def emit_increment_debug_counter(self, op, arglocs, regalloc): pass # TODO + def emit_label(self, op, arglocs, regalloc): + pass + def emit_finish(self, op, arglocs, regalloc): base_ofs = self.cpu.get_baseofs_of_frame_field() if len(arglocs) > 1: diff --git a/rpython/jit/backend/zarch/codebuilder.py b/rpython/jit/backend/zarch/codebuilder.py --- a/rpython/jit/backend/zarch/codebuilder.py +++ b/rpython/jit/backend/zarch/codebuilder.py @@ -1,8 +1,9 @@ -from rpython.jit.backend.zarch import conditions as cond -from rpython.jit.backend.zarch import registers as reg -from rpython.jit.backend.zarch import locations as loc +from rpython.jit.backend.zarch import conditions as c +from rpython.jit.backend.zarch import registers as r +from rpython.jit.backend.zarch import locations as l from rpython.jit.backend.zarch.instruction_builder import build_instr_codes from rpython.jit.backend.llsupport.asmmemmgr import BlockBuilderMixin +from rpython.jit.backend.llsupport.assembler import GuardToken from rpython.rlib.objectmodel import we_are_translated from rpython.rlib.unroll import unrolling_iterable from rpython.rtyper.lltypesystem import lltype, rffi, llmemory @@ -19,15 +20,19 @@ def binary_helper_call(name): function = getattr(support, 'arm_%s' % name) - def f(self, c=cond.AL): + def f(self, c=c.AL): """Generates a call to a helper function, takes its arguments in r0 and r1, result is placed in r0""" addr = rffi.cast(lltype.Signed, function) self.BL(addr, c) return f -class Operand(object): - pass +class ZARCHGuardToken(GuardToken): + def __init__(self, cpu, gcmap, descr, failargs, faillocs, + guard_opnum, frame_depth, fcond=c.cond_none): + GuardToken.__init__(self, cpu, gcmap, descr, failargs, faillocs, + guard_opnum, frame_depth) + self.fcond = fcond class AbstractZARCHBuilder(object): def write_i32(self, word): @@ -85,11 +90,32 @@ self._dump(addr, "jit-backend-dump", "s390x") def load(self, treg, sreg, offset): - self.LG(treg, loc.addr(offset, sreg)) + self.LG(treg, l.addr(offset, sreg)) def currpos(self): return self.get_relative_pos() + def cmp_op(self, a, b, pool=False, signed=True, fp=False): + if fp == True: + xxx + self.fcmpu(a, b) + else: + if signed: + if pool: + # 64 bit immediate signed + self.CLG(a, b) + else: + # 64 bit signed + self.CLGR(a, b) + else: + if pool: + # 64 bit immediate unsigned + self.CG(a, b) + else: + # 64 bit unsigned + self.CGR(a, b) + + _classes = (AbstractZARCHBuilder,) # Used to build the MachineCodeBlockWrapper diff --git a/rpython/jit/backend/zarch/conditions.py b/rpython/jit/backend/zarch/conditions.py --- a/rpython/jit/backend/zarch/conditions.py +++ b/rpython/jit/backend/zarch/conditions.py @@ -6,6 +6,10 @@ GT = loc.imm(0x2) LE = loc.imm(EQ.value | LT.value) GE = loc.imm(EQ.value | GT.value) +NE = loc.imm(LT.value | GT.value) OVERFLOW = loc.imm(0x1) cond_none = loc.imm(0x0) + +def negate(cond): + return cond diff --git a/rpython/jit/backend/zarch/helper/assembler.py b/rpython/jit/backend/zarch/helper/assembler.py --- a/rpython/jit/backend/zarch/helper/assembler.py +++ b/rpython/jit/backend/zarch/helper/assembler.py @@ -0,0 +1,69 @@ +import rpython.jit.backend.zarch.conditions as c +import rpython.jit.backend.zarch.registers as r +from rpython.rlib.rarithmetic import intmask +from rpython.jit.backend.zarch.arch import WORD +from rpython.jit.metainterp.history import FLOAT +from rpython.jit.metainterp.resoperation import rop +from rpython.rtyper.lltypesystem import rffi, lltype + +def flush_cc(asm, condition, result_loc): + # After emitting an instruction that leaves a boolean result in + # a condition code (cc), call this. In the common case, result_loc + # will be set to SPP by the regalloc, which in this case means + # "propagate it between this operation and the next guard by keeping + # it in the cc". In the uncommon case, result_loc is another + # register, and we emit a load from the cc into this register. + assert asm.guard_success_cc == c.cond_none + if result_loc is r.SPP: + asm.guard_success_cc = condition + else: + # Possibly invert the bit in the CR + bit, invert = c.encoding[condition] + assert 0 <= bit <= 3 + if invert == 12: + pass + elif invert == 4: + asm.mc.crnor(bit, bit, bit) + else: + assert 0 + + resval = result_loc.value + # move the content of the CR to resval + asm.mc.mfcr(resval) + # zero out everything except of the result + asm.mc.rlwinm(resval, resval, 1 + bit, 31, 31) + + +def do_emit_cmp_op(self, arglocs, condition, signed, fp): + l0 = arglocs[0] + l1 = arglocs[1] + assert not l0.is_imm() + # do the comparison + self.mc.cmp_op(l0, l1, pool=l1.is_in_pool(), signed=signed, fp=fp) + + # CR bits: + # 0: LT + # 1: GT + # 2: EQ + # 3: UNordered + + if fp: + # Support for NaNs: with LE or GE, if one of the operands is a + # NaN, we get CR=1,0,0,0 (unordered bit only). We're about to + # check "not GT" or "not LT", but in case of NaN we want to + # get the answer False. + #if condition == c.LE: + # self.mc.crnor(1, 1, 3) + # condition = c.GT + #elif condition == c.GE: + # self.mc.crnor(0, 0, 3) + # condition = c.LT + pass + + flush_cc(self, condition, r.SPP) + + +def gen_emit_cmp_op(condition, signed=True, fp=False): + def f(self, op, arglocs, regalloc): + do_emit_cmp_op(self, arglocs, condition, signed, fp) + return f diff --git a/rpython/jit/backend/zarch/helper/regalloc.py b/rpython/jit/backend/zarch/helper/regalloc.py --- a/rpython/jit/backend/zarch/helper/regalloc.py +++ b/rpython/jit/backend/zarch/helper/regalloc.py @@ -7,7 +7,7 @@ return lower_bound <= i <= upper_bound return False -def _prepare_int_binary_arith(self, op): +def prepare_int_add_or_mul(self, op): a0 = op.getarg(0) a1 = op.getarg(1) if check_imm(a0): @@ -21,7 +21,32 @@ self.force_result_in_reg(op, a0) return [l0, l1] -def _prepare_float_binary_arith(self, op): +def prepare_int_sub(self, op): + a0 = op.getarg(0) + a1 = op.getarg(1) + if isinstance(a0, ConstInt): + a0, a1 = a1, a0 + l0 = self.ensure_reg(a0) + l1 = self.ensure_reg(a1) + self.free_op_vars() + self.force_result_in_reg(op, a0) + return [l0, l1] + +def prepare_cmp_op(self, op): + a0 = op.getarg(0) + a1 = op.getarg(1) + if check_imm(a0): + a0, a1 = a1, a0 + l0 = self.ensure_reg(a0) + if check_imm(a1): + l1 = imm(a1.getint()) + else: + l1 = self.ensure_reg(a1) + self.free_op_vars() + self.force_result_in_reg(op, a0) + return [l0, l1] + +def prepare_binary_op(self, op): a0 = op.getarg(0) a1 = op.getarg(1) l0 = self.ensure_reg(a0) diff --git a/rpython/jit/backend/zarch/instructions.py b/rpython/jit/backend/zarch/instructions.py --- a/rpython/jit/backend/zarch/instructions.py +++ b/rpython/jit/backend/zarch/instructions.py @@ -21,6 +21,13 @@ 'AGF': ('rxy', ['\xE3','\x18']), 'AHI': ('ri', ['\xA7','\x0A']), 'AGHI': ('ri', ['\xA7','\x0B']), + + + # comparision + 'CGR': ('rre', ['\xB9','\x20']), + 'CG': ('rxy', ['\xE3','\x20']), + 'CLGR': ('rre', ['\xB9','\x21']), + 'CLG': ('rxy', ['\xE3','\x20']), } logic_mnemonic_codes = { diff --git a/rpython/jit/backend/zarch/locations.py b/rpython/jit/backend/zarch/locations.py --- a/rpython/jit/backend/zarch/locations.py +++ b/rpython/jit/backend/zarch/locations.py @@ -14,7 +14,7 @@ def is_raw_sp(self): return False - def is_core_reg(self): + def is_reg(self): return False def is_fp_reg(self): @@ -45,7 +45,7 @@ def __repr__(self): return 'r%d' % self.value - def is_core_reg(self): + def is_reg(self): return True def as_key(self): # 0 <= as_key <= 15 @@ -60,7 +60,7 @@ def __repr__(self): return 'f%d' % self.value - def is_core_reg(self): + def is_reg(self): return False def is_fp_reg(self): diff --git a/rpython/jit/backend/zarch/opassembler.py b/rpython/jit/backend/zarch/opassembler.py --- a/rpython/jit/backend/zarch/opassembler.py +++ b/rpython/jit/backend/zarch/opassembler.py @@ -1,15 +1,28 @@ +from rpython.jit.backend.zarch.helper.assembler import gen_emit_cmp_op +from rpython.jit.backend.zarch.codebuilder import ZARCHGuardToken +import rpython.jit.backend.zarch.conditions as c +import rpython.jit.backend.zarch.registers as r +from rpython.jit.backend.llsupport.gcmap import allocate_gcmap class IntOpAssembler(object): _mixin_ = True def emit_int_add(self, op, arglocs, regalloc): l0, l1 = arglocs - assert not l0.is_imm() if l1.is_imm(): self.mc.AGHI(l0, l1) + elif l1.is_in_pool(): + self.mc.AG(l0, l1) else: self.mc.AGR(l0, l1) + emit_int_le = gen_emit_cmp_op(c.LE) + emit_int_lt = gen_emit_cmp_op(c.LT) + emit_int_gt = gen_emit_cmp_op(c.GT) + emit_int_ge = gen_emit_cmp_op(c.GE) + emit_int_eq = gen_emit_cmp_op(c.EQ) + emit_int_ne = gen_emit_cmp_op(c.NE) + class FloatOpAssembler(object): _mixin_ = True @@ -40,3 +53,192 @@ self.mc.DDB(l0, l1) else: self.mc.DDBR(l0, l1) + +class GuardOpAssembler(object): + _mixin_ = True + + def _emit_guard(self, op, arglocs, is_guard_not_invalidated=False): + if is_guard_not_invalidated: + fcond = c.cond_none + else: + fcond = self.guard_success_cc + self.guard_success_cc = c.cond_none + assert fcond != c.cond_none + fcond = c.negate(fcond) + token = self.build_guard_token(op, arglocs[0].value, arglocs[1:], fcond) + token.pos_jump_offset = self.mc.currpos() + assert token.guard_not_invalidated() == is_guard_not_invalidated + if not is_guard_not_invalidated: + self.mc.trap() # has to be patched later on + self.pending_guard_tokens.append(token) + + def build_guard_token(self, op, frame_depth, arglocs, fcond): + descr = op.getdescr() + gcmap = allocate_gcmap(self, frame_depth, r.JITFRAME_FIXED_SIZE) + token = ZARCHGuardToken(self.cpu, gcmap, descr, op.getfailargs(), + arglocs, op.getopnum(), frame_depth, + fcond) + return token + + def emit_guard_true(self, op, arglocs, regalloc): + self._emit_guard(op, arglocs) + + def emit_guard_false(self, op, arglocs, regalloc): + self.guard_success_cc = c.negate(self.guard_success_cc) + self._emit_guard(op, arglocs) + + def emit_guard_overflow(self, op, arglocs, regalloc): + self.guard_success_cc = c.SO + self._emit_guard(op, arglocs) + + def emit_guard_no_overflow(self, op, arglocs, regalloc): + self.guard_success_cc = c.NS + self._emit_guard(op, arglocs) + + def emit_guard_value(self, op, arglocs, regalloc): + l0 = arglocs[0] + l1 = arglocs[1] + failargs = arglocs[2:] + + if l0.is_reg(): + if l1.is_imm(): + self.mc.cmp_op(0, l0.value, l1.getint(), imm=True) + else: + self.mc.cmp_op(0, l0.value, l1.value) + elif l0.is_fp_reg(): + assert l1.is_fp_reg() + self.mc.cmp_op(0, l0.value, l1.value, fp=True) + self.guard_success_cc = c.EQ + self._emit_guard(op, failargs) + + emit_guard_nonnull = emit_guard_true + emit_guard_isnull = emit_guard_false + + def emit_guard_class(self, op, arglocs, regalloc): + self._cmp_guard_class(op, arglocs, regalloc) + self.guard_success_cc = c.EQ + self._emit_guard(op, arglocs[2:]) + + def emit_guard_nonnull_class(self, op, arglocs, regalloc): + self.mc.cmp_op(0, arglocs[0].value, 1, imm=True, signed=False) + patch_pos = self.mc.currpos() + self.mc.trap() + self._cmp_guard_class(op, arglocs, regalloc) + pmc = OverwritingBuilder(self.mc, patch_pos, 1) + pmc.blt(self.mc.currpos() - patch_pos) + pmc.overwrite() + self.guard_success_cc = c.EQ + self._emit_guard(op, arglocs[2:]) + + def _cmp_guard_class(self, op, locs, regalloc): + offset = self.cpu.vtable_offset + if offset is not None: + # could be one instruction shorter, but don't care because + # it's not this case that is commonly translated + self.mc.load(r.SCRATCH.value, locs[0].value, offset) + self.mc.load_imm(r.SCRATCH2, locs[1].value) + self.mc.cmp_op(0, r.SCRATCH.value, r.SCRATCH2.value) + else: + expected_typeid = (self.cpu.gc_ll_descr + .get_typeid_from_classptr_if_gcremovetypeptr(locs[1].value)) + self._cmp_guard_gc_type(locs[0], expected_typeid) + + def _read_typeid(self, targetreg, loc_ptr): + # Note that the typeid half-word is at offset 0 on a little-endian + # machine; it is at offset 2 or 4 on a big-endian machine. + assert self.cpu.supports_guard_gc_type + if IS_PPC_32: + self.mc.lhz(targetreg.value, loc_ptr.value, 2 * IS_BIG_ENDIAN) + else: + self.mc.lwz(targetreg.value, loc_ptr.value, 4 * IS_BIG_ENDIAN) + + def _cmp_guard_gc_type(self, loc_ptr, expected_typeid): + self._read_typeid(r.SCRATCH2, loc_ptr) + assert 0 <= expected_typeid <= 0x7fffffff # 4 bytes are always enough + if expected_typeid > 0xffff: # if 2 bytes are not enough + self.mc.subis(r.SCRATCH2.value, r.SCRATCH2.value, + expected_typeid >> 16) + expected_typeid = expected_typeid & 0xffff + self.mc.cmp_op(0, r.SCRATCH2.value, expected_typeid, + imm=True, signed=False) + + def emit_guard_gc_type(self, op, arglocs, regalloc): + self._cmp_guard_gc_type(arglocs[0], arglocs[1].value) + self.guard_success_cc = c.EQ + self._emit_guard(op, arglocs[2:]) + + def emit_guard_is_object(self, op, arglocs, regalloc): + assert self.cpu.supports_guard_gc_type + loc_object = arglocs[0] + # idea: read the typeid, fetch one byte of the field 'infobits' from + # the big typeinfo table, and check the flag 'T_IS_RPYTHON_INSTANCE'. + base_type_info, shift_by, sizeof_ti = ( + self.cpu.gc_ll_descr.get_translated_info_for_typeinfo()) + infobits_offset, IS_OBJECT_FLAG = ( + self.cpu.gc_ll_descr.get_translated_info_for_guard_is_object()) + + self._read_typeid(r.SCRATCH2, loc_object) + self.mc.load_imm(r.SCRATCH, base_type_info + infobits_offset) + assert shift_by == 0 # on PPC64; fixme for PPC32 + self.mc.lbzx(r.SCRATCH2.value, r.SCRATCH2.value, r.SCRATCH.value) + self.mc.andix(r.SCRATCH2.value, r.SCRATCH2.value, IS_OBJECT_FLAG & 0xff) + self.guard_success_cc = c.NE + self._emit_guard(op, arglocs[1:]) + + def emit_guard_subclass(self, op, arglocs, regalloc): + assert self.cpu.supports_guard_gc_type + loc_object = arglocs[0] + loc_check_against_class = arglocs[1] + offset = self.cpu.vtable_offset + offset2 = self.cpu.subclassrange_min_offset + if offset is not None: + # read this field to get the vtable pointer + self.mc.load(r.SCRATCH2.value, loc_object.value, offset) + # read the vtable's subclassrange_min field + assert _check_imm_arg(offset2) + self.mc.ld(r.SCRATCH2.value, r.SCRATCH2.value, offset2) + else: + # read the typeid + self._read_typeid(r.SCRATCH, loc_object) + # read the vtable's subclassrange_min field, as a single + # step with the correct offset + base_type_info, shift_by, sizeof_ti = ( + self.cpu.gc_ll_descr.get_translated_info_for_typeinfo()) + self.mc.load_imm(r.SCRATCH2, base_type_info + sizeof_ti + offset2) + assert shift_by == 0 # on PPC64; fixme for PPC32 + self.mc.ldx(r.SCRATCH2.value, r.SCRATCH2.value, r.SCRATCH.value) + # get the two bounds to check against + vtable_ptr = loc_check_against_class.getint() + vtable_ptr = rffi.cast(rclass.CLASSTYPE, vtable_ptr) + check_min = vtable_ptr.subclassrange_min + check_max = vtable_ptr.subclassrange_max + assert check_max > check_min + check_diff = check_max - check_min - 1 + # right now, a full PyPy uses less than 6000 numbers, + # so we'll assert here that it always fit inside 15 bits + assert 0 <= check_min <= 0x7fff + assert 0 <= check_diff <= 0xffff + # check by doing the unsigned comparison (tmp - min) < (max - min) + self.mc.subi(r.SCRATCH2.value, r.SCRATCH2.value, check_min) + self.mc.cmp_op(0, r.SCRATCH2.value, check_diff, imm=True, signed=False) + # the guard passes if we get a result of "below or equal" + self.guard_success_cc = c.LE + self._emit_guard(op, arglocs[2:]) + + def emit_guard_not_invalidated(self, op, arglocs, regalloc): + self._emit_guard(op, arglocs, is_guard_not_invalidated=True) + + def emit_guard_not_forced(self, op, arglocs, regalloc): + ofs = self.cpu.get_ofs_of_frame_field('jf_descr') + self.mc.ld(r.SCRATCH.value, r.SPP.value, ofs) + self.mc.cmp_op(0, r.SCRATCH.value, 0, imm=True) + self.guard_success_cc = c.EQ + self._emit_guard(op, arglocs) + + def emit_guard_not_forced_2(self, op, arglocs, regalloc): + guard_token = self.build_guard_token(op, arglocs[0].value, arglocs[1:], + c.cond_none) + self._finish_gcmap = guard_token.gcmap + self._store_force_index(op) + self.store_info_on_descr(0, guard_token) + diff --git a/rpython/jit/backend/zarch/regalloc.py b/rpython/jit/backend/zarch/regalloc.py --- a/rpython/jit/backend/zarch/regalloc.py +++ b/rpython/jit/backend/zarch/regalloc.py @@ -8,7 +8,7 @@ INT, REF, FLOAT, VOID) from rpython.jit.metainterp.history import JitCellToken, TargetToken from rpython.jit.metainterp.resoperation import rop -from rpython.jit.backend.zarch import locations +from rpython.jit.backend.zarch import locations as l from rpython.rtyper.lltypesystem import rffi, lltype, rstr, llmemory from rpython.rtyper.lltypesystem.lloperation import llop from rpython.rtyper.annlowlevel import cast_instance_to_gcref @@ -16,7 +16,7 @@ from rpython.jit.backend.llsupport.descr import ArrayDescr import rpython.jit.backend.zarch.registers as r import rpython.jit.backend.zarch.conditions as c -import rpython.jit.backend.zarch.helper.regalloc as regallochelp +import rpython.jit.backend.zarch.helper.regalloc as helper from rpython.jit.backend.llsupport.descr import unpack_arraydescr from rpython.jit.backend.llsupport.descr import unpack_fielddescr from rpython.jit.backend.llsupport.descr import unpack_interiorfielddescr @@ -64,7 +64,7 @@ def convert_to_imm(self, c): adr = self.convert_to_adr(c) - return locations.ConstFloatLoc(adr) + return l.ConstFloatLoc(adr) def __init__(self, longevity, frame_manager=None, assembler=None): RegisterManager.__init__(self, longevity, frame_manager, assembler) @@ -74,7 +74,7 @@ def place_in_pool(self, var): offset = self.assembler.pool.place(var) - return locations.pool(offset, r.POOL) + return l.pool(offset, r.POOL) def ensure_reg(self, box): if isinstance(box, Const): @@ -116,7 +116,7 @@ def convert_to_imm(self, c): val = self.convert_to_int(c) - return locations.ImmLocation(val) + return l.ImmLocation(val) def ensure_reg(self, box): if isinstance(box, Const): @@ -143,8 +143,8 @@ self.base_ofs = base_ofs def frame_pos(self, loc, box_type): - #return locations.StackLocation(loc, get_fp_offset(self.base_ofs, loc), box_type) - return locations.StackLocation(loc, get_fp_offset(self.base_ofs, loc), box_type) + #return l.StackLocation(loc, get_fp_offset(self.base_ofs, loc), box_type) + return l.StackLocation(loc, get_fp_offset(self.base_ofs, loc), box_type) @staticmethod def frame_size(type): @@ -152,7 +152,7 @@ @staticmethod def get_loc_index(loc): - assert isinstance(loc, locations.StackLocation) + assert isinstance(loc, l.StackLocation) return loc.position @@ -350,7 +350,7 @@ gcmap[val // WORD // 8] |= r_uint(1) << (val % (WORD * 8)) for box, loc in self.fm.bindings.iteritems(): if box.type == REF and self.rm.is_still_alive(box): - assert isinstance(loc, locations.StackLocation) + assert isinstance(loc, l.StackLocation) val = loc.get_position() + r.JITFRAME_FIXED_SIZE gcmap[val // WORD // 8] |= r_uint(1) << (val % (WORD * 8)) return gcmap @@ -463,11 +463,103 @@ def prepare_increment_debug_counter(self, op): pass # XXX - prepare_int_add = regallochelp._prepare_int_binary_arith - prepare_float_add = regallochelp._prepare_float_binary_arith - prepare_float_sub = regallochelp._prepare_float_binary_arith - prepare_float_mul = regallochelp._prepare_float_binary_arith - prepare_float_div = regallochelp._prepare_float_binary_arith + prepare_int_add = helper.prepare_int_add_or_mul + prepare_int_sub = helper.prepare_int_sub + prepare_int_mul = helper.prepare_int_add_or_mul + + prepare_int_le = helper.prepare_cmp_op + prepare_int_lt = helper.prepare_cmp_op + prepare_int_ge = helper.prepare_cmp_op + prepare_int_gt = helper.prepare_cmp_op + prepare_int_eq = helper.prepare_cmp_op + prepare_int_ne = helper.prepare_cmp_op + + prepare_float_add = helper.prepare_binary_op + prepare_float_sub = helper.prepare_binary_op + prepare_float_mul = helper.prepare_binary_op + prepare_float_truediv = helper.prepare_binary_op + + def _prepare_guard(self, op, args=None): + if args is None: + args = [] + args.append(imm(self.fm.get_frame_depth())) + for arg in op.getfailargs(): + if arg: + args.append(self.loc(arg)) + else: + args.append(None) + self.possibly_free_vars(op.getfailargs()) + # + # generate_quick_failure() produces up to 14 instructions per guard + self.limit_loop_break -= 14 * 4 + # + return args + + def load_condition_into_cc(self, box): + if self.assembler.guard_success_cc == c.cond_none: + xxx + loc = self.ensure_reg(box) + mc = self.assembler.mc + mc.cmp_op(loc, l.imm(0), imm=True) + self.assembler.guard_success_cc = c.NE + + def _prepare_guard_cc(self, op): + self.load_condition_into_cc(op.getarg(0)) + return self._prepare_guard(op) + + prepare_guard_true = _prepare_guard_cc + prepare_guard_false = _prepare_guard_cc + prepare_guard_nonnull = _prepare_guard_cc + prepare_guard_isnull = _prepare_guard_cc + + def prepare_label(self, op): + descr = op.getdescr() + assert isinstance(descr, TargetToken) + inputargs = op.getarglist() + arglocs = [None] * len(inputargs) + # + # we use force_spill() on the boxes that are not going to be really + # used any more in the loop, but that are kept alive anyway + # by being in a next LABEL's or a JUMP's argument or fail_args + # of some guard + position = self.rm.position + for arg in inputargs: + assert not isinstance(arg, Const) + if self.last_real_usage.get(arg, -1) <= position: + self.force_spill_var(arg) + # + # we need to make sure that no variable is stored in spp (=r31) + for arg in inputargs: + assert self.loc(arg) is not r.SPP, ( + "variable stored in spp in prepare_label") + self.rm.bindings_to_frame_reg.clear() + # + for i in range(len(inputargs)): + arg = inputargs[i] + assert not isinstance(arg, Const) + loc = self.loc(arg) + assert loc is not r.SPP + arglocs[i] = loc + if loc.is_reg(): + self.fm.mark_as_free(arg) + # + # if we are too close to the start of the loop, the label's target may + # get overridden by redirect_call_assembler(). (rare case) + self.flush_loop() + # + descr._zarch_arglocs = arglocs + descr._ll_loop_code = self.assembler.mc.currpos() + descr._zarch_clt = self.assembler.current_clt + self.assembler.target_tokens_currently_compiling[descr] = None + self.possibly_free_vars_for_op(op) + # + # if the LABEL's descr is precisely the target of the JUMP at the + # end of the same loop, i.e. if what we are compiling is a single + # loop that ends up jumping to this LABEL, then we can now provide + # the hints about the expected position of the spilled variables. + jump_op = self.final_jump_op + if jump_op is not None and jump_op.getdescr() is descr: + self._compute_hint_frame_locations_from_descr(descr) def prepare_finish(self, op): descr = op.getdescr() _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit