Author: Armin Rigo <ar...@tunes.org> Branch: Changeset: r79450:95f8d1976013 Date: 2015-09-05 11:03 +0200 http://bitbucket.org/pypy/pypy/changeset/95f8d1976013/
Log: hg merge optimize-cond-call Remove the merging of operations with the following guards, which removes some amount of messy code. Instead, operations like INT_EQ check if they should leave the result in the condition code; if so, the next GUARD_TRUE or GUARD_FALSE will pick up the condition code without producing any more code. Finally, this simpler arrangement allows COND_CALL to also pick up the condition code, which would have been a total mess before. diff too long, truncating to 2000 out of 2119 lines diff --git a/rpython/jit/backend/arm/assembler.py b/rpython/jit/backend/arm/assembler.py --- a/rpython/jit/backend/arm/assembler.py +++ b/rpython/jit/backend/arm/assembler.py @@ -12,8 +12,7 @@ from rpython.jit.backend.arm.opassembler import ResOpAssembler from rpython.jit.backend.arm.regalloc import (Regalloc, CoreRegisterManager, check_imm_arg, VFPRegisterManager, - operations as regalloc_operations, - operations_with_guard as regalloc_operations_with_guard) + operations as regalloc_operations) from rpython.jit.backend.llsupport import jitframe, rewrite from rpython.jit.backend.llsupport.assembler import DEBUG_COUNTER, debug_bridge, BaseAssembler from rpython.jit.backend.llsupport.regalloc import get_scale, valid_addressing_size @@ -645,8 +644,10 @@ size_excluding_failure_stuff - loop_head) def _assemble(self, regalloc, inputargs, operations): + self.guard_success_cc = c.cond_none regalloc.compute_hint_frame_locations(operations) self._walk_operations(inputargs, operations, regalloc) + assert self.guard_success_cc == c.cond_none frame_depth = regalloc.get_final_frame_depth() jump_target_descr = regalloc.jump_target_descr if jump_target_descr is not None: @@ -927,6 +928,7 @@ def _walk_operations(self, inputargs, operations, regalloc): fcond = c.AL self._regalloc = regalloc + regalloc.operations = operations while regalloc.position() < len(operations) - 1: regalloc.next_instruction() i = regalloc.position() @@ -935,18 +937,7 @@ opnum = op.getopnum() if op.has_no_side_effect() and op.result not in regalloc.longevity: regalloc.possibly_free_vars_for_op(op) - elif self._regalloc.can_merge_with_next_guard(op, i, operations): - guard = operations[i + 1] - assert guard.is_guard() - arglocs = regalloc_operations_with_guard[opnum](regalloc, op, - guard, fcond) - fcond = asm_operations_with_guard[opnum](self, op, - guard, arglocs, regalloc, fcond) - assert fcond is not None - regalloc.next_instruction() - regalloc.possibly_free_vars_for_op(guard) - regalloc.possibly_free_vars(guard.getfailargs()) - elif not we_are_translated() and op.getopnum() == -124: + if not we_are_translated() and op.getopnum() == -124: regalloc.prepare_force_spill(op, fcond) else: arglocs = regalloc_operations[opnum](regalloc, op, fcond) @@ -962,6 +953,7 @@ regalloc.free_temp_vars() regalloc._check_invariants() self.mc.mark_op(None) # end of the loop + regalloc.operations = None def regalloc_emit_extra(self, op, arglocs, fcond, regalloc): # for calls to a function with a specifically-supported OS_xxx @@ -1516,21 +1508,11 @@ raise NotImplementedError(op) -def notimplemented_op_with_guard(self, op, guard_op, arglocs, regalloc, fcond): - print "[ARM/asm] %s with guard %s not implemented" % \ - (op.getopname(), guard_op.getopname()) - raise NotImplementedError(op) - asm_operations = [notimplemented_op] * (rop._LAST + 1) -asm_operations_with_guard = [notimplemented_op_with_guard] * (rop._LAST + 1) asm_extra_operations = {} for name, value in ResOpAssembler.__dict__.iteritems(): - if name.startswith('emit_guard_'): - opname = name[len('emit_guard_'):] - num = getattr(rop, opname.upper()) - asm_operations_with_guard[num] = value - elif name.startswith('emit_opx_'): + if name.startswith('emit_opx_'): opname = name[len('emit_opx_'):] num = getattr(EffectInfo, 'OS_' + opname.upper()) asm_extra_operations[num] = value diff --git a/rpython/jit/backend/arm/conditions.py b/rpython/jit/backend/arm/conditions.py --- a/rpython/jit/backend/arm/conditions.py +++ b/rpython/jit/backend/arm/conditions.py @@ -13,11 +13,13 @@ GT = 0xC LE = 0xD AL = 0xE +cond_none = -1 opposites = [NE, EQ, CC, CS, PL, MI, VC, VS, LS, HI, LT, GE, LE, GT, AL] def get_opposite_of(operation): + assert operation >= 0 return opposites[operation] # see mapping for floating poin according to diff --git a/rpython/jit/backend/arm/helper/assembler.py b/rpython/jit/backend/arm/helper/assembler.py --- a/rpython/jit/backend/arm/helper/assembler.py +++ b/rpython/jit/backend/arm/helper/assembler.py @@ -6,33 +6,32 @@ from rpython.rlib.rarithmetic import r_uint, r_longlong, intmask from rpython.jit.metainterp.resoperation import rop + +def flush_cc(asm, condition, result_loc): + # After emitting an instruction that leaves a boolean result in + # a condition code (cc), call this. In the common case, result_loc + # will be set to 'fp' by the regalloc, which in this case means + # "propagate it between this operation and the next guard by keeping + # it in the cc". In the uncommon case, result_loc is another + # register, and we emit a load from the cc into this register. + assert asm.guard_success_cc == c.cond_none + if result_loc is r.fp: + asm.guard_success_cc = condition + else: + asm.mc.MOV_ri(result_loc.value, 1, condition) + asm.mc.MOV_ri(result_loc.value, 0, c.get_opposite_of(condition)) + + def gen_emit_op_unary_cmp(name, true_cond): - false_cond = c.get_opposite_of(true_cond) def f(self, op, arglocs, regalloc, fcond): assert fcond is not None reg, res = arglocs self.mc.CMP_ri(reg.value, 0) - self.mc.MOV_ri(res.value, 1, true_cond) - self.mc.MOV_ri(res.value, 0, false_cond) + flush_cc(self, true_cond, res) return fcond f.__name__ = 'emit_op_%s' % name return f -def gen_emit_guard_unary_cmp(name, true_cond): - false_cond = c.get_opposite_of(true_cond) - def f(self, op, guard, arglocs, regalloc, fcond): - assert fcond is not None - assert guard is not None - reg = arglocs[0] - self.mc.CMP_ri(reg.value, 0) - cond = true_cond - guard_opnum = guard.getopnum() - if guard_opnum == rop.GUARD_FALSE: - cond = false_cond - return self._emit_guard(guard, arglocs[1:], cond, save_exc=False) - f.__name__ = 'emit_guard_%s' % name - return f - def gen_emit_op_ri(name, opname): ri_op = getattr(InstrBuilder, '%s_ri' % opname) rr_op = getattr(InstrBuilder, '%s_rr' % opname) @@ -61,8 +60,7 @@ f.__name__ = 'emit_op_%s' % name return f -def gen_emit_cmp_op(name, condition): - inv = c.get_opposite_of(condition) +def gen_emit_cmp_op(name, true_cond): def f(self, op, arglocs, regalloc, fcond): l0, l1, res = arglocs @@ -70,32 +68,11 @@ self.mc.CMP_ri(l0.value, imm=l1.getint(), cond=fcond) else: self.mc.CMP_rr(l0.value, l1.value, cond=fcond) - self.mc.MOV_ri(res.value, 1, cond=condition) - self.mc.MOV_ri(res.value, 0, cond=inv) + flush_cc(self, true_cond, res) return fcond f.__name__ = 'emit_op_%s' % name return f -def gen_emit_cmp_op_guard(name, true_cond): - false_cond = c.get_opposite_of(true_cond) - def f(self, op, guard, arglocs, regalloc, fcond): - assert guard is not None - l0 = arglocs[0] - l1 = arglocs[1] - assert l0.is_core_reg() - - if l1.is_imm(): - self.mc.CMP_ri(l0.value, imm=l1.getint(), cond=fcond) - else: - self.mc.CMP_rr(l0.value, l1.value, cond=fcond) - guard_opnum = guard.getopnum() - cond = true_cond - if guard_opnum == rop.GUARD_FALSE: - cond = false_cond - return self._emit_guard(guard, arglocs[2:], cond, save_exc=False) - f.__name__ = 'emit_guard_%s' % name - return f - def gen_emit_float_op(name, opname): op_rr = getattr(InstrBuilder, opname) def f(self, op, arglocs, regalloc, fcond): @@ -104,6 +81,7 @@ return fcond f.__name__ = 'emit_op_%s' % name return f + def gen_emit_unary_float_op(name, opname): op_rr = getattr(InstrBuilder, opname) def f(self, op, arglocs, regalloc, fcond): @@ -113,34 +91,16 @@ f.__name__ = 'emit_op_%s' % name return f -def gen_emit_float_cmp_op(name, cond): - inv = c.get_opposite_of(cond) +def gen_emit_float_cmp_op(name, true_cond): def f(self, op, arglocs, regalloc, fcond): arg1, arg2, res = arglocs self.mc.VCMP(arg1.value, arg2.value) self.mc.VMRS(cond=fcond) - self.mc.MOV_ri(res.value, 1, cond=cond) - self.mc.MOV_ri(res.value, 0, cond=inv) + flush_cc(self, true_cond, res) return fcond f.__name__ = 'emit_op_%s' % name return f -def gen_emit_float_cmp_op_guard(name, true_cond): - false_cond = c.get_opposite_of(true_cond) - def f(self, op, guard, arglocs, regalloc, fcond): - assert guard is not None - arg1 = arglocs[0] - arg2 = arglocs[1] - self.mc.VCMP(arg1.value, arg2.value) - self.mc.VMRS(cond=fcond) - cond = true_cond - guard_opnum = guard.getopnum() - if guard_opnum == rop.GUARD_FALSE: - cond = false_cond - return self._emit_guard(guard, arglocs[2:], cond, save_exc=False) - f.__name__ = 'emit_guard_%s' % name - return f - class saved_registers(object): def __init__(self, cb, regs_to_save, vfp_regs_to_save=None): diff --git a/rpython/jit/backend/arm/helper/regalloc.py b/rpython/jit/backend/arm/helper/regalloc.py --- a/rpython/jit/backend/arm/helper/regalloc.py +++ b/rpython/jit/backend/arm/helper/regalloc.py @@ -50,42 +50,28 @@ f.__name__ = name return f -def prepare_float_op(name=None, base=True, float_result=True, guard=False): - if guard: - def f(self, op, guard_op, fcond): - locs = [] - loc1 = self.make_sure_var_in_reg(op.getarg(0)) - locs.append(loc1) - if base: - loc2 = self.make_sure_var_in_reg(op.getarg(1)) - locs.append(loc2) - self.possibly_free_vars_for_op(op) - self.free_temp_vars() - if guard_op is None: - res = self.force_allocate_reg(op.result) - assert float_result == (op.result.type == FLOAT) - locs.append(res) - return locs - else: - args = self._prepare_guard(guard_op, locs) - return args - else: - def f(self, op, fcond): - locs = [] - loc1 = self.make_sure_var_in_reg(op.getarg(0)) - locs.append(loc1) - if base: - loc2 = self.make_sure_var_in_reg(op.getarg(1)) - locs.append(loc2) - self.possibly_free_vars_for_op(op) - self.free_temp_vars() - res = self.force_allocate_reg(op.result) - assert float_result == (op.result.type == FLOAT) - locs.append(res) - return locs - if name: - f.__name__ = name - return f +def prepare_unary_op(self, op, fcond): + loc1 = self.make_sure_var_in_reg(op.getarg(0)) + self.possibly_free_vars_for_op(op) + self.free_temp_vars() + res = self.force_allocate_reg(op.result) + return [loc1, res] + +def prepare_two_regs_op(self, op, fcond): + loc1 = self.make_sure_var_in_reg(op.getarg(0)) + loc2 = self.make_sure_var_in_reg(op.getarg(1)) + self.possibly_free_vars_for_op(op) + self.free_temp_vars() + res = self.force_allocate_reg(op.result) + return [loc1, loc2, res] + +def prepare_float_cmp(self, op, fcond): + loc1 = self.make_sure_var_in_reg(op.getarg(0)) + loc2 = self.make_sure_var_in_reg(op.getarg(1)) + self.possibly_free_vars_for_op(op) + self.free_temp_vars() + res = self.force_allocate_reg_or_cc(op.result) + return [loc1, loc2, res] def prepare_op_by_helper_call(name): def f(self, op, fcond): @@ -106,43 +92,28 @@ f.__name__ = name return f -def prepare_cmp_op(name=None): - def f(self, op, guard_op, fcond): - assert fcond is not None - boxes = list(op.getarglist()) - arg0, arg1 = boxes - imm_a1 = check_imm_box(arg1) +def prepare_int_cmp(self, op, fcond): + assert fcond is not None + boxes = list(op.getarglist()) + arg0, arg1 = boxes + imm_a1 = check_imm_box(arg1) - l0 = self.make_sure_var_in_reg(arg0, forbidden_vars=boxes) - if imm_a1: - l1 = self.convert_to_imm(arg1) - else: - l1 = self.make_sure_var_in_reg(arg1, forbidden_vars=boxes) + l0 = self.make_sure_var_in_reg(arg0, forbidden_vars=boxes) + if imm_a1: + l1 = self.convert_to_imm(arg1) + else: + l1 = self.make_sure_var_in_reg(arg1, forbidden_vars=boxes) - self.possibly_free_vars_for_op(op) - self.free_temp_vars() - if guard_op is None: - res = self.force_allocate_reg(op.result) - return [l0, l1, res] - else: - args = self._prepare_guard(guard_op, [l0, l1]) - return args - if name: - f.__name__ = name - return f + self.possibly_free_vars_for_op(op) + self.free_temp_vars() + res = self.force_allocate_reg_or_cc(op.result) + return [l0, l1, res] -def prepare_op_unary_cmp(name=None): - def f(self, op, guard_op, fcond): - assert fcond is not None - a0 = op.getarg(0) - assert isinstance(a0, Box) - reg = self.make_sure_var_in_reg(a0) - self.possibly_free_vars_for_op(op) - if guard_op is None: - res = self.force_allocate_reg(op.result, [a0]) - return [reg, res] - else: - return self._prepare_guard(guard_op, [reg]) - if name: - f.__name__ = name - return f +def prepare_unary_cmp(self, op, fcond): + assert fcond is not None + a0 = op.getarg(0) + assert isinstance(a0, Box) + reg = self.make_sure_var_in_reg(a0) + self.possibly_free_vars_for_op(op) + res = self.force_allocate_reg_or_cc(op.result) + return [reg, res] diff --git a/rpython/jit/backend/arm/opassembler.py b/rpython/jit/backend/arm/opassembler.py --- a/rpython/jit/backend/arm/opassembler.py +++ b/rpython/jit/backend/arm/opassembler.py @@ -5,13 +5,10 @@ from rpython.jit.backend.arm.arch import WORD, DOUBLE_WORD, JITFRAME_FIXED_SIZE from rpython.jit.backend.arm.helper.assembler import (gen_emit_op_by_helper_call, gen_emit_op_unary_cmp, - gen_emit_guard_unary_cmp, gen_emit_op_ri, gen_emit_cmp_op, - gen_emit_cmp_op_guard, gen_emit_float_op, gen_emit_float_cmp_op, - gen_emit_float_cmp_op_guard, gen_emit_unary_float_op, saved_registers) from rpython.jit.backend.arm.helper.regalloc import check_imm_arg @@ -114,32 +111,25 @@ return fcond #ref: http://blogs.arm.com/software-enablement/detecting-overflow-from-mul/ - def emit_guard_int_mul_ovf(self, op, guard, arglocs, regalloc, fcond): + def emit_op_int_mul_ovf(self, op, arglocs, regalloc, fcond): reg1 = arglocs[0] reg2 = arglocs[1] res = arglocs[2] - failargs = arglocs[3:] self.mc.SMULL(res.value, r.ip.value, reg1.value, reg2.value, cond=fcond) self.mc.CMP_rr(r.ip.value, res.value, shifttype=shift.ASR, imm=31, cond=fcond) - - if guard.getopnum() == rop.GUARD_OVERFLOW: - fcond = self._emit_guard(guard, failargs, c.NE, save_exc=False) - elif guard.getopnum() == rop.GUARD_NO_OVERFLOW: - fcond = self._emit_guard(guard, failargs, c.EQ, save_exc=False) - else: - assert 0 + self.guard_success_cc = c.EQ return fcond - def emit_guard_int_add_ovf(self, op, guard, arglocs, regalloc, fcond): - self.int_add_impl(op, arglocs[0:3], regalloc, fcond, flags=True) - self._emit_guard_overflow(guard, arglocs[3:], fcond) + def emit_op_int_add_ovf(self, op, arglocs, regalloc, fcond): + fcond = self.int_add_impl(op, arglocs, regalloc, fcond, flags=True) + self.guard_success_cc = c.VC return fcond - def emit_guard_int_sub_ovf(self, op, guard, arglocs, regalloc, fcond): - self.int_sub_impl(op, arglocs[0:3], regalloc, fcond, flags=True) - self._emit_guard_overflow(guard, arglocs[3:], fcond) + def emit_op_int_sub_ovf(self, op, arglocs, regalloc, fcond): + fcond = self.int_sub_impl(op, arglocs, regalloc, fcond, flags=True) + self.guard_success_cc = c.VC return fcond emit_op_int_floordiv = gen_emit_op_by_helper_call('int_floordiv', 'DIV') @@ -160,37 +150,17 @@ emit_op_int_gt = gen_emit_cmp_op('int_gt', c.GT) emit_op_int_ge = gen_emit_cmp_op('int_ge', c.GE) - emit_guard_int_lt = gen_emit_cmp_op_guard('int_lt', c.LT) - emit_guard_int_le = gen_emit_cmp_op_guard('int_le', c.LE) - emit_guard_int_eq = gen_emit_cmp_op_guard('int_eq', c.EQ) - emit_guard_int_ne = gen_emit_cmp_op_guard('int_ne', c.NE) - emit_guard_int_gt = gen_emit_cmp_op_guard('int_gt', c.GT) - emit_guard_int_ge = gen_emit_cmp_op_guard('int_ge', c.GE) - emit_op_uint_le = gen_emit_cmp_op('uint_le', c.LS) emit_op_uint_gt = gen_emit_cmp_op('uint_gt', c.HI) emit_op_uint_lt = gen_emit_cmp_op('uint_lt', c.LO) emit_op_uint_ge = gen_emit_cmp_op('uint_ge', c.HS) - emit_guard_uint_le = gen_emit_cmp_op_guard('uint_le', c.LS) - emit_guard_uint_gt = gen_emit_cmp_op_guard('uint_gt', c.HI) - emit_guard_uint_lt = gen_emit_cmp_op_guard('uint_lt', c.LO) - emit_guard_uint_ge = gen_emit_cmp_op_guard('uint_ge', c.HS) - emit_op_ptr_eq = emit_op_instance_ptr_eq = emit_op_int_eq emit_op_ptr_ne = emit_op_instance_ptr_ne = emit_op_int_ne - emit_guard_ptr_eq = emit_guard_instance_ptr_eq = emit_guard_int_eq - emit_guard_ptr_ne = emit_guard_instance_ptr_ne = emit_guard_int_ne - - emit_op_int_add_ovf = emit_op_int_add - emit_op_int_sub_ovf = emit_op_int_sub emit_op_int_is_true = gen_emit_op_unary_cmp('int_is_true', c.NE) emit_op_int_is_zero = gen_emit_op_unary_cmp('int_is_zero', c.EQ) - emit_guard_int_is_true = gen_emit_guard_unary_cmp('int_is_true', c.NE) - emit_guard_int_is_zero = gen_emit_guard_unary_cmp('int_is_zero', c.EQ) - def emit_op_int_invert(self, op, arglocs, regalloc, fcond): reg, res = arglocs @@ -223,9 +193,15 @@ fcond=fcond) return token - def _emit_guard(self, op, arglocs, fcond, save_exc, + def _emit_guard(self, op, arglocs, save_exc, is_guard_not_invalidated=False, is_guard_not_forced=False): + if is_guard_not_invalidated: + fcond = c.cond_none + else: + fcond = self.guard_success_cc + self.guard_success_cc = c.cond_none + assert fcond != c.cond_none pos = self.mc.currpos() token = self.build_guard_token(op, arglocs[0].value, arglocs[1:], pos, fcond, save_exc, is_guard_not_invalidated, @@ -241,27 +217,13 @@ self.mc.BKPT() return c.AL - def _emit_guard_overflow(self, guard, failargs, fcond): - if guard.getopnum() == rop.GUARD_OVERFLOW: - fcond = self._emit_guard(guard, failargs, c.VS, save_exc=False) - elif guard.getopnum() == rop.GUARD_NO_OVERFLOW: - fcond = self._emit_guard(guard, failargs, c.VC, save_exc=False) - else: - assert 0 - return fcond - def emit_op_guard_true(self, op, arglocs, regalloc, fcond): - l0 = arglocs[0] - failargs = arglocs[1:] - self.mc.CMP_ri(l0.value, 0) - fcond = self._emit_guard(op, failargs, c.NE, save_exc=False) + fcond = self._emit_guard(op, arglocs, save_exc=False) return fcond def emit_op_guard_false(self, op, arglocs, regalloc, fcond): - l0 = arglocs[0] - failargs = arglocs[1:] - self.mc.CMP_ri(l0.value, 0) - fcond = self._emit_guard(op, failargs, c.EQ, save_exc=False) + self.guard_success_cc = c.get_opposite_of(self.guard_success_cc) + fcond = self._emit_guard(op, arglocs, save_exc=False) return fcond def emit_op_guard_value(self, op, arglocs, regalloc, fcond): @@ -278,27 +240,27 @@ assert l1.is_vfp_reg() self.mc.VCMP(l0.value, l1.value) self.mc.VMRS(cond=fcond) - fcond = self._emit_guard(op, failargs, c.EQ, save_exc=False) + self.guard_success_cc = c.EQ + fcond = self._emit_guard(op, failargs, save_exc=False) return fcond emit_op_guard_nonnull = emit_op_guard_true emit_op_guard_isnull = emit_op_guard_false - def emit_op_guard_no_overflow(self, op, arglocs, regalloc, fcond): - return self._emit_guard(op, arglocs, c.VC, save_exc=False) - - def emit_op_guard_overflow(self, op, arglocs, regalloc, fcond): - return self._emit_guard(op, arglocs, c.VS, save_exc=False) + emit_op_guard_no_overflow = emit_op_guard_true + emit_op_guard_overflow = emit_op_guard_false def emit_op_guard_class(self, op, arglocs, regalloc, fcond): self._cmp_guard_class(op, arglocs, regalloc, fcond) - self._emit_guard(op, arglocs[3:], c.EQ, save_exc=False) + self.guard_success_cc = c.EQ + self._emit_guard(op, arglocs[3:], save_exc=False) return fcond def emit_op_guard_nonnull_class(self, op, arglocs, regalloc, fcond): self.mc.CMP_ri(arglocs[0].value, 1) self._cmp_guard_class(op, arglocs, regalloc, c.HS) - self._emit_guard(op, arglocs[3:], c.EQ, save_exc=False) + self.guard_success_cc = c.EQ + self._emit_guard(op, arglocs[3:], save_exc=False) return fcond def _cmp_guard_class(self, op, locs, regalloc, fcond): @@ -315,18 +277,20 @@ self.mc.CMP_rr(r.ip.value, typeid.value, cond=fcond) def emit_op_guard_not_invalidated(self, op, locs, regalloc, fcond): - return self._emit_guard(op, locs, fcond, save_exc=False, + return self._emit_guard(op, locs, save_exc=False, is_guard_not_invalidated=True) def emit_op_label(self, op, arglocs, regalloc, fcond): self._check_frame_depth_debug(self.mc) return fcond - def cond_call(self, op, gcmap, cond_loc, call_loc, fcond): + def emit_op_cond_call(self, op, arglocs, regalloc, fcond): + [call_loc] = arglocs + gcmap = regalloc.get_gcmap([call_loc]) + assert call_loc is r.r4 - self.mc.TST_rr(cond_loc.value, cond_loc.value) jmp_adr = self.mc.currpos() - self.mc.BKPT() # patched later + self.mc.BKPT() # patched later: the conditional jump # self.push_gcmap(self.mc, gcmap, store=True) # @@ -344,8 +308,13 @@ self.mc.BL(cond_call_adr) self.pop_gcmap(self.mc) # never any result value + cond = c.get_opposite_of(self.guard_success_cc) + self.guard_success_cc = c.cond_none pmc = OverwritingBuilder(self.mc, jmp_adr, WORD) - pmc.B_offs(self.mc.currpos(), c.EQ) # equivalent to 0 as result of TST above + pmc.B_offs(self.mc.currpos(), cond) + # might be overridden again to skip over the following + # guard_no_exception too + self.previous_cond_call_jcond = jmp_adr, cond return fcond def emit_op_jump(self, op, arglocs, regalloc, fcond): @@ -441,8 +410,15 @@ failargs = arglocs[1:] self.mc.LDR_ri(loc.value, loc.value) self.mc.CMP_ri(loc.value, 0) - cond = self._emit_guard(op, failargs, c.EQ, save_exc=True) - return cond + self.guard_success_cc = c.EQ + fcond = self._emit_guard(op, failargs, save_exc=True) + # If the previous operation was a COND_CALL, overwrite its conditional + # jump to jump over this GUARD_NO_EXCEPTION as well, if we can + if self._find_nearby_operation(-1).getopnum() == rop.COND_CALL: + jmp_adr, prev_cond = self.previous_cond_call_jcond + pmc = OverwritingBuilder(self.mc, jmp_adr, WORD) + pmc.B_offs(self.mc.currpos(), prev_cond) + return fcond def emit_op_guard_exception(self, op, arglocs, regalloc, fcond): loc, loc1, resloc, pos_exc_value, pos_exception = arglocs[:5] @@ -451,7 +427,8 @@ self.mc.LDR_ri(r.ip.value, loc1.value) self.mc.CMP_rr(r.ip.value, loc.value) - self._emit_guard(op, failargs, c.EQ, save_exc=True) + self.guard_success_cc = c.EQ + self._emit_guard(op, failargs, save_exc=True) self._store_and_reset_exception(self.mc, resloc) return fcond @@ -975,16 +952,14 @@ def imm(self, v): return imm(v) - def emit_guard_call_assembler(self, op, guard_op, arglocs, regalloc, - fcond): + def emit_op_call_assembler(self, op, arglocs, regalloc, fcond): if len(arglocs) == 4: [argloc, vloc, result_loc, tmploc] = arglocs else: [argloc, result_loc, tmploc] = arglocs vloc = imm(0) - self.call_assembler(op, guard_op, argloc, vloc, result_loc, tmploc) - self._emit_guard_may_force(guard_op, - regalloc._prepare_guard(guard_op)) + self._store_force_index(self._find_nearby_operation(+1)) + self.call_assembler(op, argloc, vloc, result_loc, tmploc) return fcond def _call_assembler_emit_call(self, addr, argloc, resloc): @@ -1058,41 +1033,37 @@ mc.B(target) mc.copy_to_raw_memory(oldadr) - def emit_guard_call_may_force(self, op, guard_op, arglocs, regalloc, - fcond): - self._store_force_index(guard_op) - numargs = op.numargs() - callargs = arglocs[:numargs + 3] # extract the arguments to the call - guardargs = arglocs[len(callargs):] - # - self._emit_call(op, callargs, fcond=fcond) - self._emit_guard_may_force(guard_op, guardargs) - return fcond - - def _emit_guard_may_force(self, guard_op, arglocs): + def emit_op_guard_not_forced(self, op, arglocs, regalloc, fcond): ofs = self.cpu.get_ofs_of_frame_field('jf_descr') self.mc.LDR_ri(r.ip.value, r.fp.value, imm=ofs) self.mc.CMP_ri(r.ip.value, 0) - self._emit_guard(guard_op, arglocs, c.EQ, - save_exc=True, is_guard_not_forced=True) + self.guard_success_cc = c.EQ + self._emit_guard(op, arglocs, save_exc=True, is_guard_not_forced=True) + return fcond - def emit_guard_call_release_gil(self, op, guard_op, arglocs, regalloc, - fcond): - numargs = op.numargs() - callargs = arglocs[:numargs + 3] # extract the arguments to the call - guardargs = arglocs[len(callargs):] # extrat the arguments for the guard - self._store_force_index(guard_op) - self._emit_call(op, callargs, is_call_release_gil=True) - self._emit_guard_may_force(guard_op, guardargs) + def emit_op_call_may_force(self, op, arglocs, regalloc, fcond): + self._store_force_index(self._find_nearby_operation(+1)) + self._emit_call(op, arglocs, fcond=fcond) + return fcond + + def emit_op_call_release_gil(self, op, arglocs, regalloc, fcond): + self._store_force_index(self._find_nearby_operation(+1)) + self._emit_call(op, arglocs, is_call_release_gil=True) return fcond def _store_force_index(self, guard_op): + assert (guard_op.getopnum() == rop.GUARD_NOT_FORCED or + guard_op.getopnum() == rop.GUARD_NOT_FORCED_2) faildescr = guard_op.getdescr() ofs = self.cpu.get_ofs_of_frame_field('jf_force_descr') value = rffi.cast(lltype.Signed, cast_instance_to_gcref(faildescr)) self.mc.gen_load_int(r.ip.value, value) self.store_reg(self.mc, r.ip, r.fp, ofs) + def _find_nearby_operation(self, delta): + regalloc = self._regalloc + return regalloc.operations[regalloc.rm.position + delta] + def emit_op_call_malloc_gc(self, op, arglocs, regalloc, fcond): self.emit_op_call(op, arglocs, regalloc, fcond) self.propagate_memoryerror_if_r0_is_null() @@ -1125,13 +1096,6 @@ emit_op_float_gt = gen_emit_float_cmp_op('float_gt', c.GT) emit_op_float_ge = gen_emit_float_cmp_op('float_ge', c.GE) - emit_guard_float_lt = gen_emit_float_cmp_op_guard('float_lt', c.VFP_LT) - emit_guard_float_le = gen_emit_float_cmp_op_guard('float_le', c.VFP_LE) - emit_guard_float_eq = gen_emit_float_cmp_op_guard('float_eq', c.EQ) - emit_guard_float_ne = gen_emit_float_cmp_op_guard('float_ne', c.NE) - emit_guard_float_gt = gen_emit_float_cmp_op_guard('float_gt', c.GT) - emit_guard_float_ge = gen_emit_float_cmp_op_guard('float_ge', c.GE) - def emit_op_cast_float_to_int(self, op, arglocs, regalloc, fcond): arg, res = arglocs assert arg.is_vfp_reg() diff --git a/rpython/jit/backend/arm/regalloc.py b/rpython/jit/backend/arm/regalloc.py --- a/rpython/jit/backend/arm/regalloc.py +++ b/rpython/jit/backend/arm/regalloc.py @@ -5,13 +5,16 @@ RegisterManager, TempBox, compute_vars_longevity, BaseRegalloc, \ get_scale from rpython.jit.backend.arm import registers as r +from rpython.jit.backend.arm import conditions as c from rpython.jit.backend.arm import locations from rpython.jit.backend.arm.locations import imm, get_fp_offset from rpython.jit.backend.arm.helper.regalloc import (prepare_op_by_helper_call, - prepare_op_unary_cmp, + prepare_unary_cmp, prepare_op_ri, - prepare_cmp_op, - prepare_float_op, + prepare_int_cmp, + prepare_unary_op, + prepare_two_regs_op, + prepare_float_cmp, check_imm_arg, check_imm_box, VMEM_imm_size, @@ -146,6 +149,7 @@ box_types = None # or a list of acceptable types no_lower_byte_regs = all_regs save_around_call_regs = r.caller_resp + frame_reg = r.fp def __init__(self, longevity, frame_manager=None, assembler=None): RegisterManager.__init__(self, longevity, frame_manager, assembler) @@ -235,6 +239,18 @@ return self.rm.force_allocate_reg(var, forbidden_vars, selected_reg, need_lower_byte) + def force_allocate_reg_or_cc(self, var, forbidden_vars=[]): + assert var.type == INT + if self.next_op_can_accept_cc(self.operations, self.rm.position): + # hack: return the 'fp' location to mean "lives in CC". This + # fp will not actually be used, and the location will be freed + # after the next op as usual. + self.rm.force_allocate_frame_reg(var) + return r.fp + else: + # else, return a regular register (not fp). + return self.rm.force_allocate_reg(var) + def try_allocate_reg(self, v, selected_reg=None, need_lower_byte=False): if v.type == FLOAT: return self.vfprm.try_allocate_reg(v, selected_reg, @@ -467,25 +483,6 @@ resloc = self.force_allocate_reg(op.result) return [argloc, imm(numbytes), resloc] - def prepare_guard_int_mul_ovf(self, op, guard, fcond): - boxes = op.getarglist() - reg1 = self.make_sure_var_in_reg(boxes[0], forbidden_vars=boxes) - reg2 = self.make_sure_var_in_reg(boxes[1], forbidden_vars=boxes) - res = self.force_allocate_reg(op.result) - return self._prepare_guard(guard, [reg1, reg2, res]) - - def prepare_guard_int_add_ovf(self, op, guard, fcond): - locs = self._prepare_op_int_add(op, fcond) - res = self.force_allocate_reg(op.result) - locs.append(res) - return self._prepare_guard(guard, locs) - - def prepare_guard_int_sub_ovf(self, op, guard, fcond): - locs = self._prepare_op_int_sub(op, fcond) - res = self.force_allocate_reg(op.result) - locs.append(res) - return self._prepare_guard(guard, locs) - prepare_op_int_floordiv = prepare_op_by_helper_call('int_floordiv') prepare_op_int_mod = prepare_op_by_helper_call('int_mod') prepare_op_uint_floordiv = prepare_op_by_helper_call('unit_floordiv') @@ -500,58 +497,36 @@ prepare_op_uint_rshift = prepare_op_ri('uint_rshift', imm_size=0x1F, allow_zero=False, commutative=False) - prepare_op_int_lt = prepare_cmp_op('int_lt') - prepare_op_int_le = prepare_cmp_op('int_le') - prepare_op_int_eq = prepare_cmp_op('int_eq') - prepare_op_int_ne = prepare_cmp_op('int_ne') - prepare_op_int_gt = prepare_cmp_op('int_gt') - prepare_op_int_ge = prepare_cmp_op('int_ge') + prepare_op_int_lt = prepare_int_cmp + prepare_op_int_le = prepare_int_cmp + prepare_op_int_eq = prepare_int_cmp + prepare_op_int_ne = prepare_int_cmp + prepare_op_int_gt = prepare_int_cmp + prepare_op_int_ge = prepare_int_cmp - prepare_op_uint_le = prepare_cmp_op('uint_le') - prepare_op_uint_gt = prepare_cmp_op('uint_gt') + prepare_op_uint_le = prepare_int_cmp + prepare_op_uint_gt = prepare_int_cmp - prepare_op_uint_lt = prepare_cmp_op('uint_lt') - prepare_op_uint_ge = prepare_cmp_op('uint_ge') + prepare_op_uint_lt = prepare_int_cmp + prepare_op_uint_ge = prepare_int_cmp prepare_op_ptr_eq = prepare_op_instance_ptr_eq = prepare_op_int_eq prepare_op_ptr_ne = prepare_op_instance_ptr_ne = prepare_op_int_ne - prepare_guard_int_lt = prepare_cmp_op('guard_int_lt') - prepare_guard_int_le = prepare_cmp_op('guard_int_le') - prepare_guard_int_eq = prepare_cmp_op('guard_int_eq') - prepare_guard_int_ne = prepare_cmp_op('guard_int_ne') - prepare_guard_int_gt = prepare_cmp_op('guard_int_gt') - prepare_guard_int_ge = prepare_cmp_op('guard_int_ge') - - prepare_guard_uint_le = prepare_cmp_op('guard_uint_le') - prepare_guard_uint_gt = prepare_cmp_op('guard_uint_gt') - - prepare_guard_uint_lt = prepare_cmp_op('guard_uint_lt') - prepare_guard_uint_ge = prepare_cmp_op('guard_uint_ge') - - prepare_guard_ptr_eq = prepare_guard_instance_ptr_eq = prepare_guard_int_eq - prepare_guard_ptr_ne = prepare_guard_instance_ptr_ne = prepare_guard_int_ne - prepare_op_int_add_ovf = prepare_op_int_add prepare_op_int_sub_ovf = prepare_op_int_sub + prepare_op_int_mul_ovf = prepare_op_int_mul - prepare_op_int_is_true = prepare_op_unary_cmp('int_is_true') - prepare_op_int_is_zero = prepare_op_unary_cmp('int_is_zero') + prepare_op_int_is_true = prepare_unary_cmp + prepare_op_int_is_zero = prepare_unary_cmp - prepare_guard_int_is_true = prepare_op_unary_cmp('int_is_true') - prepare_guard_int_is_zero = prepare_op_unary_cmp('int_is_zero') - - def prepare_op_int_neg(self, op, fcond): - l0 = self.make_sure_var_in_reg(op.getarg(0)) - self.possibly_free_vars_for_op(op) - self.free_temp_vars() - resloc = self.force_allocate_reg(op.result) - return [l0, resloc] - - prepare_op_int_invert = prepare_op_int_neg + prepare_op_int_neg = prepare_unary_op + prepare_op_int_invert = prepare_unary_op def prepare_op_call(self, op, fcond): - effectinfo = op.getdescr().get_extra_info() + calldescr = op.getdescr() + assert calldescr is not None + effectinfo = calldescr.get_extra_info() if effectinfo is not None: oopspecindex = effectinfo.oopspecindex if oopspecindex in (EffectInfo.OS_LLONG_ADD, @@ -603,13 +578,12 @@ def _call(self, op, arglocs, force_store=[], save_all_regs=False): # spill variables that need to be saved around calls - self.vfprm.before_call(save_all_regs=save_all_regs) + self.vfprm.before_call(force_store, save_all_regs=save_all_regs) if not save_all_regs: gcrootmap = self.cpu.gc_ll_descr.gcrootmap if gcrootmap and gcrootmap.is_shadow_stack: save_all_regs = 2 - self.rm.before_call(save_all_regs=save_all_regs) - self.before_call_called = True + self.rm.before_call(force_store, save_all_regs=save_all_regs) resloc = None if op.result: resloc = self.after_call(op.result) @@ -666,14 +640,25 @@ locs = [imm(fail_descr)] return locs - def prepare_op_guard_true(self, op, fcond): - l0 = self.make_sure_var_in_reg(op.getarg(0)) - args = self._prepare_guard(op, [l0]) + def load_condition_into_cc(self, box): + if self.assembler.guard_success_cc == c.cond_none: + loc = self.loc(box) + if not loc.is_core_reg(): + assert loc.is_stack() + self.assembler.regalloc_mov(loc, r.lr) + loc = r.lr + self.assembler.mc.CMP_ri(loc.value, 0) + self.assembler.guard_success_cc = c.NE + + def _prepare_guard_cc(self, op, fcond): + self.load_condition_into_cc(op.getarg(0)) + args = self._prepare_guard(op, []) return args - prepare_op_guard_false = prepare_op_guard_true - prepare_op_guard_nonnull = prepare_op_guard_true - prepare_op_guard_isnull = prepare_op_guard_true + prepare_op_guard_true = _prepare_guard_cc + prepare_op_guard_false = _prepare_guard_cc + prepare_op_guard_nonnull = _prepare_guard_cc + prepare_op_guard_isnull = _prepare_guard_cc def prepare_op_guard_value(self, op, fcond): boxes = op.getarglist() @@ -697,6 +682,7 @@ prepare_op_guard_overflow = prepare_op_guard_no_overflow prepare_op_guard_not_invalidated = prepare_op_guard_no_overflow + prepare_op_guard_not_forced = prepare_op_guard_no_overflow def prepare_op_guard_exception(self, op, fcond): boxes = op.getarglist() @@ -1188,9 +1174,8 @@ arg = op.getarg(i) self.make_sure_var_in_reg(arg, args_so_far, selected_reg=reg) args_so_far.append(arg) - loc_cond = self.make_sure_var_in_reg(op.getarg(0), args_so_far) - gcmap = self.get_gcmap([tmpreg]) - self.assembler.cond_call(op, gcmap, loc_cond, tmpreg, fcond) + self.load_condition_into_cc(op.getarg(0)) + return [tmpreg] def prepare_op_force_token(self, op, fcond): # XXX for now we return a regular reg @@ -1244,19 +1229,16 @@ self.assembler.store_force_descr(op, fail_locs[1:], fail_locs[0].value) self.possibly_free_vars(op.getfailargs()) - def prepare_guard_call_may_force(self, op, guard_op, fcond): - args = self._prepare_call(op, save_all_regs=True) - return self._prepare_guard(guard_op, args) + def prepare_op_call_may_force(self, op, fcond): + return self._prepare_call(op, save_all_regs=True) - def prepare_guard_call_release_gil(self, op, guard_op, fcond): - args = self._prepare_call(op, save_all_regs=True, first_arg_index=2) - return self._prepare_guard(guard_op, args) + def prepare_op_call_release_gil(self, op, fcond): + return self._prepare_call(op, save_all_regs=True, first_arg_index=2) - def prepare_guard_call_assembler(self, op, guard_op, fcond): - locs = self.locs_for_call_assembler(op, guard_op) + def prepare_op_call_assembler(self, op, fcond): + locs = self.locs_for_call_assembler(op) tmploc = self.get_scratch_reg(INT, selected_reg=r.r0) resloc = self._call(op, locs + [tmploc], save_all_regs=True) - self.possibly_free_vars(guard_op.getfailargs()) return locs + [resloc, tmploc] def _prepare_args_for_new_op(self, new_args): @@ -1271,39 +1253,18 @@ arglocs.append(t) return arglocs - prepare_op_float_add = prepare_float_op(name='prepare_op_float_add') - prepare_op_float_sub = prepare_float_op(name='prepare_op_float_sub') - prepare_op_float_mul = prepare_float_op(name='prepare_op_float_mul') - prepare_op_float_truediv = prepare_float_op(name='prepare_op_float_truediv') - prepare_op_float_lt = prepare_float_op(float_result=False, - name='prepare_op_float_lt') - prepare_op_float_le = prepare_float_op(float_result=False, - name='prepare_op_float_le') - prepare_op_float_eq = prepare_float_op(float_result=False, - name='prepare_op_float_eq') - prepare_op_float_ne = prepare_float_op(float_result=False, - name='prepare_op_float_ne') - prepare_op_float_gt = prepare_float_op(float_result=False, - name='prepare_op_float_gt') - prepare_op_float_ge = prepare_float_op(float_result=False, - name='prepare_op_float_ge') - prepare_op_float_neg = prepare_float_op(base=False, - name='prepare_op_float_neg') - prepare_op_float_abs = prepare_float_op(base=False, - name='prepare_op_float_abs') - - prepare_guard_float_lt = prepare_float_op(guard=True, - float_result=False, name='prepare_guard_float_lt') - prepare_guard_float_le = prepare_float_op(guard=True, - float_result=False, name='prepare_guard_float_le') - prepare_guard_float_eq = prepare_float_op(guard=True, - float_result=False, name='prepare_guard_float_eq') - prepare_guard_float_ne = prepare_float_op(guard=True, - float_result=False, name='prepare_guard_float_ne') - prepare_guard_float_gt = prepare_float_op(guard=True, - float_result=False, name='prepare_guard_float_gt') - prepare_guard_float_ge = prepare_float_op(guard=True, - float_result=False, name='prepare_guard_float_ge') + prepare_op_float_add = prepare_two_regs_op + prepare_op_float_sub = prepare_two_regs_op + prepare_op_float_mul = prepare_two_regs_op + prepare_op_float_truediv = prepare_two_regs_op + prepare_op_float_lt = prepare_float_cmp + prepare_op_float_le = prepare_float_cmp + prepare_op_float_eq = prepare_float_cmp + prepare_op_float_ne = prepare_float_cmp + prepare_op_float_gt = prepare_float_cmp + prepare_op_float_ge = prepare_float_cmp + prepare_op_float_neg = prepare_unary_op + prepare_op_float_abs = prepare_unary_op def _prepare_op_math_sqrt(self, op, fcond): loc = self.make_sure_var_in_reg(op.getarg(1)) @@ -1327,10 +1288,8 @@ self.force_spill_var(op.getarg(0)) return [] - prepare_op_convert_float_bytes_to_longlong = prepare_float_op(base=False, - name='prepare_op_convert_float_bytes_to_longlong') - prepare_op_convert_longlong_bytes_to_float = prepare_float_op(base=False, - name='prepare_op_convert_longlong_bytes_to_float') + prepare_op_convert_float_bytes_to_longlong = prepare_unary_op + prepare_op_convert_longlong_bytes_to_float = prepare_unary_op #def prepare_op_read_timestamp(self, op, fcond): # loc = self.get_scratch_reg(INT) @@ -1348,22 +1307,12 @@ return [loc1, res] -def add_none_argument(fn): - return lambda self, op, fcond: fn(self, op, None, fcond) - - def notimplemented(self, op, fcond): print "[ARM/regalloc] %s not implemented" % op.getopname() raise NotImplementedError(op) -def notimplemented_with_guard(self, op, guard_op, fcond): - print "[ARM/regalloc] %s with guard %s not implemented" % \ - (op.getopname(), guard_op.getopname()) - raise NotImplementedError(op) - operations = [notimplemented] * (rop._LAST + 1) -operations_with_guard = [notimplemented_with_guard] * (rop._LAST + 1) for key, value in rop.__dict__.items(): @@ -1374,13 +1323,3 @@ if hasattr(Regalloc, methname): func = getattr(Regalloc, methname).im_func operations[value] = func - -for key, value in rop.__dict__.items(): - key = key.lower() - if key.startswith('_'): - continue - methname = 'prepare_guard_%s' % key - if hasattr(Regalloc, methname): - func = getattr(Regalloc, methname).im_func - operations_with_guard[value] = func - operations[value] = add_none_argument(func) diff --git a/rpython/jit/backend/arm/test/conftest.py b/rpython/jit/backend/arm/test/conftest.py --- a/rpython/jit/backend/arm/test/conftest.py +++ b/rpython/jit/backend/arm/test/conftest.py @@ -1,21 +1,12 @@ """ -This conftest adds an option to run the translation tests which by default will -be disabled. -Also it disables the backend tests on non ARMv7 platforms +This disables the backend tests on non ARMv7 platforms. +Note that you need "--slow" to run translation tests. """ import py, os from rpython.jit.backend import detect_cpu cpu = detect_cpu.autodetect() -def pytest_addoption(parser): - group = parser.getgroup('translation test options') - group.addoption('--run-translation-tests', - action="store_true", - default=False, - dest="run_translation_tests", - help="run tests that translate code") - def pytest_collect_directory(path, parent): if not cpu.startswith('arm'): py.test.skip("ARM(v7) tests skipped: cpu is %r" % (cpu,)) diff --git a/rpython/jit/backend/llsupport/assembler.py b/rpython/jit/backend/llsupport/assembler.py --- a/rpython/jit/backend/llsupport/assembler.py +++ b/rpython/jit/backend/llsupport/assembler.py @@ -212,8 +212,7 @@ self.codemap_builder.leave_portal_frame(op.getarg(0).getint(), self.mc.get_relative_pos()) - def call_assembler(self, op, guard_op, argloc, vloc, result_loc, tmploc): - self._store_force_index(guard_op) + def call_assembler(self, op, argloc, vloc, result_loc, tmploc): descr = op.getdescr() assert isinstance(descr, JitCellToken) # @@ -262,9 +261,6 @@ # # Here we join Path A and Path B again self._call_assembler_patch_jmp(jmp_location) - # XXX here should be emitted guard_not_forced, but due - # to incompatibilities in how it's done, we leave it for the - # caller to deal with @specialize.argtype(1) def _inject_debugging_code(self, looptoken, operations, tp, number): diff --git a/rpython/jit/backend/llsupport/regalloc.py b/rpython/jit/backend/llsupport/regalloc.py --- a/rpython/jit/backend/llsupport/regalloc.py +++ b/rpython/jit/backend/llsupport/regalloc.py @@ -639,31 +639,26 @@ if looptoken.compiled_loop_token is not None: # <- for tests looptoken.compiled_loop_token._ll_initial_locs = locs - def can_merge_with_next_guard(self, op, i, operations): - if (op.getopnum() == rop.CALL_MAY_FORCE or - op.getopnum() == rop.CALL_ASSEMBLER or - op.getopnum() == rop.CALL_RELEASE_GIL): - assert operations[i + 1].getopnum() == rop.GUARD_NOT_FORCED - return True - if not op.is_comparison(): - if op.is_ovf(): - if (operations[i + 1].getopnum() != rop.GUARD_NO_OVERFLOW and - operations[i + 1].getopnum() != rop.GUARD_OVERFLOW): - not_implemented("int_xxx_ovf not followed by " - "guard_(no)_overflow") - return True + def next_op_can_accept_cc(self, operations, i): + op = operations[i] + next_op = operations[i + 1] + opnum = next_op.getopnum() + if (opnum != rop.GUARD_TRUE and opnum != rop.GUARD_FALSE + and opnum != rop.COND_CALL): return False - if (operations[i + 1].getopnum() != rop.GUARD_TRUE and - operations[i + 1].getopnum() != rop.GUARD_FALSE): + if next_op.getarg(0) is not op.result: return False - if operations[i + 1].getarg(0) is not op.result: + if self.longevity[op.result][1] > i + 1: return False - if (self.longevity[op.result][1] > i + 1 or - op.result in operations[i + 1].getfailargs()): - return False + if opnum != rop.COND_CALL: + if op.result in operations[i + 1].getfailargs(): + return False + else: + if op.result in operations[i + 1].getarglist()[1:]: + return False return True - def locs_for_call_assembler(self, op, guard_op): + def locs_for_call_assembler(self, op): descr = op.getdescr() assert isinstance(descr, JitCellToken) if op.numargs() == 2: diff --git a/rpython/jit/backend/test/runner_test.py b/rpython/jit/backend/test/runner_test.py --- a/rpython/jit/backend/test/runner_test.py +++ b/rpython/jit/backend/test/runner_test.py @@ -2293,7 +2293,7 @@ value |= 32768 assert s.data.tid == value - def test_cond_call(self): + def test_cond_call_1(self): def func_void(*args): called.append(args) @@ -2330,6 +2330,52 @@ assert longlong.getrealfloat(self.cpu.get_float_value(frame, 6)) == 1.2 assert longlong.getrealfloat(self.cpu.get_float_value(frame, 7)) == 3.4 + def test_cond_call_2(self): + def func_void(*args): + called.append(args) + + FUNC = self.FuncType([lltype.Signed, lltype.Signed], lltype.Void) + func_ptr = llhelper(lltype.Ptr(FUNC), func_void) + calldescr = self.cpu.calldescrof(FUNC, FUNC.ARGS, FUNC.RESULT, + EffectInfo.MOST_GENERAL) + + for (operation, arg1, arg2_if_true, arg2_if_false) in [ + ('int_lt', -5, 2, -5), + ('int_le', 5, 5, -6), + ('int_eq', 11, 11, 12), + ('int_ne', 11, 12, 11), + ('int_gt', 8, -1, 8), + ('int_xor', 7, 3, 7), # test without a comparison at all + ('int_is_true', 4242, 1, 0), + ('int_is_zero', 4242, 0, 1), + ('float_lt', -0.5, 0.2, -0.5), + ('float_eq', 1.1, 1.1, 1.2), + ]: + called = [] + + ops = ''' + [%s, %s, i3, i4] + i2 = %s(%s) + cond_call(i2, ConstClass(func_ptr), i3, i4, descr=calldescr) + guard_no_exception(descr=faildescr) [] + finish() + ''' % ("i0" if operation.startswith('int') else "f0", + "i1" if operation.startswith('int') else "f1", + operation, + ("i1" if operation.startswith('int_is_') else + "i0, i1" if operation.startswith('int') else + "f0, f1")) + loop = parse(ops, namespace={'func_ptr': func_ptr, + 'calldescr': calldescr, + 'faildescr': BasicFailDescr()}) + looptoken = JitCellToken() + self.cpu.compile_loop(loop.inputargs, loop.operations, looptoken) + frame = self.cpu.execute_token(looptoken, arg1, arg2_if_false, 0, 0) + assert called == [] + frame = self.cpu.execute_token(looptoken, arg1, arg2_if_true, + 67, 89) + assert called == [(67, 89)] + def test_force_operations_returning_void(self): values = [] def maybe_force(token, flag): diff --git a/rpython/jit/backend/x86/assembler.py b/rpython/jit/backend/x86/assembler.py --- a/rpython/jit/backend/x86/assembler.py +++ b/rpython/jit/backend/x86/assembler.py @@ -726,8 +726,10 @@ def _assemble(self, regalloc, inputargs, operations): self._regalloc = regalloc + self.guard_success_cc = rx86.cond_none regalloc.compute_hint_frame_locations(operations) regalloc.walk_operations(inputargs, operations) + assert self.guard_success_cc == rx86.cond_none if we_are_translated() or self.cpu.dont_keepalive_stuff: self._regalloc = None # else keep it around for debugging frame_depth = regalloc.get_final_frame_depth() @@ -922,8 +924,8 @@ oopspecindex = effectinfo.oopspecindex genop_math_list[oopspecindex](self, op, arglocs, resloc) - def regalloc_perform_with_guard(self, op, guard_op, faillocs, - arglocs, resloc, frame_depth): + def regalloc_perform_guard(self, guard_op, faillocs, arglocs, resloc, + frame_depth): faildescr = guard_op.getdescr() assert isinstance(faildescr, AbstractFailDescr) failargs = guard_op.getfailargs() @@ -931,21 +933,12 @@ guard_token = self.implement_guard_recovery(guard_opnum, faildescr, failargs, faillocs, frame_depth) - if op is None: - dispatch_opnum = guard_opnum - else: - dispatch_opnum = op.getopnum() - genop_guard_list[dispatch_opnum](self, op, guard_op, guard_token, - arglocs, resloc) + genop_guard_list[guard_opnum](self, guard_op, guard_token, + arglocs, resloc) if not we_are_translated(): # must be added by the genop_guard_list[]() assert guard_token is self.pending_guard_tokens[-1] - def regalloc_perform_guard(self, guard_op, faillocs, arglocs, resloc, - frame_depth): - self.regalloc_perform_with_guard(None, guard_op, faillocs, arglocs, - resloc, frame_depth) - def load_effective_addr(self, sizereg, baseofs, scale, result, frm=imm0): self.mc.LEA(result, addr_add(frm, sizereg, baseofs, scale)) @@ -977,88 +970,70 @@ self.mc.LEA_rm(result_loc.value, (loc.value, delta)) return genop_binary_or_lea + def flush_cc(self, cond, result_loc): + # After emitting a instruction that leaves a boolean result in + # a condition code (cc), call this. In the common case, result_loc + # will be set to ebp by the regalloc, which in this case means + # "propagate it between this operation and the next guard by keeping + # it in the cc". In the uncommon case, result_loc is another + # register, and we emit a load from the cc into this register. + assert self.guard_success_cc == rx86.cond_none + if result_loc is ebp: + self.guard_success_cc = cond + else: + rl = result_loc.lowest8bits() + self.mc.SET_ir(cond, rl.value) + self.mc.MOVZX8_rr(result_loc.value, rl.value) + def _cmpop(cond, rev_cond): + cond = rx86.Conditions[cond] + rev_cond = rx86.Conditions[rev_cond] + # def genop_cmp(self, op, arglocs, result_loc): - rl = result_loc.lowest8bits() if isinstance(op.getarg(0), Const): self.mc.CMP(arglocs[1], arglocs[0]) - self.mc.SET_ir(rx86.Conditions[rev_cond], rl.value) + self.flush_cc(rev_cond, result_loc) else: self.mc.CMP(arglocs[0], arglocs[1]) - self.mc.SET_ir(rx86.Conditions[cond], rl.value) - self.mc.MOVZX8_rr(result_loc.value, rl.value) + self.flush_cc(cond, result_loc) return genop_cmp - def _cmpop_float(cond, rev_cond, is_ne=False): - def genop_cmp(self, op, arglocs, result_loc): - if isinstance(arglocs[0], RegLoc): + def _if_parity_clear_zero_and_carry(self): + self.mc.J_il8(rx86.Conditions['NP'], 0) + jnp_location = self.mc.get_relative_pos() + # CMP EBP, 0: as EBP cannot be null here, that operation should + # always clear zero and carry + self.mc.CMP_ri(ebp.value, 0) + # patch the JNP above + offset = self.mc.get_relative_pos() - jnp_location + assert 0 < offset <= 127 + self.mc.overwrite(jnp_location-1, chr(offset)) + + def _cmpop_float(cond, rev_cond): + is_ne = cond == 'NE' + need_direct_p = 'A' not in cond + need_rev_p = 'A' not in rev_cond + cond_contains_e = ('E' in cond) ^ ('N' in cond) + cond = rx86.Conditions[cond] + rev_cond = rx86.Conditions[rev_cond] + # + def genop_cmp_float(self, op, arglocs, result_loc): + if need_direct_p: + direct_case = not isinstance(arglocs[1], RegLoc) + else: + direct_case = isinstance(arglocs[0], RegLoc) + if direct_case: self.mc.UCOMISD(arglocs[0], arglocs[1]) checkcond = cond + need_p = need_direct_p else: self.mc.UCOMISD(arglocs[1], arglocs[0]) checkcond = rev_cond - - tmp1 = result_loc.lowest8bits() - if IS_X86_32: - tmp2 = result_loc.higher8bits() - elif IS_X86_64: - tmp2 = X86_64_SCRATCH_REG.lowest8bits() - - self.mc.SET_ir(rx86.Conditions[checkcond], tmp1.value) - if is_ne: - self.mc.SET_ir(rx86.Conditions['P'], tmp2.value) - self.mc.OR8_rr(tmp1.value, tmp2.value) - else: - self.mc.SET_ir(rx86.Conditions['NP'], tmp2.value) - self.mc.AND8_rr(tmp1.value, tmp2.value) - self.mc.MOVZX8_rr(result_loc.value, tmp1.value) - return genop_cmp - - def _cmpop_guard(cond, rev_cond, false_cond, false_rev_cond): - def genop_cmp_guard(self, op, guard_op, guard_token, arglocs, result_loc): - guard_opnum = guard_op.getopnum() - if isinstance(op.getarg(0), Const): - self.mc.CMP(arglocs[1], arglocs[0]) - if guard_opnum == rop.GUARD_FALSE: - self.implement_guard(guard_token, rev_cond) - else: - self.implement_guard(guard_token, false_rev_cond) - else: - self.mc.CMP(arglocs[0], arglocs[1]) - if guard_opnum == rop.GUARD_FALSE: - self.implement_guard(guard_token, cond) - else: - self.implement_guard(guard_token, false_cond) - return genop_cmp_guard - - def _cmpop_guard_float(cond, rev_cond, false_cond, false_rev_cond): - need_direct_jp = 'A' not in cond - need_rev_jp = 'A' not in rev_cond - def genop_cmp_guard_float(self, op, guard_op, guard_token, arglocs, - result_loc): - guard_opnum = guard_op.getopnum() - if isinstance(arglocs[0], RegLoc): - self.mc.UCOMISD(arglocs[0], arglocs[1]) - checkcond = cond - checkfalsecond = false_cond - need_jp = need_direct_jp - else: - self.mc.UCOMISD(arglocs[1], arglocs[0]) - checkcond = rev_cond - checkfalsecond = false_rev_cond - need_jp = need_rev_jp - if guard_opnum == rop.GUARD_FALSE: - if need_jp: - self.mc.J_il8(rx86.Conditions['P'], 6) - self.implement_guard(guard_token, checkcond) - else: - if need_jp: - self.mc.J_il8(rx86.Conditions['P'], 2) - self.mc.J_il8(rx86.Conditions[checkcond], 5) - self.implement_guard(guard_token) - else: - self.implement_guard(guard_token, checkfalsecond) - return genop_cmp_guard_float + need_p = need_rev_p + if need_p: + self._if_parity_clear_zero_and_carry() + self.flush_cc(checkcond, result_loc) + return genop_cmp_float def simple_call(self, fnloc, arglocs, result_loc=eax): if result_loc is xmm0: @@ -1121,37 +1096,17 @@ genop_ptr_eq = genop_instance_ptr_eq = genop_int_eq genop_ptr_ne = genop_instance_ptr_ne = genop_int_ne - genop_float_lt = _cmpop_float('B', 'A') - genop_float_le = _cmpop_float('BE', 'AE') - genop_float_ne = _cmpop_float('NE', 'NE', is_ne=True) - genop_float_eq = _cmpop_float('E', 'E') - genop_float_gt = _cmpop_float('A', 'B') - genop_float_ge = _cmpop_float('AE', 'BE') - genop_uint_gt = _cmpop("A", "B") genop_uint_lt = _cmpop("B", "A") genop_uint_le = _cmpop("BE", "AE") genop_uint_ge = _cmpop("AE", "BE") - genop_guard_int_lt = _cmpop_guard("L", "G", "GE", "LE") - genop_guard_int_le = _cmpop_guard("LE", "GE", "G", "L") - genop_guard_int_eq = _cmpop_guard("E", "E", "NE", "NE") - genop_guard_int_ne = _cmpop_guard("NE", "NE", "E", "E") - genop_guard_int_gt = _cmpop_guard("G", "L", "LE", "GE") - genop_guard_int_ge = _cmpop_guard("GE", "LE", "L", "G") - genop_guard_ptr_eq = genop_guard_instance_ptr_eq = genop_guard_int_eq - genop_guard_ptr_ne = genop_guard_instance_ptr_ne = genop_guard_int_ne - - genop_guard_uint_gt = _cmpop_guard("A", "B", "BE", "AE") - genop_guard_uint_lt = _cmpop_guard("B", "A", "AE", "BE") - genop_guard_uint_le = _cmpop_guard("BE", "AE", "A", "B") - genop_guard_uint_ge = _cmpop_guard("AE", "BE", "B", "A") - - genop_guard_float_lt = _cmpop_guard_float("B", "A", "AE","BE") - genop_guard_float_le = _cmpop_guard_float("BE","AE", "A", "B") - genop_guard_float_eq = _cmpop_guard_float("E", "E", "NE","NE") - genop_guard_float_gt = _cmpop_guard_float("A", "B", "BE","AE") - genop_guard_float_ge = _cmpop_guard_float("AE","BE", "B", "A") + genop_float_lt = _cmpop_float("B", "A") + genop_float_le = _cmpop_float("BE","AE") + genop_float_eq = _cmpop_float("E", "E") + genop_float_ne = _cmpop_float("NE", "NE") + genop_float_gt = _cmpop_float("A", "B") + genop_float_ge = _cmpop_float("AE","BE") def genop_math_sqrt(self, op, arglocs, resloc): self.mc.SQRTSD(arglocs[0], resloc) @@ -1181,20 +1136,6 @@ else: raise AssertionError("bad number of bytes") - def genop_guard_float_ne(self, op, guard_op, guard_token, arglocs, result_loc): - guard_opnum = guard_op.getopnum() - if isinstance(arglocs[0], RegLoc): - self.mc.UCOMISD(arglocs[0], arglocs[1]) - else: - self.mc.UCOMISD(arglocs[1], arglocs[0]) - if guard_opnum == rop.GUARD_TRUE: - self.mc.J_il8(rx86.Conditions['P'], 6) - self.implement_guard(guard_token, 'E') - else: - self.mc.J_il8(rx86.Conditions['P'], 2) - self.mc.J_il8(rx86.Conditions['E'], 5) - self.implement_guard(guard_token) - def genop_float_neg(self, op, arglocs, resloc): # Following what gcc does: res = x ^ 0x8000000000000000 self.mc.XORPD(arglocs[0], heap(self.float_const_neg_addr)) @@ -1241,33 +1182,20 @@ else: self.mov(loc0, resloc) - def genop_guard_int_is_true(self, op, guard_op, guard_token, arglocs, resloc): - guard_opnum = guard_op.getopnum() - self.mc.CMP(arglocs[0], imm0) - if guard_opnum == rop.GUARD_TRUE: - self.implement_guard(guard_token, 'Z') + def test_location(self, loc): + assert not isinstance(loc, ImmedLoc) + if isinstance(loc, RegLoc): + self.mc.TEST_rr(loc.value, loc.value) # more compact else: - self.implement_guard(guard_token, 'NZ') + self.mc.CMP(loc, imm0) # works from memory too def genop_int_is_true(self, op, arglocs, resloc): - self.mc.CMP(arglocs[0], imm0) - rl = resloc.lowest8bits() - self.mc.SET_ir(rx86.Conditions['NE'], rl.value) - self.mc.MOVZX8(resloc, rl) - - def genop_guard_int_is_zero(self, op, guard_op, guard_token, arglocs, resloc): - guard_opnum = guard_op.getopnum() - self.mc.CMP(arglocs[0], imm0) - if guard_opnum == rop.GUARD_TRUE: - self.implement_guard(guard_token, 'NZ') - else: - self.implement_guard(guard_token, 'Z') + self.test_location(arglocs[0]) + self.flush_cc(rx86.Conditions['NZ'], resloc) def genop_int_is_zero(self, op, arglocs, resloc): - self.mc.CMP(arglocs[0], imm0) - rl = resloc.lowest8bits() - self.mc.SET_ir(rx86.Conditions['E'], rl.value) - self.mc.MOVZX8(resloc, rl) + self.test_location(arglocs[0]) + self.flush_cc(rx86.Conditions['Z'], resloc) def genop_same_as(self, op, arglocs, resloc): self.mov(arglocs[0], resloc) @@ -1618,30 +1546,40 @@ self.mc.MOVD32_xr(resloc.value, eax.value) self.mc.PUNPCKLDQ_xx(resloc.value, loc1.value) - def genop_guard_guard_true(self, ign_1, guard_op, guard_token, locs, ign_2): - loc = locs[0] - self.mc.TEST(loc, loc) - self.implement_guard(guard_token, 'Z') + def genop_guard_guard_true(self, guard_op, guard_token, locs, resloc): + self.implement_guard(guard_token) genop_guard_guard_nonnull = genop_guard_guard_true - def genop_guard_guard_no_exception(self, ign_1, guard_op, guard_token, - locs, ign_2): + def genop_guard_guard_false(self, guard_op, guard_token, locs, resloc): + self.guard_success_cc = rx86.invert_condition(self.guard_success_cc) + self.implement_guard(guard_token) + genop_guard_guard_isnull = genop_guard_guard_false + + def genop_guard_guard_no_exception(self, guard_op, guard_token, locs, ign): self.mc.CMP(heap(self.cpu.pos_exception()), imm0) - self.implement_guard(guard_token, 'NZ') + self.guard_success_cc = rx86.Conditions['Z'] + self.implement_guard(guard_token) + # If the previous operation was a COND_CALL, overwrite its conditional + # jump to jump over this GUARD_NO_EXCEPTION as well, if we can + if self._find_nearby_operation(-1).getopnum() == rop.COND_CALL: + jmp_adr = self.previous_cond_call_jcond + offset = self.mc.get_relative_pos() - jmp_adr + if offset <= 127: + self.mc.overwrite(jmp_adr-1, chr(offset)) - def genop_guard_guard_not_invalidated(self, ign_1, guard_op, guard_token, - locs, ign_2): + def genop_guard_guard_not_invalidated(self, guard_op, guard_token, + locs, ign): pos = self.mc.get_relative_pos() + 1 # after potential jmp guard_token.pos_jump_offset = pos self.pending_guard_tokens.append(guard_token) - def genop_guard_guard_exception(self, ign_1, guard_op, guard_token, - locs, resloc): + def genop_guard_guard_exception(self, guard_op, guard_token, locs, resloc): loc = locs[0] loc1 = locs[1] self.mc.MOV(loc1, heap(self.cpu.pos_exception())) self.mc.CMP(loc1, loc) - self.implement_guard(guard_token, 'NE') + self.guard_success_cc = rx86.Conditions['E'] + self.implement_guard(guard_token) self._store_and_reset_exception(self.mc, resloc) def _store_and_reset_exception(self, mc, excvalloc=None, exctploc=None, @@ -1674,41 +1612,29 @@ mc.MOV(heap(self.cpu.pos_exc_value()), tmploc) mc.MOV(heap(self.cpu.pos_exception()), exctploc) - def _gen_guard_overflow(self, guard_op, guard_token): - guard_opnum = guard_op.getopnum() - if guard_opnum == rop.GUARD_NO_OVERFLOW: - self.implement_guard(guard_token, 'O') - elif guard_opnum == rop.GUARD_OVERFLOW: - self.implement_guard(guard_token, 'NO') - else: - not_implemented("int_xxx_ovf followed by %s" % - guard_op.getopname()) + def genop_int_add_ovf(self, op, arglocs, resloc): + self.genop_int_add(op, arglocs, resloc) + self.guard_success_cc = rx86.Conditions['NO'] - def genop_guard_int_add_ovf(self, op, guard_op, guard_token, arglocs, result_loc): - self.mc.ADD(arglocs[0], arglocs[1]) - return self._gen_guard_overflow(guard_op, guard_token) + def genop_int_sub_ovf(self, op, arglocs, resloc): + self.genop_int_sub(op, arglocs, resloc) + self.guard_success_cc = rx86.Conditions['NO'] - def genop_guard_int_sub_ovf(self, op, guard_op, guard_token, arglocs, result_loc): - self.mc.SUB(arglocs[0], arglocs[1]) - return self._gen_guard_overflow(guard_op, guard_token) + def genop_int_mul_ovf(self, op, arglocs, resloc): + self.genop_int_mul(op, arglocs, resloc) + self.guard_success_cc = rx86.Conditions['NO'] - def genop_guard_int_mul_ovf(self, op, guard_op, guard_token, arglocs, result_loc): - self.mc.IMUL(arglocs[0], arglocs[1]) - return self._gen_guard_overflow(guard_op, guard_token) + genop_guard_guard_no_overflow = genop_guard_guard_true + genop_guard_guard_overflow = genop_guard_guard_false - def genop_guard_guard_false(self, ign_1, guard_op, guard_token, locs, ign_2): - loc = locs[0] - self.mc.TEST(loc, loc) - self.implement_guard(guard_token, 'NZ') - genop_guard_guard_isnull = genop_guard_guard_false - - def genop_guard_guard_value(self, ign_1, guard_op, guard_token, locs, ign_2): + def genop_guard_guard_value(self, guard_op, guard_token, locs, ign): if guard_op.getarg(0).type == FLOAT: assert guard_op.getarg(1).type == FLOAT self.mc.UCOMISD(locs[0], locs[1]) else: self.mc.CMP(locs[0], locs[1]) - self.implement_guard(guard_token, 'NE') + self.guard_success_cc = rx86.Conditions['E'] + self.implement_guard(guard_token) def _cmp_guard_class(self, locs): offset = self.cpu.vtable_offset @@ -1743,12 +1669,12 @@ elif IS_X86_64: self.mc.CMP32_mi((locs[0].value, 0), expected_typeid) - def genop_guard_guard_class(self, ign_1, guard_op, guard_token, locs, ign_2): + def genop_guard_guard_class(self, guard_op, guard_token, locs, ign): self._cmp_guard_class(locs) - self.implement_guard(guard_token, 'NE') + self.guard_success_cc = rx86.Conditions['E'] + self.implement_guard(guard_token) - def genop_guard_guard_nonnull_class(self, ign_1, guard_op, - guard_token, locs, ign_2): + def genop_guard_guard_nonnull_class(self, guard_op, guard_token, locs, ign): self.mc.CMP(locs[0], imm1) # Patched below self.mc.J_il8(rx86.Conditions['B'], 0) @@ -1759,7 +1685,8 @@ assert 0 < offset <= 127 self.mc.overwrite(jb_location-1, chr(offset)) # - self.implement_guard(guard_token, 'NE') + self.guard_success_cc = rx86.Conditions['E'] + self.implement_guard(guard_token) def implement_guard_recovery(self, guard_opnum, faildescr, failargs, fail_locs, frame_depth): @@ -1924,13 +1851,11 @@ # exit function self._call_footer() - def implement_guard(self, guard_token, condition=None): + def implement_guard(self, guard_token): # These jumps are patched later. - if condition: - self.mc.J_il(rx86.Conditions[condition], 0) - else: - self.mc.JMP_l(0) - self.mc.force_frame_size(DEFAULT_FRAME_BYTES) + assert self.guard_success_cc >= 0 + self.mc.J_il(rx86.invert_condition(self.guard_success_cc), 0) + self.guard_success_cc = rx86.cond_none guard_token.pos_jump_offset = self.mc.get_relative_pos() - 4 self.pending_guard_tokens.append(guard_token) @@ -1964,42 +1889,44 @@ cb.emit() def _store_force_index(self, guard_op): + assert (guard_op.getopnum() == rop.GUARD_NOT_FORCED or + guard_op.getopnum() == rop.GUARD_NOT_FORCED_2) faildescr = guard_op.getdescr() ofs = self.cpu.get_ofs_of_frame_field('jf_force_descr') self.mc.MOV(raw_stack(ofs), imm(rffi.cast(lltype.Signed, cast_instance_to_gcref(faildescr)))) - def _emit_guard_not_forced(self, guard_token): + def _find_nearby_operation(self, delta): + regalloc = self._regalloc + return regalloc.operations[regalloc.rm.position + delta] + + def genop_guard_guard_not_forced(self, guard_op, guard_token, locs, resloc): ofs = self.cpu.get_ofs_of_frame_field('jf_descr') self.mc.CMP_bi(ofs, 0) - self.implement_guard(guard_token, 'NE') + self.guard_success_cc = rx86.Conditions['E'] + self.implement_guard(guard_token) - def genop_guard_call_may_force(self, op, guard_op, guard_token, - arglocs, result_loc): - self._store_force_index(guard_op) + def genop_call_may_force(self, op, arglocs, result_loc): + self._store_force_index(self._find_nearby_operation(+1)) self._genop_call(op, arglocs, result_loc) - self._emit_guard_not_forced(guard_token) - def genop_guard_call_release_gil(self, op, guard_op, guard_token, - arglocs, result_loc): - self._store_force_index(guard_op) + def genop_call_release_gil(self, op, arglocs, result_loc): + self._store_force_index(self._find_nearby_operation(+1)) self._genop_call(op, arglocs, result_loc, is_call_release_gil=True) - self._emit_guard_not_forced(guard_token) def imm(self, v): return imm(v) # ------------------- CALL ASSEMBLER -------------------------- - def genop_guard_call_assembler(self, op, guard_op, guard_token, - arglocs, result_loc): + def genop_call_assembler(self, op, arglocs, result_loc): if len(arglocs) == 2: [argloc, vloc] = arglocs else: [argloc] = arglocs vloc = self.imm(0) - self.call_assembler(op, guard_op, argloc, vloc, result_loc, eax) - self._emit_guard_not_forced(guard_token) + self._store_force_index(self._find_nearby_operation(+1)) + self.call_assembler(op, argloc, vloc, result_loc, eax) def _call_assembler_emit_call(self, addr, argloc, _): threadlocal_loc = RawEspLoc(THREADLOCAL_OFS, INT) @@ -2200,10 +2127,9 @@ not_implemented("not implemented operation with res: %s" % op.getopname()) - def not_implemented_op_guard(self, op, guard_op, - failaddr, arglocs, resloc): + def not_implemented_op_guard(self, guard_op, guard_token, locs, resloc): not_implemented("not implemented operation (guard): %s" % - op.getopname()) + guard_op.getopname()) def closing_jump(self, target_token): target = target_token._ll_loop_code @@ -2216,10 +2142,12 @@ def label(self): self._check_frame_depth_debug(self.mc) - def cond_call(self, op, gcmap, loc_cond, imm_func, arglocs): - self.mc.TEST(loc_cond, loc_cond) - self.mc.J_il8(rx86.Conditions['Z'], 0) # patched later + def cond_call(self, op, gcmap, imm_func, arglocs): + assert self.guard_success_cc >= 0 + self.mc.J_il8(rx86.invert_condition(self.guard_success_cc), 0) + # patched later jmp_adr = self.mc.get_relative_pos() + self.guard_success_cc = rx86.cond_none # self.push_gcmap(self.mc, gcmap, store=True) # @@ -2260,8 +2188,9 @@ offset = self.mc.get_relative_pos() - jmp_adr assert 0 < offset <= 127 self.mc.overwrite(jmp_adr-1, chr(offset)) - # XXX if the next operation is a GUARD_NO_EXCEPTION, we should - # somehow jump over it too in the fast path + # might be overridden again to skip over the following + # guard_no_exception too + self.previous_cond_call_jcond = jmp_adr def malloc_cond(self, nursery_free_adr, nursery_top_adr, size, gcmap): assert size & (WORD-1) == 0 # must be correctly aligned @@ -2454,7 +2383,7 @@ opname = name[len('genop_discard_'):] num = getattr(rop, opname.upper()) genop_discard_list[num] = value - elif name.startswith('genop_guard_') and name != 'genop_guard_exception': + elif name.startswith('genop_guard_'): opname = name[len('genop_guard_'):] num = getattr(rop, opname.upper()) genop_guard_list[num] = value diff --git a/rpython/jit/backend/x86/regalloc.py b/rpython/jit/backend/x86/regalloc.py --- a/rpython/jit/backend/x86/regalloc.py +++ b/rpython/jit/backend/x86/regalloc.py @@ -215,6 +215,18 @@ return self.rm.force_allocate_reg(var, forbidden_vars, selected_reg, need_lower_byte) + def force_allocate_reg_or_cc(self, var): + assert var.type == INT + if self.next_op_can_accept_cc(self.operations, self.rm.position): + # hack: return the ebp location to mean "lives in CC". This + # ebp will not actually be used, and the location will be freed + # after the next op as usual. + self.rm.force_allocate_frame_reg(var) + return ebp + else: + # else, return a regular register (not ebp). + return self.rm.force_allocate_reg(var, need_lower_byte=True) + def force_spill_var(self, var): if var.type == FLOAT: return self.xrm.force_spill_var(var) @@ -278,20 +290,8 @@ self.assembler.dump('%s <- %s(%s)' % (result_loc, op, arglocs)) self.assembler.regalloc_perform_math(op, arglocs, result_loc) - def locs_for_fail(self, guard_op): - return [self.loc(v) for v in guard_op.getfailargs()] - - def perform_with_guard(self, op, guard_op, arglocs, result_loc): - faillocs = self.locs_for_fail(guard_op) - self.rm.position += 1 - self.xrm.position += 1 - self.assembler.regalloc_perform_with_guard(op, guard_op, faillocs, - arglocs, result_loc, - self.fm.get_frame_depth()) - self.possibly_free_vars(guard_op.getfailargs()) - def perform_guard(self, guard_op, arglocs, result_loc): - faillocs = self.locs_for_fail(guard_op) + faillocs = [self.loc(v) for v in guard_op.getfailargs()] if not we_are_translated(): if result_loc is not None: self.assembler.dump('%s <- %s(%s)' % (result_loc, guard_op, @@ -310,7 +310,7 @@ def walk_operations(self, inputargs, operations): i = 0 - #self.operations = operations + self.operations = operations while i < len(operations): op = operations[i] self.assembler.mc.mark_op(op) @@ -321,10 +321,7 @@ i += 1 self.possibly_free_vars_for_op(op) continue - if self.can_merge_with_next_guard(op, i, operations): - oplist_with_guard[op.getopnum()](self, op, operations[i + 1]) - i += 1 - elif not we_are_translated() and op.getopnum() == -124: + if not we_are_translated() and op.getopnum() == -124: self._consider_force_spill(op) else: oplist[op.getopnum()](self, op) @@ -336,6 +333,7 @@ assert not self.xrm.reg_bindings self.flush_loop() self.assembler.mc.mark_op(None) # end of the loop + self.operations = None for arg in inputargs: self.possibly_free_var(arg) @@ -363,14 +361,19 @@ return self.xrm.loc(v) return self.rm.loc(v) - def _consider_guard(self, op): - loc = self.rm.make_sure_var_in_reg(op.getarg(0)) - self.perform_guard(op, [loc], None) + def load_condition_into_cc(self, box): + if self.assembler.guard_success_cc == rx86.cond_none: + self.assembler.test_location(self.loc(box)) + self.assembler.guard_success_cc = rx86.Conditions['NZ'] - consider_guard_true = _consider_guard - consider_guard_false = _consider_guard - consider_guard_nonnull = _consider_guard - consider_guard_isnull = _consider_guard + def _consider_guard_cc(self, op): + self.load_condition_into_cc(op.getarg(0)) + self.perform_guard(op, [], None) + + consider_guard_true = _consider_guard_cc + consider_guard_false = _consider_guard_cc + consider_guard_nonnull = _consider_guard_cc + consider_guard_isnull = _consider_guard_cc def consider_finish(self, op): # the frame is in ebp, but we have to point where in the frame is @@ -415,6 +418,7 @@ consider_guard_no_overflow = consider_guard_no_exception consider_guard_overflow = consider_guard_no_exception + consider_guard_not_forced = consider_guard_no_exception def consider_guard_value(self, op): x = self.make_sure_var_in_reg(op.getarg(0)) @@ -482,17 +486,9 @@ consider_int_or = _consider_binop_symm consider_int_xor = _consider_binop_symm - def _consider_binop_with_guard(self, op, guard_op): - loc, argloc = self._consider_binop_part(op) - self.perform_with_guard(op, guard_op, [loc, argloc], loc) - - def _consider_binop_with_guard_symm(self, op, guard_op): - loc, argloc = self._consider_binop_part(op, symm=True) - self.perform_with_guard(op, guard_op, [loc, argloc], loc) - - consider_int_mul_ovf = _consider_binop_with_guard_symm - consider_int_sub_ovf = _consider_binop_with_guard - consider_int_add_ovf = _consider_binop_with_guard_symm + consider_int_mul_ovf = _consider_binop_symm + consider_int_sub_ovf = _consider_binop + consider_int_add_ovf = _consider_binop_symm def consider_int_neg(self, op): res = self.rm.force_result_in_reg(op.result, op.getarg(0)) @@ -541,7 +537,7 @@ consider_uint_floordiv = consider_int_floordiv - def _consider_compop(self, op, guard_op): + def _consider_compop(self, op): vx = op.getarg(0) vy = op.getarg(1) arglocs = [self.loc(vx), self.loc(vy)] @@ -551,12 +547,9 @@ pass else: arglocs[0] = self.rm.make_sure_var_in_reg(vx) - if guard_op is None: - loc = self.rm.force_allocate_reg(op.result, args, - need_lower_byte=True) - self.perform(op, arglocs, loc) - else: - self.perform_with_guard(op, guard_op, arglocs, None) + self.possibly_free_vars(args) + loc = self.force_allocate_reg_or_cc(op.result) + self.perform(op, arglocs, loc) consider_int_lt = _consider_compop consider_int_gt = _consider_compop @@ -582,7 +575,7 @@ consider_float_mul = _consider_float_op # xxx could be _symm consider_float_truediv = _consider_float_op - def _consider_float_cmp(self, op, guard_op): + def _consider_float_cmp(self, op): vx = op.getarg(0) vy = op.getarg(1) arglocs = [self.loc(vx), self.loc(vy)] @@ -592,11 +585,9 @@ arglocs[1] = self.xrm.make_sure_var_in_reg(vy) else: arglocs[0] = self.xrm.make_sure_var_in_reg(vx) - if guard_op is None: - res = self.rm.force_allocate_reg(op.result, need_lower_byte=True) - self.perform(op, arglocs, res) - else: - self.perform_with_guard(op, guard_op, arglocs, None) + self.possibly_free_vars(op.getarglist()) + loc = self.force_allocate_reg_or_cc(op.result) + self.perform(op, arglocs, loc) consider_float_lt = _consider_float_cmp consider_float_le = _consider_float_cmp @@ -737,7 +728,7 @@ else: self._consider_call(op) - def _call(self, op, arglocs, force_store=[], guard_not_forced_op=None): + def _call(self, op, arglocs, force_store=[], guard_not_forced=False): # we need to save registers on the stack: # # - at least the non-callee-saved registers @@ -750,7 +741,7 @@ # grab_frame_values() would not be able to locate values in # callee-saved registers. # - save_all_regs = guard_not_forced_op is not None + save_all_regs = guard_not_forced self.xrm.before_call(force_store, save_all_regs=save_all_regs) if not save_all_regs: gcrootmap = self.assembler.cpu.gc_ll_descr.gcrootmap @@ -768,12 +759,9 @@ resloc = self.rm.after_call(op.result) else: resloc = None - if guard_not_forced_op is not None: - self.perform_with_guard(op, guard_not_forced_op, arglocs, resloc) - else: - self.perform(op, arglocs, resloc) + self.perform(op, arglocs, resloc) - def _consider_call(self, op, guard_not_forced_op=None, first_arg_index=1): _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit