Author: Richard Plangger <planri...@gmail.com> Branch: s390x-backend Changeset: r82899:c63346ce0b33 Date: 2016-03-09 09:46 +0100 http://bitbucket.org/pypy/pypy/changeset/c63346ce0b33/
Log: merged the speed improvements from s390x-enhance-speed diff --git a/rpython/jit/backend/zarch/assembler.py b/rpython/jit/backend/zarch/assembler.py --- a/rpython/jit/backend/zarch/assembler.py +++ b/rpython/jit/backend/zarch/assembler.py @@ -50,7 +50,7 @@ self.gcrootmap_retaddr_forced = 0 self.failure_recovery_code = [0, 0, 0, 0] self.wb_slowpath = [0,0,0,0,0] - # self.pool = None + self.pool = None def setup(self, looptoken): BaseAssembler.setup(self, looptoken) @@ -58,7 +58,7 @@ if we_are_translated(): self.debug = False self.current_clt = looptoken.compiled_loop_token - # POOL self.pool = LiteralPool() + self.pool = LiteralPool() self.mc = InstrBuilder(None) self.pending_guard_tokens = [] self.pending_guard_tokens_recovered = 0 @@ -76,7 +76,7 @@ self.current_clt = None self._regalloc = None self.mc = None - # self.pool = None + self.pool = None def target_arglocs(self, looptoken): @@ -350,8 +350,8 @@ gcrootmap = self.cpu.gc_ll_descr.gcrootmap if gcrootmap and gcrootmap.is_shadow_stack: - diff = mc.load_imm_plus(r.r5, gcrootmap.get_root_stack_top_addr()) - mc.load(r.r5, r.r5, diff) + diff = mc.load_imm(r.r5, gcrootmap.get_root_stack_top_addr()) + mc.load(r.r5, r.r5, 0) mc.store(r.r2, r.r5, -WORD) self._pop_core_regs_from_jitframe(mc, r.MANAGED_REGS) @@ -636,7 +636,7 @@ # operations = regalloc.prepare_loop(inputargs, operations, looptoken, clt.allgcrefs) - # POOL self.pool.pre_assemble(self, operations) + self.pool.pre_assemble(self, operations) entrypos = self.mc.get_relative_pos() self._call_header_with_stack_check() looppos = self.mc.get_relative_pos() @@ -645,7 +645,7 @@ self.update_frame_depth(frame_depth_no_fixed_size + JITFRAME_FIXED_SIZE) # size_excluding_failure_stuff = self.mc.get_relative_pos() - # POOL self.pool.post_assemble(self) + #self.pool.post_assemble(self) self.write_pending_failure_recoveries() full_size = self.mc.get_relative_pos() # @@ -704,13 +704,13 @@ operations, self.current_clt.allgcrefs, self.current_clt.frame_info) - # POOL self.pool.pre_assemble(self, operations, bridge=True) + self.pool.pre_assemble(self, operations, bridge=True) startpos = self.mc.get_relative_pos() - # POOL self.mc.LARL(r.POOL, l.halfword(self.pool.pool_start - startpos)) + self.mc.LARL(r.POOL, l.halfword(self.pool.pool_start - startpos)) self._check_frame_depth(self.mc, regalloc.get_gcmap()) frame_depth_no_fixed_size = self._assemble(regalloc, inputargs, operations) codeendpos = self.mc.get_relative_pos() - # POOL self.pool.post_assemble(self) + #self.pool.post_assemble(self) self.write_pending_failure_recoveries() fullsize = self.mc.get_relative_pos() # @@ -735,7 +735,6 @@ # 'faildescr.adr_jump_offset' is the address of an instruction that is a # conditional jump. We must patch this conditional jump to go # to 'adr_new_target'. - # Updates the pool address mc = InstrBuilder() mc.b_abs(adr_new_target) mc.copy_to_raw_memory(faildescr.adr_jump_offset) @@ -922,14 +921,17 @@ return assert 0, "not supported location" elif prev_loc.is_in_pool(): + if loc.is_core_reg(): + self.mc.LG(loc, prev_loc) + return # move immediate value to fp register if loc.is_fp_reg(): - self.mc.LD(loc, prev_loc) + self.mc.LDY(loc, prev_loc) return # move immediate value to memory elif loc.is_stack(): offset = loc.value - self.mc.LD(r.FP_SCRATCH, prev_loc) + self.mc.LDY(r.FP_SCRATCH, prev_loc) self.mc.STDY(r.FP_SCRATCH, l.addr(offset, r.SPP)) return assert 0, "not supported location" @@ -976,9 +978,8 @@ if gcrootmap: if gcrootmap.is_shadow_stack: if shadowstack_reg is None: - diff = mc.load_imm_plus(r.SPP, - gcrootmap.get_root_stack_top_addr()) - mc.load(r.SPP, r.SPP, diff) + diff = mc.load_imm(r.SPP, gcrootmap.get_root_stack_top_addr()) + mc.load(r.SPP, r.SPP, 0) shadowstack_reg = r.SPP mc.load(r.SPP, shadowstack_reg, -WORD) wbdescr = self.cpu.gc_ll_descr.write_barrier_descr @@ -1019,7 +1020,7 @@ # Build a new stackframe of size STD_FRAME_SIZE_IN_BYTES fpoff = JIT_ENTER_EXTRA_STACK_SPACE self.mc.STMG(r.r6, r.r15, l.addr(-fpoff+6*WORD, r.SP)) - # POOL self.mc.LARL(r.POOL, l.halfword(self.pool.pool_start - self.mc.get_relative_pos())) + self.mc.LARL(r.POOL, l.halfword(self.pool.pool_start - self.mc.get_relative_pos())) # f8 through f15 are saved registers (= non volatile) # TODO it would be good to detect if any float is used in the loop # and to skip this push/pop whenever no float operation occurs @@ -1046,38 +1047,39 @@ def _call_header_shadowstack(self, gcrootmap): # we need to put one word into the shadowstack: the jitframe (SPP) # we saved all registers to the stack - RCS1 = r.r2 - RCS2 = r.r3 - RCS3 = r.r4 + RCS1 = r.r3 + RCS2 = r.r4 + RCS3 = r.r5 mc = self.mc - diff = mc.load_imm_plus(RCS1, gcrootmap.get_root_stack_top_addr()) - mc.load(RCS2, RCS1, diff) # ld RCS2, [rootstacktop] + mc.load_imm(RCS1, gcrootmap.get_root_stack_top_addr()) + mc.load(RCS2, RCS1, 0) # ld RCS2, [rootstacktop] # mc.LGR(RCS3, RCS2) mc.AGHI(RCS3, l.imm(WORD)) # add RCS3, RCS2, WORD mc.store(r.SPP, RCS2, 0) # std SPP, RCS2 # - mc.store(RCS3, RCS1, diff) # std RCS3, [rootstacktop] + mc.store(RCS3, RCS1, 0) # std RCS3, [rootstacktop] def _call_footer_shadowstack(self, gcrootmap): # r6 -> r15 can be used freely, they will be restored by # _call_footer after this call - RCS1 = r.r9 - RCS2 = r.r10 + RCS1 = r.r8 + RCS2 = r.r7 mc = self.mc - diff = mc.load_imm_plus(RCS1, gcrootmap.get_root_stack_top_addr()) - mc.load(RCS2, RCS1, diff) # ld RCS2, [rootstacktop] + mc.load_imm(RCS1, gcrootmap.get_root_stack_top_addr()) + mc.load(RCS2, RCS1, 0) # ld RCS2, [rootstacktop] mc.AGHI(RCS2, l.imm(-WORD)) # sub RCS2, RCS2, WORD - mc.store(RCS2, RCS1, diff) # std RCS2, [rootstacktop] + mc.store(RCS2, RCS1, 0) # std RCS2, [rootstacktop] def _call_footer(self): - # the return value is the jitframe - self.mc.LGR(r.r2, r.SPP) gcrootmap = self.cpu.gc_ll_descr.gcrootmap if gcrootmap and gcrootmap.is_shadow_stack: self._call_footer_shadowstack(gcrootmap) + # the return value is the jitframe + self.mc.LGR(r.r2, r.SPP) + size = STD_FRAME_SIZE_IN_BYTES # f8 through f15 are saved registers (= non volatile) # TODO it would be good to detect if any float is used in the loop @@ -1180,11 +1182,9 @@ # ASSEMBLER EMISSION def emit_label(self, op, arglocs, regalloc): - pass - # POOL - #offset = self.pool.pool_start - self.mc.get_relative_pos() + offset = self.pool.pool_start - self.mc.get_relative_pos() # load the pool address at each label - #self.mc.LARL(r.POOL, l.halfword(offset)) + self.mc.LARL(r.POOL, l.halfword(offset)) def emit_jump(self, op, arglocs, regalloc): # The backend's logic assumes that the target code is in a piece of @@ -1201,7 +1201,7 @@ if descr in self.target_tokens_currently_compiling: # a label has a LARL instruction that does not need # to be executed, thus remove the first opcode - self.mc.b_offset(descr._ll_loop_code) # POOL + self.mc.LARL_byte_count) + self.mc.b_offset(descr._ll_loop_code + self.mc.LARL_byte_count) else: # POOL #offset = self.pool.get_descr_offset(descr) + \ @@ -1249,11 +1249,11 @@ gcmap = self._finish_gcmap else: gcmap = lltype.nullptr(jitframe.GCMAP) - self.load_gcmap(self.mc, r.r2, gcmap) + self.load_gcmap(self.mc, r.r9, gcmap) - self.mc.load_imm(r.r3, fail_descr_loc.getint()) - self.mc.STG(r.r3, l.addr(ofs, r.SPP)) - self.mc.STG(r.r2, l.addr(ofs2, r.SPP)) + self.mc.load_imm(r.r10, fail_descr_loc.getint()) + self.mc.STG(r.r9, l.addr(ofs2, r.SPP)) + self.mc.STG(r.r10, l.addr(ofs, r.SPP)) # exit function self._call_footer() diff --git a/rpython/jit/backend/zarch/codebuilder.py b/rpython/jit/backend/zarch/codebuilder.py --- a/rpython/jit/backend/zarch/codebuilder.py +++ b/rpython/jit/backend/zarch/codebuilder.py @@ -35,7 +35,6 @@ GuardToken.__init__(self, cpu, gcmap, descr, failargs, faillocs, guard_opnum, frame_depth) self.fcond = fcond - # POOL self._pool_offset = -1 class AbstractZARCHBuilder(object): diff --git a/rpython/jit/backend/zarch/helper/assembler.py b/rpython/jit/backend/zarch/helper/assembler.py --- a/rpython/jit/backend/zarch/helper/assembler.py +++ b/rpython/jit/backend/zarch/helper/assembler.py @@ -12,8 +12,7 @@ l1 = arglocs[1] assert not l0.is_imm() # do the comparison - # POOL self.mc.cmp_op(l0, l1, pool=l1.is_in_pool(), imm=l1.is_imm(), signed=signed, fp=fp) - self.mc.cmp_op(l0, l1, imm=l1.is_imm(), signed=signed, fp=fp) + self.mc.cmp_op(l0, l1, pool=l1.is_in_pool(), imm=l1.is_imm(), signed=signed, fp=fp) self.flush_cc(condition, arglocs[2]) @@ -30,31 +29,21 @@ f.name = 'emit_shift_' + func return f -def gen_emit_rr(rr_func): +def gen_emit_rr_rp(rr_func, rp_func): def f(self, op, arglocs, regalloc): l0, l1 = arglocs - getattr(self.mc, rr_func)(l0, l1) + if l1.is_in_pool(): + getattr(self.mc, rp_func)(l0, l1) + else: + getattr(self.mc, rr_func)(l0, l1) return f -# POOL -#def gen_emit_rr_or_rpool(rr_func, rp_func): -# """ the parameters can either be both in registers or -# the first is in the register, second in literal pool. -# """ -# def f(self, op, arglocs, regalloc): -# l0, l1 = arglocs -# if l1.is_imm() and not l1.is_in_pool(): -# assert 0, "logical imm must reside in pool!" -# if l1.is_in_pool(): -# getattr(self.mc, rp_func)(l0, l1) -# else: -# getattr(self.mc, rr_func)(l0, l1) -# return f - -def gen_emit_rr_rh_ri(rr_func, rh_func, ri_func): +def gen_emit_rr_rh_ri_rp(rr_func, rh_func, ri_func, rp_func): def emit(self, op, arglocs, regalloc): l0, l1 = arglocs - if l1.is_imm(): + if l1.is_in_pool(): + getattr(self.mc, rp_func)(l0, l1) + elif l1.is_imm(): if check_imm_value(l1.value): getattr(self.mc, rh_func)(l0, l1) else: @@ -63,27 +52,18 @@ getattr(self.mc, rr_func)(l0, l1) return emit -# POOL -#def gen_emit_imm_pool_rr(imm_func, pool_func, rr_func): -# def emit(self, op, arglocs, regalloc): -# l0, l1 = arglocs -# if l1.is_in_pool(): -# getattr(self.mc, pool_func)(l0, l1) -# elif l1.is_imm(): -# getattr(self.mc, imm_func)(l0, l1) -# else: -# getattr(self.mc, rr_func)(l0, l1) -# return emit - -def gen_emit_div_mod(rr_func): +def gen_emit_div_mod(rr_func, rp_func): def emit(self, op, arglocs, regalloc): lr, lq, l1 = arglocs # lr == remainer, lq == quotient # when entering the function lr contains the dividend # after this operation either lr or lq is used further assert not l1.is_imm(), "imm divider not supported" - # remainer is always a even register r0, r2, ... , r14 + # remainer is always an even register r0, r2, ... , r14 assert lr.is_even() assert lq.is_odd() self.mc.XGR(lr, lr) - getattr(self.mc,rr_func)(lr, l1) + if l1.is_in_pool(): + getattr(self.mc,rp_func)(lr, l1) + else: + getattr(self.mc,rr_func)(lr, l1) return emit diff --git a/rpython/jit/backend/zarch/helper/regalloc.py b/rpython/jit/backend/zarch/helper/regalloc.py --- a/rpython/jit/backend/zarch/helper/regalloc.py +++ b/rpython/jit/backend/zarch/helper/regalloc.py @@ -26,8 +26,7 @@ if check_imm32(a1): l1 = imm(a1.getint()) else: - # POOL l1 = self.ensure_reg_or_pool(a1) - l1 = self.ensure_reg(a1) + l1 = self.ensure_reg_or_pool(a1) l0 = self.force_result_in_reg(op, a0) return [l0, l1] @@ -39,7 +38,7 @@ if check_imm32(a1): l1 = imm(a1.getint()) else: - l1 = self.ensure_reg(a1) + l1 = self.ensure_reg_or_pool(a1) l0 = self.force_result_in_reg(op, a0) return [l0, l1] @@ -51,7 +50,7 @@ if check_imm32(a1): l1 = imm(a1.getint()) else: - l1 = self.ensure_reg(a1) + l1 = self.ensure_reg_or_pool(a1) lr,lq = self.rm.ensure_even_odd_pair(a0, op, bind_first=False) return [lr, lq, l1] @@ -61,7 +60,7 @@ a1 = op.getarg(1) l1 = self.ensure_reg(a1) if isinstance(a0, Const): - loc = self.ensure_reg(a0) + loc = self.ensure_reg_or_pool(a0) lr,lq = self.rm.ensure_even_odd_pair(a0, op, bind_first=modulus, must_exist=False, move_regs=False) @@ -78,7 +77,6 @@ a0 = op.getarg(0) a1 = op.getarg(1) # sub is not commotative, thus cannot swap operands - # POOL l1 = self.ensure_reg_or_pool(a1) l0 = self.ensure_reg(a0) l1 = self.ensure_reg(a1) res = self.force_allocate_reg(op) diff --git a/rpython/jit/backend/zarch/opassembler.py b/rpython/jit/backend/zarch/opassembler.py --- a/rpython/jit/backend/zarch/opassembler.py +++ b/rpython/jit/backend/zarch/opassembler.py @@ -3,7 +3,7 @@ STD_FRAME_SIZE_IN_BYTES) from rpython.jit.backend.zarch.arch import THREADLOCAL_ADDR_OFFSET from rpython.jit.backend.zarch.helper.assembler import (gen_emit_cmp_op, - gen_emit_rr, gen_emit_shift, gen_emit_rr_rh_ri, gen_emit_div_mod) + gen_emit_rr_rp, gen_emit_shift, gen_emit_rr_rh_ri_rp, gen_emit_div_mod) from rpython.jit.backend.zarch.helper.regalloc import (check_imm, check_imm_value) from rpython.jit.metainterp.history import (ConstInt) @@ -28,7 +28,7 @@ class IntOpAssembler(object): _mixin_ = True - emit_int_add = gen_emit_rr_rh_ri('AGR', 'AGHI', 'AGFI') + emit_int_add = gen_emit_rr_rh_ri_rp('AGR', 'AGHI', 'AGFI', 'AG') emit_int_add_ovf = emit_int_add emit_nursery_ptr_increment = emit_int_add @@ -36,25 +36,16 @@ def emit_int_sub(self, op, arglocs, regalloc): res, l0, l1 = arglocs self.mc.SGRK(res, l0, l1) - # POOL - #if l1.is_imm() and not l1.is_in_pool(): - # assert 0, "logical imm must reside in pool!" - #if l1.is_in_pool(): - # self.mc.SG(l0, l1) - #else: - # self.mc.SGR(l0, l1) emit_int_sub_ovf = emit_int_sub - emit_int_mul = gen_emit_rr_rh_ri('MSGR', 'MGHI', 'MSGFI') + emit_int_mul = gen_emit_rr_rh_ri_rp('MSGR', 'MGHI', 'MSGFI', 'MSG') def emit_int_mul_ovf(self, op, arglocs, regalloc): lr, lq, l1 = arglocs - # POOL - # if l1.is_in_pool(): - # self.mc.LG(r.SCRATCH, l1) - # l1 = r.SCRATCH - # elif - if l1.is_imm(): + if l1.is_in_pool(): + self.mc.LG(r.SCRATCH, l1) + l1 = r.SCRATCH + elif l1.is_imm(): self.mc.LGFI(r.SCRATCH, l1) l1 = r.SCRATCH else: @@ -169,11 +160,11 @@ omc.BRC(c.ANY, l.imm(label_end - jmp_neither_lqlr_overflow)) omc.overwrite() - emit_int_floordiv = gen_emit_div_mod('DSGR') - emit_uint_floordiv = gen_emit_div_mod('DLGR') + emit_int_floordiv = gen_emit_div_mod('DSGR', 'DSG') + emit_uint_floordiv = gen_emit_div_mod('DLGR', 'DLG') # NOTE division sets one register with the modulo value, thus # the regalloc ensures the right register survives. - emit_int_mod = gen_emit_div_mod('DSGR') + emit_int_mod = gen_emit_div_mod('DSGR', 'DSG') def emit_int_invert(self, op, arglocs, regalloc): l0, = arglocs @@ -213,9 +204,9 @@ self.mc.CGHI(l0, l.imm(0)) self.flush_cc(c.NE, res) - emit_int_and = gen_emit_rr("NGR") - emit_int_or = gen_emit_rr("OGR") - emit_int_xor = gen_emit_rr("XGR") + emit_int_and = gen_emit_rr_rp("NGR", "NG") + emit_int_or = gen_emit_rr_rp("OGR", "OG") + emit_int_xor = gen_emit_rr_rp("XGR", "XG") emit_int_rshift = gen_emit_shift("SRAG") emit_int_lshift = gen_emit_shift("SLLG") @@ -242,10 +233,10 @@ class FloatOpAssembler(object): _mixin_ = True - emit_float_add = gen_emit_rr('ADBR') - emit_float_sub = gen_emit_rr('SDBR') - emit_float_mul = gen_emit_rr('MDBR') - emit_float_truediv = gen_emit_rr('DDBR') + emit_float_add = gen_emit_rr_rp('ADBR', 'ADB') + emit_float_sub = gen_emit_rr_rp('SDBR', 'SDB') + emit_float_mul = gen_emit_rr_rp('MDBR', 'MDB') + emit_float_truediv = gen_emit_rr_rp('DDBR', 'DDB') # Support for NaNs: S390X sets condition code to 0x3 (unordered) # whenever any operand is nan. @@ -1072,7 +1063,7 @@ self._store_force_index(self._find_nearby_operation(regalloc, +1)) # 'result_loc' is either r2, f0 or None self.call_assembler(op, argloc, vloc, result_loc, r.r2) - # POOL self.mc.LARL(r.POOL, l.halfword(self.pool.pool_start - self.mc.get_relative_pos())) + self.mc.LARL(r.POOL, l.halfword(self.pool.pool_start - self.mc.get_relative_pos())) emit_call_assembler_i = _genop_call_assembler emit_call_assembler_r = _genop_call_assembler diff --git a/rpython/jit/backend/zarch/pool.py b/rpython/jit/backend/zarch/pool.py --- a/rpython/jit/backend/zarch/pool.py +++ b/rpython/jit/backend/zarch/pool.py @@ -1,5 +1,6 @@ from rpython.jit.backend.zarch import registers as r from rpython.jit.backend.zarch import locations as l +from rpython.rlib import rgil from rpython.jit.metainterp.history import (INT, REF, FLOAT, TargetToken) from rpython.rlib.objectmodel import we_are_translated @@ -19,81 +20,25 @@ self.size = 0 # the offset to index the pool self.pool_start = 0 - self.label_offset = 0 - self.label_count = 0 # for constant offsets self.offset_map = {} # for descriptors self.offset_descr = {} - self.constant_64_zeros = -1 - self.constant_64_ones = -1 - self.constant_64_sign_bit = -1 - self.constant_max_64_positive = -1 + + def reset(self): + self.pool_start = 0 + self.size = 0 + self.offset_map = {} + self.offset_descr = {} def ensure_can_hold_constants(self, asm, op): - opnum = op.getopnum() - if op.is_guard(): - # 1x gcmap pointer - # 1x target address - self.offset_descr[op.getdescr()] = self.size - self.allocate_slot(2*8) - elif op.getopnum() == rop.JUMP: - descr = op.getdescr() - if descr not in asm.target_tokens_currently_compiling: - # this is a 'long' jump instead of a relative jump - self.offset_descr[descr] = self.size - self.allocate_slot(8) - elif op.getopnum() == rop.LABEL: - descr = op.getdescr() - if descr not in asm.target_tokens_currently_compiling: - # this is a 'long' jump instead of a relative jump - self.offset_descr[descr] = self.size - self.allocate_slot(8) - elif op.getopnum() == rop.INT_INVERT: - self.constant_64_ones = 1 # we need constant ones!!! - elif op.getopnum() == rop.INT_MUL_OVF: - self.constant_64_sign_bit = 1 - self.constant_max_64_positive = 1 - elif opnum == rop.INT_RSHIFT or opnum == rop.INT_LSHIFT or \ - opnum == rop.UINT_RSHIFT: - a0 = op.getarg(0) - if a0.is_constant(): - self.reserve_literal(8, a0) + # allocates 8 bytes in memory for pointers, long integers or floats + if op.is_jit_debug(): return - elif opnum == rop.GC_STORE or opnum == rop.GC_STORE_INDEXED: - arg = op.getarg(0) - if arg.is_constant(): - self.reserve_literal(8, arg) - arg = op.getarg(1) - if arg.is_constant(): - self.reserve_literal(8, arg) - arg = op.getarg(2) - if arg.is_constant(): - self.reserve_literal(8, arg) - return - elif opnum in (rop.GC_LOAD_F, - rop.GC_LOAD_I, - rop.GC_LOAD_R,) \ - or opnum in (rop.GC_LOAD_INDEXED_F, - rop.GC_LOAD_INDEXED_R, - rop.GC_LOAD_INDEXED_I,): - arg = op.getarg(0) - if arg.is_constant(): - self.reserve_literal(8, arg) - arg = op.getarg(1) - if arg.is_constant(): - self.reserve_literal(8, arg) - return - elif op.is_call_release_gil(): - for arg in op.getarglist()[1:]: - if arg.is_constant(): - self.reserve_literal(8, arg) - return - elif opnum == rop.COND_CALL_GC_WB_ARRAY: - self.constant_64_ones = 1 # we need constant ones!!! + for arg in op.getarglist(): if arg.is_constant(): - self.reserve_literal(8, arg) + self.reserve_literal(8, arg, asm) def contains_constant(self, unique_val): return unique_val in self.offset_map @@ -101,6 +46,10 @@ def get_descr_offset(self, descr): return self.offset_descr[descr] + def contains_box(self, box): + uvalue = self.unique_value(box) + return self.contains_constant(uvalue) + def get_offset(self, box): assert box.is_constant() uvalue = self.unique_value(box) @@ -108,11 +57,6 @@ assert self.offset_map[uvalue] >= 0 return self.offset_map[uvalue] - def get_direct_offset(self, unique_val): - """ Get the offset directly using a unique value, - use get_offset if you have a Const box """ - return self.offset_map[unique_val] - def unique_value(self, val): if val.type == FLOAT: if val.getfloat() == 0.0: @@ -124,21 +68,14 @@ assert val.type == REF return rffi.cast(lltype.Signed, val.getref_base()) - def reserve_literal(self, size, box): + def reserve_literal(self, size, box, asm): uvalue = self.unique_value(box) - if uvalue not in self.offset_map: - self.offset_map[uvalue] = self.size - self.allocate_slot(size) - - def reset(self): - self.pool_start = 0 - self.label_offset = 0 - self.size = 0 - self.offset_map = {} - self.constant_64_zeros = -1 - self.constant_64_ones = -1 - self.constant_64_sign_bit = -1 - self.constant_max_64_positive = -1 + if box.type == INT and -2**31 <= uvalue <= 2**31-1: + # we do not allocate non 64 bit values, these + # can be loaded as imm by LGHI/LGFI + return + # + self._ensure_value(uvalue, asm) def check_size(self, size=-1): if size == -1: @@ -149,18 +86,19 @@ llop.debug_print(lltype.Void, msg) raise PoolOverflow(msg) + def _ensure_value(self, uvalue, asm): + if uvalue not in self.offset_map: + self.offset_map[uvalue] = self.size + self.allocate_slot(8) + asm.mc.write_i64(uvalue) + return self.offset_map[uvalue] + def allocate_slot(self, size): val = self.size + size self.check_size(val) self.size = val assert val >= 0 - def ensure_value(self, val): - if val not in self.offset_map: - self.offset_map[val] = self.size - self.allocate_slot(8) - return self.offset_map[val] - def pre_assemble(self, asm, operations, bridge=False): # O(len(operations)). I do not think there is a way # around this. @@ -179,27 +117,34 @@ self.pool_start = asm.mc.get_relative_pos() for op in operations: self.ensure_can_hold_constants(asm, op) - self.ensure_value(asm.cpu.pos_exc_value()) + self._ensure_value(asm.cpu.pos_exc_value(), asm) + # the top of shadow stack + gcrootmap = asm.cpu.gc_ll_descr.gcrootmap + if gcrootmap and gcrootmap.is_shadow_stack: + self._ensure_value(gcrootmap.get_root_stack_top_addr(), asm) + # endaddr of insert stack check + endaddr, lengthaddr, _ = asm.cpu.insert_stack_check() + self._ensure_value(endaddr, asm) + # fast gil + fastgil = rffi.cast(lltype.Signed, rgil.gil_fetch_fastgil()) + self._ensure_value(fastgil, asm) # TODO add more values that are loaded with load_imm - if self.size == 0: - # no pool needed! - return - assert self.size % 2 == 0, "not aligned properly" - if self.constant_64_ones != -1: - self.constant_64_ones = self.ensure_value(-1) - if self.constant_64_zeros != -1: - self.constant_64_zeros = self.ensure_value(0x0) - if self.constant_64_sign_bit != -1: - self.constant_64_sign_bit = self.ensure_value(-2**63) # == 0x8000000000000000 - if self.constant_max_64_positive != -1: - self.constant_max_64_positive = self.ensure_value(0x7fffFFFFffffFFFF) - asm.mc.write('\x00' * self.size) - wrote = 0 - for val, offset in self.offset_map.items(): - self.overwrite_64(asm.mc, offset, val) - wrote += 8 - def overwrite_64(self, mc, index, value): + # XXX def post_assemble(self, asm): + # XXX mc = asm.mc + # XXX pending_guard_tokens = asm.pending_guard_tokens + # XXX if self.size == 0: + # XXX return + # XXX for guard_token in pending_guard_tokens: + # XXX descr = guard_token.faildescr + # XXX offset = self.offset_descr[descr] + # XXX assert isinstance(offset, int) + # XXX assert offset >= 0 + # XXX assert guard_token._pool_offset != -1 + # XXX ptr = rffi.cast(lltype.Signed, guard_token.gcmap) + # XXX self._overwrite_64(mc, offset + RECOVERY_GCMAP_POOL_OFFSET, ptr) + + def _overwrite_64(self, mc, index, value): index += self.pool_start mc.overwrite(index, chr(value >> 56 & 0xff)) @@ -210,17 +155,3 @@ mc.overwrite(index+5, chr(value >> 16 & 0xff)) mc.overwrite(index+6, chr(value >> 8 & 0xff)) mc.overwrite(index+7, chr(value & 0xff)) - - def post_assemble(self, asm): - mc = asm.mc - pending_guard_tokens = asm.pending_guard_tokens - if self.size == 0: - return - for guard_token in pending_guard_tokens: - descr = guard_token.faildescr - offset = self.offset_descr[descr] - assert isinstance(offset, int) - assert offset >= 0 - assert guard_token._pool_offset != -1 - ptr = rffi.cast(lltype.Signed, guard_token.gcmap) - self.overwrite_64(mc, offset + RECOVERY_GCMAP_POOL_OFFSET, ptr) diff --git a/rpython/jit/backend/zarch/regalloc.py b/rpython/jit/backend/zarch/regalloc.py --- a/rpython/jit/backend/zarch/regalloc.py +++ b/rpython/jit/backend/zarch/regalloc.py @@ -62,44 +62,24 @@ assert set(save_around_call_regs).issubset(all_regs) pool = None - def convert_to_adr(self, c): - assert isinstance(c, ConstFloat) - adr = self.assembler.datablockwrapper.malloc_aligned(8, 8) - x = c.getfloatstorage() - rffi.cast(rffi.CArrayPtr(longlong.FLOATSTORAGE), adr)[0] = x - return adr - - def convert_to_imm(self, c): - adr = self.convert_to_adr(c) - return l.ConstFloatLoc(adr) - - # POOL - #def convert_to_imm(self, c): - # off = self.pool.get_offset(c) - # return l.pool(off, float=True) - def __init__(self, longevity, frame_manager=None, assembler=None): RegisterManager.__init__(self, longevity, frame_manager, assembler) def call_result_location(self, v): return r.FPR_RETURN - # POOL - # def place_in_pool(self, var): - # offset = self.assembler.pool.get_offset(var) - # return l.pool(offset, float=True) + def convert_to_imm(self, c): + return l.pool(self.assembler.pool.get_offset(c), float=True) - # POOL - #def ensure_reg_or_pool(self, box): - # if isinstance(box, Const): - # loc = self.get_scratch_reg() - # immvalue = self.convert_to_int(box) - # self.assembler.mc.load_imm(loc, immvalue) - # else: - # assert box in self.temp_boxes - # loc = self.make_sure_var_in_reg(box, - # forbidden_vars=self.temp_boxes) - # return loc + def ensure_reg_or_pool(self, box): + if isinstance(box, Const): + offset = self.assembler.pool.get_offset(box) + return l.pool(offset, float=True) + else: + assert box in self.temp_boxes + loc = self.make_sure_var_in_reg(box, + forbidden_vars=self.temp_boxes) + return loc def get_scratch_reg(self): box = TempVar() @@ -109,21 +89,14 @@ def ensure_reg(self, box): if isinstance(box, Const): - # POOL - #poolloc = self.place_in_pool(box) - #tmp = TempVar() - #reg = self.force_allocate_reg(tmp, self.temp_boxes) - #self.temp_boxes.append(tmp) - #assert poolloc.displace >= 0 - #if poolloc.displace <= 2**12-1: - # self.assembler.mc.LD(reg, poolloc) - #else: - # self.assembler.mc.LDY(reg, poolloc) - loc = self.get_scratch_reg() - immadrvalue = self.convert_to_adr(box) - mc = self.assembler.mc - mc.load_imm(r.SCRATCH, immadrvalue) - mc.LD(loc, l.addr(0, r.SCRATCH)) + offset = self.assembler.pool.get_offset(box) + poolloc = l.pool(offset, float=True) + reg = self.get_scratch_reg() + if poolloc.displace <= 2**11-1: + self.assembler.mc.LD(reg, poolloc) + else: + self.assembler.mc.LDY(reg, poolloc) + return reg else: assert box in self.temp_boxes loc = self.make_sure_var_in_reg(box, @@ -159,32 +132,25 @@ assert isinstance(c, ConstPtr) return rffi.cast(lltype.Signed, c.value) + def ensure_reg_or_pool(self, box): + if isinstance(box, Const): + if self.assembler.pool.contains_box(box): + offset = self.assembler.pool.get_offset(box) + return l.pool(offset) + else: + return self.ensure_reg(box) + else: + assert box in self.temp_boxes + loc = self.make_sure_var_in_reg(box, + forbidden_vars=self.temp_boxes) + return loc + def convert_to_imm(self, c): - val = self.convert_to_int(c) - return l.imm(val) + if self.assembler.pool.contains_box(c): + return l.pool(self.assembler.pool.get_offset(c)) + immvalue = self.convert_to_int(c) + return l.imm(immvalue) - # POOL - #def convert_to_imm(self, c): - # off = self.pool.get_offset(c) - # return l.pool(off) - - #def ensure_reg_or_pool(self, box): - # if isinstance(box, Const): - # offset = self.assembler.pool.get_offset(box) - # return l.pool(offset) - # else: - # assert box in self.temp_boxes - # loc = self.make_sure_var_in_reg(box, - # forbidden_vars=self.temp_boxes) - # return loc - - # POOL - #offset = self.assembler.pool.get_offset(box) - #poolloc = l.pool(offset) - #tmp = TempInt() - #reg = self.force_allocate_reg(tmp, forbidden_vars=self.temp_boxes) - #self.temp_boxes.append(tmp) - #self.assembler.mc.LG(reg, poolloc) def ensure_reg(self, box): if isinstance(box, Const): loc = self.get_scratch_reg() @@ -388,10 +354,10 @@ self.rm = ZARCHRegisterManager(self.longevity, frame_manager = self.fm, assembler = self.assembler) - #self.rm.pool = self.assembler.pool + self.rm.pool = self.assembler.pool self.fprm = FPRegisterManager(self.longevity, frame_manager = self.fm, assembler = self.assembler) - #self.fprm.pool = self.assembler.pool + self.fprm.pool = self.assembler.pool return operations def prepare_loop(self, inputargs, operations, looptoken, allgcrefs): @@ -607,12 +573,11 @@ else: return self.rm.call_result_location(v) - # POOL - #def ensure_reg_or_pool(self, box): - # if box.type == FLOAT: - # return self.fprm.ensure_reg_or_pool(box) - # else: - # return self.rm.ensure_reg_or_pool(box) + def ensure_reg_or_pool(self, box): + if box.type == FLOAT: + return self.fprm.ensure_reg_or_pool(box) + else: + return self.rm.ensure_reg_or_pool(box) def ensure_reg(self, box): if box.type == FLOAT: diff --git a/rpython/jit/backend/zarch/registers.py b/rpython/jit/backend/zarch/registers.py --- a/rpython/jit/backend/zarch/registers.py +++ b/rpython/jit/backend/zarch/registers.py @@ -7,7 +7,7 @@ [r0,r1,r2,r3,r4,r5,r6,r7,r8, r9,r10,r11,r12,r13,r14,r15] = registers -MANAGED_REGS = [r2,r3,r4,r5,r6,r7,r8,r9,r10,r11,r13] # keep this list sorted (asc)! +MANAGED_REGS = [r2,r3,r4,r5,r6,r7,r8,r9,r10,r11] # keep this list sorted (asc)! MANAGED_REG_PAIRS = [(r2,r3), (r4,r5), (r6,r7), (r8,r9), (r10,r11)] VOLATILES = [r2,r3,r4,r5,r6] SP = r15 @@ -39,6 +39,7 @@ for _r in MANAGED_FP_REGS: ALL_REG_INDEXES[_r] = len(ALL_REG_INDEXES) # NOT used, but keeps JITFRAME_FIXED_SIZE even +ALL_REG_INDEXES[f15] = len(ALL_REG_INDEXES) JITFRAME_FIXED_SIZE = len(ALL_REG_INDEXES) def odd_reg(r): diff --git a/rpython/jit/backend/zarch/test/test_pool.py b/rpython/jit/backend/zarch/test/test_pool.py --- a/rpython/jit/backend/zarch/test/test_pool.py +++ b/rpython/jit/backend/zarch/test/test_pool.py @@ -12,13 +12,18 @@ from rpython.jit.backend.detect_cpu import getcpuclass from rpython.jit.tool.oparser import parse +class FakeAsm(object): + def write_i64(self, val): + pass + class TestPoolZARCH(object): def setup_class(self): self.calldescr = None def setup_method(self, name): self.pool = LiteralPool() - self.asm = None + self.asm = FakeAsm() + self.asm.mc = FakeAsm() self.cpu = getcpuclass()(None, None) self.cpu.setup_once() @@ -34,20 +39,20 @@ return False def test_constant_in_call_malloc(self): - c = ConstPtr(rffi.cast(llmemory.GCREF, 0xdeadbeef)) + c = ConstPtr(rffi.cast(llmemory.GCREF, 0xdeadbeef1234)) self.ensure_can_hold(rop.CALL_MALLOC_GC, [c], descr=self.calldescr) assert self.const_in_pool(c) - assert self.const_in_pool(ConstPtr(rffi.cast(llmemory.GCREF, 0xdeadbeef))) + assert self.const_in_pool(ConstPtr(rffi.cast(llmemory.GCREF, 0xdeadbeef1234))) @py.test.mark.parametrize('opnum', [rop.INT_ADD, rop.INT_SUB, rop.INT_MUL]) def test_constants_arith(self, opnum): for c1 in [ConstInt(1), ConstInt(2**44), InputArgInt(1)]: - for c2 in [InputArgInt(1), ConstInt(1), ConstInt(2**55)]: + for c2 in [InputArgInt(1), ConstInt(-2**33), ConstInt(2**55)]: self.ensure_can_hold(opnum, [c1,c2]) - if c1.is_constant(): + if c1.is_constant() and not -2**31 <= c1.getint() <= 2**31-1: assert self.const_in_pool(c1) - if c2.is_constant(): + if c2.is_constant() and not -2**31 <= c1.getint() <= 2**31-1: assert self.const_in_pool(c2) def test_pool_overflow(self): diff --git a/rpython/jit/backend/zarch/test/test_runner.py b/rpython/jit/backend/zarch/test/test_runner.py --- a/rpython/jit/backend/zarch/test/test_runner.py +++ b/rpython/jit/backend/zarch/test/test_runner.py @@ -24,6 +24,6 @@ cpu.setup_once() return cpu - add_loop_instructions = "lg; lgr; agr; cgfi; jge; j;$" - bridge_loop_instructions = "lg; cgfi; jnl; lghi; " \ - "iilf;( iihf;)? iilf;( iihf;)? basr; iilf;( iihf;)? br;$" + add_loop_instructions = "lg; lgr; larl; agr; cgfi; jge; j;$" + bridge_loop_instructions = "larl; lg; cgfi; jnl; lghi; " \ + "(lgfi|iilf);( iihf;)? (lgfi|iilf);( iihf;)? basr; (lgfi|iilf);( iihf;)? br;$" _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit