Author: Richard Plangger <planri...@gmail.com> Branch: s390x-backend Changeset: r81928:6a1b2984c003 Date: 2016-01-25 13:31 +0100 http://bitbucket.org/pypy/pypy/changeset/6a1b2984c003/
Log: rewritten many calls to use one stack frame less diff --git a/rpython/jit/backend/zarch/arch.py b/rpython/jit/backend/zarch/arch.py --- a/rpython/jit/backend/zarch/arch.py +++ b/rpython/jit/backend/zarch/arch.py @@ -34,7 +34,8 @@ # in reverse order to SP STD_FRAME_SIZE_IN_BYTES = 160 -THREADLOCAL_ADDR_OFFSET = 16 # at position of r2, but r2 is never saved!! +THREADLOCAL_ON_ENTER_JIT = 8 +THREADLOCAL_ADDR_OFFSET = STD_FRAME_SIZE_IN_BYTES + THREADLOCAL_ON_ENTER_JIT assert STD_FRAME_SIZE_IN_BYTES % 2 == 0 diff --git a/rpython/jit/backend/zarch/assembler.py b/rpython/jit/backend/zarch/assembler.py --- a/rpython/jit/backend/zarch/assembler.py +++ b/rpython/jit/backend/zarch/assembler.py @@ -16,7 +16,8 @@ from rpython.jit.backend.zarch.arch import (WORD, STD_FRAME_SIZE_IN_BYTES, THREADLOCAL_ADDR_OFFSET, RECOVERY_GCMAP_POOL_OFFSET, RECOVERY_TARGET_POOL_OFFSET, - JUMPABS_TARGET_ADDR__POOL_OFFSET, JUMPABS_POOL_ADDR_POOL_OFFSET) + JUMPABS_TARGET_ADDR__POOL_OFFSET, JUMPABS_POOL_ADDR_POOL_OFFSET, + THREADLOCAL_ON_ENTER_JIT) from rpython.jit.backend.zarch.opassembler import OpAssembler from rpython.jit.backend.zarch.regalloc import Regalloc from rpython.jit.codewriter.effectinfo import EffectInfo @@ -382,7 +383,7 @@ """ # signature of these cond_call_slowpath functions: # * on entry, r12 contains the function to call - # * r3, r4, r5, r6 contain arguments for the call + # * r2, r3, r4, r5 contain arguments for the call # * r0 is the gcmap # * the old value of these regs must already be stored in the jitframe # * on exit, all registers are restored from the jitframe @@ -391,6 +392,8 @@ self.mc = mc ofs2 = self.cpu.get_ofs_of_frame_field('jf_gcmap') mc.STG(r.SCRATCH2, l.addr(ofs2,r.SPP)) + mc.STMG(r.r14,r.r15,l.addr(14*WORD, r.SP)) + mc.push_std_frame() # copy registers to the frame, with the exception of r3 to r6 and r12, # because these have already been saved by the caller. Note that @@ -406,21 +409,21 @@ reg is not r.r4 and reg is not r.r5 and reg is not r.r12] - self._push_core_regs_to_jitframe(mc, regs + [r.r14]) + self._push_core_regs_to_jitframe(mc, regs) if supports_floats: self._push_fp_regs_to_jitframe(mc) # allocate a stack frame! - mc.push_std_frame() mc.raw_call(r.r12) - mc.pop_std_frame() # Finish self._reload_frame_if_necessary(mc) - self._pop_core_regs_from_jitframe(mc, saved_regs + [r.r14]) + self._pop_core_regs_from_jitframe(mc, saved_regs) if supports_floats: self._pop_fp_regs_from_jitframe(mc) + size = STD_FRAME_SIZE_IN_BYTES + mc.LMG(r.r14, r.r15, l.addr(size+14*WORD, r.SP)) mc.BCR(c.ANY, r.RETURN) self.mc = None return mc.materialize(self.cpu, []) @@ -446,8 +449,11 @@ mc.STG(r.SCRATCH, l.addr(ofs2, r.SPP)) saved_regs = [reg for reg in r.MANAGED_REGS if reg is not r.RES and reg is not r.RSZ] - self._push_core_regs_to_jitframe(mc, saved_regs + [r.r14]) + self._push_core_regs_to_jitframe(mc, saved_regs) self._push_fp_regs_to_jitframe(mc) + # alloc a frame for the callee + mc.STMG(r.r14, r.r15, l.addr(14*WORD, r.SP)) + mc.push_std_frame() # if kind == 'fixed': addr = self.cpu.gc_ll_descr.get_malloc_slowpath_addr() @@ -478,10 +484,8 @@ # Do the call addr = rffi.cast(lltype.Signed, addr) - mc.push_std_frame() mc.load_imm(mc.RAW_CALL_REG, addr) mc.raw_call() - mc.pop_std_frame() self._reload_frame_if_necessary(mc) @@ -490,7 +494,7 @@ # emit_call_malloc_gc()). self.propagate_memoryerror_if_r2_is_null() - self._pop_core_regs_from_jitframe(mc, saved_regs + [r.r14]) + self._pop_core_regs_from_jitframe(mc, saved_regs) self._pop_fp_regs_from_jitframe(mc) nursery_free_adr = self.cpu.gc_ll_descr.get_nursery_free_addr() @@ -501,6 +505,8 @@ # r.RSZ is loaded from [SCRATCH], to make the caller's store a no-op here mc.load(r.RSZ, r.r1, 0) # + size = STD_FRAME_SIZE_IN_BYTES + mc.LMG(r.r14, r.r15, l.addr(size+14*WORD, r.SP)) mc.BCR(c.ANY, r.r14) self.mc = None return mc.materialize(self.cpu, []) @@ -517,7 +523,7 @@ mc = InstrBuilder() # # store the link backwards - self.mc.STMG(r.r14, r.r15, l.addr(14*WORD, r.SP)) + mc.STMG(r.r14, r.r15, l.addr(14*WORD, r.SP)) mc.push_std_frame() mc.LGR(r.r2, r.SP) @@ -532,7 +538,7 @@ mc.cmp_op(r.SCRATCH, l.imm(0), imm=True) # size = STD_FRAME_SIZE_IN_BYTES - self.mc.LMG(r.r14, r.r15, l.addr(size+14*WORD, r.SP)) # restore the link + mc.LMG(r.r14, r.r15, l.addr(size+14*WORD, r.SP)) # restore the link # So we return to our caller, conditionally if "EQ" mc.BCR(c.EQ, r.r14) mc.trap() # debug if this is EVER executed! @@ -590,11 +596,11 @@ # LGHI r0, ... (4 bytes) # sum -> (14 bytes) mc.write('\x00'*14) - self.mc.push_std_frame() + mc.push_std_frame() mc.load_imm(r.RETURN, self._frame_realloc_slowpath) self.load_gcmap(mc, r.r1, gcmap) mc.raw_call() - self.mc.pop_std_frame() + mc.pop_std_frame() self.frame_depth_to_patch.append((patch_pos, mc.currpos())) @@ -1006,8 +1012,8 @@ # save the back chain self.mc.STG(r.SP, l.addr(0, r.SP)) - # save r3, the second argument, to THREADLOCAL_ADDR_OFFSET - self.mc.STG(r.r3, l.addr(THREADLOCAL_ADDR_OFFSET, r.SP)) + # save r3, the second argument, to the thread local position + self.mc.STG(r.r3, l.addr(THREADLOCAL_ON_ENTER_JIT, r.SP)) # push a standard frame for any call self.mc.push_std_frame() @@ -1418,9 +1424,7 @@ raise AssertionError(kind) # # call! - mc.push_std_frame() mc.branch_absolute(addr) - mc.pop_std_frame() jmp_location = mc.currpos() mc.reserve_cond_jump(short=True) # jump forward, patched later diff --git a/rpython/jit/backend/zarch/callbuilder.py b/rpython/jit/backend/zarch/callbuilder.py --- a/rpython/jit/backend/zarch/callbuilder.py +++ b/rpython/jit/backend/zarch/callbuilder.py @@ -62,6 +62,7 @@ # called function will in turn call further functions (which must be passed the # address of the new frame). This stack grows downwards from high addresses # """ + self.subtracted_to_sp = 0 gpr_regs = 0 fpr_regs = 0 @@ -83,18 +84,18 @@ stack_params.append(i) self.subtracted_to_sp += len(stack_params) * WORD - base = -len(stack_params) * WORD + base = len(stack_params) * WORD if self.is_call_release_gil: self.subtracted_to_sp += 8*WORD - base -= 8*WORD - # one additional owrd for remap frame layout + base += 8*WORD + # one additional word for remap frame layout # regalloc_push will overwrite -8(r.SP) and destroy # a parameter if we would not reserve that space - base -= WORD - self.subtracted_to_sp += WORD + # base += WORD + # TODO self.subtracted_to_sp += WORD for idx,i in enumerate(stack_params): loc = arglocs[i] - offset = base + 8 * idx + offset = STD_FRAME_SIZE_IN_BYTES - base + 8 * idx if loc.type == FLOAT: if loc.is_fp_reg(): src = loc @@ -148,15 +149,23 @@ def emit_raw_call(self): # always allocate a stack frame for the new function # save the SP back chain - self.mc.STG(r.SP, l.addr(-self.subtracted_to_sp, r.SP)) + #self.mc.STG(r.SP, l.addr(-self.subtracted_to_sp, r.SP)) # move the frame pointer if self.subtracted_to_sp != 0: self.mc.LAY(r.SP, l.addr(-self.subtracted_to_sp, r.SP)) self.mc.raw_call() + + + def restore_stack_pointer(self): + # it must at LEAST be 160 bytes + if self.subtracted_to_sp != 0: + self.mc.LAY(r.SP, l.addr(self.subtracted_to_sp, r.SP)) + + def load_result(self): + assert (self.resloc is None or + self.resloc is r.GPR_RETURN or + self.resloc is r.FPR_RETURN) # - self.ensure_correct_signzero_extension() - - def ensure_correct_signzero_extension(self): if self.restype == 'i' and self.ressize != WORD: # we must be sure! libffi (s390x impl) will not return # a sane 64 bit zero/sign extended value. fix for this @@ -177,25 +186,14 @@ else: assert 0, "cannot zero extend size %d" % self.ressize - - def restore_stack_pointer(self): - # it must at LEAST be 160 bytes - if self.subtracted_to_sp != 0: - self.mc.LAY(r.SP, l.addr(self.subtracted_to_sp, r.SP)) - - def load_result(self): - assert (self.resloc is None or - self.resloc is r.GPR_RETURN or - self.resloc is r.FPR_RETURN) - - def call_releasegil_addr_and_move_real_arguments(self, fastgil): assert self.is_call_release_gil RSHADOWOLD = self.RSHADOWOLD RSHADOWPTR = self.RSHADOWPTR RFASTGILPTR = self.RFASTGILPTR # - self.mc.STMG(r.r8, r.r13, l.addr(-7*WORD, r.SP)) + pos = STD_FRAME_SIZE_IN_BYTES - 7*WORD + self.mc.STMG(r.r8, r.r13, l.addr(pos, r.SP)) # 6 registers, 1 for a floating point return value! # registered by prepare_arguments! # @@ -268,26 +266,27 @@ PARAM_SAVE_AREA_OFFSET = 0 if reg is not None: # save 1 word below the stack pointer + pos = STD_FRAME_SIZE_IN_BYTES if reg.is_core_reg(): - self.mc.STG(reg, l.addr(-1*WORD, r.SP)) + self.mc.STG(reg, l.addr(pos-1*WORD, r.SP)) elif reg.is_fp_reg(): - self.mc.STD(reg, l.addr(-1*WORD, r.SP)) - self.mc.push_std_frame(8*WORD) + self.mc.STD(reg, l.addr(pos-1*WORD, r.SP)) self.mc.load_imm(self.mc.RAW_CALL_REG, self.asm.reacqgil_addr) self.mc.raw_call() - self.mc.pop_std_frame(8*WORD) if reg is not None: + pos = STD_FRAME_SIZE_IN_BYTES if reg.is_core_reg(): - self.mc.LG(reg, l.addr(-1*WORD, r.SP)) + self.mc.LG(reg, l.addr(pos-1*WORD, r.SP)) elif reg.is_fp_reg(): - self.mc.LD(reg, l.addr(-1*WORD, r.SP)) + self.mc.LD(reg, l.addr(pos-1*WORD, r.SP)) # replace b1_location with BEQ(here) pmc = OverwritingBuilder(self.mc, b1_location, 1) pmc.BRCL(c.EQ, l.imm(self.mc.currpos() - b1_location)) pmc.overwrite() - self.mc.LMG(r.r8, r.r13, l.addr(-7*WORD, r.SP)) + pos = STD_FRAME_SIZE_IN_BYTES - 7*WORD + self.mc.LMG(r.r8, r.r13, l.addr(pos, r.SP)) def write_real_errno(self, save_err): if save_err & rffi.RFFI_READSAVED_ERRNO: diff --git a/rpython/jit/backend/zarch/codebuilder.py b/rpython/jit/backend/zarch/codebuilder.py --- a/rpython/jit/backend/zarch/codebuilder.py +++ b/rpython/jit/backend/zarch/codebuilder.py @@ -198,7 +198,7 @@ function pointer, which means on big-endian that it is actually the address of a three-words descriptor. """ - self.BASR(r.RETURN, call_reg) + self.BASR(r.r14, call_reg) def reserve_cond_jump(self, short=False): self.trap() # conditional jump, patched later diff --git a/rpython/jit/backend/zarch/opassembler.py b/rpython/jit/backend/zarch/opassembler.py --- a/rpython/jit/backend/zarch/opassembler.py +++ b/rpython/jit/backend/zarch/opassembler.py @@ -530,11 +530,7 @@ mc.LGR(r.r0, loc_base) # unusual argument location mc.load_imm(r.r14, self.wb_slowpath[helper_num]) - # alloc a stack frame - mc.push_std_frame() mc.BASR(r.r14, r.r14) - # destory the frame - mc.pop_std_frame() if card_marking_mask: # The helper ends again with a check of the flag in the object. _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit