Author: Richard Plangger <r...@pasra.at> Branch: vecopt-merge Changeset: r79159:0a4078644343 Date: 2015-08-23 17:24 +0200 http://bitbucket.org/pypy/pypy/changeset/0a4078644343/
Log: a all new stitch bridge that considers register mapping. works for accumulation values as well diff --git a/rpython/jit/backend/x86/assembler.py b/rpython/jit/backend/x86/assembler.py --- a/rpython/jit/backend/x86/assembler.py +++ b/rpython/jit/backend/x86/assembler.py @@ -590,9 +590,57 @@ rawstart, fullsize) return AsmInfo(ops_offset, startpos + rawstart, codeendpos - startpos, rawstart) - def stitch_bridge(self, faildescr, target): - assert target.rawstart != 0 - self.patch_jump_for_descr(faildescr, target.rawstart) + def stitch_bridge(self, faildescr, version): + """ Stitching means that one can enter a bridge with a complete different register + allocation. This needs remapping which is done here for both normal registers + and accumulation registers. + Why? Because this only generates a very small junk of memory, instead of + duplicating the loop assembler! + """ + asminfo, bridge_faildescr, compiled_version, looptoken = version._compiled + assert asminfo.rawstart != 0 + self.mc = codebuf.MachineCodeBlockWrapper() + allblocks = self.get_asmmemmgr_blocks(looptoken) + self.datablockwrapper = MachineDataBlockWrapper(self.cpu.asmmemmgr, + allblocks) + frame_info = self.datablockwrapper.malloc_aligned( + jitframe.JITFRAMEINFO_SIZE, alignment=WORD) + + self.mc.force_frame_size(DEFAULT_FRAME_BYTES) + # if accumulation is saved at the guard, we need to update it here! + guard_locs = self.rebuild_faillocs_from_descr(faildescr, version.inputargs) + bridge_locs = self.rebuild_faillocs_from_descr(bridge_faildescr, compiled_version.inputargs) + guard_accum_info = faildescr.rd_accum_list + # O(n^2), but usually you only have at most 1 fail argument + while guard_accum_info: + bridge_accum_info = bridge_faildescr.rd_accum_list + while bridge_accum_info: + if bridge_accum_info.scalar_position == guard_accum_info.scalar_position: + # the mapping might be wrong! + if bridge_accum_info.vector_loc is not guard_accum_info.vector_loc: + self.mov(guard_accum_info.vector_loc, bridge_accum_info.vector_loc) + bridge_accum_info = bridge_accum_info.prev + guard_accum_info = guard_accum_info.prev + + # register mapping is most likely NOT valid, thus remap it in this + # short piece of assembler + assert len(guard_locs) == len(bridge_locs) + for i,gloc in enumerate(guard_locs): + bloc = bridge_locs[i] + bstack = bloc.location_code() == 'b' + gstack = gloc.location_code() == 'b' + if bstack and gstack: + pass + elif gloc is not bloc: + self.mov(gloc, bloc) + self.mc.JMP_l(0) + self.mc.force_frame_size(DEFAULT_FRAME_BYTES) + offset = self.mc.get_relative_pos() - 4 + rawstart = self.materialize_loop(looptoken) + # update the exit target + self._patch_jump_for_descr(rawstart + offset, asminfo.rawstart) + # update the guard to jump right to this custom piece of assembler + self.patch_jump_for_descr(faildescr, rawstart) def write_pending_failure_recoveries(self, regalloc): # for each pending guard, generate the code of the recovery stub @@ -732,6 +780,10 @@ def patch_jump_for_descr(self, faildescr, adr_new_target): adr_jump_offset = faildescr.adr_jump_offset + self._patch_jump_for_descr(adr_jump_offset, adr_new_target) + faildescr.adr_jump_offset = 0 # means "patched" + + def _patch_jump_for_descr(self, adr_jump_offset, adr_new_target): assert adr_jump_offset != 0 offset = adr_new_target - (adr_jump_offset + 4) # If the new target fits within a rel32 of the jump, just patch @@ -752,7 +804,6 @@ p = rffi.cast(rffi.INTP, adr_jump_offset) adr_target = adr_jump_offset + 4 + rffi.cast(lltype.Signed, p[0]) mc.copy_to_raw_memory(adr_target) - faildescr.adr_jump_offset = 0 # means "patched" def fixup_target_tokens(self, rawstart): for targettoken in self.target_tokens_currently_compiling: diff --git a/rpython/jit/metainterp/compile.py b/rpython/jit/metainterp/compile.py --- a/rpython/jit/metainterp/compile.py +++ b/rpython/jit/metainterp/compile.py @@ -209,9 +209,8 @@ version.operations, jitcell_token) record_loop_or_bridge(metainterp_sd, vl) assert asminfo is not None - version._compiled = asminfo + version._compiled = (asminfo, faildescr, faildescr.version, jitcell_token) faildescr.version = None - # stitch the rest of the traces for lv in loop.versions: if not lv.compiled(): # the version was never compiled, do not bother @@ -221,7 +220,7 @@ assert isinstance(faildescr, CompileLoopVersionDescr) version = faildescr.version if version and version.compiled(): - cpu.stitch_bridge(faildescr, version._compiled) + cpu.stitch_bridge(faildescr, version) faildescr.version = None loop.versions = None _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit