Author: Armin Rigo <ar...@tunes.org> Branch: guard-compatible Changeset: r94073:c4f28bd30a85 Date: 2018-03-22 15:14 +0100 http://bitbucket.org/pypy/pypy/changeset/c4f28bd30a85/
Log: hg merge vtune diff --git a/rpython/jit/backend/x86/assembler.py b/rpython/jit/backend/x86/assembler.py --- a/rpython/jit/backend/x86/assembler.py +++ b/rpython/jit/backend/x86/assembler.py @@ -177,7 +177,8 @@ self.pop_gcmap(mc) # cancel the push_gcmap(store=True) in the caller self._pop_all_regs_from_frame(mc, [], self.cpu.supports_floats) mc.RET() - self._frame_realloc_slowpath = mc.materialize(self.cpu, []) + self._frame_realloc_slowpath = self.materialize(mc, [], + "frame_realloc") def _build_cond_call_slowpath(self, supports_floats, callee_only): """ This builds a general call slowpath, for whatever call happens to @@ -215,7 +216,7 @@ self._pop_all_regs_from_frame(mc, [eax], supports_floats, callee_only) mc.RET() self.flush_pending_slowpaths(mc) - return mc.materialize(self.cpu, []) + return self.materialize(mc, [], "cond_call") def _build_malloc_slowpath(self, kind): """ While arriving on slowpath, we have a gcpattern on stack 0. @@ -305,7 +306,7 @@ mc.JMP(imm(self.propagate_exception_path)) self.flush_pending_slowpaths(mc) # - rawstart = mc.materialize(self.cpu, []) + rawstart = self.materialize(mc, [], "malloc") return rawstart def _build_propagate_exception_path(self): @@ -323,7 +324,7 @@ self.mc.MOV(RawEbpLoc(ofs), imm(propagate_exception_descr)) # self._call_footer() - rawstart = self.mc.materialize(self.cpu, []) + rawstart = self.materialize(self.mc, [], "propagate_exception") self.propagate_exception_path = rawstart self.mc = None @@ -368,7 +369,7 @@ mc.ADD_ri(esp.value, WORD) mc.JMP(imm(self.propagate_exception_path)) # - rawstart = mc.materialize(self.cpu, []) + rawstart = self.materialize(mc, [], "stack_check") self.stack_check_slowpath = rawstart def _build_wb_slowpath(self, withcards, withfloats=False, for_frame=False): @@ -469,7 +470,7 @@ mc.LEA_rs(esp.value, 7 * WORD) mc.RET() - rawstart = mc.materialize(self.cpu, []) + rawstart = self.materialize(mc, [], "write_barrier") if for_frame: self.wb_slowpath[4] = rawstart else: @@ -567,6 +568,7 @@ ops_offset=ops_offset) self.fixup_target_tokens(rawstart) + self.materialize_done(rawstart, full_size, "loop%d" % looptoken.number) self.teardown() # oprofile support if self.cpu.profile_agent is not None: @@ -644,6 +646,8 @@ self.fixup_target_tokens(rawstart) self.update_frame_depth(frame_depth) + self.materialize_done(rawstart, fullsize, + "loop%d" % original_loop_token.number) self.teardown() # oprofile support if self.cpu.profile_agent is not None: @@ -710,11 +714,14 @@ self.mc.JMP_l(0) self.mc.writeimm32(0) self.mc.force_frame_size(DEFAULT_FRAME_BYTES) + fullsize = self.mc.get_relative_pos() rawstart = self.materialize_loop(looptoken) # update the jump (above) to the real trace self._patch_jump_to(rawstart + offset, asminfo.rawstart) # update the guard to jump right to this custom piece of assembler self.patch_jump_for_descr(faildescr, rawstart) + self.materialize_done(rawstart, fullsize, + "loop%d" % looptoken.number) def _patch_jump_to(self, adr_jump_offset, adr_new_target): assert adr_jump_offset != 0 @@ -892,13 +899,30 @@ mc.writeimm32(allocated_depth) mc.copy_to_raw_memory(adr) + def get_asmmemmgr_blocks(self, looptoken): + clt = looptoken.compiled_loop_token + if clt.asmmemmgr_blocks is None: + clt.asmmemmgr_blocks = [] + return clt.asmmemmgr_blocks + + def materialize_done(self, rawstart, size, funcname): + from rpython.jit.backend.x86.vtune import rpy_vtune_register + with rffi.scoped_str2charp("rpyjit." + funcname) as p: + rpy_vtune_register(p, rawstart, size) + + def materialize(self, mc, allblocks, funcname, gcrootmap=None): + size = mc.get_relative_pos() + rawstart = mc.materialize(self.cpu, allblocks, gcrootmap=gcrootmap) + self.materialize_done(rawstart, size, funcname) + return rawstart + def materialize_loop(self, looptoken): self.datablockwrapper.done() # finish using cpu.asmmemmgr self.datablockwrapper = None allblocks = self.get_asmmemmgr_blocks(looptoken) size = self.mc.get_relative_pos() - res = self.mc.materialize(self.cpu, allblocks, - self.cpu.gc_ll_descr.gcrootmap) + res = self.materialize(self.mc, allblocks, + gcrootmap=self.cpu.gc_ll_descr.gcrootmap) if self.cpu.HAS_CODEMAP: self.cpu.codemap.register_codemap( self.codemap_builder.get_final_bytecode(res, size)) @@ -2072,7 +2096,7 @@ # now we return from the complete frame, which starts from # _call_header_with_stack_check(). The _call_footer below does it. self._call_footer() - rawstart = mc.materialize(self.cpu, []) + rawstart = self.materialize(mc, [], "failure_recovery") self.failure_recovery_code[exc + 2 * withfloats] = rawstart self.mc = None diff --git a/rpython/jit/backend/x86/guard_compat.py b/rpython/jit/backend/x86/guard_compat.py --- a/rpython/jit/backend/x86/guard_compat.py +++ b/rpython/jit/backend/x86/guard_compat.py @@ -288,7 +288,7 @@ mc.JMP_s(0) assembler.flush_pending_slowpaths(mc) - assembler.guard_compat_search_tree = mc.materialize(assembler.cpu, []) + assembler.guard_compat_search_tree = assembler.materialize(mc, []) def build_once_guard_compat_recovery(assembler): @@ -303,7 +303,7 @@ target = assembler.get_target_for_failure_recovery_of_guard_compat() mc.JMP(regloc.imm(target)) - assembler.guard_compat_recovery = mc.materialize(assembler.cpu, []) + assembler.guard_compat_recovery = assembler.materialize(mc, []) def generate_recovery_stub(assembler, guard_token): diff --git a/rpython/jit/backend/x86/vtune.py b/rpython/jit/backend/x86/vtune.py new file mode 100644 --- /dev/null +++ b/rpython/jit/backend/x86/vtune.py @@ -0,0 +1,43 @@ +""" +Support for VTune Amplifier +""" + +from rpython.rtyper.lltypesystem import lltype, rffi +from rpython.translator.tool.cbuild import ExternalCompilationInfo + + +eci = ExternalCompilationInfo( + post_include_bits=[""" +RPY_EXTERN void rpy_vtune_register(char *, long, long); +"""], + include_dirs=["/opt/intel/system_studio_2018/vtune_amplifier/include"], + libraries=["dl"], # otherwise, iJIT_IsProfilingActive() just returns 0 + separate_module_sources=[""" +#include "/opt/intel/system_studio_2018/vtune_amplifier/sdk/src/ittnotify/jitprofiling.c" + +RPY_EXTERN void rpy_vtune_register(char *funcname, Signed addr, Signed size) +{ + iJIT_Method_Load_V2 jmethod = {0}; + + if (iJIT_IsProfilingActive() != iJIT_SAMPLING_ON) { + return; + } + + jmethod.method_id = iJIT_GetNewMethodID(); + jmethod.method_name = funcname; + jmethod.method_load_address = (void *)addr; + jmethod.method_size = size; + jmethod.module_name = "rpyjit"; + + iJIT_NotifyEvent(iJVM_EVENT_TYPE_METHOD_LOAD_FINISHED_V2, + (void*)&jmethod); +} +"""]) + +rpy_vtune_register = rffi.llexternal( + "rpy_vtune_register", + [rffi.CCHARP, lltype.Signed, lltype.Signed], + lltype.Void, + compilation_info=eci, + _nowrapper=True, + sandboxsafe=True) _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit