Author: Richard Plangger <planri...@gmail.com> Branch: vmprof-native Changeset: r90020:dffb13845738 Date: 2017-02-09 12:22 +0100 http://bitbucket.org/pypy/pypy/changeset/dffb13845738/
Log: apply changes made in pypy source repo, and never ported back to vmprof source diff --git a/pypy/module/_vmprof/test/test__vmprof.py b/pypy/module/_vmprof/test/test__vmprof.py --- a/pypy/module/_vmprof/test/test__vmprof.py +++ b/pypy/module/_vmprof/test/test__vmprof.py @@ -24,10 +24,11 @@ i += 5 * WORD # header assert s[i ] == '\x05' # MARKER_HEADER assert s[i + 1] == '\x00' # 0 - assert s[i + 2] == '\x02' # VERSION_THREAD_ID - assert s[i + 3] == chr(4) # len('pypy') - assert s[i + 4: i + 8] == 'pypy' - i += 8 + assert s[i + 2] == '\x06' # VERSION_TIMESTAMP + assert s[i + 3] == '\x08' # PROFILE_RPYTHON + assert s[i + 4] == chr(4) # len('pypy') + assert s[i + 5: i + 9] == 'pypy' + i += 9 while i < len(s): if s[i] == '\x03': break @@ -41,6 +42,8 @@ _, size = struct.unpack("ll", s[i:i + 2 * WORD]) count += 1 i += 2 * WORD + size + elif s[i] == '\x06': + i += 8+8+8 else: raise AssertionError(ord(s[i])) return count diff --git a/pypy/module/_vmprof/test/test_direct.py b/pypy/module/_vmprof/test/test_direct.py --- a/pypy/module/_vmprof/test/test_direct.py +++ b/pypy/module/_vmprof/test/test_direct.py @@ -43,7 +43,7 @@ } -""" + open(str(srcdir.join("vmprof_get_custom_offset.h"))).read(), include_dirs=[str(srcdir)]) +""" + open(str(srcdir.join("shared/vmprof_get_custom_offset.h"))).read(), include_dirs=[str(srcdir)]) class TestDirect(object): def test_infrastructure(self): diff --git a/rpython/rlib/rvmprof/rvmprof.py b/rpython/rlib/rvmprof/rvmprof.py --- a/rpython/rlib/rvmprof/rvmprof.py +++ b/rpython/rlib/rvmprof/rvmprof.py @@ -185,15 +185,16 @@ This function can only be called once during translation. It generates a C function called __vmprof_eval_vmprof which is used by - the C source code as an extern function. This is necessary while walking - the native stack. If you see __vmprof_eval_vmprof defined twice during + the vmprof C source code and is bound as an extern function. + This is necessary while walking the native stack. + If you see __vmprof_eval_vmprof defined twice during translation, read on: To remove this restriction do the following: *) Extend the macro IS_VMPROF_EVAL in the vmprof source repo to check several sybmols. - *) Give each function provided to this decorator a unique name in C + *) Give each function provided to this decorator a unique symbol name in C """ if _hack_update_stack_untranslated: from rpython.rtyper.annlowlevel import llhelper diff --git a/rpython/rlib/rvmprof/src/shared/vmprof_common.h b/rpython/rlib/rvmprof/src/shared/vmprof_common.h --- a/rpython/rlib/rvmprof/src/shared/vmprof_common.h +++ b/rpython/rlib/rvmprof/src/shared/vmprof_common.h @@ -60,8 +60,9 @@ char *vmprof_init(int fd, double interval, int memory, int lines, const char *interp_name, int native) { - if (interval < 1e-6 || interval >= 1.0) + if (!(interval >= 1e-6 && interval < 1.0)) { /* also if it is NaN */ return "bad value for 'interval'"; + } prepare_interval_usec = (int)(interval * 1000000.0); if (prepare_concurrent_bufs() < 0) diff --git a/rpython/rlib/rvmprof/src/shared/vmprof_get_custom_offset.h b/rpython/rlib/rvmprof/src/shared/vmprof_get_custom_offset.h --- a/rpython/rlib/rvmprof/src/shared/vmprof_get_custom_offset.h +++ b/rpython/rlib/rvmprof/src/shared/vmprof_get_custom_offset.h @@ -1,120 +1,53 @@ #pragma once -#ifdef PYPY_JIT_CODEMAP void *pypy_find_codemap_at_addr(long addr, long *start_addr); long pypy_yield_codemap_at_addr(void *codemap_raw, long addr, long *current_pos_addr); -long pypy_jit_stack_depth_at_loc(long loc); -#endif +#define MAX_INLINE_DEPTH 384 -#ifdef CPYTHON_GET_CUSTOM_OFFSET -static void *tramp_start, *tramp_end; -#endif - - -static ptrdiff_t vmprof_unw_get_custom_offset(void* ip, void *cp) { - -#if defined(PYPY_JIT_CODEMAP) - - intptr_t ip_l = (intptr_t)ip; - return pypy_jit_stack_depth_at_loc(ip_l); - -#elif defined(CPYTHON_GET_CUSTOM_OFFSET) - - if (ip >= tramp_start && ip <= tramp_end) { - // XXX the return value is wrong for all the places before push and - // after pop, fix - void *bp; - void *sp; - - /* This is a stage2 trampoline created by hotpatch: - - push %rbx - push %rbp - mov %rsp,%rbp - and $0xfffffffffffffff0,%rsp // make sure the stack is aligned - movabs $0x7ffff687bb10,%rbx - callq *%rbx - leaveq - pop %rbx - retq - - the stack layout is like this: - - +-----------+ high addresses - | ret addr | - +-----------+ - | saved rbx | start of the function frame - +-----------+ - | saved rbp | - +-----------+ - | ........ | <-- rbp - +-----------+ low addresses - - So, the trampoline frame starts at rbp+16, and the return address, - is at rbp+24. The vmprof API requires us to return the offset of - the frame relative to sp, hence we have this weird computation. - - XXX (antocuni): I think we could change the API to return directly - the frame address instead of the offset; however, this require a - change in the PyPy code too - */ - - unw_get_reg (cp, UNW_REG_SP, (unw_word_t*)&sp); - unw_get_reg (cp, UNW_X86_64_RBP, (unw_word_t*)&bp); - return bp+16+8-sp; - } - return -1; - -#else - - return -1; - -#endif -} - -static long vmprof_write_header_for_jit_addr(void **result, long n, - void *ip, int max_depth) +static long vmprof_write_header_for_jit_addr(intptr_t *result, long n, + intptr_t addr, int max_depth) { #ifdef PYPY_JIT_CODEMAP void *codemap; long current_pos = 0; - intptr_t id; + intptr_t ident, local_stack[MAX_INLINE_DEPTH]; + long m; long start_addr = 0; - intptr_t addr = (intptr_t)ip; - int start, k; - void *tmp; codemap = pypy_find_codemap_at_addr(addr, &start_addr); - if (codemap == NULL) - // not a jit code at all + if (codemap == NULL || n >= max_depth - 2) + // not a jit code at all or almost max depth return n; // modify the last entry to point to start address and not the random one // in the middle - result[n - 1] = (void*)start_addr; - result[n] = (void*)2; - n++; - start = n; - while (n < max_depth) { - id = pypy_yield_codemap_at_addr(codemap, addr, ¤t_pos); - if (id == -1) + result[n++] = VMPROF_ASSEMBLER_TAG; + result[n++] = start_addr; + + // build the list of code idents corresponding to the current + // position inside this particular piece of assembler. If (very + // unlikely) we get more than MAX_INLINE_DEPTH recursion levels + // all inlined inside this single piece of assembler, then stop: + // there will be some missing frames then. Otherwise, we need to + // first collect 'local_stack' and then write it to 'result' in the + // opposite order, stopping at 'max_depth'. Previous versions of + // the code would write the oldest calls and then stop---whereas + // what we really need it to write the newest calls and then stop. + m = 0; + while (m < MAX_INLINE_DEPTH) { + ident = pypy_yield_codemap_at_addr(codemap, addr, ¤t_pos); + if (ident == -1) // finish break; - if (id == 0) + if (ident == 0) continue; // not main codemap - result[n++] = (void *)id; + local_stack[m++] = ident; } - k = 0; - while (k < (n - start) / 2) { - tmp = result[start + k]; - result[start + k] = result[n - k - 1]; - result[n - k - 1] = tmp; - k++; - } - if (n < max_depth) { - result[n++] = (void*)3; + while (m > 0 && n < max_depth) { + result[n++] = VMPROF_JITTED_TAG; + result[n++] = local_stack[--m]; } #endif return n; _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit