Author: Richard Plangger <planri...@gmail.com> Branch: vmprof-native Changeset: r90697:ac30c079910e Date: 2017-03-14 15:00 +0100 http://bitbucket.org/pypy/pypy/changeset/ac30c079910e/
Log: copy over changes made to vmprof-python diff --git a/rpython/rlib/rvmprof/src/vmprof_main.h b/rpython/rlib/rvmprof/src/vmprof_main.h --- a/rpython/rlib/rvmprof/src/vmprof_main.h +++ b/rpython/rlib/rvmprof/src/vmprof_main.h @@ -1,3 +1,5 @@ +#pragma once + /* VMPROF * * statistical sampling profiler specifically designed to profile programs @@ -10,45 +12,49 @@ * * Tested only on gcc, linux, x86_64. * - * Copyright (C) 2014-2015 + * Copyright (C) 2014-2017 * Antonio Cuni - anto.c...@gmail.com * Maciej Fijalkowski - fij...@gmail.com * Armin Rigo - ar...@tunes.org + * Richard Plangger - planri...@gmail.com * */ #define _GNU_SOURCE 1 #include <dlfcn.h> +#include <pthread.h> +#include <unistd.h> #include <assert.h> -#include <pthread.h> +#include <errno.h> +#include <stdio.h> +#include <fcntl.h> +#include <time.h> +#include <sys/types.h> +#include <sys/stat.h> #include <sys/time.h> -#include <errno.h> -#include <unistd.h> -#include <stddef.h> -#include <stdio.h> -#include <sys/types.h> -#include <signal.h> -#include <sys/stat.h> -#include <unistd.h> -#include <fcntl.h> -#include "vmprof_stack.h" + +#include "vmprof.h" + +#include "vmp_stack.h" #include "vmprof_getpc.h" #include "vmprof_mt.h" -#include "vmprof_get_custom_offset.h" #include "vmprof_common.h" +#include "compat.h" + +#if defined(__unix__) +#include "rss_unix.h" +#elif defined(__APPLE__) +#include "rss_darwin.h" +#endif + /************************************************************/ -static long prepare_interval_usec; -static long saved_profile_file; -static struct profbuf_s *volatile current_codes; static void *(*mainloop_get_virtual_ip)(char *) = 0; - -static int opened_profile(char *interp_name); +static int opened_profile(const char *interp_name, int memory, int proflines, int native); static void flush_codes(void); - /************************************************************/ /* value: last bit is 1 if signals must be ignored; all other bits @@ -79,24 +85,26 @@ static char atfork_hook_installed = 0; -static intptr_t get_current_thread_id(void) +/* ************************************************************* + * functions to dump the stack trace + * ************************************************************* + */ + +int get_stack_trace(PY_THREAD_STATE_T * current, void** result, int max_depth, intptr_t pc) { - /* xxx This function is a hack on two fronts: - - - It assumes that pthread_self() is async-signal-safe. This - should be true on Linux. I hope it is also true elsewhere. - - - It abuses pthread_self() by assuming it just returns an - integer. According to comments in CPython's source code, the - platforms where it is not the case are rare nowadays. - - An alternative would be to try to look if the information is - available in the ucontext_t in the caller. - */ - return (intptr_t)pthread_self(); + PY_STACK_FRAME_T * frame; +#ifdef RPYTHON_VMPROF + // do nothing here, + frame = (PY_STACK_FRAME_T*)current; +#else + if (!current) { + return 0; + } + frame = current->frame; +#endif + return vmp_walk_and_record_stack(frame, result, max_depth, 1, pc); } - /* ************************************************************* * the signal handler * ************************************************************* @@ -112,9 +120,67 @@ longjmp(restore_point, SIGSEGV); } +int _vmprof_sample_stack(struct profbuf_s *p, PY_THREAD_STATE_T * tstate, ucontext_t * uc) +{ + int depth; + struct prof_stacktrace_s *st = (struct prof_stacktrace_s *)p->data; + st->marker = MARKER_STACKTRACE; + st->count = 1; +#ifdef RPYTHON_VMPROF + depth = get_stack_trace(get_vmprof_stack(), st->stack, MAX_STACK_DEPTH-1, (intptr_t)GetPC(uc)); +#else + depth = get_stack_trace(tstate, st->stack, MAX_STACK_DEPTH-1, (intptr_t)NULL); +#endif + if (depth == 0) { + return 0; + } + st->depth = depth; + st->stack[depth++] = tstate; + long rss = get_current_proc_rss(); + if (rss >= 0) + st->stack[depth++] = (void*)rss; + p->data_offset = offsetof(struct prof_stacktrace_s, marker); + p->data_size = (depth * sizeof(void *) + + sizeof(struct prof_stacktrace_s) - + offsetof(struct prof_stacktrace_s, marker)); + return 1; +} + +#ifndef RPYTHON_VMPROF +static PY_THREAD_STATE_T * _get_pystate_for_this_thread(void) { + // see issue 116 on github.com/vmprof/vmprof-python. + // PyGILState_GetThisThreadState(); can hang forever + // + PyInterpreterState * istate; + PyThreadState * state; + long mythread_id; + + istate = PyInterpreterState_Head(); + if (istate == NULL) { + return NULL; + } + mythread_id = PyThread_get_thread_ident(); + // fish fish fish, it will NOT lock the keymutex in pythread + do { + state = PyInterpreterState_ThreadHead(istate); + do { + if (state->thread_id == mythread_id) { + return state; + } + } while ((state = PyThreadState_Next(state)) != NULL); + } while ((istate = PyInterpreterState_Next(istate)) != NULL); + + // uh? not found? + return NULL; +} +#endif + static void sigprof_handler(int sig_nr, siginfo_t* info, void *ucontext) { -#ifdef __APPLE__ + int commit; + PY_THREAD_STATE_T * tstate = NULL; + void (*prevhandler)(int); +#ifndef RPYTHON_VMPROF // TERRIBLE HACK AHEAD // on OS X, the thread local storage is sometimes uninitialized // when the signal handler runs - it means it's impossible to read errno @@ -122,48 +188,46 @@ // it seems impossible to read the register gs. // here we register segfault handler (all guarded by a spinlock) and call // longjmp in case segfault happens while reading a thread local + // + // We do the same error detection for linux to ensure that + // get_current_thread_state returns a sane result while (__sync_lock_test_and_set(&spinlock, 1)) { } - signal(SIGSEGV, &segfault_handler); + prevhandler = signal(SIGSEGV, &segfault_handler); int fault_code = setjmp(restore_point); if (fault_code == 0) { pthread_self(); - get_current_thread_id(); + tstate = _get_pystate_for_this_thread(); } else { - signal(SIGSEGV, SIG_DFL); - __sync_synchronize(); - spinlock = 0; - return; + signal(SIGSEGV, prevhandler); + __sync_lock_release(&spinlock); + return; } - signal(SIGSEGV, SIG_DFL); - __sync_synchronize(); - spinlock = 0; + signal(SIGSEGV, prevhandler); + __sync_lock_release(&spinlock); #endif + long val = __sync_fetch_and_add(&signal_handler_value, 2L); if ((val & 1) == 0) { int saved_errno = errno; - int fd = profile_file; + int fd = vmp_profile_fileno(); assert(fd >= 0); struct profbuf_s *p = reserve_buffer(fd); if (p == NULL) { /* ignore this signal: there are no free buffers right now */ - } - else { - int depth; - struct prof_stacktrace_s *st = (struct prof_stacktrace_s *)p->data; - st->marker = MARKER_STACKTRACE; - st->count = 1; - depth = get_stack_trace(get_vmprof_stack(), st->stack, - MAX_STACK_DEPTH-2, GetPC((ucontext_t*)ucontext)); - st->depth = depth; - st->stack[depth++] = get_current_thread_id(); - p->data_offset = offsetof(struct prof_stacktrace_s, marker); - p->data_size = (depth * sizeof(void *) + - sizeof(struct prof_stacktrace_s) - - offsetof(struct prof_stacktrace_s, marker)); - commit_buffer(fd, p); + } else { +#ifdef RPYTHON_VMPORF + commit = _vmprof_sample_stack(p, NULL, (ucontext_t*)ucontext); +#else + commit = _vmprof_sample_stack(p, tstate, (ucontext_t*)ucontext); +#endif + if (commit) { + commit_buffer(fd, p); + } else { + cancel_buffer(p); + } } errno = saved_errno; @@ -173,6 +237,7 @@ } + /* ************************************************************* * the setup and teardown functions * ************************************************************* @@ -197,58 +262,53 @@ return 0; } -static int itimer_which = ITIMER_PROF; - static int install_sigprof_timer(void) { - struct itimerval timer; + static struct itimerval timer; timer.it_interval.tv_sec = 0; timer.it_interval.tv_usec = profile_interval_usec; timer.it_value = timer.it_interval; - if (setitimer(itimer_which, &timer, NULL) == 0) - return 0; /* normal path */ - - if (errno == EINVAL) { - /* on WSL, only ITIMER_REAL is supported */ - if (setitimer(ITIMER_REAL, &timer, NULL) == 0) { - fprintf(stderr, "warning: setitimer(): ITIMER_PROF not " - "available, using ITIMER_REAL instead. " - "Multithreaded programs and programs " - "doing a lot of I/O won't give correct " - "results.\n"); - itimer_which = ITIMER_REAL; - return 0; - } - } - return -1; + if (setitimer(ITIMER_PROF, &timer, NULL) != 0) + return -1; + return 0; } static int remove_sigprof_timer(void) { - struct itimerval timer; + static struct itimerval timer; timer.it_interval.tv_sec = 0; timer.it_interval.tv_usec = 0; timer.it_value.tv_sec = 0; timer.it_value.tv_usec = 0; - if (setitimer(itimer_which, &timer, NULL) != 0) + if (setitimer(ITIMER_PROF, &timer, NULL) != 0) return -1; return 0; } static void atfork_disable_timer(void) { if (profile_interval_usec > 0) { - saved_profile_file = profile_file; - profile_file = -1; remove_sigprof_timer(); +#ifndef RPYTHON_VMPROF + is_enabled = 0; +#endif } } static void atfork_enable_timer(void) { if (profile_interval_usec > 0) { - profile_file = saved_profile_file; install_sigprof_timer(); +#ifndef RPYTHON_VMPROF + is_enabled = 1; +#endif } } +static void atfork_close_profile_file(void) { + int fd = vmp_profile_fileno(); + if (fd != -1) + close(fd); + vmp_set_profile_fileno(-1); +} + static int install_pthread_atfork_hooks(void) { /* this is needed to prevent the problems described there: - http://code.google.com/p/gperftools/issues/detail?id=278 @@ -262,20 +322,69 @@ */ if (atfork_hook_installed) return 0; - int ret = pthread_atfork(atfork_disable_timer, atfork_enable_timer, NULL); + int ret = pthread_atfork(atfork_disable_timer, atfork_enable_timer, atfork_close_profile_file); if (ret != 0) return -1; atfork_hook_installed = 1; return 0; } +#ifdef VMP_SUPPORTS_NATIVE_PROFILING +void init_cpyprof(int native) +{ + // skip this if native should not be enabled + if (!native) { + vmp_native_disable(); + return; + } +#if CPYTHON_HAS_FRAME_EVALUATION + PyThreadState *tstate = PyThreadState_GET(); + tstate->interp->eval_frame = vmprof_eval; + _default_eval_loop = _PyEval_EvalFrameDefault; +#elif defined(RPYTHON_VMPROF) + // do nothing here, the stack is maintained by rpython + // no need for a trampoline +#else + if (vmp_patch_callee_trampoline(PyEval_EvalFrameEx, + vmprof_eval, (void*)&_default_eval_loop) == 0) { + } else { + fprintf(stderr, "FATAL: could not insert trampline, try with --no-native\n"); + // TODO dump the first few bytes and tell them to create an issue! + exit(-1); + } +#endif + vmp_native_enable(); +} + +static void disable_cpyprof(void) +{ + vmp_native_disable(); +#if CPYTHON_HAS_FRAME_EVALUATION + PyThreadState *tstate = PyThreadState_GET(); + tstate->interp->eval_frame = _PyEval_EvalFrameDefault; +#elif defined(RPYTHON_VMPROF) + // TODO nothing? +#else + if (vmp_unpatch_callee_trampoline(PyEval_EvalFrameEx) > 0) { + fprintf(stderr, "FATAL: could not remove trampoline\n"); + exit(-1); + } +#endif + dump_native_symbols(vmp_profile_fileno()); +} +#endif + RPY_EXTERN -int vmprof_enable(void) +int vmprof_enable(int memory, int native) { - assert(profile_file >= 0); +#ifdef VMP_SUPPORTS_NATIVE_PROFILING + init_cpyprof(native); +#endif + assert(vmp_profile_fileno() >= 0); assert(prepare_interval_usec > 0); profile_interval_usec = prepare_interval_usec; - + if (memory && setup_rss() == -1) + goto error; if (install_pthread_atfork_hooks() == -1) goto error; if (install_sigprof_handler() == -1) @@ -286,32 +395,19 @@ return 0; error: - profile_file = -1; + vmp_set_profile_fileno(-1); profile_interval_usec = 0; return -1; } -static int _write_all(const char *buf, size_t bufsize) + +int close_profile(void) { - while (bufsize > 0) { - ssize_t count = write(profile_file, buf, bufsize); - if (count <= 0) - return -1; /* failed */ - buf += count; - bufsize -= count; - } - return 0; -} + (void)vmp_write_time_now(MARKER_TRAILER); -static int close_profile(void) -{ - char marker = MARKER_TRAILER; - - if (_write_all(&marker, 1) < 0) - return -1; - + teardown_rss(); /* don't close() the file descriptor from here */ - profile_file = -1; + vmp_set_profile_fileno(-1); return 0; } @@ -320,29 +416,29 @@ { vmprof_ignore_signals(1); profile_interval_usec = 0; +#ifdef VMP_SUPPORTS_NATIVE_PROFILING + disable_cpyprof(); +#endif if (remove_sigprof_timer() == -1) return -1; if (remove_sigprof_handler() == -1) return -1; flush_codes(); - if (shutdown_concurrent_bufs(profile_file) < 0) + if (shutdown_concurrent_bufs(vmp_profile_fileno()) < 0) return -1; return close_profile(); } RPY_EXTERN -int vmprof_register_virtual_function(char *code_name, long code_uid, +int vmprof_register_virtual_function(char *code_name, intptr_t code_uid, int auto_retry) { long namelen = strnlen(code_name, 1023); - long blocklen = 1 + 2 * sizeof(long) + namelen; + long blocklen = 1 + sizeof(intptr_t) + sizeof(long) + namelen; struct profbuf_s *p; char *t; - if (profile_file == -1) - return 0; // silently don't write it - retry: p = current_codes; if (p != NULL) { @@ -352,7 +448,7 @@ size_t freesize = SINGLE_BUF_SIZE - p->data_size; if (freesize < (size_t)blocklen) { /* full: flush it */ - commit_buffer(profile_file, p); + commit_buffer(vmp_profile_fileno(), p); p = NULL; } } @@ -363,7 +459,7 @@ } if (p == NULL) { - p = reserve_buffer(profile_file); + p = reserve_buffer(vmp_profile_fileno()); if (p == NULL) { /* can't get a free block; should almost never be the case. Spin loop if allowed, or return a failure code @@ -381,14 +477,14 @@ p->data_size += blocklen; assert(p->data_size <= SINGLE_BUF_SIZE); *t++ = MARKER_VIRTUAL_IP; - memcpy(t, &code_uid, sizeof(long)); t += sizeof(long); + memcpy(t, &code_uid, sizeof(intptr_t)); t += sizeof(intptr_t); memcpy(t, &namelen, sizeof(long)); t += sizeof(long); memcpy(t, code_name, namelen); /* try to reattach 'p' to 'current_codes' */ if (!__sync_bool_compare_and_swap(¤t_codes, NULL, p)) { /* failed, flush it */ - commit_buffer(profile_file, p); + commit_buffer(vmp_profile_fileno(), p); } return 0; } @@ -398,6 +494,6 @@ struct profbuf_s *p = current_codes; if (p != NULL) { current_codes = NULL; - commit_buffer(profile_file, p); + commit_buffer(vmp_profile_fileno(), p); } } _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit