Author: Richard Plangger <planri...@gmail.com> Branch: Changeset: r91967:e19ef006ba32 Date: 2017-07-23 16:46 -0400 http://bitbucket.org/pypy/pypy/changeset/e19ef006ba32/
Log: reapply fix diff too long, truncating to 2000 out of 2191 lines diff --git a/pypy/module/_vmprof/test/test__vmprof.py b/pypy/module/_vmprof/test/test__vmprof.py --- a/pypy/module/_vmprof/test/test__vmprof.py +++ b/pypy/module/_vmprof/test/test__vmprof.py @@ -115,3 +115,31 @@ assert fd1.read() == tmpfile.read() _vmprof.disable() assert _vmprof.get_profile_path() is None + + def test_stop_sampling(self): + import os + import _vmprof + tmpfile = open(self.tmpfilename, 'wb') + native = 1 + def f(): + import sys + import math + j = sys.maxsize + for i in range(500): + j = math.sqrt(j) + _vmprof.enable(tmpfile.fileno(), 0.01, 0, native, 0, 0) + # get_vmprof_stack() always returns 0 here! + # see vmprof_common.c and assume RPYTHON_LL2CTYPES is defined! + f() + fileno = _vmprof.stop_sampling() + pos = os.lseek(fileno, 0, os.SEEK_CUR) + f() + pos2 = os.lseek(fileno, 0, os.SEEK_CUR) + assert pos == pos2 + _vmprof.start_sampling() + f() + fileno = _vmprof.stop_sampling() + pos3 = os.lseek(fileno, 0, os.SEEK_CUR) + assert pos3 > pos + _vmprof.disable() + diff --git a/rpython/rlib/rvmprof/cintf.py b/rpython/rlib/rvmprof/cintf.py --- a/rpython/rlib/rvmprof/cintf.py +++ b/rpython/rlib/rvmprof/cintf.py @@ -20,7 +20,8 @@ compile_extra = ['-DRPYTHON_VMPROF', '-O3'] separate_module_files = [ - SHARED.join('symboltable.c') + SHARED.join('symboltable.c'), + SHARED.join('vmprof_unix.c') ] if sys.platform.startswith('linux'): separate_module_files += [ @@ -40,7 +41,7 @@ compile_extra += ['-DVMPROF_LINUX'] elif sys.platform == 'win32': compile_extra = ['-DRPYTHON_VMPROF', '-DVMPROF_WINDOWS'] - separate_module_files = [SHARED.join('vmprof_main_win32.c')] + separate_module_files = [SHARED.join('vmprof_win.c')] _libs = [] else: # Guessing a BSD-like Unix platform @@ -58,7 +59,9 @@ SHARED.join('compat.c'), SHARED.join('machine.c'), SHARED.join('vmp_stack.c'), - SHARED.join('vmprof_main.c'), + SHARED.join('vmprof_mt.c'), + SHARED.join('vmprof_memory.c'), + SHARED.join('vmprof_common.c'), # symbol table already in separate_module_files ] + separate_module_files, post_include_bits=[], diff --git a/rpython/rlib/rvmprof/src/rvmprof.c b/rpython/rlib/rvmprof/src/rvmprof.c --- a/rpython/rlib/rvmprof/src/rvmprof.c +++ b/rpython/rlib/rvmprof/src/rvmprof.c @@ -15,9 +15,9 @@ #include "shared/vmprof_get_custom_offset.h" #ifdef VMPROF_UNIX -#include "shared/vmprof_main.h" +#include "shared/vmprof_unix.h" #else -#include "shared/vmprof_main_win32.h" +#include "shared/vmprof_win.h" #endif diff --git a/rpython/rlib/rvmprof/src/shared/_vmprof.c b/rpython/rlib/rvmprof/src/shared/_vmprof.c --- a/rpython/rlib/rvmprof/src/shared/_vmprof.c +++ b/rpython/rlib/rvmprof/src/shared/_vmprof.c @@ -9,8 +9,8 @@ #include <signal.h> #include "_vmprof.h" +#include "vmprof_common.h" -static volatile int is_enabled = 0; static destructor Original_code_dealloc = 0; static PyObject* (*_default_eval_loop)(PyFrameObject *, int) = 0; @@ -18,9 +18,9 @@ #include "trampoline.h" #include "machine.h" #include "symboltable.h" -#include "vmprof_main.h" +#include "vmprof_unix.h" #else -#include "vmprof_main_win32.h" +#include "vmprof_win.h" #endif #include "vmp_stack.h" @@ -156,7 +156,7 @@ static void cpyprof_code_dealloc(PyObject *co) { - if (is_enabled) { + if (vmprof_is_enabled()) { emit_code_object((PyCodeObject *)co); /* xxx error return values are ignored */ } @@ -187,7 +187,7 @@ return NULL; } - if (is_enabled) { + if (vmprof_is_enabled()) { PyErr_SetString(PyExc_ValueError, "vmprof is already enabled"); return NULL; } @@ -217,13 +217,13 @@ return NULL; } - is_enabled = 1; + vmprof_set_enabled(1); Py_RETURN_NONE; } static PyObject * vmp_is_enabled(PyObject *module, PyObject *noargs) { - if (is_enabled) { + if (vmprof_is_enabled()) { Py_RETURN_TRUE; } Py_RETURN_FALSE; @@ -237,7 +237,7 @@ return NULL; } - is_enabled = 0; + vmprof_set_enabled(0); if (PyErr_Occurred()) return NULL; @@ -362,7 +362,7 @@ #ifdef VMPROF_UNIX static PyObject * vmp_get_profile_path(PyObject *module, PyObject *noargs) { PyObject * o; - if (is_enabled) { + if (vmprof_is_enabled()) { char buffer[4096]; buffer[0] = 0; ssize_t buffer_len = vmp_fd_to_path(vmp_profile_fileno(), buffer, 4096); @@ -382,21 +382,19 @@ insert_real_time_thread(PyObject *module, PyObject * noargs) { ssize_t thread_count; - if (!is_enabled) { + if (!vmprof_is_enabled()) { PyErr_SetString(PyExc_ValueError, "vmprof is not enabled"); return NULL; } - if (signal_type != SIGALRM) { + if (vmprof_get_signal_type() != SIGALRM) { PyErr_SetString(PyExc_ValueError, "vmprof is not in real time mode"); return NULL; } - while (__sync_lock_test_and_set(&spinlock, 1)) { - } - + vmprof_aquire_lock(); thread_count = insert_thread(pthread_self(), -1); - __sync_lock_release(&spinlock); + vmprof_release_lock(); return PyLong_FromSsize_t(thread_count); } @@ -405,21 +403,19 @@ remove_real_time_thread(PyObject *module, PyObject * noargs) { ssize_t thread_count; - if (!is_enabled) { + if (!vmprof_is_enabled()) { PyErr_SetString(PyExc_ValueError, "vmprof is not enabled"); return NULL; } - if (signal_type != SIGALRM) { + if (vmprof_get_signal_type() != SIGALRM) { PyErr_SetString(PyExc_ValueError, "vmprof is not in real time mode"); return NULL; } - while (__sync_lock_test_and_set(&spinlock, 1)) { - } - + vmprof_aquire_lock(); thread_count = remove_thread(pthread_self(), -1); - __sync_lock_release(&spinlock); + vmprof_release_lock(); return PyLong_FromSsize_t(thread_count); } diff --git a/rpython/rlib/rvmprof/src/shared/machine.c b/rpython/rlib/rvmprof/src/shared/machine.c --- a/rpython/rlib/rvmprof/src/shared/machine.c +++ b/rpython/rlib/rvmprof/src/shared/machine.c @@ -27,6 +27,8 @@ #endif #elif __linux__ return "linux"; +#elif __FreeBSD__ + return "freebsd" #else #error "Unknown compiler" #endif @@ -38,7 +40,7 @@ char proffs[24]; (void)snprintf(proffs, 24, "/proc/self/fd/%d", fd); return readlink(proffs, buffer, buffer_len); -#elif defined(VMPROF_UNIX) +#elif defined(VMPROF_UNIX) && !defined(__FreeBSD__) fcntl(fd, F_GETPATH, buffer); return strlen(buffer); #endif diff --git a/rpython/rlib/rvmprof/src/shared/vmp_stack.c b/rpython/rlib/rvmprof/src/shared/vmp_stack.c --- a/rpython/rlib/rvmprof/src/shared/vmp_stack.c +++ b/rpython/rlib/rvmprof/src/shared/vmp_stack.c @@ -523,7 +523,7 @@ int vmp_native_enable(void) { #ifdef VMPROF_LINUX - if (!unw_get_reg) { + if (libhandle == NULL) { if ((libhandle = dlopen(LIBUNWIND, RTLD_LAZY | RTLD_LOCAL)) == NULL) { goto bail_out; } @@ -570,6 +570,7 @@ vmprof_error = dlerror(); fprintf(stderr, "could not close libunwind at runtime. error: %s\n", vmprof_error); } + libhandle = NULL; } vmp_native_traces_enabled = 0; diff --git a/rpython/rlib/rvmprof/src/shared/vmprof.h b/rpython/rlib/rvmprof/src/shared/vmprof.h --- a/rpython/rlib/rvmprof/src/shared/vmprof.h +++ b/rpython/rlib/rvmprof/src/shared/vmprof.h @@ -1,5 +1,11 @@ #pragma once +#define _GNU_SOURCE 1 + +#ifndef RPYTHON_VMPROF +#include <Python.h> +#endif + #ifdef VMPROF_UNIX #include <unistd.h> #endif @@ -79,3 +85,7 @@ #endif +void set_current_codes(void * to); +int opened_profile(const char *interp_name, int memory, int proflines, int native, int real_time); +void flush_codes(void); + diff --git a/rpython/rlib/rvmprof/src/shared/vmprof_common.c b/rpython/rlib/rvmprof/src/shared/vmprof_common.c new file mode 100644 --- /dev/null +++ b/rpython/rlib/rvmprof/src/shared/vmprof_common.c @@ -0,0 +1,303 @@ +#include "vmprof_common.h" + +#include <assert.h> +#include <errno.h> + +#ifdef RPYTHON_VMPROF +#ifdef RPYTHON_LL2CTYPES + /* only for testing: ll2ctypes sets RPY_EXTERN from the command-line */ + +#else +# include "common_header.h" +# include "structdef.h" +# include "src/threadlocal.h" +# include "rvmprof.h" +# include "forwarddecl.h" +#endif +#endif + +#ifdef VMP_SUPPORTS_NATIVE_PROFILING +#include "vmp_stack.h" // reduces warings +#endif + + +static volatile int is_enabled = 0; +static long prepare_interval_usec = 0; +static long profile_interval_usec = 0; + +#ifdef VMPROF_UNIX +static int signal_type = SIGPROF; +static int itimer_type = ITIMER_PROF; +static pthread_t *threads = NULL; +static size_t threads_size = 0; +static size_t thread_count = 0; +static size_t threads_size_step = 8; +#endif + +int vmprof_get_itimer_type(void) { + return itimer_type; +} + +int vmprof_is_enabled(void) { + return is_enabled; +} + +void vmprof_set_enabled(int value) { + is_enabled = value; +} + +long vmprof_get_prepare_interval_usec(void) { + return prepare_interval_usec; +} + +long vmprof_get_profile_interval_usec(void) { + return profile_interval_usec; +} + +void vmprof_set_prepare_interval_usec(long value) { + prepare_interval_usec = value; +} + +void vmprof_set_profile_interval_usec(long value) { + profile_interval_usec = value; +} + +int vmprof_get_signal_type(void) { + return signal_type; +} + +char *vmprof_init(int fd, double interval, int memory, + int proflines, const char *interp_name, int native, int real_time) +{ + if (!(interval >= 1e-6 && interval < 1.0)) { /* also if it is NaN */ + return "bad value for 'interval'"; + } + prepare_interval_usec = (int)(interval * 1000000.0); + + if (prepare_concurrent_bufs() < 0) + return "out of memory"; +#if VMPROF_UNIX + if (real_time) { + signal_type = SIGALRM; + itimer_type = ITIMER_REAL; + } else { + signal_type = SIGPROF; + itimer_type = ITIMER_PROF; + } + set_current_codes(NULL); + assert(fd >= 0); +#else + if (memory) { + return "memory tracking only supported on unix"; + } + if (native) { + return "native profiling only supported on unix"; + } +#endif + vmp_set_profile_fileno(fd); + if (opened_profile(interp_name, memory, proflines, native, real_time) < 0) { + vmp_set_profile_fileno(0); + return strerror(errno); + } + return NULL; +} + +int opened_profile(const char *interp_name, int memory, int proflines, int native, int real_time) +{ + int success; + int bits; + struct { + long hdr[5]; + char interp_name[259]; + } header; + + const char * machine; + size_t namelen = strnlen(interp_name, 255); + + machine = vmp_machine_os_name(); + + header.hdr[0] = 0; + header.hdr[1] = 3; + header.hdr[2] = 0; + header.hdr[3] = prepare_interval_usec; + if (strstr(machine, "win64") != 0) { + header.hdr[4] = 1; + } else { + header.hdr[4] = 0; + } + header.interp_name[0] = MARKER_HEADER; + header.interp_name[1] = '\x00'; + header.interp_name[2] = VERSION_TIMESTAMP; + header.interp_name[3] = memory*PROFILE_MEMORY + proflines*PROFILE_LINES + \ + native*PROFILE_NATIVE + real_time*PROFILE_REAL_TIME; +#ifdef RPYTHON_VMPROF + header.interp_name[3] += PROFILE_RPYTHON; +#endif + header.interp_name[4] = (char)namelen; + + memcpy(&header.interp_name[5], interp_name, namelen); + success = vmp_write_all((char*)&header, 5 * sizeof(long) + 5 + namelen); + if (success < 0) { + return success; + } + + /* Write the time and the zone to the log file, profiling will start now */ + (void)vmp_write_time_now(MARKER_TIME_N_ZONE); + + /* write some more meta information */ + vmp_write_meta("os", machine); + bits = vmp_machine_bits(); + if (bits == 64) { + vmp_write_meta("bits", "64"); + } else if (bits == 32) { + vmp_write_meta("bits", "32"); + } + + return success; +} + + +/* Seems that CPython 3.5.1 made our job harder. Did not find out how + to do that without these hacks. We can't use PyThreadState_GET(), + because that calls PyThreadState_Get() which fails an assert if the + result is NULL. */ +#if PY_MAJOR_VERSION >= 3 && !defined(_Py_atomic_load_relaxed) + /* this was abruptly un-defined in 3.5.1 */ +void *volatile _PyThreadState_Current; + /* XXX simple volatile access is assumed atomic */ +# define _Py_atomic_load_relaxed(pp) (*(pp)) +#endif + +#ifdef RPYTHON_VMPROF +#ifndef RPYTHON_LL2CTYPES +PY_STACK_FRAME_T *get_vmprof_stack(void) +{ + struct pypy_threadlocal_s *tl; + _OP_THREADLOCALREF_ADDR_SIGHANDLER(tl); + if (tl == NULL) { + return NULL; + } else { + return tl->vmprof_tl_stack; + } +} +#else +PY_STACK_FRAME_T *get_vmprof_stack(void) +{ + return 0; +} +#endif + +intptr_t vmprof_get_traceback(void *stack, void *ucontext, + intptr_t *result_p, intptr_t result_length) +{ + int n; + int enabled; +#ifdef VMPROF_WINDOWS + intptr_t pc = 0; /* XXX implement me */ +#else + intptr_t pc = ucontext ? (intptr_t)GetPC((ucontext_t *)ucontext) : 0; +#endif + if (stack == NULL) { + stack = get_vmprof_stack(); + } +#ifdef VMP_SUPPORTS_NATIVE_PROFILING + enabled = vmp_native_enabled(); + vmp_native_disable(); +#endif + n = get_stack_trace(stack, result_p, result_length - 2, pc); +#ifdef VMP_SUPPORTS_NATIVE_PROFILING + if (enabled) { + vmp_native_enable(); + } +#endif + return (intptr_t)n; +} +#endif + +#ifdef VMPROF_UNIX + +ssize_t search_thread(pthread_t tid, ssize_t i) +{ + if (i < 0) + i = 0; + while ((size_t)i < thread_count) { + if (pthread_equal(threads[i], tid)) + return i; + i++; + } + return -1; +} + +ssize_t insert_thread(pthread_t tid, ssize_t i) +{ + assert(signal_type == SIGALRM); + i = search_thread(tid, i); + if (i > 0) + return -1; + if (thread_count == threads_size) { + threads_size += threads_size_step; + threads = realloc(threads, sizeof(pid_t) * threads_size); + assert(threads != NULL); + memset(threads + thread_count, 0, sizeof(pid_t) * threads_size_step); + } + threads[thread_count++] = tid; + return thread_count; +} + +ssize_t remove_thread(pthread_t tid, ssize_t i) +{ + assert(signal_type == SIGALRM); + if (thread_count == 0) + return -1; + if (threads == NULL) + return -1; + i = search_thread(tid, i); + if (i < 0) + return -1; + threads[i] = threads[--thread_count]; + threads[thread_count] = 0; + return thread_count; +} + +ssize_t remove_threads(void) +{ + assert(signal_type == SIGALRM); + if (threads != NULL) { + free(threads); + threads = NULL; + } + thread_count = 0; + threads_size = 0; + return 0; +} + +int broadcast_signal_for_threads(void) +{ + int done = 1; + size_t i = 0; + pthread_t self = pthread_self(); + pthread_t tid; + while (i < thread_count) { + tid = threads[i]; + if (pthread_equal(tid, self)) { + done = 0; + } else if (pthread_kill(tid, SIGALRM)) { + remove_thread(tid, i); + } + i++; + } + return done; +} + +int is_main_thread(void) +{ +#ifdef VMPROF_LINUX + pid_t pid = getpid(); + pid_t tid = (pid_t) syscall(SYS_gettid); + return (pid == tid); +#elif defined(VMPROF_APPLE) + return pthread_main_np(); +#endif +} + +#endif diff --git a/rpython/rlib/rvmprof/src/shared/vmprof_common.h b/rpython/rlib/rvmprof/src/shared/vmprof_common.h --- a/rpython/rlib/rvmprof/src/shared/vmprof_common.h +++ b/rpython/rlib/rvmprof/src/shared/vmprof_common.h @@ -8,84 +8,27 @@ #include <time.h> #include <stdlib.h> -#ifndef VMPROF_WINDOWS +#ifdef VMPROF_UNIX #include <sys/time.h> #include "vmprof_mt.h" +#include <signal.h> +#include <pthread.h> #endif +#include "vmprof_getpc.h" + #ifdef VMPROF_LINUX #include <syscall.h> #endif #define MAX_FUNC_NAME 1024 -static long prepare_interval_usec = 0; -static long profile_interval_usec = 0; - -static int opened_profile(const char *interp_name, int memory, int proflines, int native, int real_time); - -#ifdef VMPROF_UNIX -static int signal_type = SIGPROF; -static int itimer_type = ITIMER_PROF; -static pthread_t *threads = NULL; -static size_t threads_size = 0; -static size_t thread_count = 0; -static size_t threads_size_step = 8; -static struct profbuf_s *volatile current_codes; -#endif - #ifdef VMPROF_UNIX -static inline ssize_t search_thread(pthread_t tid, ssize_t i) { - if (i < 0) - i = 0; - while ((size_t)i < thread_count) { - if (pthread_equal(threads[i], tid)) - return i; - i++; - } - return -1; -} - -ssize_t insert_thread(pthread_t tid, ssize_t i) { - assert(signal_type == SIGALRM); - i = search_thread(tid, i); - if (i > 0) - return -1; - if (thread_count == threads_size) { - threads_size += threads_size_step; - threads = realloc(threads, sizeof(pid_t) * threads_size); - assert(threads != NULL); - memset(threads + thread_count, 0, sizeof(pid_t) * threads_size_step); - } - threads[thread_count++] = tid; - return thread_count; -} - -ssize_t remove_thread(pthread_t tid, ssize_t i) { - assert(signal_type == SIGALRM); - if (thread_count == 0) - return -1; - if (threads == NULL) - return -1; - i = search_thread(tid, i); - if (i < 0) - return -1; - threads[i] = threads[--thread_count]; - threads[thread_count] = 0; - return thread_count; -} - -ssize_t remove_threads(void) { - assert(signal_type == SIGALRM); - if (threads != NULL) { - free(threads); - threads = NULL; - } - thread_count = 0; - threads_size = 0; - return 0; -} +ssize_t search_thread(pthread_t tid, ssize_t i); +ssize_t insert_thread(pthread_t tid, ssize_t i); +ssize_t remove_thread(pthread_t tid, ssize_t i); +ssize_t remove_threads(void); #endif @@ -130,95 +73,9 @@ RPY_EXTERN char *vmprof_init(int fd, double interval, int memory, - int proflines, const char *interp_name, int native, int real_time) -{ - if (!(interval >= 1e-6 && interval < 1.0)) { /* also if it is NaN */ - return "bad value for 'interval'"; - } - prepare_interval_usec = (int)(interval * 1000000.0); + int proflines, const char *interp_name, int native, int real_time); - if (prepare_concurrent_bufs() < 0) - return "out of memory"; -#if VMPROF_UNIX - if (real_time) { - signal_type = SIGALRM; - itimer_type = ITIMER_REAL; - } else { - signal_type = SIGPROF; - itimer_type = ITIMER_PROF; - } - current_codes = NULL; - assert(fd >= 0); -#else - if (memory) { - return "memory tracking only supported on unix"; - } - if (native) { - return "native profiling only supported on unix"; - } -#endif - vmp_set_profile_fileno(fd); - if (opened_profile(interp_name, memory, proflines, native, real_time) < 0) { - vmp_set_profile_fileno(0); - return strerror(errno); - } - return NULL; -} - -static int opened_profile(const char *interp_name, int memory, int proflines, int native, int real_time) -{ - int success; - int bits; - struct { - long hdr[5]; - char interp_name[259]; - } header; - - const char * machine; - size_t namelen = strnlen(interp_name, 255); - - machine = vmp_machine_os_name(); - - header.hdr[0] = 0; - header.hdr[1] = 3; - header.hdr[2] = 0; - header.hdr[3] = prepare_interval_usec; - if (strstr(machine, "win64") != 0) { - header.hdr[4] = 1; - } else { - header.hdr[4] = 0; - } - header.interp_name[0] = MARKER_HEADER; - header.interp_name[1] = '\x00'; - header.interp_name[2] = VERSION_TIMESTAMP; - header.interp_name[3] = memory*PROFILE_MEMORY + proflines*PROFILE_LINES + \ - native*PROFILE_NATIVE + real_time*PROFILE_REAL_TIME; -#ifdef RPYTHON_VMPROF - header.interp_name[3] += PROFILE_RPYTHON; -#endif - header.interp_name[4] = (char)namelen; - - memcpy(&header.interp_name[5], interp_name, namelen); - success = vmp_write_all((char*)&header, 5 * sizeof(long) + 5 + namelen); - if (success < 0) { - return success; - } - - /* Write the time and the zone to the log file, profiling will start now */ - (void)vmp_write_time_now(MARKER_TIME_N_ZONE); - - /* write some more meta information */ - vmp_write_meta("os", machine); - bits = vmp_machine_bits(); - if (bits == 64) { - vmp_write_meta("bits", "64"); - } else if (bits == 32) { - vmp_write_meta("bits", "32"); - } - - return success; -} - +int opened_profile(const char *interp_name, int memory, int proflines, int native, int real_time); /* Seems that CPython 3.5.1 made our job harder. Did not find out how to do that without these hacks. We can't use PyThreadState_GET(), @@ -233,46 +90,22 @@ #ifdef RPYTHON_VMPROF #ifndef RPYTHON_LL2CTYPES -static PY_STACK_FRAME_T *get_vmprof_stack(void) -{ - struct pypy_threadlocal_s *tl; - _OP_THREADLOCALREF_ADDR_SIGHANDLER(tl); - if (tl == NULL) - return NULL; - else - return tl->vmprof_tl_stack; -} -#else -static PY_STACK_FRAME_T *get_vmprof_stack(void) -{ - return 0; -} +PY_STACK_FRAME_T *get_vmprof_stack(void); +#endif +RPY_EXTERN +intptr_t vmprof_get_traceback(void *stack, void *ucontext, + intptr_t *result_p, intptr_t result_length); #endif -RPY_EXTERN -intptr_t vmprof_get_traceback(void *stack, void *ucontext, - intptr_t *result_p, intptr_t result_length) -{ - int n; - int enabled; -#ifdef VMPROF_WINDOWS - intptr_t pc = 0; /* XXX implement me */ -#else - intptr_t pc = ucontext ? (intptr_t)GetPC((ucontext_t *)ucontext) : 0; +int vmprof_get_signal_type(void); +long vmprof_get_prepare_interval_usec(void); +long vmprof_get_profile_interval_usec(void); +void vmprof_set_prepare_interval_usec(long value); +void vmprof_set_profile_interval_usec(long value); +int vmprof_is_enabled(void); +void vmprof_set_enabled(int value); +int vmprof_get_itimer_type(void); +#ifdef VMPROF_UNIX +int broadcast_signal_for_threads(void); +int is_main_thread(void); #endif - if (stack == NULL) { - stack = get_vmprof_stack(); - } -#ifdef VMP_SUPPORTS_NATIVE_PROFILING - enabled = vmp_native_enabled(); - vmp_native_disable(); -#endif - n = get_stack_trace(stack, result_p, result_length - 2, pc); -#ifdef VMP_SUPPORTS_NATIVE_PROFILING - if (enabled) { - vmp_native_enable(); - } -#endif - return (intptr_t)n; -} -#endif diff --git a/rpython/rlib/rvmprof/src/shared/vmprof_getpc.h b/rpython/rlib/rvmprof/src/shared/vmprof_getpc.h --- a/rpython/rlib/rvmprof/src/shared/vmprof_getpc.h +++ b/rpython/rlib/rvmprof/src/shared/vmprof_getpc.h @@ -142,6 +142,7 @@ // the right value for your system, and add it to the list in // vmrpof_config.h #else + static intptr_t GetPC(ucontext_t *signal_ucontext) { return signal_ucontext->PC_FROM_UCONTEXT; // defined in config.h } diff --git a/rpython/rlib/rvmprof/src/shared/vmprof_memory.c b/rpython/rlib/rvmprof/src/shared/vmprof_memory.c new file mode 100644 --- /dev/null +++ b/rpython/rlib/rvmprof/src/shared/vmprof_memory.c @@ -0,0 +1,81 @@ +#include "vmprof_memory.h" + +#ifdef VMPROF_APPLE +/* On OS X we can get RSS using the Mach API. */ +#include <mach/mach.h> +#include <mach/message.h> +#include <mach/kern_return.h> +#include <mach/task_info.h> + +static mach_port_t mach_task; +#else +#include <stdio.h> +#include <stdlib.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> +#include <string.h> +/* On '''normal''' Unices we can get RSS from '/proc/<pid>/status'. */ +static int proc_file = -1; +#endif + +int setup_rss(void) +{ +#ifdef VMPROF_LINUX + char buf[128]; + + sprintf(buf, "/proc/%d/status", getpid()); + proc_file = open(buf, O_RDONLY); + return proc_file; +#elif defined(VMPROF_APPLE) + mach_task = mach_task_self(); + return 0; +#else + return 0; +#endif +} + +int teardown_rss(void) +{ +#ifdef VMPROF_LINUX + close(proc_file); + proc_file = -1; + return 0; +#else + return 0; +#endif +} + +long get_current_proc_rss(void) +{ +#ifdef VMPROF_LINUX + char buf[1024]; + int i = 0; + + if (lseek(proc_file, 0, SEEK_SET) == -1) + return -1; + if (read(proc_file, buf, 1024) == -1) + return -1; + while (i < 1020) { + if (strncmp(buf + i, "VmRSS:\t", 7) == 0) { + i += 7; + return atoi(buf + i); + } + i++; + } + return -1; +#elif defined(VMPROF_APPLE) + mach_msg_type_number_t out_count = MACH_TASK_BASIC_INFO_COUNT; + mach_task_basic_info_data_t taskinfo = { .resident_size = 0 }; + + kern_return_t error = task_info(mach_task, MACH_TASK_BASIC_INFO, (task_info_t)&taskinfo, &out_count); + if (error == KERN_SUCCESS) { + return (long)(taskinfo.resident_size / 1024); + } else { + return -1; + } +#else + return -1; // not implemented +#endif +} diff --git a/rpython/rlib/rvmprof/src/shared/vmprof_memory.h b/rpython/rlib/rvmprof/src/shared/vmprof_memory.h new file mode 100644 --- /dev/null +++ b/rpython/rlib/rvmprof/src/shared/vmprof_memory.h @@ -0,0 +1,5 @@ +#pragma once + +int setup_rss(void); +int teardown_rss(void); +long get_current_proc_rss(void); diff --git a/rpython/rlib/rvmprof/src/shared/vmprof_mt.c b/rpython/rlib/rvmprof/src/shared/vmprof_mt.c new file mode 100644 --- /dev/null +++ b/rpython/rlib/rvmprof/src/shared/vmprof_mt.c @@ -0,0 +1,181 @@ +#include "vmprof_mt.h" +/* Support for multithreaded write() operations (implementation) */ + +#include <assert.h> + +#if defined(__i386__) || defined(__amd64__) + static inline void write_fence(void) { asm("" : : : "memory"); } +#else + static inline void write_fence(void) { __sync_synchronize(); } +#endif + +static char volatile profbuf_state[MAX_NUM_BUFFERS]; +static struct profbuf_s *profbuf_all_buffers = NULL; +static int volatile profbuf_write_lock = 2; +static long profbuf_pending_write; + + +static void unprepare_concurrent_bufs(void) +{ + if (profbuf_all_buffers != NULL) { + munmap(profbuf_all_buffers, sizeof(struct profbuf_s) * MAX_NUM_BUFFERS); + profbuf_all_buffers = NULL; + } +} + +int prepare_concurrent_bufs(void) +{ + assert(sizeof(struct profbuf_s) == 8192); + + unprepare_concurrent_bufs(); + profbuf_all_buffers = mmap(NULL, sizeof(struct profbuf_s) * MAX_NUM_BUFFERS, + PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, + -1, 0); + if (profbuf_all_buffers == MAP_FAILED) { + profbuf_all_buffers = NULL; + return -1; + } + memset((char *)profbuf_state, PROFBUF_UNUSED, sizeof(profbuf_state)); + profbuf_write_lock = 0; + profbuf_pending_write = -1; + return 0; +} + +static int _write_single_ready_buffer(int fd, long i) +{ + /* Try to write to disk the buffer number 'i'. This function must + only be called while we hold the write lock. */ + assert(profbuf_write_lock != 0); + + if (profbuf_pending_write >= 0) { + /* A partially written buffer is waiting. We'll write the + rest of this buffer now, instead of 'i'. */ + i = profbuf_pending_write; + assert(profbuf_state[i] == PROFBUF_READY); + } + + if (profbuf_state[i] != PROFBUF_READY) { + /* this used to be a race condition: the buffer was written by a + different thread already, nothing to do now */ + return 0; + } + + int err; + struct profbuf_s *p = &profbuf_all_buffers[i]; + ssize_t count = write(fd, p->data + p->data_offset, p->data_size); + if (count == p->data_size) { + profbuf_state[i] = PROFBUF_UNUSED; + profbuf_pending_write = -1; + } + else { + if (count > 0) { + p->data_offset += count; + p->data_size -= count; + } + profbuf_pending_write = i; + if (count < 0) + return -1; + } + return 0; +} + +static void _write_ready_buffers(int fd) +{ + long i; + int has_write_lock = 0; + + for (i = 0; i < MAX_NUM_BUFFERS; i++) { + if (profbuf_state[i] == PROFBUF_READY) { + if (!has_write_lock) { + if (!__sync_bool_compare_and_swap(&profbuf_write_lock, 0, 1)) + return; /* can't acquire the write lock, give up */ + has_write_lock = 1; + } + if (_write_single_ready_buffer(fd, i) < 0) + break; + } + } + if (has_write_lock) + profbuf_write_lock = 0; +} + +struct profbuf_s *reserve_buffer(int fd) +{ + /* Tries to enter a region of code that fills one buffer. If + successful, returns the profbuf_s. It fails only if the + concurrent buffers are all busy (extreme multithreaded usage). + + This might call write() to emit the data sitting in + previously-prepared buffers. In case of write() error, the + error is ignored but unwritten data stays in the buffers. + */ + long i; + + _write_ready_buffers(fd); + + for (i = 0; i < MAX_NUM_BUFFERS; i++) { + if (profbuf_state[i] == PROFBUF_UNUSED && + __sync_bool_compare_and_swap(&profbuf_state[i], PROFBUF_UNUSED, + PROFBUF_FILLING)) { + struct profbuf_s *p = &profbuf_all_buffers[i]; + p->data_size = 0; + p->data_offset = 0; + return p; + } + } + /* no unused buffer found */ + return NULL; +} + +void commit_buffer(int fd, struct profbuf_s *buf) +{ + /* Leaves a region of code that filled 'buf'. + + This might call write() to emit the data now ready. In case of + write() error, the error is ignored but unwritten data stays in + the buffers. + */ + + /* Make sure every thread sees the full content of 'buf' */ + write_fence(); + + /* Then set the 'ready' flag */ + long i = buf - profbuf_all_buffers; + assert(profbuf_state[i] == PROFBUF_FILLING); + profbuf_state[i] = PROFBUF_READY; + + if (!__sync_bool_compare_and_swap(&profbuf_write_lock, 0, 1)) { + /* can't acquire the write lock, ignore */ + } + else { + _write_single_ready_buffer(fd, i); + profbuf_write_lock = 0; + } +} + +void cancel_buffer(struct profbuf_s *buf) +{ + long i = buf - profbuf_all_buffers; + assert(profbuf_state[i] == PROFBUF_FILLING); + profbuf_state[i] = PROFBUF_UNUSED; +} + +int shutdown_concurrent_bufs(int fd) +{ + /* no signal handler can be running concurrently here, because we + already did vmprof_ignore_signals(1) */ + assert(profbuf_write_lock == 0); + profbuf_write_lock = 2; + + /* last attempt to flush buffers */ + int i; + for (i = 0; i < MAX_NUM_BUFFERS; i++) { + while (profbuf_state[i] == PROFBUF_READY) { + if (_write_single_ready_buffer(fd, i) < 0) + return -1; + } + } + unprepare_concurrent_bufs(); + return 0; +} diff --git a/rpython/rlib/rvmprof/src/shared/vmprof_mt.h b/rpython/rlib/rvmprof/src/shared/vmprof_mt.h --- a/rpython/rlib/rvmprof/src/shared/vmprof_mt.h +++ b/rpython/rlib/rvmprof/src/shared/vmprof_mt.h @@ -1,11 +1,11 @@ #pragma once /* Support for multithreaded write() operations */ +#include "vmprof.h" + #include <string.h> #include <sys/mman.h> -#include "vmprof.h" - /* The idea is that we have MAX_NUM_BUFFERS available, all of size SINGLE_BUF_SIZE. Threads and signal handlers can ask to reserve a buffer, fill it, and finally "commit" it, at which point its @@ -29,12 +29,6 @@ */ #define MAX_NUM_BUFFERS 20 -#if defined(__i386__) || defined(__amd64__) - static inline void write_fence(void) { asm("" : : : "memory"); } -#else - static inline void write_fence(void) { __sync_synchronize(); } -#endif - #ifndef MAP_ANONYMOUS #define MAP_ANONYMOUS MAP_ANON #endif @@ -50,173 +44,8 @@ char data[SINGLE_BUF_SIZE]; }; -static char volatile profbuf_state[MAX_NUM_BUFFERS]; -static struct profbuf_s *profbuf_all_buffers = NULL; -static int volatile profbuf_write_lock = 2; -static long profbuf_pending_write; - - -static void unprepare_concurrent_bufs(void) -{ - if (profbuf_all_buffers != NULL) { - munmap(profbuf_all_buffers, sizeof(struct profbuf_s) * MAX_NUM_BUFFERS); - profbuf_all_buffers = NULL; - } -} - -static int prepare_concurrent_bufs(void) -{ - assert(sizeof(struct profbuf_s) == 8192); - - unprepare_concurrent_bufs(); - profbuf_all_buffers = mmap(NULL, sizeof(struct profbuf_s) * MAX_NUM_BUFFERS, - PROT_READ | PROT_WRITE, - MAP_PRIVATE | MAP_ANONYMOUS, - -1, 0); - if (profbuf_all_buffers == MAP_FAILED) { - profbuf_all_buffers = NULL; - return -1; - } - memset((char *)profbuf_state, PROFBUF_UNUSED, sizeof(profbuf_state)); - profbuf_write_lock = 0; - profbuf_pending_write = -1; - return 0; -} - -static int _write_single_ready_buffer(int fd, long i) -{ - /* Try to write to disk the buffer number 'i'. This function must - only be called while we hold the write lock. */ - assert(profbuf_write_lock != 0); - - if (profbuf_pending_write >= 0) { - /* A partially written buffer is waiting. We'll write the - rest of this buffer now, instead of 'i'. */ - i = profbuf_pending_write; - assert(profbuf_state[i] == PROFBUF_READY); - } - - if (profbuf_state[i] != PROFBUF_READY) { - /* this used to be a race condition: the buffer was written by a - different thread already, nothing to do now */ - return 0; - } - - int err; - struct profbuf_s *p = &profbuf_all_buffers[i]; - ssize_t count = write(fd, p->data + p->data_offset, p->data_size); - if (count == p->data_size) { - profbuf_state[i] = PROFBUF_UNUSED; - profbuf_pending_write = -1; - } - else { - if (count > 0) { - p->data_offset += count; - p->data_size -= count; - } - profbuf_pending_write = i; - if (count < 0) - return -1; - } - return 0; -} - -static void _write_ready_buffers(int fd) -{ - long i; - int has_write_lock = 0; - - for (i = 0; i < MAX_NUM_BUFFERS; i++) { - if (profbuf_state[i] == PROFBUF_READY) { - if (!has_write_lock) { - if (!__sync_bool_compare_and_swap(&profbuf_write_lock, 0, 1)) - return; /* can't acquire the write lock, give up */ - has_write_lock = 1; - } - if (_write_single_ready_buffer(fd, i) < 0) - break; - } - } - if (has_write_lock) - profbuf_write_lock = 0; -} - -static struct profbuf_s *reserve_buffer(int fd) -{ - /* Tries to enter a region of code that fills one buffer. If - successful, returns the profbuf_s. It fails only if the - concurrent buffers are all busy (extreme multithreaded usage). - - This might call write() to emit the data sitting in - previously-prepared buffers. In case of write() error, the - error is ignored but unwritten data stays in the buffers. - */ - long i; - - _write_ready_buffers(fd); - - for (i = 0; i < MAX_NUM_BUFFERS; i++) { - if (profbuf_state[i] == PROFBUF_UNUSED && - __sync_bool_compare_and_swap(&profbuf_state[i], PROFBUF_UNUSED, - PROFBUF_FILLING)) { - struct profbuf_s *p = &profbuf_all_buffers[i]; - p->data_size = 0; - p->data_offset = 0; - return p; - } - } - /* no unused buffer found */ - return NULL; -} - -static void commit_buffer(int fd, struct profbuf_s *buf) -{ - /* Leaves a region of code that filled 'buf'. - - This might call write() to emit the data now ready. In case of - write() error, the error is ignored but unwritten data stays in - the buffers. - */ - - /* Make sure every thread sees the full content of 'buf' */ - write_fence(); - - /* Then set the 'ready' flag */ - long i = buf - profbuf_all_buffers; - assert(profbuf_state[i] == PROFBUF_FILLING); - profbuf_state[i] = PROFBUF_READY; - - if (!__sync_bool_compare_and_swap(&profbuf_write_lock, 0, 1)) { - /* can't acquire the write lock, ignore */ - } - else { - _write_single_ready_buffer(fd, i); - profbuf_write_lock = 0; - } -} - -static void cancel_buffer(struct profbuf_s *buf) -{ - long i = buf - profbuf_all_buffers; - assert(profbuf_state[i] == PROFBUF_FILLING); - profbuf_state[i] = PROFBUF_UNUSED; -} - -static int shutdown_concurrent_bufs(int fd) -{ - /* no signal handler can be running concurrently here, because we - already did vmprof_ignore_signals(1) */ - assert(profbuf_write_lock == 0); - profbuf_write_lock = 2; - - /* last attempt to flush buffers */ - int i; - for (i = 0; i < MAX_NUM_BUFFERS; i++) { - while (profbuf_state[i] == PROFBUF_READY) { - if (_write_single_ready_buffer(fd, i) < 0) - return -1; - } - } - unprepare_concurrent_bufs(); - return 0; -} +int prepare_concurrent_bufs(void); +struct profbuf_s *reserve_buffer(int fd); +void commit_buffer(int fd, struct profbuf_s *buf); +void cancel_buffer(struct profbuf_s *buf); +int shutdown_concurrent_bufs(int fd); diff --git a/rpython/rlib/rvmprof/src/shared/vmprof_unix.c b/rpython/rlib/rvmprof/src/shared/vmprof_unix.c new file mode 100644 --- /dev/null +++ b/rpython/rlib/rvmprof/src/shared/vmprof_unix.c @@ -0,0 +1,496 @@ +#include "vmprof_unix.h" + +#ifdef VMPROF_UNIX + +#if VMPROF_LINUX +#include <syscall.h> +#endif + + +#include <dlfcn.h> +#include <pthread.h> +#include <unistd.h> +#include <assert.h> +#include <errno.h> +#include <stdio.h> +#include <fcntl.h> +#include <time.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/time.h> + +#include "vmp_stack.h" +#include "vmprof_mt.h" +#include "vmprof_getpc.h" +#include "vmprof_common.h" +#include "vmprof_memory.h" +#include "compat.h" + + + +/* value: LSB bit is 1 if signals must be ignored; all other bits + are a counter for how many threads are currently in a signal handler */ +static long volatile signal_handler_ignore = 1; +static long volatile signal_handler_entries = 0; +static char atfork_hook_installed = 0; +static volatile int spinlock; +static jmp_buf restore_point; +static struct profbuf_s *volatile current_codes; + + +void vmprof_ignore_signals(int ignored) +{ + if (ignored) { + /* set the last bit, and wait until concurrently-running signal + handlers finish */ + __sync_add_and_fetch(&signal_handler_ignore, 1L); + while (signal_handler_entries != 0L) { + usleep(1); + } + } else { + __sync_sub_and_fetch(&signal_handler_ignore, 1L); + } +} + +long vmprof_enter_signal(void) +{ + __sync_fetch_and_add(&signal_handler_entries, 1L); + return signal_handler_ignore; +} + +long vmprof_exit_signal(void) +{ + return __sync_sub_and_fetch(&signal_handler_entries, 1L); +} + +int install_pthread_atfork_hooks(void) { + /* this is needed to prevent the problems described there: + - http://code.google.com/p/gperftools/issues/detail?id=278 + - http://lists.debian.org/debian-glibc/2010/03/msg00161.html + + TL;DR: if the RSS of the process is large enough, the clone() syscall + will be interrupted by the SIGPROF before it can complete, then + retried, interrupted again and so on, in an endless loop. The + solution is to disable the timer around the fork, and re-enable it + only inside the parent. + */ + if (atfork_hook_installed) + return 0; + int ret = pthread_atfork(atfork_disable_timer, atfork_enable_timer, atfork_close_profile_file); + if (ret != 0) + return -1; + atfork_hook_installed = 1; + return 0; +} + +void segfault_handler(int arg) +{ + longjmp(restore_point, SIGSEGV); +} + +int _vmprof_sample_stack(struct profbuf_s *p, PY_THREAD_STATE_T * tstate, ucontext_t * uc) +{ + int depth; + struct prof_stacktrace_s *st = (struct prof_stacktrace_s *)p->data; + st->marker = MARKER_STACKTRACE; + st->count = 1; +#ifdef RPYTHON_VMPROF + depth = get_stack_trace(get_vmprof_stack(), st->stack, MAX_STACK_DEPTH-1, (intptr_t)GetPC(uc)); +#else + depth = get_stack_trace(tstate, st->stack, MAX_STACK_DEPTH-1, (intptr_t)NULL); +#endif + // useful for tests (see test_stop_sampling) +#ifndef RPYTHON_LL2CTYPES + if (depth == 0) { + return 0; + } +#endif + st->depth = depth; + st->stack[depth++] = tstate; + long rss = get_current_proc_rss(); + if (rss >= 0) + st->stack[depth++] = (void*)rss; + p->data_offset = offsetof(struct prof_stacktrace_s, marker); + p->data_size = (depth * sizeof(void *) + + sizeof(struct prof_stacktrace_s) - + offsetof(struct prof_stacktrace_s, marker)); + return 1; +} + +#ifndef RPYTHON_VMPROF +PY_THREAD_STATE_T * _get_pystate_for_this_thread(void) { + // see issue 116 on github.com/vmprof/vmprof-python. + // PyGILState_GetThisThreadState(); can hang forever + // + PyInterpreterState * istate; + PyThreadState * state; + long mythread_id; + + mythread_id = PyThread_get_thread_ident(); + istate = PyInterpreterState_Head(); + if (istate == NULL) { + fprintf(stderr, "WARNING: interp state head is null (for thread id %ld)\n", mythread_id); + return NULL; + } + // fish fish fish, it will NOT lock the keymutex in pythread + do { + state = PyInterpreterState_ThreadHead(istate); + do { + if (state->thread_id == mythread_id) { + return state; + } + } while ((state = PyThreadState_Next(state)) != NULL); + } while ((istate = PyInterpreterState_Next(istate)) != NULL); + + // uh? not found? + fprintf(stderr, "WARNING: cannot find thread state (for thread id %ld), sample will be thrown away\n", mythread_id); + return NULL; +} +#endif + +void flush_codes(void) +{ + struct profbuf_s *p = current_codes; + if (p != NULL) { + current_codes = NULL; + commit_buffer(vmp_profile_fileno(), p); + } +} + +void set_current_codes(void * to) { + current_codes = to; +} + +#endif + +void vmprof_aquire_lock(void) { + while (__sync_lock_test_and_set(&spinlock, 1)) { + } +} + +void vmprof_release_lock(void) { + __sync_lock_release(&spinlock); +} + +void sigprof_handler(int sig_nr, siginfo_t* info, void *ucontext) +{ + int commit; + PY_THREAD_STATE_T * tstate = NULL; + void (*prevhandler)(int); + +#ifndef RPYTHON_VMPROF + + // Even though the docs say that this function call is for 'esoteric use' + // it seems to be correctly set when the interpreter is teared down! + if (!Py_IsInitialized()) { + return; + } + + // TERRIBLE HACK AHEAD + // on OS X, the thread local storage is sometimes uninitialized + // when the signal handler runs - it means it's impossible to read errno + // or call any syscall or read PyThread_Current or pthread_self. Additionally, + // it seems impossible to read the register gs. + // here we register segfault handler (all guarded by a spinlock) and call + // longjmp in case segfault happens while reading a thread local + // + // We do the same error detection for linux to ensure that + // get_current_thread_state returns a sane result + while (__sync_lock_test_and_set(&spinlock, 1)) { + } + +#ifdef VMPROF_UNIX + // SIGNAL ABUSE AHEAD + // On linux, the prof timer will deliver the signal to the thread which triggered the timer, + // because these timers are based on process and system time, and as such, are thread-aware. + // For the real timer, the signal gets delivered to the main thread, seemingly always. + // Consequently if we want to sample multiple threads, we need to forward this signal. + if (vmprof_get_signal_type() == SIGALRM) { + if (is_main_thread() && broadcast_signal_for_threads()) { + __sync_lock_release(&spinlock); + return; + } + } +#endif + + prevhandler = signal(SIGSEGV, &segfault_handler); + int fault_code = setjmp(restore_point); + if (fault_code == 0) { + pthread_self(); + tstate = _get_pystate_for_this_thread(); + } else { + signal(SIGSEGV, prevhandler); + __sync_lock_release(&spinlock); + return; + } + signal(SIGSEGV, prevhandler); + __sync_lock_release(&spinlock); +#endif + + long val = vmprof_enter_signal(); + + if (val == 0) { + int saved_errno = errno; + int fd = vmp_profile_fileno(); + assert(fd >= 0); + + struct profbuf_s *p = reserve_buffer(fd); + if (p == NULL) { + /* ignore this signal: there are no free buffers right now */ + } else { +#ifdef RPYTHON_VMPROF + commit = _vmprof_sample_stack(p, NULL, (ucontext_t*)ucontext); +#else + commit = _vmprof_sample_stack(p, tstate, (ucontext_t*)ucontext); +#endif + if (commit) { + commit_buffer(fd, p); + } else { +#ifndef RPYTHON_VMPROF + fprintf(stderr, "WARNING: canceled buffer, no stack trace was written\n"); +#else + fprintf(stderr, "WARNING: canceled buffer, no stack trace was written\n"); +#endif + cancel_buffer(p); + } + } + + errno = saved_errno; + } + + vmprof_exit_signal(); +} + +int install_sigprof_handler(void) +{ + struct sigaction sa; + memset(&sa, 0, sizeof(sa)); + sa.sa_sigaction = sigprof_handler; + sa.sa_flags = SA_RESTART | SA_SIGINFO; + if (sigemptyset(&sa.sa_mask) == -1 || + sigaction(vmprof_get_signal_type(), &sa, NULL) == -1) + return -1; + return 0; +} + +int remove_sigprof_handler(void) +{ + struct sigaction ign_sigint, prev; + ign_sigint.sa_handler = SIG_IGN; + ign_sigint.sa_flags = 0; + sigemptyset(&ign_sigint.sa_mask); + + if (sigaction(vmprof_get_signal_type(), &ign_sigint, NULL) < 0) { + fprintf(stderr, "Could not remove the signal handler (for profiling)\n"); + return -1; + } + return 0; +} + +int install_sigprof_timer(void) +{ + static struct itimerval timer; + timer.it_interval.tv_sec = 0; + timer.it_interval.tv_usec = (int)vmprof_get_profile_interval_usec(); + timer.it_value = timer.it_interval; + if (setitimer(vmprof_get_itimer_type(), &timer, NULL) != 0) + return -1; + return 0; +} + +int remove_sigprof_timer(void) +{ + static struct itimerval timer; + timerclear(&(timer.it_interval)); + timerclear(&(timer.it_value)); + if (setitimer(vmprof_get_itimer_type(), &timer, NULL) != 0) { + fprintf(stderr, "Could not disable the signal handler (for profiling)\n"); + return -1; + } + return 0; +} + +void atfork_disable_timer(void) +{ + if (vmprof_get_profile_interval_usec() > 0) { + remove_sigprof_timer(); + vmprof_set_enabled(0); + } +} + +void atfork_close_profile_file(void) +{ + int fd = vmp_profile_fileno(); + if (fd != -1) + close(fd); + vmp_set_profile_fileno(-1); +} +void atfork_enable_timer(void) +{ + if (vmprof_get_profile_interval_usec() > 0) { + install_sigprof_timer(); + vmprof_set_enabled(1); + } +} + +#ifdef VMP_SUPPORTS_NATIVE_PROFILING +void init_cpyprof(int native) +{ + // skip this if native should not be enabled + if (!native) { + vmp_native_disable(); + return; + } + vmp_native_enable(); +} + +static void disable_cpyprof(void) +{ + vmp_native_disable(); +} +#endif + +int vmprof_enable(int memory, int native, int real_time) +{ +#ifdef VMP_SUPPORTS_NATIVE_PROFILING + init_cpyprof(native); +#endif + assert(vmp_profile_fileno() >= 0); + assert(vmprof_get_prepare_interval_usec() > 0); + vmprof_set_profile_interval_usec(vmprof_get_prepare_interval_usec()); + if (memory && setup_rss() == -1) + goto error; +#if VMPROF_UNIX + if (real_time && insert_thread(pthread_self(), -1) == -1) + goto error; +#endif + if (install_pthread_atfork_hooks() == -1) + goto error; + if (install_sigprof_handler() == -1) + goto error; + if (install_sigprof_timer() == -1) + goto error; + vmprof_ignore_signals(0); + return 0; + + error: + vmp_set_profile_fileno(-1); + vmprof_set_profile_interval_usec(0); + return -1; +} + + +int close_profile(void) +{ + int fileno = vmp_profile_fileno(); + fsync(fileno); + (void)vmp_write_time_now(MARKER_TRAILER); + teardown_rss(); + + /* don't close() the file descriptor from here */ + vmp_set_profile_fileno(-1); + return 0; +} + +int vmprof_disable(void) +{ + vmprof_ignore_signals(1); + vmprof_set_profile_interval_usec(0); +#ifdef VMP_SUPPORTS_NATIVE_PROFILING + disable_cpyprof(); +#endif + + if (remove_sigprof_timer() == -1) { + return -1; + } + if (remove_sigprof_handler() == -1) { + return -1; + } +#ifdef VMPROF_UNIX + if ((vmprof_get_signal_type() == SIGALRM) && remove_threads() == -1) { + return -1; + } +#endif + flush_codes(); + if (shutdown_concurrent_bufs(vmp_profile_fileno()) < 0) + return -1; + return close_profile(); +} + +int vmprof_register_virtual_function(char *code_name, intptr_t code_uid, + int auto_retry) +{ + long namelen = strnlen(code_name, 1023); + long blocklen = 1 + sizeof(intptr_t) + sizeof(long) + namelen; + struct profbuf_s *p; + char *t; + + retry: + p = current_codes; + if (p != NULL) { + if (__sync_bool_compare_and_swap(¤t_codes, p, NULL)) { + /* grabbed 'current_codes': we will append the current block + to it if it contains enough room */ + size_t freesize = SINGLE_BUF_SIZE - p->data_size; + if (freesize < (size_t)blocklen) { + /* full: flush it */ + commit_buffer(vmp_profile_fileno(), p); + p = NULL; + } + } + else { + /* compare-and-swap failed, don't try again */ + p = NULL; + } + } + + if (p == NULL) { + p = reserve_buffer(vmp_profile_fileno()); + if (p == NULL) { + /* can't get a free block; should almost never be the + case. Spin loop if allowed, or return a failure code + if not (e.g. we're in a signal handler) */ + if (auto_retry > 0) { + auto_retry--; + usleep(1); + goto retry; + } + return -1; + } + } + + t = p->data + p->data_size; + p->data_size += blocklen; + assert(p->data_size <= SINGLE_BUF_SIZE); + *t++ = MARKER_VIRTUAL_IP; + memcpy(t, &code_uid, sizeof(intptr_t)); t += sizeof(intptr_t); + memcpy(t, &namelen, sizeof(long)); t += sizeof(long); + memcpy(t, code_name, namelen); + + /* try to reattach 'p' to 'current_codes' */ + if (!__sync_bool_compare_and_swap(¤t_codes, NULL, p)) { + /* failed, flush it */ + commit_buffer(vmp_profile_fileno(), p); + } + return 0; +} + +int get_stack_trace(PY_THREAD_STATE_T * current, void** result, int max_depth, intptr_t pc) +{ + PY_STACK_FRAME_T * frame; +#ifdef RPYTHON_VMPROF + // do nothing here, + frame = (PY_STACK_FRAME_T*)current; +#else + if (current == NULL) { + fprintf(stderr, "WARNING: get_stack_trace, current is NULL\n"); + return 0; + } + frame = current->frame; +#endif + if (frame == NULL) { + fprintf(stderr, "WARNING: get_stack_trace, frame is NULL\n"); + return 0; + } + return vmp_walk_and_record_stack(frame, result, max_depth, 1, pc); +} diff --git a/rpython/rlib/rvmprof/src/shared/vmprof_unix.h b/rpython/rlib/rvmprof/src/shared/vmprof_unix.h new file mode 100644 --- /dev/null +++ b/rpython/rlib/rvmprof/src/shared/vmprof_unix.h @@ -0,0 +1,86 @@ +#pragma once + +/* VMPROF + * + * statistical sampling profiler specifically designed to profile programs + * which run on a Virtual Machine and/or bytecode interpreter, such as Python, + * etc. + * + * The logic to dump the C stack traces is partly stolen from the code in + * gperftools. + * The file "getpc.h" has been entirely copied from gperftools. + * + * Tested only on gcc, linux, x86_64. + * + * Copyright (C) 2014-2017 + * Antonio Cuni - anto.c...@gmail.com + * Maciej Fijalkowski - fij...@gmail.com + * Armin Rigo - ar...@tunes.org + * Richard Plangger - planri...@gmail.com + * + */ + +#include "vmprof.h" + +#include "vmprof_mt.h" + +#include <signal.h> + +RPY_EXTERN void vmprof_ignore_signals(int ignored); +RPY_EXTERN long vmprof_enter_signal(void); +RPY_EXTERN long vmprof_exit_signal(void); + +/* ************************************************************* + * functions to dump the stack trace + * ************************************************************* + */ + +#ifndef RPYTHON_VMPROF +PY_THREAD_STATE_T * _get_pystate_for_this_thread(void); +#endif +int get_stack_trace(PY_THREAD_STATE_T * current, void** result, int max_depth, intptr_t pc); + +/* ************************************************************* + * the signal handler + * ************************************************************* + */ + +#include <setjmp.h> + +void segfault_handler(int arg); +int _vmprof_sample_stack(struct profbuf_s *p, PY_THREAD_STATE_T * tstate, ucontext_t * uc); +void sigprof_handler(int sig_nr, siginfo_t* info, void *ucontext); + + +/* ************************************************************* + * the setup and teardown functions + * ************************************************************* + */ + +int install_sigprof_handler(void); +int remove_sigprof_handler(void); +int install_sigprof_timer(void); +int remove_sigprof_timer(void); +void atfork_disable_timer(void); +void atfork_enable_timer(void); +void atfork_close_profile_file(void); +int install_pthread_atfork_hooks(void); + +#ifdef VMP_SUPPORTS_NATIVE_PROFILING +void init_cpyprof(int native); +static void disable_cpyprof(void); +#endif + +int close_profile(void); + +RPY_EXTERN +int vmprof_enable(int memory, int native, int real_time); +RPY_EXTERN +int vmprof_disable(void); +RPY_EXTERN +int vmprof_register_virtual_function(char *code_name, intptr_t code_uid, + int auto_retry); + + +void vmprof_aquire_lock(void); +void vmprof_release_lock(void); diff --git a/rpython/rlib/rvmprof/src/shared/vmprof_win.c b/rpython/rlib/rvmprof/src/shared/vmprof_win.c new file mode 100644 --- /dev/null +++ b/rpython/rlib/rvmprof/src/shared/vmprof_win.c @@ -0,0 +1,42 @@ +// cannot include this header because it also has definitions +#include "windows.h" +#include "compat.h" +#include "vmp_stack.h" + +HANDLE write_mutex; + +int prepare_concurrent_bufs(void) +{ + if (!(write_mutex = CreateMutex(NULL, FALSE, NULL))) + return -1; + return 0; +} + +#include <tlhelp32.h> + +int vmp_write_all(const char *buf, size_t bufsize) +{ + int res; + int fd; + int count; + + res = WaitForSingleObject(write_mutex, INFINITE); + fd = vmp_profile_fileno(); + + if (fd == -1) { + ReleaseMutex(write_mutex); + return -1; + } + while (bufsize > 0) { + count = _write(fd, buf, (long)bufsize); + if (count <= 0) { + ReleaseMutex(write_mutex); + return -1; /* failed */ + } + buf += count; + bufsize -= count; + } + ReleaseMutex(write_mutex); + return 0; +} + diff --git a/rpython/rlib/rvmprof/src/shared/vmprof_win.h b/rpython/rlib/rvmprof/src/shared/vmprof_win.h new file mode 100644 --- /dev/null +++ b/rpython/rlib/rvmprof/src/shared/vmprof_win.h @@ -0,0 +1,203 @@ +#pragma once + +#include "windows.h" +#include "compat.h" +#include "vmp_stack.h" + +HANDLE write_mutex; + +int prepare_concurrent_bufs(void); + +#include "vmprof_common.h" +#include <tlhelp32.h> _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit