Author: Richard Plangger <[email protected]>
Branch:
Changeset: r91967:e19ef006ba32
Date: 2017-07-23 16:46 -0400
http://bitbucket.org/pypy/pypy/changeset/e19ef006ba32/
Log: reapply fix
diff too long, truncating to 2000 out of 2191 lines
diff --git a/pypy/module/_vmprof/test/test__vmprof.py
b/pypy/module/_vmprof/test/test__vmprof.py
--- a/pypy/module/_vmprof/test/test__vmprof.py
+++ b/pypy/module/_vmprof/test/test__vmprof.py
@@ -115,3 +115,31 @@
assert fd1.read() == tmpfile.read()
_vmprof.disable()
assert _vmprof.get_profile_path() is None
+
+ def test_stop_sampling(self):
+ import os
+ import _vmprof
+ tmpfile = open(self.tmpfilename, 'wb')
+ native = 1
+ def f():
+ import sys
+ import math
+ j = sys.maxsize
+ for i in range(500):
+ j = math.sqrt(j)
+ _vmprof.enable(tmpfile.fileno(), 0.01, 0, native, 0, 0)
+ # get_vmprof_stack() always returns 0 here!
+ # see vmprof_common.c and assume RPYTHON_LL2CTYPES is defined!
+ f()
+ fileno = _vmprof.stop_sampling()
+ pos = os.lseek(fileno, 0, os.SEEK_CUR)
+ f()
+ pos2 = os.lseek(fileno, 0, os.SEEK_CUR)
+ assert pos == pos2
+ _vmprof.start_sampling()
+ f()
+ fileno = _vmprof.stop_sampling()
+ pos3 = os.lseek(fileno, 0, os.SEEK_CUR)
+ assert pos3 > pos
+ _vmprof.disable()
+
diff --git a/rpython/rlib/rvmprof/cintf.py b/rpython/rlib/rvmprof/cintf.py
--- a/rpython/rlib/rvmprof/cintf.py
+++ b/rpython/rlib/rvmprof/cintf.py
@@ -20,7 +20,8 @@
compile_extra = ['-DRPYTHON_VMPROF', '-O3']
separate_module_files = [
- SHARED.join('symboltable.c')
+ SHARED.join('symboltable.c'),
+ SHARED.join('vmprof_unix.c')
]
if sys.platform.startswith('linux'):
separate_module_files += [
@@ -40,7 +41,7 @@
compile_extra += ['-DVMPROF_LINUX']
elif sys.platform == 'win32':
compile_extra = ['-DRPYTHON_VMPROF', '-DVMPROF_WINDOWS']
- separate_module_files = [SHARED.join('vmprof_main_win32.c')]
+ separate_module_files = [SHARED.join('vmprof_win.c')]
_libs = []
else:
# Guessing a BSD-like Unix platform
@@ -58,7 +59,9 @@
SHARED.join('compat.c'),
SHARED.join('machine.c'),
SHARED.join('vmp_stack.c'),
- SHARED.join('vmprof_main.c'),
+ SHARED.join('vmprof_mt.c'),
+ SHARED.join('vmprof_memory.c'),
+ SHARED.join('vmprof_common.c'),
# symbol table already in separate_module_files
] + separate_module_files,
post_include_bits=[],
diff --git a/rpython/rlib/rvmprof/src/rvmprof.c
b/rpython/rlib/rvmprof/src/rvmprof.c
--- a/rpython/rlib/rvmprof/src/rvmprof.c
+++ b/rpython/rlib/rvmprof/src/rvmprof.c
@@ -15,9 +15,9 @@
#include "shared/vmprof_get_custom_offset.h"
#ifdef VMPROF_UNIX
-#include "shared/vmprof_main.h"
+#include "shared/vmprof_unix.h"
#else
-#include "shared/vmprof_main_win32.h"
+#include "shared/vmprof_win.h"
#endif
diff --git a/rpython/rlib/rvmprof/src/shared/_vmprof.c
b/rpython/rlib/rvmprof/src/shared/_vmprof.c
--- a/rpython/rlib/rvmprof/src/shared/_vmprof.c
+++ b/rpython/rlib/rvmprof/src/shared/_vmprof.c
@@ -9,8 +9,8 @@
#include <signal.h>
#include "_vmprof.h"
+#include "vmprof_common.h"
-static volatile int is_enabled = 0;
static destructor Original_code_dealloc = 0;
static PyObject* (*_default_eval_loop)(PyFrameObject *, int) = 0;
@@ -18,9 +18,9 @@
#include "trampoline.h"
#include "machine.h"
#include "symboltable.h"
-#include "vmprof_main.h"
+#include "vmprof_unix.h"
#else
-#include "vmprof_main_win32.h"
+#include "vmprof_win.h"
#endif
#include "vmp_stack.h"
@@ -156,7 +156,7 @@
static void cpyprof_code_dealloc(PyObject *co)
{
- if (is_enabled) {
+ if (vmprof_is_enabled()) {
emit_code_object((PyCodeObject *)co);
/* xxx error return values are ignored */
}
@@ -187,7 +187,7 @@
return NULL;
}
- if (is_enabled) {
+ if (vmprof_is_enabled()) {
PyErr_SetString(PyExc_ValueError, "vmprof is already enabled");
return NULL;
}
@@ -217,13 +217,13 @@
return NULL;
}
- is_enabled = 1;
+ vmprof_set_enabled(1);
Py_RETURN_NONE;
}
static PyObject * vmp_is_enabled(PyObject *module, PyObject *noargs) {
- if (is_enabled) {
+ if (vmprof_is_enabled()) {
Py_RETURN_TRUE;
}
Py_RETURN_FALSE;
@@ -237,7 +237,7 @@
return NULL;
}
- is_enabled = 0;
+ vmprof_set_enabled(0);
if (PyErr_Occurred())
return NULL;
@@ -362,7 +362,7 @@
#ifdef VMPROF_UNIX
static PyObject * vmp_get_profile_path(PyObject *module, PyObject *noargs) {
PyObject * o;
- if (is_enabled) {
+ if (vmprof_is_enabled()) {
char buffer[4096];
buffer[0] = 0;
ssize_t buffer_len = vmp_fd_to_path(vmp_profile_fileno(), buffer,
4096);
@@ -382,21 +382,19 @@
insert_real_time_thread(PyObject *module, PyObject * noargs) {
ssize_t thread_count;
- if (!is_enabled) {
+ if (!vmprof_is_enabled()) {
PyErr_SetString(PyExc_ValueError, "vmprof is not enabled");
return NULL;
}
- if (signal_type != SIGALRM) {
+ if (vmprof_get_signal_type() != SIGALRM) {
PyErr_SetString(PyExc_ValueError, "vmprof is not in real time mode");
return NULL;
}
- while (__sync_lock_test_and_set(&spinlock, 1)) {
- }
-
+ vmprof_aquire_lock();
thread_count = insert_thread(pthread_self(), -1);
- __sync_lock_release(&spinlock);
+ vmprof_release_lock();
return PyLong_FromSsize_t(thread_count);
}
@@ -405,21 +403,19 @@
remove_real_time_thread(PyObject *module, PyObject * noargs) {
ssize_t thread_count;
- if (!is_enabled) {
+ if (!vmprof_is_enabled()) {
PyErr_SetString(PyExc_ValueError, "vmprof is not enabled");
return NULL;
}
- if (signal_type != SIGALRM) {
+ if (vmprof_get_signal_type() != SIGALRM) {
PyErr_SetString(PyExc_ValueError, "vmprof is not in real time mode");
return NULL;
}
- while (__sync_lock_test_and_set(&spinlock, 1)) {
- }
-
+ vmprof_aquire_lock();
thread_count = remove_thread(pthread_self(), -1);
- __sync_lock_release(&spinlock);
+ vmprof_release_lock();
return PyLong_FromSsize_t(thread_count);
}
diff --git a/rpython/rlib/rvmprof/src/shared/machine.c
b/rpython/rlib/rvmprof/src/shared/machine.c
--- a/rpython/rlib/rvmprof/src/shared/machine.c
+++ b/rpython/rlib/rvmprof/src/shared/machine.c
@@ -27,6 +27,8 @@
#endif
#elif __linux__
return "linux";
+#elif __FreeBSD__
+ return "freebsd"
#else
#error "Unknown compiler"
#endif
@@ -38,7 +40,7 @@
char proffs[24];
(void)snprintf(proffs, 24, "/proc/self/fd/%d", fd);
return readlink(proffs, buffer, buffer_len);
-#elif defined(VMPROF_UNIX)
+#elif defined(VMPROF_UNIX) && !defined(__FreeBSD__)
fcntl(fd, F_GETPATH, buffer);
return strlen(buffer);
#endif
diff --git a/rpython/rlib/rvmprof/src/shared/vmp_stack.c
b/rpython/rlib/rvmprof/src/shared/vmp_stack.c
--- a/rpython/rlib/rvmprof/src/shared/vmp_stack.c
+++ b/rpython/rlib/rvmprof/src/shared/vmp_stack.c
@@ -523,7 +523,7 @@
int vmp_native_enable(void) {
#ifdef VMPROF_LINUX
- if (!unw_get_reg) {
+ if (libhandle == NULL) {
if ((libhandle = dlopen(LIBUNWIND, RTLD_LAZY | RTLD_LOCAL)) == NULL) {
goto bail_out;
}
@@ -570,6 +570,7 @@
vmprof_error = dlerror();
fprintf(stderr, "could not close libunwind at runtime. error:
%s\n", vmprof_error);
}
+ libhandle = NULL;
}
vmp_native_traces_enabled = 0;
diff --git a/rpython/rlib/rvmprof/src/shared/vmprof.h
b/rpython/rlib/rvmprof/src/shared/vmprof.h
--- a/rpython/rlib/rvmprof/src/shared/vmprof.h
+++ b/rpython/rlib/rvmprof/src/shared/vmprof.h
@@ -1,5 +1,11 @@
#pragma once
+#define _GNU_SOURCE 1
+
+#ifndef RPYTHON_VMPROF
+#include <Python.h>
+#endif
+
#ifdef VMPROF_UNIX
#include <unistd.h>
#endif
@@ -79,3 +85,7 @@
#endif
+void set_current_codes(void * to);
+int opened_profile(const char *interp_name, int memory, int proflines, int
native, int real_time);
+void flush_codes(void);
+
diff --git a/rpython/rlib/rvmprof/src/shared/vmprof_common.c
b/rpython/rlib/rvmprof/src/shared/vmprof_common.c
new file mode 100644
--- /dev/null
+++ b/rpython/rlib/rvmprof/src/shared/vmprof_common.c
@@ -0,0 +1,303 @@
+#include "vmprof_common.h"
+
+#include <assert.h>
+#include <errno.h>
+
+#ifdef RPYTHON_VMPROF
+#ifdef RPYTHON_LL2CTYPES
+ /* only for testing: ll2ctypes sets RPY_EXTERN from the command-line */
+
+#else
+# include "common_header.h"
+# include "structdef.h"
+# include "src/threadlocal.h"
+# include "rvmprof.h"
+# include "forwarddecl.h"
+#endif
+#endif
+
+#ifdef VMP_SUPPORTS_NATIVE_PROFILING
+#include "vmp_stack.h" // reduces warings
+#endif
+
+
+static volatile int is_enabled = 0;
+static long prepare_interval_usec = 0;
+static long profile_interval_usec = 0;
+
+#ifdef VMPROF_UNIX
+static int signal_type = SIGPROF;
+static int itimer_type = ITIMER_PROF;
+static pthread_t *threads = NULL;
+static size_t threads_size = 0;
+static size_t thread_count = 0;
+static size_t threads_size_step = 8;
+#endif
+
+int vmprof_get_itimer_type(void) {
+ return itimer_type;
+}
+
+int vmprof_is_enabled(void) {
+ return is_enabled;
+}
+
+void vmprof_set_enabled(int value) {
+ is_enabled = value;
+}
+
+long vmprof_get_prepare_interval_usec(void) {
+ return prepare_interval_usec;
+}
+
+long vmprof_get_profile_interval_usec(void) {
+ return profile_interval_usec;
+}
+
+void vmprof_set_prepare_interval_usec(long value) {
+ prepare_interval_usec = value;
+}
+
+void vmprof_set_profile_interval_usec(long value) {
+ profile_interval_usec = value;
+}
+
+int vmprof_get_signal_type(void) {
+ return signal_type;
+}
+
+char *vmprof_init(int fd, double interval, int memory,
+ int proflines, const char *interp_name, int native, int
real_time)
+{
+ if (!(interval >= 1e-6 && interval < 1.0)) { /* also if it is NaN */
+ return "bad value for 'interval'";
+ }
+ prepare_interval_usec = (int)(interval * 1000000.0);
+
+ if (prepare_concurrent_bufs() < 0)
+ return "out of memory";
+#if VMPROF_UNIX
+ if (real_time) {
+ signal_type = SIGALRM;
+ itimer_type = ITIMER_REAL;
+ } else {
+ signal_type = SIGPROF;
+ itimer_type = ITIMER_PROF;
+ }
+ set_current_codes(NULL);
+ assert(fd >= 0);
+#else
+ if (memory) {
+ return "memory tracking only supported on unix";
+ }
+ if (native) {
+ return "native profiling only supported on unix";
+ }
+#endif
+ vmp_set_profile_fileno(fd);
+ if (opened_profile(interp_name, memory, proflines, native, real_time) < 0)
{
+ vmp_set_profile_fileno(0);
+ return strerror(errno);
+ }
+ return NULL;
+}
+
+int opened_profile(const char *interp_name, int memory, int proflines, int
native, int real_time)
+{
+ int success;
+ int bits;
+ struct {
+ long hdr[5];
+ char interp_name[259];
+ } header;
+
+ const char * machine;
+ size_t namelen = strnlen(interp_name, 255);
+
+ machine = vmp_machine_os_name();
+
+ header.hdr[0] = 0;
+ header.hdr[1] = 3;
+ header.hdr[2] = 0;
+ header.hdr[3] = prepare_interval_usec;
+ if (strstr(machine, "win64") != 0) {
+ header.hdr[4] = 1;
+ } else {
+ header.hdr[4] = 0;
+ }
+ header.interp_name[0] = MARKER_HEADER;
+ header.interp_name[1] = '\x00';
+ header.interp_name[2] = VERSION_TIMESTAMP;
+ header.interp_name[3] = memory*PROFILE_MEMORY + proflines*PROFILE_LINES + \
+ native*PROFILE_NATIVE +
real_time*PROFILE_REAL_TIME;
+#ifdef RPYTHON_VMPROF
+ header.interp_name[3] += PROFILE_RPYTHON;
+#endif
+ header.interp_name[4] = (char)namelen;
+
+ memcpy(&header.interp_name[5], interp_name, namelen);
+ success = vmp_write_all((char*)&header, 5 * sizeof(long) + 5 + namelen);
+ if (success < 0) {
+ return success;
+ }
+
+ /* Write the time and the zone to the log file, profiling will start now */
+ (void)vmp_write_time_now(MARKER_TIME_N_ZONE);
+
+ /* write some more meta information */
+ vmp_write_meta("os", machine);
+ bits = vmp_machine_bits();
+ if (bits == 64) {
+ vmp_write_meta("bits", "64");
+ } else if (bits == 32) {
+ vmp_write_meta("bits", "32");
+ }
+
+ return success;
+}
+
+
+/* Seems that CPython 3.5.1 made our job harder. Did not find out how
+ to do that without these hacks. We can't use PyThreadState_GET(),
+ because that calls PyThreadState_Get() which fails an assert if the
+ result is NULL. */
+#if PY_MAJOR_VERSION >= 3 && !defined(_Py_atomic_load_relaxed)
+ /* this was abruptly un-defined in 3.5.1 */
+void *volatile _PyThreadState_Current;
+ /* XXX simple volatile access is assumed atomic */
+# define _Py_atomic_load_relaxed(pp) (*(pp))
+#endif
+
+#ifdef RPYTHON_VMPROF
+#ifndef RPYTHON_LL2CTYPES
+PY_STACK_FRAME_T *get_vmprof_stack(void)
+{
+ struct pypy_threadlocal_s *tl;
+ _OP_THREADLOCALREF_ADDR_SIGHANDLER(tl);
+ if (tl == NULL) {
+ return NULL;
+ } else {
+ return tl->vmprof_tl_stack;
+ }
+}
+#else
+PY_STACK_FRAME_T *get_vmprof_stack(void)
+{
+ return 0;
+}
+#endif
+
+intptr_t vmprof_get_traceback(void *stack, void *ucontext,
+ intptr_t *result_p, intptr_t result_length)
+{
+ int n;
+ int enabled;
+#ifdef VMPROF_WINDOWS
+ intptr_t pc = 0; /* XXX implement me */
+#else
+ intptr_t pc = ucontext ? (intptr_t)GetPC((ucontext_t *)ucontext) : 0;
+#endif
+ if (stack == NULL) {
+ stack = get_vmprof_stack();
+ }
+#ifdef VMP_SUPPORTS_NATIVE_PROFILING
+ enabled = vmp_native_enabled();
+ vmp_native_disable();
+#endif
+ n = get_stack_trace(stack, result_p, result_length - 2, pc);
+#ifdef VMP_SUPPORTS_NATIVE_PROFILING
+ if (enabled) {
+ vmp_native_enable();
+ }
+#endif
+ return (intptr_t)n;
+}
+#endif
+
+#ifdef VMPROF_UNIX
+
+ssize_t search_thread(pthread_t tid, ssize_t i)
+{
+ if (i < 0)
+ i = 0;
+ while ((size_t)i < thread_count) {
+ if (pthread_equal(threads[i], tid))
+ return i;
+ i++;
+ }
+ return -1;
+}
+
+ssize_t insert_thread(pthread_t tid, ssize_t i)
+{
+ assert(signal_type == SIGALRM);
+ i = search_thread(tid, i);
+ if (i > 0)
+ return -1;
+ if (thread_count == threads_size) {
+ threads_size += threads_size_step;
+ threads = realloc(threads, sizeof(pid_t) * threads_size);
+ assert(threads != NULL);
+ memset(threads + thread_count, 0, sizeof(pid_t) * threads_size_step);
+ }
+ threads[thread_count++] = tid;
+ return thread_count;
+}
+
+ssize_t remove_thread(pthread_t tid, ssize_t i)
+{
+ assert(signal_type == SIGALRM);
+ if (thread_count == 0)
+ return -1;
+ if (threads == NULL)
+ return -1;
+ i = search_thread(tid, i);
+ if (i < 0)
+ return -1;
+ threads[i] = threads[--thread_count];
+ threads[thread_count] = 0;
+ return thread_count;
+}
+
+ssize_t remove_threads(void)
+{
+ assert(signal_type == SIGALRM);
+ if (threads != NULL) {
+ free(threads);
+ threads = NULL;
+ }
+ thread_count = 0;
+ threads_size = 0;
+ return 0;
+}
+
+int broadcast_signal_for_threads(void)
+{
+ int done = 1;
+ size_t i = 0;
+ pthread_t self = pthread_self();
+ pthread_t tid;
+ while (i < thread_count) {
+ tid = threads[i];
+ if (pthread_equal(tid, self)) {
+ done = 0;
+ } else if (pthread_kill(tid, SIGALRM)) {
+ remove_thread(tid, i);
+ }
+ i++;
+ }
+ return done;
+}
+
+int is_main_thread(void)
+{
+#ifdef VMPROF_LINUX
+ pid_t pid = getpid();
+ pid_t tid = (pid_t) syscall(SYS_gettid);
+ return (pid == tid);
+#elif defined(VMPROF_APPLE)
+ return pthread_main_np();
+#endif
+}
+
+#endif
diff --git a/rpython/rlib/rvmprof/src/shared/vmprof_common.h
b/rpython/rlib/rvmprof/src/shared/vmprof_common.h
--- a/rpython/rlib/rvmprof/src/shared/vmprof_common.h
+++ b/rpython/rlib/rvmprof/src/shared/vmprof_common.h
@@ -8,84 +8,27 @@
#include <time.h>
#include <stdlib.h>
-#ifndef VMPROF_WINDOWS
+#ifdef VMPROF_UNIX
#include <sys/time.h>
#include "vmprof_mt.h"
+#include <signal.h>
+#include <pthread.h>
#endif
+#include "vmprof_getpc.h"
+
#ifdef VMPROF_LINUX
#include <syscall.h>
#endif
#define MAX_FUNC_NAME 1024
-static long prepare_interval_usec = 0;
-static long profile_interval_usec = 0;
-
-static int opened_profile(const char *interp_name, int memory, int proflines,
int native, int real_time);
-
-#ifdef VMPROF_UNIX
-static int signal_type = SIGPROF;
-static int itimer_type = ITIMER_PROF;
-static pthread_t *threads = NULL;
-static size_t threads_size = 0;
-static size_t thread_count = 0;
-static size_t threads_size_step = 8;
-static struct profbuf_s *volatile current_codes;
-#endif
-
#ifdef VMPROF_UNIX
-static inline ssize_t search_thread(pthread_t tid, ssize_t i) {
- if (i < 0)
- i = 0;
- while ((size_t)i < thread_count) {
- if (pthread_equal(threads[i], tid))
- return i;
- i++;
- }
- return -1;
-}
-
-ssize_t insert_thread(pthread_t tid, ssize_t i) {
- assert(signal_type == SIGALRM);
- i = search_thread(tid, i);
- if (i > 0)
- return -1;
- if (thread_count == threads_size) {
- threads_size += threads_size_step;
- threads = realloc(threads, sizeof(pid_t) * threads_size);
- assert(threads != NULL);
- memset(threads + thread_count, 0, sizeof(pid_t) * threads_size_step);
- }
- threads[thread_count++] = tid;
- return thread_count;
-}
-
-ssize_t remove_thread(pthread_t tid, ssize_t i) {
- assert(signal_type == SIGALRM);
- if (thread_count == 0)
- return -1;
- if (threads == NULL)
- return -1;
- i = search_thread(tid, i);
- if (i < 0)
- return -1;
- threads[i] = threads[--thread_count];
- threads[thread_count] = 0;
- return thread_count;
-}
-
-ssize_t remove_threads(void) {
- assert(signal_type == SIGALRM);
- if (threads != NULL) {
- free(threads);
- threads = NULL;
- }
- thread_count = 0;
- threads_size = 0;
- return 0;
-}
+ssize_t search_thread(pthread_t tid, ssize_t i);
+ssize_t insert_thread(pthread_t tid, ssize_t i);
+ssize_t remove_thread(pthread_t tid, ssize_t i);
+ssize_t remove_threads(void);
#endif
@@ -130,95 +73,9 @@
RPY_EXTERN
char *vmprof_init(int fd, double interval, int memory,
- int proflines, const char *interp_name, int native, int
real_time)
-{
- if (!(interval >= 1e-6 && interval < 1.0)) { /* also if it is NaN */
- return "bad value for 'interval'";
- }
- prepare_interval_usec = (int)(interval * 1000000.0);
+ int proflines, const char *interp_name, int native, int
real_time);
- if (prepare_concurrent_bufs() < 0)
- return "out of memory";
-#if VMPROF_UNIX
- if (real_time) {
- signal_type = SIGALRM;
- itimer_type = ITIMER_REAL;
- } else {
- signal_type = SIGPROF;
- itimer_type = ITIMER_PROF;
- }
- current_codes = NULL;
- assert(fd >= 0);
-#else
- if (memory) {
- return "memory tracking only supported on unix";
- }
- if (native) {
- return "native profiling only supported on unix";
- }
-#endif
- vmp_set_profile_fileno(fd);
- if (opened_profile(interp_name, memory, proflines, native, real_time) < 0)
{
- vmp_set_profile_fileno(0);
- return strerror(errno);
- }
- return NULL;
-}
-
-static int opened_profile(const char *interp_name, int memory, int proflines,
int native, int real_time)
-{
- int success;
- int bits;
- struct {
- long hdr[5];
- char interp_name[259];
- } header;
-
- const char * machine;
- size_t namelen = strnlen(interp_name, 255);
-
- machine = vmp_machine_os_name();
-
- header.hdr[0] = 0;
- header.hdr[1] = 3;
- header.hdr[2] = 0;
- header.hdr[3] = prepare_interval_usec;
- if (strstr(machine, "win64") != 0) {
- header.hdr[4] = 1;
- } else {
- header.hdr[4] = 0;
- }
- header.interp_name[0] = MARKER_HEADER;
- header.interp_name[1] = '\x00';
- header.interp_name[2] = VERSION_TIMESTAMP;
- header.interp_name[3] = memory*PROFILE_MEMORY + proflines*PROFILE_LINES + \
- native*PROFILE_NATIVE +
real_time*PROFILE_REAL_TIME;
-#ifdef RPYTHON_VMPROF
- header.interp_name[3] += PROFILE_RPYTHON;
-#endif
- header.interp_name[4] = (char)namelen;
-
- memcpy(&header.interp_name[5], interp_name, namelen);
- success = vmp_write_all((char*)&header, 5 * sizeof(long) + 5 + namelen);
- if (success < 0) {
- return success;
- }
-
- /* Write the time and the zone to the log file, profiling will start now */
- (void)vmp_write_time_now(MARKER_TIME_N_ZONE);
-
- /* write some more meta information */
- vmp_write_meta("os", machine);
- bits = vmp_machine_bits();
- if (bits == 64) {
- vmp_write_meta("bits", "64");
- } else if (bits == 32) {
- vmp_write_meta("bits", "32");
- }
-
- return success;
-}
-
+int opened_profile(const char *interp_name, int memory, int proflines, int
native, int real_time);
/* Seems that CPython 3.5.1 made our job harder. Did not find out how
to do that without these hacks. We can't use PyThreadState_GET(),
@@ -233,46 +90,22 @@
#ifdef RPYTHON_VMPROF
#ifndef RPYTHON_LL2CTYPES
-static PY_STACK_FRAME_T *get_vmprof_stack(void)
-{
- struct pypy_threadlocal_s *tl;
- _OP_THREADLOCALREF_ADDR_SIGHANDLER(tl);
- if (tl == NULL)
- return NULL;
- else
- return tl->vmprof_tl_stack;
-}
-#else
-static PY_STACK_FRAME_T *get_vmprof_stack(void)
-{
- return 0;
-}
+PY_STACK_FRAME_T *get_vmprof_stack(void);
+#endif
+RPY_EXTERN
+intptr_t vmprof_get_traceback(void *stack, void *ucontext,
+ intptr_t *result_p, intptr_t result_length);
#endif
-RPY_EXTERN
-intptr_t vmprof_get_traceback(void *stack, void *ucontext,
- intptr_t *result_p, intptr_t result_length)
-{
- int n;
- int enabled;
-#ifdef VMPROF_WINDOWS
- intptr_t pc = 0; /* XXX implement me */
-#else
- intptr_t pc = ucontext ? (intptr_t)GetPC((ucontext_t *)ucontext) : 0;
+int vmprof_get_signal_type(void);
+long vmprof_get_prepare_interval_usec(void);
+long vmprof_get_profile_interval_usec(void);
+void vmprof_set_prepare_interval_usec(long value);
+void vmprof_set_profile_interval_usec(long value);
+int vmprof_is_enabled(void);
+void vmprof_set_enabled(int value);
+int vmprof_get_itimer_type(void);
+#ifdef VMPROF_UNIX
+int broadcast_signal_for_threads(void);
+int is_main_thread(void);
#endif
- if (stack == NULL) {
- stack = get_vmprof_stack();
- }
-#ifdef VMP_SUPPORTS_NATIVE_PROFILING
- enabled = vmp_native_enabled();
- vmp_native_disable();
-#endif
- n = get_stack_trace(stack, result_p, result_length - 2, pc);
-#ifdef VMP_SUPPORTS_NATIVE_PROFILING
- if (enabled) {
- vmp_native_enable();
- }
-#endif
- return (intptr_t)n;
-}
-#endif
diff --git a/rpython/rlib/rvmprof/src/shared/vmprof_getpc.h
b/rpython/rlib/rvmprof/src/shared/vmprof_getpc.h
--- a/rpython/rlib/rvmprof/src/shared/vmprof_getpc.h
+++ b/rpython/rlib/rvmprof/src/shared/vmprof_getpc.h
@@ -142,6 +142,7 @@
// the right value for your system, and add it to the list in
// vmrpof_config.h
#else
+
static intptr_t GetPC(ucontext_t *signal_ucontext) {
return signal_ucontext->PC_FROM_UCONTEXT; // defined in config.h
}
diff --git a/rpython/rlib/rvmprof/src/shared/vmprof_memory.c
b/rpython/rlib/rvmprof/src/shared/vmprof_memory.c
new file mode 100644
--- /dev/null
+++ b/rpython/rlib/rvmprof/src/shared/vmprof_memory.c
@@ -0,0 +1,81 @@
+#include "vmprof_memory.h"
+
+#ifdef VMPROF_APPLE
+/* On OS X we can get RSS using the Mach API. */
+#include <mach/mach.h>
+#include <mach/message.h>
+#include <mach/kern_return.h>
+#include <mach/task_info.h>
+
+static mach_port_t mach_task;
+#else
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <string.h>
+/* On '''normal''' Unices we can get RSS from '/proc/<pid>/status'. */
+static int proc_file = -1;
+#endif
+
+int setup_rss(void)
+{
+#ifdef VMPROF_LINUX
+ char buf[128];
+
+ sprintf(buf, "/proc/%d/status", getpid());
+ proc_file = open(buf, O_RDONLY);
+ return proc_file;
+#elif defined(VMPROF_APPLE)
+ mach_task = mach_task_self();
+ return 0;
+#else
+ return 0;
+#endif
+}
+
+int teardown_rss(void)
+{
+#ifdef VMPROF_LINUX
+ close(proc_file);
+ proc_file = -1;
+ return 0;
+#else
+ return 0;
+#endif
+}
+
+long get_current_proc_rss(void)
+{
+#ifdef VMPROF_LINUX
+ char buf[1024];
+ int i = 0;
+
+ if (lseek(proc_file, 0, SEEK_SET) == -1)
+ return -1;
+ if (read(proc_file, buf, 1024) == -1)
+ return -1;
+ while (i < 1020) {
+ if (strncmp(buf + i, "VmRSS:\t", 7) == 0) {
+ i += 7;
+ return atoi(buf + i);
+ }
+ i++;
+ }
+ return -1;
+#elif defined(VMPROF_APPLE)
+ mach_msg_type_number_t out_count = MACH_TASK_BASIC_INFO_COUNT;
+ mach_task_basic_info_data_t taskinfo = { .resident_size = 0 };
+
+ kern_return_t error = task_info(mach_task, MACH_TASK_BASIC_INFO,
(task_info_t)&taskinfo, &out_count);
+ if (error == KERN_SUCCESS) {
+ return (long)(taskinfo.resident_size / 1024);
+ } else {
+ return -1;
+ }
+#else
+ return -1; // not implemented
+#endif
+}
diff --git a/rpython/rlib/rvmprof/src/shared/vmprof_memory.h
b/rpython/rlib/rvmprof/src/shared/vmprof_memory.h
new file mode 100644
--- /dev/null
+++ b/rpython/rlib/rvmprof/src/shared/vmprof_memory.h
@@ -0,0 +1,5 @@
+#pragma once
+
+int setup_rss(void);
+int teardown_rss(void);
+long get_current_proc_rss(void);
diff --git a/rpython/rlib/rvmprof/src/shared/vmprof_mt.c
b/rpython/rlib/rvmprof/src/shared/vmprof_mt.c
new file mode 100644
--- /dev/null
+++ b/rpython/rlib/rvmprof/src/shared/vmprof_mt.c
@@ -0,0 +1,181 @@
+#include "vmprof_mt.h"
+/* Support for multithreaded write() operations (implementation) */
+
+#include <assert.h>
+
+#if defined(__i386__) || defined(__amd64__)
+ static inline void write_fence(void) { asm("" : : : "memory"); }
+#else
+ static inline void write_fence(void) { __sync_synchronize(); }
+#endif
+
+static char volatile profbuf_state[MAX_NUM_BUFFERS];
+static struct profbuf_s *profbuf_all_buffers = NULL;
+static int volatile profbuf_write_lock = 2;
+static long profbuf_pending_write;
+
+
+static void unprepare_concurrent_bufs(void)
+{
+ if (profbuf_all_buffers != NULL) {
+ munmap(profbuf_all_buffers, sizeof(struct profbuf_s) *
MAX_NUM_BUFFERS);
+ profbuf_all_buffers = NULL;
+ }
+}
+
+int prepare_concurrent_bufs(void)
+{
+ assert(sizeof(struct profbuf_s) == 8192);
+
+ unprepare_concurrent_bufs();
+ profbuf_all_buffers = mmap(NULL, sizeof(struct profbuf_s) *
MAX_NUM_BUFFERS,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS,
+ -1, 0);
+ if (profbuf_all_buffers == MAP_FAILED) {
+ profbuf_all_buffers = NULL;
+ return -1;
+ }
+ memset((char *)profbuf_state, PROFBUF_UNUSED, sizeof(profbuf_state));
+ profbuf_write_lock = 0;
+ profbuf_pending_write = -1;
+ return 0;
+}
+
+static int _write_single_ready_buffer(int fd, long i)
+{
+ /* Try to write to disk the buffer number 'i'. This function must
+ only be called while we hold the write lock. */
+ assert(profbuf_write_lock != 0);
+
+ if (profbuf_pending_write >= 0) {
+ /* A partially written buffer is waiting. We'll write the
+ rest of this buffer now, instead of 'i'. */
+ i = profbuf_pending_write;
+ assert(profbuf_state[i] == PROFBUF_READY);
+ }
+
+ if (profbuf_state[i] != PROFBUF_READY) {
+ /* this used to be a race condition: the buffer was written by a
+ different thread already, nothing to do now */
+ return 0;
+ }
+
+ int err;
+ struct profbuf_s *p = &profbuf_all_buffers[i];
+ ssize_t count = write(fd, p->data + p->data_offset, p->data_size);
+ if (count == p->data_size) {
+ profbuf_state[i] = PROFBUF_UNUSED;
+ profbuf_pending_write = -1;
+ }
+ else {
+ if (count > 0) {
+ p->data_offset += count;
+ p->data_size -= count;
+ }
+ profbuf_pending_write = i;
+ if (count < 0)
+ return -1;
+ }
+ return 0;
+}
+
+static void _write_ready_buffers(int fd)
+{
+ long i;
+ int has_write_lock = 0;
+
+ for (i = 0; i < MAX_NUM_BUFFERS; i++) {
+ if (profbuf_state[i] == PROFBUF_READY) {
+ if (!has_write_lock) {
+ if (!__sync_bool_compare_and_swap(&profbuf_write_lock, 0, 1))
+ return; /* can't acquire the write lock, give up */
+ has_write_lock = 1;
+ }
+ if (_write_single_ready_buffer(fd, i) < 0)
+ break;
+ }
+ }
+ if (has_write_lock)
+ profbuf_write_lock = 0;
+}
+
+struct profbuf_s *reserve_buffer(int fd)
+{
+ /* Tries to enter a region of code that fills one buffer. If
+ successful, returns the profbuf_s. It fails only if the
+ concurrent buffers are all busy (extreme multithreaded usage).
+
+ This might call write() to emit the data sitting in
+ previously-prepared buffers. In case of write() error, the
+ error is ignored but unwritten data stays in the buffers.
+ */
+ long i;
+
+ _write_ready_buffers(fd);
+
+ for (i = 0; i < MAX_NUM_BUFFERS; i++) {
+ if (profbuf_state[i] == PROFBUF_UNUSED &&
+ __sync_bool_compare_and_swap(&profbuf_state[i], PROFBUF_UNUSED,
+ PROFBUF_FILLING)) {
+ struct profbuf_s *p = &profbuf_all_buffers[i];
+ p->data_size = 0;
+ p->data_offset = 0;
+ return p;
+ }
+ }
+ /* no unused buffer found */
+ return NULL;
+}
+
+void commit_buffer(int fd, struct profbuf_s *buf)
+{
+ /* Leaves a region of code that filled 'buf'.
+
+ This might call write() to emit the data now ready. In case of
+ write() error, the error is ignored but unwritten data stays in
+ the buffers.
+ */
+
+ /* Make sure every thread sees the full content of 'buf' */
+ write_fence();
+
+ /* Then set the 'ready' flag */
+ long i = buf - profbuf_all_buffers;
+ assert(profbuf_state[i] == PROFBUF_FILLING);
+ profbuf_state[i] = PROFBUF_READY;
+
+ if (!__sync_bool_compare_and_swap(&profbuf_write_lock, 0, 1)) {
+ /* can't acquire the write lock, ignore */
+ }
+ else {
+ _write_single_ready_buffer(fd, i);
+ profbuf_write_lock = 0;
+ }
+}
+
+void cancel_buffer(struct profbuf_s *buf)
+{
+ long i = buf - profbuf_all_buffers;
+ assert(profbuf_state[i] == PROFBUF_FILLING);
+ profbuf_state[i] = PROFBUF_UNUSED;
+}
+
+int shutdown_concurrent_bufs(int fd)
+{
+ /* no signal handler can be running concurrently here, because we
+ already did vmprof_ignore_signals(1) */
+ assert(profbuf_write_lock == 0);
+ profbuf_write_lock = 2;
+
+ /* last attempt to flush buffers */
+ int i;
+ for (i = 0; i < MAX_NUM_BUFFERS; i++) {
+ while (profbuf_state[i] == PROFBUF_READY) {
+ if (_write_single_ready_buffer(fd, i) < 0)
+ return -1;
+ }
+ }
+ unprepare_concurrent_bufs();
+ return 0;
+}
diff --git a/rpython/rlib/rvmprof/src/shared/vmprof_mt.h
b/rpython/rlib/rvmprof/src/shared/vmprof_mt.h
--- a/rpython/rlib/rvmprof/src/shared/vmprof_mt.h
+++ b/rpython/rlib/rvmprof/src/shared/vmprof_mt.h
@@ -1,11 +1,11 @@
#pragma once
/* Support for multithreaded write() operations */
+#include "vmprof.h"
+
#include <string.h>
#include <sys/mman.h>
-#include "vmprof.h"
-
/* The idea is that we have MAX_NUM_BUFFERS available, all of size
SINGLE_BUF_SIZE. Threads and signal handlers can ask to reserve a
buffer, fill it, and finally "commit" it, at which point its
@@ -29,12 +29,6 @@
*/
#define MAX_NUM_BUFFERS 20
-#if defined(__i386__) || defined(__amd64__)
- static inline void write_fence(void) { asm("" : : : "memory"); }
-#else
- static inline void write_fence(void) { __sync_synchronize(); }
-#endif
-
#ifndef MAP_ANONYMOUS
#define MAP_ANONYMOUS MAP_ANON
#endif
@@ -50,173 +44,8 @@
char data[SINGLE_BUF_SIZE];
};
-static char volatile profbuf_state[MAX_NUM_BUFFERS];
-static struct profbuf_s *profbuf_all_buffers = NULL;
-static int volatile profbuf_write_lock = 2;
-static long profbuf_pending_write;
-
-
-static void unprepare_concurrent_bufs(void)
-{
- if (profbuf_all_buffers != NULL) {
- munmap(profbuf_all_buffers, sizeof(struct profbuf_s) *
MAX_NUM_BUFFERS);
- profbuf_all_buffers = NULL;
- }
-}
-
-static int prepare_concurrent_bufs(void)
-{
- assert(sizeof(struct profbuf_s) == 8192);
-
- unprepare_concurrent_bufs();
- profbuf_all_buffers = mmap(NULL, sizeof(struct profbuf_s) *
MAX_NUM_BUFFERS,
- PROT_READ | PROT_WRITE,
- MAP_PRIVATE | MAP_ANONYMOUS,
- -1, 0);
- if (profbuf_all_buffers == MAP_FAILED) {
- profbuf_all_buffers = NULL;
- return -1;
- }
- memset((char *)profbuf_state, PROFBUF_UNUSED, sizeof(profbuf_state));
- profbuf_write_lock = 0;
- profbuf_pending_write = -1;
- return 0;
-}
-
-static int _write_single_ready_buffer(int fd, long i)
-{
- /* Try to write to disk the buffer number 'i'. This function must
- only be called while we hold the write lock. */
- assert(profbuf_write_lock != 0);
-
- if (profbuf_pending_write >= 0) {
- /* A partially written buffer is waiting. We'll write the
- rest of this buffer now, instead of 'i'. */
- i = profbuf_pending_write;
- assert(profbuf_state[i] == PROFBUF_READY);
- }
-
- if (profbuf_state[i] != PROFBUF_READY) {
- /* this used to be a race condition: the buffer was written by a
- different thread already, nothing to do now */
- return 0;
- }
-
- int err;
- struct profbuf_s *p = &profbuf_all_buffers[i];
- ssize_t count = write(fd, p->data + p->data_offset, p->data_size);
- if (count == p->data_size) {
- profbuf_state[i] = PROFBUF_UNUSED;
- profbuf_pending_write = -1;
- }
- else {
- if (count > 0) {
- p->data_offset += count;
- p->data_size -= count;
- }
- profbuf_pending_write = i;
- if (count < 0)
- return -1;
- }
- return 0;
-}
-
-static void _write_ready_buffers(int fd)
-{
- long i;
- int has_write_lock = 0;
-
- for (i = 0; i < MAX_NUM_BUFFERS; i++) {
- if (profbuf_state[i] == PROFBUF_READY) {
- if (!has_write_lock) {
- if (!__sync_bool_compare_and_swap(&profbuf_write_lock, 0, 1))
- return; /* can't acquire the write lock, give up */
- has_write_lock = 1;
- }
- if (_write_single_ready_buffer(fd, i) < 0)
- break;
- }
- }
- if (has_write_lock)
- profbuf_write_lock = 0;
-}
-
-static struct profbuf_s *reserve_buffer(int fd)
-{
- /* Tries to enter a region of code that fills one buffer. If
- successful, returns the profbuf_s. It fails only if the
- concurrent buffers are all busy (extreme multithreaded usage).
-
- This might call write() to emit the data sitting in
- previously-prepared buffers. In case of write() error, the
- error is ignored but unwritten data stays in the buffers.
- */
- long i;
-
- _write_ready_buffers(fd);
-
- for (i = 0; i < MAX_NUM_BUFFERS; i++) {
- if (profbuf_state[i] == PROFBUF_UNUSED &&
- __sync_bool_compare_and_swap(&profbuf_state[i], PROFBUF_UNUSED,
- PROFBUF_FILLING)) {
- struct profbuf_s *p = &profbuf_all_buffers[i];
- p->data_size = 0;
- p->data_offset = 0;
- return p;
- }
- }
- /* no unused buffer found */
- return NULL;
-}
-
-static void commit_buffer(int fd, struct profbuf_s *buf)
-{
- /* Leaves a region of code that filled 'buf'.
-
- This might call write() to emit the data now ready. In case of
- write() error, the error is ignored but unwritten data stays in
- the buffers.
- */
-
- /* Make sure every thread sees the full content of 'buf' */
- write_fence();
-
- /* Then set the 'ready' flag */
- long i = buf - profbuf_all_buffers;
- assert(profbuf_state[i] == PROFBUF_FILLING);
- profbuf_state[i] = PROFBUF_READY;
-
- if (!__sync_bool_compare_and_swap(&profbuf_write_lock, 0, 1)) {
- /* can't acquire the write lock, ignore */
- }
- else {
- _write_single_ready_buffer(fd, i);
- profbuf_write_lock = 0;
- }
-}
-
-static void cancel_buffer(struct profbuf_s *buf)
-{
- long i = buf - profbuf_all_buffers;
- assert(profbuf_state[i] == PROFBUF_FILLING);
- profbuf_state[i] = PROFBUF_UNUSED;
-}
-
-static int shutdown_concurrent_bufs(int fd)
-{
- /* no signal handler can be running concurrently here, because we
- already did vmprof_ignore_signals(1) */
- assert(profbuf_write_lock == 0);
- profbuf_write_lock = 2;
-
- /* last attempt to flush buffers */
- int i;
- for (i = 0; i < MAX_NUM_BUFFERS; i++) {
- while (profbuf_state[i] == PROFBUF_READY) {
- if (_write_single_ready_buffer(fd, i) < 0)
- return -1;
- }
- }
- unprepare_concurrent_bufs();
- return 0;
-}
+int prepare_concurrent_bufs(void);
+struct profbuf_s *reserve_buffer(int fd);
+void commit_buffer(int fd, struct profbuf_s *buf);
+void cancel_buffer(struct profbuf_s *buf);
+int shutdown_concurrent_bufs(int fd);
diff --git a/rpython/rlib/rvmprof/src/shared/vmprof_unix.c
b/rpython/rlib/rvmprof/src/shared/vmprof_unix.c
new file mode 100644
--- /dev/null
+++ b/rpython/rlib/rvmprof/src/shared/vmprof_unix.c
@@ -0,0 +1,496 @@
+#include "vmprof_unix.h"
+
+#ifdef VMPROF_UNIX
+
+#if VMPROF_LINUX
+#include <syscall.h>
+#endif
+
+
+#include <dlfcn.h>
+#include <pthread.h>
+#include <unistd.h>
+#include <assert.h>
+#include <errno.h>
+#include <stdio.h>
+#include <fcntl.h>
+#include <time.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+
+#include "vmp_stack.h"
+#include "vmprof_mt.h"
+#include "vmprof_getpc.h"
+#include "vmprof_common.h"
+#include "vmprof_memory.h"
+#include "compat.h"
+
+
+
+/* value: LSB bit is 1 if signals must be ignored; all other bits
+ are a counter for how many threads are currently in a signal handler */
+static long volatile signal_handler_ignore = 1;
+static long volatile signal_handler_entries = 0;
+static char atfork_hook_installed = 0;
+static volatile int spinlock;
+static jmp_buf restore_point;
+static struct profbuf_s *volatile current_codes;
+
+
+void vmprof_ignore_signals(int ignored)
+{
+ if (ignored) {
+ /* set the last bit, and wait until concurrently-running signal
+ handlers finish */
+ __sync_add_and_fetch(&signal_handler_ignore, 1L);
+ while (signal_handler_entries != 0L) {
+ usleep(1);
+ }
+ } else {
+ __sync_sub_and_fetch(&signal_handler_ignore, 1L);
+ }
+}
+
+long vmprof_enter_signal(void)
+{
+ __sync_fetch_and_add(&signal_handler_entries, 1L);
+ return signal_handler_ignore;
+}
+
+long vmprof_exit_signal(void)
+{
+ return __sync_sub_and_fetch(&signal_handler_entries, 1L);
+}
+
+int install_pthread_atfork_hooks(void) {
+ /* this is needed to prevent the problems described there:
+ - http://code.google.com/p/gperftools/issues/detail?id=278
+ - http://lists.debian.org/debian-glibc/2010/03/msg00161.html
+
+ TL;DR: if the RSS of the process is large enough, the clone() syscall
+ will be interrupted by the SIGPROF before it can complete, then
+ retried, interrupted again and so on, in an endless loop. The
+ solution is to disable the timer around the fork, and re-enable it
+ only inside the parent.
+ */
+ if (atfork_hook_installed)
+ return 0;
+ int ret = pthread_atfork(atfork_disable_timer, atfork_enable_timer,
atfork_close_profile_file);
+ if (ret != 0)
+ return -1;
+ atfork_hook_installed = 1;
+ return 0;
+}
+
+void segfault_handler(int arg)
+{
+ longjmp(restore_point, SIGSEGV);
+}
+
+int _vmprof_sample_stack(struct profbuf_s *p, PY_THREAD_STATE_T * tstate,
ucontext_t * uc)
+{
+ int depth;
+ struct prof_stacktrace_s *st = (struct prof_stacktrace_s *)p->data;
+ st->marker = MARKER_STACKTRACE;
+ st->count = 1;
+#ifdef RPYTHON_VMPROF
+ depth = get_stack_trace(get_vmprof_stack(), st->stack, MAX_STACK_DEPTH-1,
(intptr_t)GetPC(uc));
+#else
+ depth = get_stack_trace(tstate, st->stack, MAX_STACK_DEPTH-1,
(intptr_t)NULL);
+#endif
+ // useful for tests (see test_stop_sampling)
+#ifndef RPYTHON_LL2CTYPES
+ if (depth == 0) {
+ return 0;
+ }
+#endif
+ st->depth = depth;
+ st->stack[depth++] = tstate;
+ long rss = get_current_proc_rss();
+ if (rss >= 0)
+ st->stack[depth++] = (void*)rss;
+ p->data_offset = offsetof(struct prof_stacktrace_s, marker);
+ p->data_size = (depth * sizeof(void *) +
+ sizeof(struct prof_stacktrace_s) -
+ offsetof(struct prof_stacktrace_s, marker));
+ return 1;
+}
+
+#ifndef RPYTHON_VMPROF
+PY_THREAD_STATE_T * _get_pystate_for_this_thread(void) {
+ // see issue 116 on github.com/vmprof/vmprof-python.
+ // PyGILState_GetThisThreadState(); can hang forever
+ //
+ PyInterpreterState * istate;
+ PyThreadState * state;
+ long mythread_id;
+
+ mythread_id = PyThread_get_thread_ident();
+ istate = PyInterpreterState_Head();
+ if (istate == NULL) {
+ fprintf(stderr, "WARNING: interp state head is null (for thread id
%ld)\n", mythread_id);
+ return NULL;
+ }
+ // fish fish fish, it will NOT lock the keymutex in pythread
+ do {
+ state = PyInterpreterState_ThreadHead(istate);
+ do {
+ if (state->thread_id == mythread_id) {
+ return state;
+ }
+ } while ((state = PyThreadState_Next(state)) != NULL);
+ } while ((istate = PyInterpreterState_Next(istate)) != NULL);
+
+ // uh? not found?
+ fprintf(stderr, "WARNING: cannot find thread state (for thread id %ld),
sample will be thrown away\n", mythread_id);
+ return NULL;
+}
+#endif
+
+void flush_codes(void)
+{
+ struct profbuf_s *p = current_codes;
+ if (p != NULL) {
+ current_codes = NULL;
+ commit_buffer(vmp_profile_fileno(), p);
+ }
+}
+
+void set_current_codes(void * to) {
+ current_codes = to;
+}
+
+#endif
+
+void vmprof_aquire_lock(void) {
+ while (__sync_lock_test_and_set(&spinlock, 1)) {
+ }
+}
+
+void vmprof_release_lock(void) {
+ __sync_lock_release(&spinlock);
+}
+
+void sigprof_handler(int sig_nr, siginfo_t* info, void *ucontext)
+{
+ int commit;
+ PY_THREAD_STATE_T * tstate = NULL;
+ void (*prevhandler)(int);
+
+#ifndef RPYTHON_VMPROF
+
+ // Even though the docs say that this function call is for 'esoteric use'
+ // it seems to be correctly set when the interpreter is teared down!
+ if (!Py_IsInitialized()) {
+ return;
+ }
+
+ // TERRIBLE HACK AHEAD
+ // on OS X, the thread local storage is sometimes uninitialized
+ // when the signal handler runs - it means it's impossible to read errno
+ // or call any syscall or read PyThread_Current or pthread_self.
Additionally,
+ // it seems impossible to read the register gs.
+ // here we register segfault handler (all guarded by a spinlock) and call
+ // longjmp in case segfault happens while reading a thread local
+ //
+ // We do the same error detection for linux to ensure that
+ // get_current_thread_state returns a sane result
+ while (__sync_lock_test_and_set(&spinlock, 1)) {
+ }
+
+#ifdef VMPROF_UNIX
+ // SIGNAL ABUSE AHEAD
+ // On linux, the prof timer will deliver the signal to the thread which
triggered the timer,
+ // because these timers are based on process and system time, and as such,
are thread-aware.
+ // For the real timer, the signal gets delivered to the main thread,
seemingly always.
+ // Consequently if we want to sample multiple threads, we need to forward
this signal.
+ if (vmprof_get_signal_type() == SIGALRM) {
+ if (is_main_thread() && broadcast_signal_for_threads()) {
+ __sync_lock_release(&spinlock);
+ return;
+ }
+ }
+#endif
+
+ prevhandler = signal(SIGSEGV, &segfault_handler);
+ int fault_code = setjmp(restore_point);
+ if (fault_code == 0) {
+ pthread_self();
+ tstate = _get_pystate_for_this_thread();
+ } else {
+ signal(SIGSEGV, prevhandler);
+ __sync_lock_release(&spinlock);
+ return;
+ }
+ signal(SIGSEGV, prevhandler);
+ __sync_lock_release(&spinlock);
+#endif
+
+ long val = vmprof_enter_signal();
+
+ if (val == 0) {
+ int saved_errno = errno;
+ int fd = vmp_profile_fileno();
+ assert(fd >= 0);
+
+ struct profbuf_s *p = reserve_buffer(fd);
+ if (p == NULL) {
+ /* ignore this signal: there are no free buffers right now */
+ } else {
+#ifdef RPYTHON_VMPROF
+ commit = _vmprof_sample_stack(p, NULL, (ucontext_t*)ucontext);
+#else
+ commit = _vmprof_sample_stack(p, tstate, (ucontext_t*)ucontext);
+#endif
+ if (commit) {
+ commit_buffer(fd, p);
+ } else {
+#ifndef RPYTHON_VMPROF
+ fprintf(stderr, "WARNING: canceled buffer, no stack trace was
written\n");
+#else
+ fprintf(stderr, "WARNING: canceled buffer, no stack trace was
written\n");
+#endif
+ cancel_buffer(p);
+ }
+ }
+
+ errno = saved_errno;
+ }
+
+ vmprof_exit_signal();
+}
+
+int install_sigprof_handler(void)
+{
+ struct sigaction sa;
+ memset(&sa, 0, sizeof(sa));
+ sa.sa_sigaction = sigprof_handler;
+ sa.sa_flags = SA_RESTART | SA_SIGINFO;
+ if (sigemptyset(&sa.sa_mask) == -1 ||
+ sigaction(vmprof_get_signal_type(), &sa, NULL) == -1)
+ return -1;
+ return 0;
+}
+
+int remove_sigprof_handler(void)
+{
+ struct sigaction ign_sigint, prev;
+ ign_sigint.sa_handler = SIG_IGN;
+ ign_sigint.sa_flags = 0;
+ sigemptyset(&ign_sigint.sa_mask);
+
+ if (sigaction(vmprof_get_signal_type(), &ign_sigint, NULL) < 0) {
+ fprintf(stderr, "Could not remove the signal handler (for
profiling)\n");
+ return -1;
+ }
+ return 0;
+}
+
+int install_sigprof_timer(void)
+{
+ static struct itimerval timer;
+ timer.it_interval.tv_sec = 0;
+ timer.it_interval.tv_usec = (int)vmprof_get_profile_interval_usec();
+ timer.it_value = timer.it_interval;
+ if (setitimer(vmprof_get_itimer_type(), &timer, NULL) != 0)
+ return -1;
+ return 0;
+}
+
+int remove_sigprof_timer(void)
+{
+ static struct itimerval timer;
+ timerclear(&(timer.it_interval));
+ timerclear(&(timer.it_value));
+ if (setitimer(vmprof_get_itimer_type(), &timer, NULL) != 0) {
+ fprintf(stderr, "Could not disable the signal handler (for
profiling)\n");
+ return -1;
+ }
+ return 0;
+}
+
+void atfork_disable_timer(void)
+{
+ if (vmprof_get_profile_interval_usec() > 0) {
+ remove_sigprof_timer();
+ vmprof_set_enabled(0);
+ }
+}
+
+void atfork_close_profile_file(void)
+{
+ int fd = vmp_profile_fileno();
+ if (fd != -1)
+ close(fd);
+ vmp_set_profile_fileno(-1);
+}
+void atfork_enable_timer(void)
+{
+ if (vmprof_get_profile_interval_usec() > 0) {
+ install_sigprof_timer();
+ vmprof_set_enabled(1);
+ }
+}
+
+#ifdef VMP_SUPPORTS_NATIVE_PROFILING
+void init_cpyprof(int native)
+{
+ // skip this if native should not be enabled
+ if (!native) {
+ vmp_native_disable();
+ return;
+ }
+ vmp_native_enable();
+}
+
+static void disable_cpyprof(void)
+{
+ vmp_native_disable();
+}
+#endif
+
+int vmprof_enable(int memory, int native, int real_time)
+{
+#ifdef VMP_SUPPORTS_NATIVE_PROFILING
+ init_cpyprof(native);
+#endif
+ assert(vmp_profile_fileno() >= 0);
+ assert(vmprof_get_prepare_interval_usec() > 0);
+ vmprof_set_profile_interval_usec(vmprof_get_prepare_interval_usec());
+ if (memory && setup_rss() == -1)
+ goto error;
+#if VMPROF_UNIX
+ if (real_time && insert_thread(pthread_self(), -1) == -1)
+ goto error;
+#endif
+ if (install_pthread_atfork_hooks() == -1)
+ goto error;
+ if (install_sigprof_handler() == -1)
+ goto error;
+ if (install_sigprof_timer() == -1)
+ goto error;
+ vmprof_ignore_signals(0);
+ return 0;
+
+ error:
+ vmp_set_profile_fileno(-1);
+ vmprof_set_profile_interval_usec(0);
+ return -1;
+}
+
+
+int close_profile(void)
+{
+ int fileno = vmp_profile_fileno();
+ fsync(fileno);
+ (void)vmp_write_time_now(MARKER_TRAILER);
+ teardown_rss();
+
+ /* don't close() the file descriptor from here */
+ vmp_set_profile_fileno(-1);
+ return 0;
+}
+
+int vmprof_disable(void)
+{
+ vmprof_ignore_signals(1);
+ vmprof_set_profile_interval_usec(0);
+#ifdef VMP_SUPPORTS_NATIVE_PROFILING
+ disable_cpyprof();
+#endif
+
+ if (remove_sigprof_timer() == -1) {
+ return -1;
+ }
+ if (remove_sigprof_handler() == -1) {
+ return -1;
+ }
+#ifdef VMPROF_UNIX
+ if ((vmprof_get_signal_type() == SIGALRM) && remove_threads() == -1) {
+ return -1;
+ }
+#endif
+ flush_codes();
+ if (shutdown_concurrent_bufs(vmp_profile_fileno()) < 0)
+ return -1;
+ return close_profile();
+}
+
+int vmprof_register_virtual_function(char *code_name, intptr_t code_uid,
+ int auto_retry)
+{
+ long namelen = strnlen(code_name, 1023);
+ long blocklen = 1 + sizeof(intptr_t) + sizeof(long) + namelen;
+ struct profbuf_s *p;
+ char *t;
+
+ retry:
+ p = current_codes;
+ if (p != NULL) {
+ if (__sync_bool_compare_and_swap(¤t_codes, p, NULL)) {
+ /* grabbed 'current_codes': we will append the current block
+ to it if it contains enough room */
+ size_t freesize = SINGLE_BUF_SIZE - p->data_size;
+ if (freesize < (size_t)blocklen) {
+ /* full: flush it */
+ commit_buffer(vmp_profile_fileno(), p);
+ p = NULL;
+ }
+ }
+ else {
+ /* compare-and-swap failed, don't try again */
+ p = NULL;
+ }
+ }
+
+ if (p == NULL) {
+ p = reserve_buffer(vmp_profile_fileno());
+ if (p == NULL) {
+ /* can't get a free block; should almost never be the
+ case. Spin loop if allowed, or return a failure code
+ if not (e.g. we're in a signal handler) */
+ if (auto_retry > 0) {
+ auto_retry--;
+ usleep(1);
+ goto retry;
+ }
+ return -1;
+ }
+ }
+
+ t = p->data + p->data_size;
+ p->data_size += blocklen;
+ assert(p->data_size <= SINGLE_BUF_SIZE);
+ *t++ = MARKER_VIRTUAL_IP;
+ memcpy(t, &code_uid, sizeof(intptr_t)); t += sizeof(intptr_t);
+ memcpy(t, &namelen, sizeof(long)); t += sizeof(long);
+ memcpy(t, code_name, namelen);
+
+ /* try to reattach 'p' to 'current_codes' */
+ if (!__sync_bool_compare_and_swap(¤t_codes, NULL, p)) {
+ /* failed, flush it */
+ commit_buffer(vmp_profile_fileno(), p);
+ }
+ return 0;
+}
+
+int get_stack_trace(PY_THREAD_STATE_T * current, void** result, int max_depth,
intptr_t pc)
+{
+ PY_STACK_FRAME_T * frame;
+#ifdef RPYTHON_VMPROF
+ // do nothing here,
+ frame = (PY_STACK_FRAME_T*)current;
+#else
+ if (current == NULL) {
+ fprintf(stderr, "WARNING: get_stack_trace, current is NULL\n");
+ return 0;
+ }
+ frame = current->frame;
+#endif
+ if (frame == NULL) {
+ fprintf(stderr, "WARNING: get_stack_trace, frame is NULL\n");
+ return 0;
+ }
+ return vmp_walk_and_record_stack(frame, result, max_depth, 1, pc);
+}
diff --git a/rpython/rlib/rvmprof/src/shared/vmprof_unix.h
b/rpython/rlib/rvmprof/src/shared/vmprof_unix.h
new file mode 100644
--- /dev/null
+++ b/rpython/rlib/rvmprof/src/shared/vmprof_unix.h
@@ -0,0 +1,86 @@
+#pragma once
+
+/* VMPROF
+ *
+ * statistical sampling profiler specifically designed to profile programs
+ * which run on a Virtual Machine and/or bytecode interpreter, such as Python,
+ * etc.
+ *
+ * The logic to dump the C stack traces is partly stolen from the code in
+ * gperftools.
+ * The file "getpc.h" has been entirely copied from gperftools.
+ *
+ * Tested only on gcc, linux, x86_64.
+ *
+ * Copyright (C) 2014-2017
+ * Antonio Cuni - [email protected]
+ * Maciej Fijalkowski - [email protected]
+ * Armin Rigo - [email protected]
+ * Richard Plangger - [email protected]
+ *
+ */
+
+#include "vmprof.h"
+
+#include "vmprof_mt.h"
+
+#include <signal.h>
+
+RPY_EXTERN void vmprof_ignore_signals(int ignored);
+RPY_EXTERN long vmprof_enter_signal(void);
+RPY_EXTERN long vmprof_exit_signal(void);
+
+/* *************************************************************
+ * functions to dump the stack trace
+ * *************************************************************
+ */
+
+#ifndef RPYTHON_VMPROF
+PY_THREAD_STATE_T * _get_pystate_for_this_thread(void);
+#endif
+int get_stack_trace(PY_THREAD_STATE_T * current, void** result, int max_depth,
intptr_t pc);
+
+/* *************************************************************
+ * the signal handler
+ * *************************************************************
+ */
+
+#include <setjmp.h>
+
+void segfault_handler(int arg);
+int _vmprof_sample_stack(struct profbuf_s *p, PY_THREAD_STATE_T * tstate,
ucontext_t * uc);
+void sigprof_handler(int sig_nr, siginfo_t* info, void *ucontext);
+
+
+/* *************************************************************
+ * the setup and teardown functions
+ * *************************************************************
+ */
+
+int install_sigprof_handler(void);
+int remove_sigprof_handler(void);
+int install_sigprof_timer(void);
+int remove_sigprof_timer(void);
+void atfork_disable_timer(void);
+void atfork_enable_timer(void);
+void atfork_close_profile_file(void);
+int install_pthread_atfork_hooks(void);
+
+#ifdef VMP_SUPPORTS_NATIVE_PROFILING
+void init_cpyprof(int native);
+static void disable_cpyprof(void);
+#endif
+
+int close_profile(void);
+
+RPY_EXTERN
+int vmprof_enable(int memory, int native, int real_time);
+RPY_EXTERN
+int vmprof_disable(void);
+RPY_EXTERN
+int vmprof_register_virtual_function(char *code_name, intptr_t code_uid,
+ int auto_retry);
+
+
+void vmprof_aquire_lock(void);
+void vmprof_release_lock(void);
diff --git a/rpython/rlib/rvmprof/src/shared/vmprof_win.c
b/rpython/rlib/rvmprof/src/shared/vmprof_win.c
new file mode 100644
--- /dev/null
+++ b/rpython/rlib/rvmprof/src/shared/vmprof_win.c
@@ -0,0 +1,42 @@
+// cannot include this header because it also has definitions
+#include "windows.h"
+#include "compat.h"
+#include "vmp_stack.h"
+
+HANDLE write_mutex;
+
+int prepare_concurrent_bufs(void)
+{
+ if (!(write_mutex = CreateMutex(NULL, FALSE, NULL)))
+ return -1;
+ return 0;
+}
+
+#include <tlhelp32.h>
+
+int vmp_write_all(const char *buf, size_t bufsize)
+{
+ int res;
+ int fd;
+ int count;
+
+ res = WaitForSingleObject(write_mutex, INFINITE);
+ fd = vmp_profile_fileno();
+
+ if (fd == -1) {
+ ReleaseMutex(write_mutex);
+ return -1;
+ }
+ while (bufsize > 0) {
+ count = _write(fd, buf, (long)bufsize);
+ if (count <= 0) {
+ ReleaseMutex(write_mutex);
+ return -1; /* failed */
+ }
+ buf += count;
+ bufsize -= count;
+ }
+ ReleaseMutex(write_mutex);
+ return 0;
+}
+
diff --git a/rpython/rlib/rvmprof/src/shared/vmprof_win.h
b/rpython/rlib/rvmprof/src/shared/vmprof_win.h
new file mode 100644
--- /dev/null
+++ b/rpython/rlib/rvmprof/src/shared/vmprof_win.h
@@ -0,0 +1,203 @@
+#pragma once
+
+#include "windows.h"
+#include "compat.h"
+#include "vmp_stack.h"
+
+HANDLE write_mutex;
+
+int prepare_concurrent_bufs(void);
+
+#include "vmprof_common.h"
+#include <tlhelp32.h>
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit