Author: Carl Friedrich Bolz <[email protected]>
Branch: value-profiling
Changeset: r79065:bbcbd47d4cec
Date: 2015-08-19 18:25 +0200
http://bitbucket.org/pypy/pypy/changeset/bbcbd47d4cec/
Log: merge default
diff too long, truncating to 2000 out of 2204 lines
diff --git a/_pytest/assertion/rewrite.py b/_pytest/assertion/rewrite.py
--- a/_pytest/assertion/rewrite.py
+++ b/_pytest/assertion/rewrite.py
@@ -308,7 +308,10 @@
if (len(data) != 8 or data[:4] != imp.get_magic() or
struct.unpack("<l", data[4:])[0] != mtime):
return None
- co = marshal.load(fp)
+ try:
+ co = marshal.load(fp)
+ except ValueError:
+ return None # e.g. bad marshal data because of pypy/cpython mix
if not isinstance(co, types.CodeType):
# That's interesting....
return None
diff --git a/pypy/doc/embedding.rst b/pypy/doc/embedding.rst
--- a/pypy/doc/embedding.rst
+++ b/pypy/doc/embedding.rst
@@ -46,7 +46,11 @@
source. It'll acquire the GIL.
Note: this is meant to be called *only once* or a few times at most. See
- the `more complete example`_ below.
+ the `more complete example`_ below. In PyPy <= 2.6.0, the globals
+ dictionary is *reused* across multiple calls, giving potentially
+ strange results (e.g. objects dying too early). In PyPy >= 2.6.1,
+ you get a new globals dictionary for every call (but then, all globals
+ dictionaries are all kept alive forever, in ``sys._pypy_execute_source``).
.. function:: int pypy_execute_source_ptr(char* source, void* ptr);
diff --git a/pypy/goal/targetpypystandalone.py
b/pypy/goal/targetpypystandalone.py
--- a/pypy/goal/targetpypystandalone.py
+++ b/pypy/goal/targetpypystandalone.py
@@ -128,13 +128,7 @@
@entrypoint('main', [rffi.CCHARP], c_name='pypy_execute_source')
def pypy_execute_source(ll_source):
- after = rffi.aroundstate.after
- if after: after()
- source = rffi.charp2str(ll_source)
- res = _pypy_execute_source(source)
- before = rffi.aroundstate.before
- if before: before()
- return rffi.cast(rffi.INT, res)
+ return pypy_execute_source_ptr(ll_source, 0)
@entrypoint('main', [rffi.CCHARP, lltype.Signed],
c_name='pypy_execute_source_ptr')
@@ -142,9 +136,7 @@
after = rffi.aroundstate.after
if after: after()
source = rffi.charp2str(ll_source)
- space.setitem(w_globals, space.wrap('c_argument'),
- space.wrap(ll_ptr))
- res = _pypy_execute_source(source)
+ res = _pypy_execute_source(source, ll_ptr)
before = rffi.aroundstate.before
if before: before()
return rffi.cast(rffi.INT, res)
@@ -169,15 +161,21 @@
before = rffi.aroundstate.before
if before: before()
- w_globals = space.newdict()
- space.setitem(w_globals, space.wrap('__builtins__'),
- space.builtin_modules['__builtin__'])
-
- def _pypy_execute_source(source):
+ def _pypy_execute_source(source, c_argument):
try:
- compiler = space.createcompiler()
- stmt = compiler.compile(source, 'c callback', 'exec', 0)
- stmt.exec_code(space, w_globals, w_globals)
+ w_globals = space.newdict(module=True)
+ space.setitem(w_globals, space.wrap('__builtins__'),
+ space.builtin_modules['__builtin__'])
+ space.setitem(w_globals, space.wrap('c_argument'),
+ space.wrap(c_argument))
+ space.appexec([space.wrap(source), w_globals], """(src, glob):
+ import sys
+ stmt = compile(src, 'c callback', 'exec')
+ if not hasattr(sys, '_pypy_execute_source'):
+ sys._pypy_execute_source = []
+ sys._pypy_execute_source.append(glob)
+ exec stmt in glob
+ """)
except OperationError, e:
debug("OperationError:")
debug(" operror-type: " + e.w_type.getname(space))
diff --git a/pypy/module/_vmprof/test/test__vmprof.py
b/pypy/module/_vmprof/test/test__vmprof.py
--- a/pypy/module/_vmprof/test/test__vmprof.py
+++ b/pypy/module/_vmprof/test/test__vmprof.py
@@ -21,11 +21,12 @@
i = 0
count = 0
i += 5 * WORD # header
- assert s[i] == '\x04'
- i += 1 # marker
- assert s[i] == '\x04'
- i += 1 # length
- i += len('pypy')
+ assert s[i ] == '\x05' # MARKER_HEADER
+ assert s[i + 1] == '\x00' # 0
+ assert s[i + 2] == '\x01' # VERSION_THREAD_ID
+ assert s[i + 3] == chr(4) # len('pypy')
+ assert s[i + 4: i + 8] == 'pypy'
+ i += 8
while i < len(s):
if s[i] == '\x03':
break
diff --git a/pypy/module/_vmprof/test/test_direct.py
b/pypy/module/_vmprof/test/test_direct.py
--- a/pypy/module/_vmprof/test/test_direct.py
+++ b/pypy/module/_vmprof/test/test_direct.py
@@ -42,7 +42,7 @@
}
-""" + open(str(srcdir.join("rvmprof_get_custom_offset.h"))).read())
+""" + open(str(srcdir.join("vmprof_get_custom_offset.h"))).read())
class TestDirect(object):
def test_infrastructure(self):
diff --git a/pypy/module/struct/formatiterator.py
b/pypy/module/struct/formatiterator.py
--- a/pypy/module/struct/formatiterator.py
+++ b/pypy/module/struct/formatiterator.py
@@ -82,7 +82,13 @@
w_index = space.int(w_obj) # wrapped float -> wrapped int or
long
if w_index is None:
raise StructError("cannot convert argument to integer")
- return getattr(space, meth)(w_index)
+ method = getattr(space, meth)
+ try:
+ return method(w_index)
+ except OperationError as e:
+ if e.match(self.space, self.space.w_OverflowError):
+ raise StructError("argument out of range")
+ raise
def accept_bool_arg(self):
w_obj = self.accept_obj_arg()
diff --git a/pypy/module/struct/test/test_struct.py
b/pypy/module/struct/test/test_struct.py
--- a/pypy/module/struct/test/test_struct.py
+++ b/pypy/module/struct/test/test_struct.py
@@ -428,6 +428,9 @@
assert s.unpack(s.pack(42)) == (42,)
assert s.unpack_from(memoryview(s.pack(42))) == (42,)
+ def test_overflow(self):
+ raises(self.struct.error, self.struct.pack, 'i', 1<<65)
+
class AppTestStructBuffer(object):
spaceconfig = dict(usemodules=['struct', '__pypy__'])
diff --git a/rpython/flowspace/objspace.py b/rpython/flowspace/objspace.py
--- a/rpython/flowspace/objspace.py
+++ b/rpython/flowspace/objspace.py
@@ -13,6 +13,11 @@
def _assert_rpythonic(func):
"""Raise ValueError if ``func`` is obviously not RPython"""
+ try:
+ func.func_code.co_cellvars
+ except AttributeError:
+ raise ValueError("%r is not RPython: it is likely an unexpected "
+ "built-in function or type" % (func,))
if func.func_doc and func.func_doc.lstrip().startswith('NOT_RPYTHON'):
raise ValueError("%r is tagged as NOT_RPYTHON" % (func,))
if func.func_code.co_cellvars:
diff --git a/rpython/flowspace/test/test_objspace.py
b/rpython/flowspace/test/test_objspace.py
--- a/rpython/flowspace/test/test_objspace.py
+++ b/rpython/flowspace/test/test_objspace.py
@@ -1363,6 +1363,15 @@
simplify_graph(graph)
assert self.all_operations(graph) == {'bool': 1, 'inplace_add': 1}
+ def test_unexpected_builtin_function(self):
+ import itertools
+ e = py.test.raises(ValueError, build_flow, itertools.permutations)
+ assert ' is not RPython:' in str(e.value)
+ e = py.test.raises(ValueError, build_flow, itertools.tee)
+ assert ' is not RPython:' in str(e.value)
+ e = py.test.raises(ValueError, build_flow, Exception.__init__)
+ assert ' is not RPython:' in str(e.value)
+
DATA = {'x': 5,
'y': 6}
diff --git a/rpython/jit/backend/detect_cpu.py
b/rpython/jit/backend/detect_cpu.py
--- a/rpython/jit/backend/detect_cpu.py
+++ b/rpython/jit/backend/detect_cpu.py
@@ -63,6 +63,7 @@
'AMD64': MODEL_X86, # win64
'armv7l': MODEL_ARM,
'armv6l': MODEL_ARM,
+ 'arm': MODEL_ARM, # freebsd
}.get(mach)
if result is None:
diff --git a/rpython/jit/backend/llsupport/src/codemap.c
b/rpython/jit/backend/llsupport/src/codemap.c
--- a/rpython/jit/backend/llsupport/src/codemap.c
+++ b/rpython/jit/backend/llsupport/src/codemap.c
@@ -6,9 +6,9 @@
#endif
#ifdef RPYTHON_VMPROF
-RPY_EXTERN void rpython_vmprof_ignore_signals(int ignored);
+RPY_EXTERN void vmprof_ignore_signals(int ignored);
static void pypy_codemap_invalid_set(int ignored) {
- rpython_vmprof_ignore_signals(ignored);
+ vmprof_ignore_signals(ignored);
}
#else
static void pypy_codemap_invalid_set(int ignored) {
diff --git a/rpython/jit/backend/tool/viewcode.py
b/rpython/jit/backend/tool/viewcode.py
--- a/rpython/jit/backend/tool/viewcode.py
+++ b/rpython/jit/backend/tool/viewcode.py
@@ -17,18 +17,6 @@
import subprocess
from bisect import bisect_left
-# don't use rpython.tool.udir here to avoid removing old usessions which
-# might still contain interesting executables
-udir = py.path.local.make_numbered_dir(prefix='viewcode-', keep=2)
-tmpfile = str(udir.join('dump.tmp'))
-
-# hack hack
-import rpython.tool
-mod = new.module('rpython.tool.udir')
-mod.udir = udir
-sys.modules['rpython.tool.udir'] = mod
-rpython.tool.udir = mod
-
# ____________________________________________________________
# Some support code from Psyco. There is more over there,
# I am porting it in a lazy fashion... See py-utils/xam.py
@@ -438,6 +426,18 @@
# ____________________________________________________________
if __name__ == '__main__':
+ # don't use rpython.tool.udir here to avoid removing old usessions which
+ # might still contain interesting executables
+ udir = py.path.local.make_numbered_dir(prefix='viewcode-', keep=2)
+ tmpfile = str(udir.join('dump.tmp'))
+
+ # hack hack
+ import rpython.tool
+ mod = new.module('rpython.tool.udir')
+ mod.udir = udir
+ sys.modules['rpython.tool.udir'] = mod
+ rpython.tool.udir = mod
+
if '--text' in sys.argv:
sys.argv.remove('--text')
showgraph = False
@@ -463,3 +463,7 @@
world.show(showtext=True)
else:
world.showtextonly()
+else:
+ from rpython.tool.udir import udir
+ tmpfile = str(udir.join('dump.tmp'))
+
diff --git a/rpython/rlib/jit.py b/rpython/rlib/jit.py
--- a/rpython/rlib/jit.py
+++ b/rpython/rlib/jit.py
@@ -1087,6 +1087,16 @@
"""
assert value is not None and type(value) is cls
+def ll_record_exact_class(ll_value, ll_cls):
+ from rpython.rlib.debug import ll_assert
+ from rpython.rtyper.lltypesystem.lloperation import llop
+ from rpython.rtyper.lltypesystem import lltype
+ from rpython.rtyper.rclass import ll_type
+ ll_assert(ll_value == lltype.nullptr(lltype.typeOf(ll_value).TO),
"record_exact_class called with None argument")
+ ll_assert(ll_type(ll_value) is ll_cls, "record_exact_class called with
invalid arguments")
+ llop.jit_record_exact_class(lltype.Void, ll_value, ll_cls)
+
+
class Entry(ExtRegistryEntry):
_about_ = record_exact_class
@@ -1099,12 +1109,10 @@
from rpython.rtyper import rclass
classrepr = rclass.get_type_repr(hop.rtyper)
-
- hop.exception_cannot_occur()
v_inst = hop.inputarg(hop.args_r[0], arg=0)
v_cls = hop.inputarg(classrepr, arg=1)
- return hop.genop('jit_record_exact_class', [v_inst, v_cls],
- resulttype=lltype.Void)
+ hop.exception_is_here()
+ return hop.gendirectcall(ll_record_exact_class, v_inst, v_cls)
def _jit_conditional_call(condition, function, *args):
pass
diff --git a/rpython/rlib/rvmprof/cintf.py b/rpython/rlib/rvmprof/cintf.py
--- a/rpython/rlib/rvmprof/cintf.py
+++ b/rpython/rlib/rvmprof/cintf.py
@@ -40,24 +40,20 @@
**eci_kwds))
- vmprof_init = rffi.llexternal("rpython_vmprof_init", [rffi.INT],
rffi.CCHARP,
- compilation_info=eci)
- vmprof_enable = rffi.llexternal("rpython_vmprof_enable", [rffi.LONG],
rffi.INT,
+ vmprof_init = rffi.llexternal("vmprof_init",
+ [rffi.INT, rffi.DOUBLE, rffi.CCHARP],
+ rffi.CCHARP, compilation_info=eci)
+ vmprof_enable = rffi.llexternal("vmprof_enable", [], rffi.INT,
compilation_info=eci,
save_err=rffi.RFFI_SAVE_ERRNO)
- vmprof_disable = rffi.llexternal("rpython_vmprof_disable", [], rffi.INT,
+ vmprof_disable = rffi.llexternal("vmprof_disable", [], rffi.INT,
compilation_info=eci,
save_err=rffi.RFFI_SAVE_ERRNO)
- vmprof_write_buf = rffi.llexternal("rpython_vmprof_write_buf",
- [rffi.CCHARP, rffi.LONG],
- lltype.Void, compilation_info=eci)
-
- ## vmprof_register_virtual_function = rffi.llexternal(
- ## "vmprof_register_virtual_function",
- ## [rffi.CCHARP, rffi.VOIDP, rffi.VOIDP], lltype.Void,
- ## compilation_info=eci, _nowrapper=True)
-
- vmprof_ignore_signals = rffi.llexternal("rpython_vmprof_ignore_signals",
+ vmprof_register_virtual_function = rffi.llexternal(
+ "vmprof_register_virtual_function",
+ [rffi.CCHARP, rffi.LONG, rffi.INT],
+ rffi.INT, compilation_info=eci)
+ vmprof_ignore_signals = rffi.llexternal("vmprof_ignore_signals",
[rffi.INT], lltype.Void,
compilation_info=eci)
return CInterface(locals())
diff --git a/rpython/rlib/rvmprof/rvmprof.py b/rpython/rlib/rvmprof/rvmprof.py
--- a/rpython/rlib/rvmprof/rvmprof.py
+++ b/rpython/rlib/rvmprof/rvmprof.py
@@ -1,14 +1,12 @@
import sys, os
from rpython.rlib.objectmodel import specialize, we_are_translated
-from rpython.rlib.rstring import StringBuilder
from rpython.rlib import jit, rgc, rposix
from rpython.rlib.rvmprof import cintf
from rpython.rtyper.annlowlevel import cast_instance_to_gcref
from rpython.rtyper.annlowlevel import cast_base_ptr_to_instance
from rpython.rtyper.lltypesystem import rffi
-MAX_CODES = 8000 - 255
-MAX_FUNC_NAME = 255
+MAX_FUNC_NAME = 1023
# ____________________________________________________________
@@ -34,8 +32,6 @@
def _cleanup_(self):
self.is_enabled = False
- self.fileno = -1
- self._current_codes = None
@specialize.argtype(1)
def register_code(self, code, full_name_func):
@@ -102,18 +98,13 @@
assert fileno >= 0
if self.is_enabled:
raise VMProfError("vmprof is already enabled")
- if not (1e-6 <= interval < 1.0):
- raise VMProfError("bad value for 'interval'")
- interval_usec = int(interval * 1000000.0)
- p_error = self.cintf.vmprof_init(fileno)
+ p_error = self.cintf.vmprof_init(fileno, interval, "pypy")
if p_error:
raise VMProfError(rffi.charp2str(p_error))
- self.fileno = fileno
- self._write_header(interval_usec)
self._gather_all_code_objs()
- res = self.cintf.vmprof_enable(interval_usec)
+ res = self.cintf.vmprof_enable()
if res < 0:
raise VMProfError(os.strerror(rposix.get_saved_errno()))
self.is_enabled = True
@@ -125,9 +116,6 @@
if not self.is_enabled:
raise VMProfError("vmprof is not enabled")
self.is_enabled = False
- if self._current_codes is not None:
- self._flush_codes()
- self.fileno = -1
res = self.cintf.vmprof_disable()
if res < 0:
raise VMProfError(os.strerror(rposix.get_saved_errno()))
@@ -136,48 +124,8 @@
assert name.count(':') == 3 and len(name) <= MAX_FUNC_NAME, (
"the name must be 'class:func_name:func_line:filename' "
"and at most %d characters; got '%s'" % (MAX_FUNC_NAME, name))
- b = self._current_codes
- if b is None:
- b = self._current_codes = StringBuilder()
- b.append('\x02')
- _write_long_to_string_builder(uid, b)
- _write_long_to_string_builder(len(name), b)
- b.append(name)
- if b.getlength() >= MAX_CODES:
- self._flush_codes()
-
- def _flush_codes(self):
- buf = self._current_codes.build()
- self._current_codes = None
- self.cintf.vmprof_write_buf(buf, len(buf))
- # NOTE: keep in mind that vmprof_write_buf() can only write
- # a maximum of 8184 bytes. This should be guaranteed here because:
- assert MAX_CODES + 17 + MAX_FUNC_NAME <= 8184
-
- def _write_header(self, interval_usec):
- b = StringBuilder()
- _write_long_to_string_builder(0, b)
- _write_long_to_string_builder(3, b)
- _write_long_to_string_builder(0, b)
- _write_long_to_string_builder(interval_usec, b)
- _write_long_to_string_builder(0, b)
- b.append('\x04') # interp name
- b.append(chr(len('pypy')))
- b.append('pypy')
- buf = b.build()
- self.cintf.vmprof_write_buf(buf, len(buf))
-
-
-def _write_long_to_string_builder(l, b):
- b.append(chr(l & 0xff))
- b.append(chr((l >> 8) & 0xff))
- b.append(chr((l >> 16) & 0xff))
- b.append(chr((l >> 24) & 0xff))
- if sys.maxint > 2147483647:
- b.append(chr((l >> 32) & 0xff))
- b.append(chr((l >> 40) & 0xff))
- b.append(chr((l >> 48) & 0xff))
- b.append(chr((l >> 56) & 0xff))
+ if self.cintf.vmprof_register_virtual_function(name, uid, 500000) < 0:
+ raise VMProfError("vmprof buffers full! disk full or too slow")
def vmprof_execute_code(name, get_code_fn, result_class=None):
diff --git a/rpython/rlib/rvmprof/src/rvmprof.c
b/rpython/rlib/rvmprof/src/rvmprof.c
--- a/rpython/rlib/rvmprof/src/rvmprof.c
+++ b/rpython/rlib/rvmprof/src/rvmprof.c
@@ -1,22 +1,3 @@
-/* VMPROF
- *
- * statistical sampling profiler specifically designed to profile programs
- * which run on a Virtual Machine and/or bytecode interpreter, such as Python,
- * etc.
- *
- * The logic to dump the C stack traces is partly stolen from the code in
- * gperftools.
- * The file "getpc.h" has been entirely copied from gperftools.
- *
- * Tested only on gcc, linux, x86_64.
- *
- * Copyright (C) 2014-2015
- * Antonio Cuni - [email protected]
- * Maciej Fijalkowski - [email protected]
- * Armin Rigo - [email protected]
- *
- */
-
#define _GNU_SOURCE 1
@@ -39,431 +20,4 @@
#endif
-#include <dlfcn.h>
-#include <assert.h>
-#include <pthread.h>
-#include <sys/time.h>
-#include <errno.h>
-#include <unistd.h>
-#include <stdio.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <fcntl.h>
-#include "rvmprof_getpc.h"
-#include "rvmprof_unwind.h"
-#include "rvmprof_mt.h"
-
-
-/************************************************************/
-
-// functions copied from libunwind using dlopen
-
-static int (*unw_get_reg)(unw_cursor_t*, int, unw_word_t*) = NULL;
-static int (*unw_step)(unw_cursor_t*) = NULL;
-static int (*unw_init_local)(unw_cursor_t *, unw_context_t *) = NULL;
-static int (*unw_get_proc_info)(unw_cursor_t *, unw_proc_info_t *) = NULL;
-
-static int profile_file = -1;
-
-
-RPY_EXTERN
-char *rpython_vmprof_init(int fd)
-{
- if (!unw_get_reg) {
- void *libhandle;
-
- if (!(libhandle = dlopen("libunwind.so", RTLD_LAZY | RTLD_LOCAL)))
- goto error;
- if (!(unw_get_reg = dlsym(libhandle, "_ULx86_64_get_reg")))
- goto error;
- if (!(unw_get_proc_info = dlsym(libhandle, "_ULx86_64_get_proc_info")))
- goto error;
- if (!(unw_init_local = dlsym(libhandle, "_ULx86_64_init_local")))
- goto error;
- if (!(unw_step = dlsym(libhandle, "_ULx86_64_step")))
- goto error;
- }
- if (prepare_concurrent_bufs() < 0)
- return "out of memory";
-
- assert(fd >= 0);
- profile_file = fd;
- return NULL;
-
- error:
- return dlerror();
-}
-
-/************************************************************/
-
-/* value: last bit is 1 if signals must be ignored; all other bits
- are a counter for how many threads are currently in a signal handler */
-static long volatile signal_handler_value = 1;
-
-RPY_EXTERN
-void rpython_vmprof_ignore_signals(int ignored)
-{
- if (!ignored) {
- __sync_fetch_and_and(&signal_handler_value, ~1L);
- }
- else {
- /* set the last bit, and wait until concurrently-running signal
- handlers finish */
- while (__sync_or_and_fetch(&signal_handler_value, 1L) != 1L) {
- usleep(1);
- }
- }
-}
-
-
-/* *************************************************************
- * functions to write a profile file compatible with gperftools
- * *************************************************************
- */
-
-#define MAX_FUNC_NAME 128
-#define MAX_STACK_DEPTH \
- ((SINGLE_BUF_SIZE - sizeof(struct prof_stacktrace_s)) / sizeof(void *))
-
-#define MARKER_STACKTRACE '\x01'
-#define MARKER_VIRTUAL_IP '\x02'
-#define MARKER_TRAILER '\x03'
-
-struct prof_stacktrace_s {
- char padding[sizeof(long) - 1];
- char marker;
- long count, depth;
- void *stack[];
-};
-
-static long profile_interval_usec = 0;
-static char atfork_hook_installed = 0;
-
-
-/* ******************************************************
- * libunwind workaround for process JIT frames correctly
- * ******************************************************
- */
-
-#include "rvmprof_get_custom_offset.h"
-
-typedef struct {
- void* _unused1;
- void* _unused2;
- void* sp;
- void* ip;
- void* _unused3[sizeof(unw_cursor_t)/sizeof(void*) - 4];
-} vmprof_hacked_unw_cursor_t;
-
-static int vmprof_unw_step(unw_cursor_t *cp, int first_run)
-{
- void* ip;
- void* sp;
- ptrdiff_t sp_offset;
- unw_get_reg (cp, UNW_REG_IP, (unw_word_t*)&ip);
- unw_get_reg (cp, UNW_REG_SP, (unw_word_t*)&sp);
- if (!first_run) {
- // make sure we're pointing to the CALL and not to the first
- // instruction after. If the callee adjusts the stack for us
- // it's not safe to be at the instruction after
- ip -= 1;
- }
- sp_offset = vmprof_unw_get_custom_offset(ip, cp);
-
- if (sp_offset == -1) {
- // it means that the ip is NOT in JITted code, so we can use the
- // stardard unw_step
- return unw_step(cp);
- }
- else {
- // this is a horrible hack to manually walk the stack frame, by
- // setting the IP and SP in the cursor
- vmprof_hacked_unw_cursor_t *cp2 = (vmprof_hacked_unw_cursor_t*)cp;
- void* bp = (void*)sp + sp_offset;
- cp2->sp = bp;
- bp -= sizeof(void*);
- cp2->ip = ((void**)bp)[0];
- // the ret is on the top of the stack minus WORD
- return 1;
- }
-}
-
-
-/* *************************************************************
- * functions to dump the stack trace
- * *************************************************************
- */
-
-static int get_stack_trace(void** result, int max_depth, ucontext_t *ucontext)
-{
- void *ip;
- int n = 0;
- unw_cursor_t cursor;
- unw_context_t uc = *ucontext;
-
- int ret = unw_init_local(&cursor, &uc);
- assert(ret >= 0);
- (void)ret;
-
- while (n < max_depth) {
- if (unw_get_reg(&cursor, UNW_REG_IP, (unw_word_t *) &ip) < 0) {
- break;
- }
-
- unw_proc_info_t pip;
- unw_get_proc_info(&cursor, &pip);
-
- /* if n==0, it means that the signal handler interrupted us while we
- were in the trampoline, so we are not executing (yet) the real main
- loop function; just skip it */
- if (VMPROF_ADDR_OF_TRAMPOLINE((void*)pip.start_ip) && n > 0) {
- // found main loop stack frame
- void* sp;
- unw_get_reg(&cursor, UNW_REG_SP, (unw_word_t *) &sp);
- void *arg_addr = (char*)sp /* + mainloop_sp_offset */;
- void **arg_ptr = (void**)arg_addr;
- /* if (mainloop_get_virtual_ip) {
- ip = mainloop_get_virtual_ip(*arg_ptr);
- } else { */
- ip = *arg_ptr;
- }
-
- int first_run = (n == 0);
- result[n++] = ip;
- n = vmprof_write_header_for_jit_addr(result, n, ip, max_depth);
- if (vmprof_unw_step(&cursor, first_run) <= 0)
- break;
- }
- return n;
-}
-
-
-/* *************************************************************
- * the signal handler
- * *************************************************************
- */
-
-static void sigprof_handler(int sig_nr, siginfo_t* info, void *ucontext)
-{
- long val = __sync_fetch_and_add(&signal_handler_value, 2L);
-
- if ((val & 1) == 0) {
- int saved_errno = errno;
- int fd = profile_file;
- assert(fd >= 0);
-
- struct profbuf_s *p = reserve_buffer(fd);
- if (p == NULL) {
- /* ignore this signal: there are no free buffers right now */
- }
- else {
- int depth;
- struct prof_stacktrace_s *st = (struct prof_stacktrace_s *)p->data;
- st->marker = MARKER_STACKTRACE;
- st->count = 1;
- st->stack[0] = GetPC((ucontext_t*)ucontext);
- depth = get_stack_trace(st->stack+1, MAX_STACK_DEPTH-1, ucontext);
- depth++; // To account for pc value in stack[0];
- st->depth = depth;
- p->data_offset = offsetof(struct prof_stacktrace_s, marker);
- p->data_size = (depth * sizeof(void *) +
- sizeof(struct prof_stacktrace_s) -
- offsetof(struct prof_stacktrace_s, marker));
- commit_buffer(fd, p);
- }
-
- errno = saved_errno;
- }
-
- __sync_sub_and_fetch(&signal_handler_value, 2L);
-}
-
-
-/* *************************************************************
- * the setup and teardown functions
- * *************************************************************
- */
-
-static int install_sigprof_handler(void)
-{
- struct sigaction sa;
- memset(&sa, 0, sizeof(sa));
- sa.sa_sigaction = sigprof_handler;
- sa.sa_flags = SA_RESTART | SA_SIGINFO;
- if (sigemptyset(&sa.sa_mask) == -1 ||
- sigaction(SIGPROF, &sa, NULL) == -1)
- return -1;
- return 0;
-}
-
-static int remove_sigprof_handler(void)
-{
- if (signal(SIGPROF, SIG_DFL) == SIG_ERR)
- return -1;
- return 0;
-}
-
-static int install_sigprof_timer(void)
-{
- static struct itimerval timer;
- timer.it_interval.tv_sec = 0;
- timer.it_interval.tv_usec = profile_interval_usec;
- timer.it_value = timer.it_interval;
- if (setitimer(ITIMER_PROF, &timer, NULL) != 0)
- return -1;
- return 0;
-}
-
-static int remove_sigprof_timer(void) {
- static struct itimerval timer;
- timer.it_interval.tv_sec = 0;
- timer.it_interval.tv_usec = 0;
- timer.it_value.tv_sec = 0;
- timer.it_value.tv_usec = 0;
- if (setitimer(ITIMER_PROF, &timer, NULL) != 0)
- return -1;
- return 0;
-}
-
-static void atfork_disable_timer(void) {
- if (profile_interval_usec > 0) {
- remove_sigprof_timer();
- }
-}
-
-static void atfork_enable_timer(void) {
- if (profile_interval_usec > 0) {
- install_sigprof_timer();
- }
-}
-
-static int install_pthread_atfork_hooks(void) {
- /* this is needed to prevent the problems described there:
- - http://code.google.com/p/gperftools/issues/detail?id=278
- - http://lists.debian.org/debian-glibc/2010/03/msg00161.html
-
- TL;DR: if the RSS of the process is large enough, the clone() syscall
- will be interrupted by the SIGPROF before it can complete, then
- retried, interrupted again and so on, in an endless loop. The
- solution is to disable the timer around the fork, and re-enable it
- only inside the parent.
- */
- if (atfork_hook_installed)
- return 0;
- int ret = pthread_atfork(atfork_disable_timer, atfork_enable_timer, NULL);
- if (ret != 0)
- return -1;
- atfork_hook_installed = 1;
- return 0;
-}
-
-RPY_EXTERN
-int rpython_vmprof_enable(long interval_usec)
-{
- assert(profile_file >= 0);
- assert(interval_usec > 0);
- profile_interval_usec = interval_usec;
-
- if (install_pthread_atfork_hooks() == -1)
- goto error;
- if (install_sigprof_handler() == -1)
- goto error;
- if (install_sigprof_timer() == -1)
- goto error;
- rpython_vmprof_ignore_signals(0);
- return 0;
-
- error:
- profile_file = -1;
- profile_interval_usec = 0;
- return -1;
-}
-
-static int _write_all(const void *buf, size_t bufsize)
-{
- while (bufsize > 0) {
- ssize_t count = write(profile_file, buf, bufsize);
- if (count <= 0)
- return -1; /* failed */
- buf += count;
- bufsize -= count;
- }
- return 0;
-}
-
-static int close_profile(void)
-{
- char buf[4096];
- ssize_t size;
- unsigned char marker = MARKER_TRAILER;
-
- if (_write_all(&marker, 1) < 0)
- return -1;
-
-#ifdef __linux__
- // copy /proc/self/maps to the end of the profile file
- int srcfd = open("/proc/self/maps", O_RDONLY);
- if (srcfd < 0)
- return -1;
-
- while ((size = read(srcfd, buf, sizeof buf)) > 0) {
- if (_write_all(buf, size) < 0) {
- close(srcfd);
- return -1;
- }
- }
- close(srcfd);
-#else
- // freebsd and mac
- sprintf(buf, "procstat -v %d", getpid());
- FILE *srcf = popen(buf, "r");
- if (!srcf)
- return -1;
-
- while ((size = fread(buf, 1, sizeof buf, src))) {
- if (_write_all(buf, size) < 0) {
- pclose(srcf);
- return -1;
- }
- }
- pclose(srcf);
-#endif
-
- /* don't close() the file descriptor from here */
- profile_file = -1;
- return 0;
-}
-
-RPY_EXTERN
-int rpython_vmprof_disable(void)
-{
- rpython_vmprof_ignore_signals(1);
- profile_interval_usec = 0;
-
- if (remove_sigprof_timer() == -1)
- return -1;
- if (remove_sigprof_handler() == -1)
- return -1;
- if (shutdown_concurrent_bufs(profile_file) < 0)
- return -1;
- return close_profile();
-}
-
-RPY_EXTERN
-void rpython_vmprof_write_buf(char *buf, long size)
-{
- struct profbuf_s *p;
-
- while ((p = reserve_buffer(profile_file)) == NULL) {
- /* spin loop waiting for a buffer to be ready; should almost never
- be the case */
- usleep(1);
- }
-
- if (size > SINGLE_BUF_SIZE)
- size = SINGLE_BUF_SIZE;
- memcpy(p->data, buf, size);
- p->data_size = size;
-
- commit_buffer(profile_file, p);
-}
+#include "vmprof_main.h"
diff --git a/rpython/rlib/rvmprof/src/rvmprof.h
b/rpython/rlib/rvmprof/src/rvmprof.h
--- a/rpython/rlib/rvmprof/src/rvmprof.h
+++ b/rpython/rlib/rvmprof/src/rvmprof.h
@@ -1,6 +1,6 @@
-RPY_EXTERN char *rpython_vmprof_init(int);
-RPY_EXTERN void rpython_vmprof_ignore_signals(int);
-RPY_EXTERN int rpython_vmprof_enable(long);
-RPY_EXTERN int rpython_vmprof_disable(void);
-RPY_EXTERN void rpython_vmprof_write_buf(char *, long);
+RPY_EXTERN char *vmprof_init(int, double, char *);
+RPY_EXTERN void vmprof_ignore_signals(int);
+RPY_EXTERN int vmprof_enable(void);
+RPY_EXTERN int vmprof_disable(void);
+RPY_EXTERN int vmprof_register_virtual_function(char *, long, int);
diff --git a/rpython/rlib/rvmprof/src/rvmprof_get_custom_offset.h
b/rpython/rlib/rvmprof/src/rvmprof_get_custom_offset.h
deleted file mode 100644
--- a/rpython/rlib/rvmprof/src/rvmprof_get_custom_offset.h
+++ /dev/null
@@ -1,63 +0,0 @@
-
-#ifdef PYPY_JIT_CODEMAP
-void *pypy_find_codemap_at_addr(long addr, long *start_addr);
-long pypy_yield_codemap_at_addr(void *codemap_raw, long addr,
- long *current_pos_addr);
-long pypy_jit_stack_depth_at_loc(long loc);
-#endif
-
-
-static ptrdiff_t vmprof_unw_get_custom_offset(void* ip, void *cp) {
-#ifdef PYPY_JIT_CODEMAP
- intptr_t ip_l = (intptr_t)ip;
- return pypy_jit_stack_depth_at_loc(ip_l);
-#else
- return -1;
-#endif
-}
-
-static long vmprof_write_header_for_jit_addr(void **result, long n,
- void *ip, int max_depth)
-{
-#ifdef PYPY_JIT_CODEMAP
- void *codemap;
- long current_pos = 0;
- intptr_t id;
- long start_addr = 0;
- intptr_t addr = (intptr_t)ip;
- int start, k;
- void *tmp;
-
- codemap = pypy_find_codemap_at_addr(addr, &start_addr);
- if (codemap == NULL)
- // not a jit code at all
- return n;
-
- // modify the last entry to point to start address and not the random one
- // in the middle
- result[n - 1] = (void*)start_addr;
- result[n] = (void*)2;
- n++;
- start = n;
- while (n < max_depth) {
- id = pypy_yield_codemap_at_addr(codemap, addr, ¤t_pos);
- if (id == -1)
- // finish
- break;
- if (id == 0)
- continue; // not main codemap
- result[n++] = (void *)id;
- }
- k = 0;
- while (k < (n - start) / 2) {
- tmp = result[start + k];
- result[start + k] = result[n - k - 1];
- result[n - k - 1] = tmp;
- k++;
- }
- if (n < max_depth) {
- result[n++] = (void*)3;
- }
-#endif
- return n;
-}
diff --git a/rpython/rlib/rvmprof/src/rvmprof_mt.h
b/rpython/rlib/rvmprof/src/rvmprof_mt.h
deleted file mode 100644
--- a/rpython/rlib/rvmprof/src/rvmprof_mt.h
+++ /dev/null
@@ -1,210 +0,0 @@
-/* Support for multithreaded write() operations */
-
-#include <sys/mman.h>
-#include <string.h>
-
-/* The idea is that we have MAX_NUM_BUFFERS available, all of size
- SINGLE_BUF_SIZE. Threads and signal handlers can ask to reserve a
- buffer, fill it, and finally "commit" it, at which point its
- content is written into the profile file. There is no hard
- guarantee about the order in which the committed blocks are
- actually written. We do this with two constrains:
-
- - write() calls should not overlap; only one thread can be
- currently calling it.
-
- - the code needs to be multithread-safe *and* signal-handler-safe,
- which means it must be written in a wait-free style: never have
- spin loops waiting for some lock to be released, from any of
- the functions that can be called from the signal handler! The
- code holding the lock could be running in the same thread,
- currently interrupted by the signal handler.
-
- The value of MAX_NUM_BUFFERS is a trade-off between too high
- (lots of unnecessary memory, lots of checking all of them)
- and too low (risk that there is none left).
-*/
-#define MAX_NUM_BUFFERS 20
-#define SINGLE_BUF_SIZE (8192 - 2 * sizeof(unsigned int))
-
-#if defined(__i386__) || defined(__amd64__)
- static inline void write_fence(void) { asm("" : : : "memory"); }
-#else
- static inline void write_fence(void) { __sync_synchronize(); }
-#endif
-
-
-#define PROFBUF_UNUSED 0
-#define PROFBUF_FILLING 1
-#define PROFBUF_READY 2
-
-
-struct profbuf_s {
- unsigned int data_size;
- unsigned int data_offset;
- char data[SINGLE_BUF_SIZE];
-};
-
-static char volatile profbuf_state[MAX_NUM_BUFFERS];
-static struct profbuf_s *profbuf_all_buffers = NULL;
-static int volatile profbuf_write_lock = 2;
-static long profbuf_pending_write;
-
-
-static void unprepare_concurrent_bufs(void)
-{
- if (profbuf_all_buffers != NULL) {
- munmap(profbuf_all_buffers, sizeof(struct profbuf_s) *
MAX_NUM_BUFFERS);
- profbuf_all_buffers = NULL;
- }
-}
-
-static int prepare_concurrent_bufs(void)
-{
- assert(sizeof(struct profbuf_s) == 8192);
-
- unprepare_concurrent_bufs();
- profbuf_all_buffers = mmap(NULL, sizeof(struct profbuf_s) *
MAX_NUM_BUFFERS,
- PROT_READ | PROT_WRITE,
- MAP_PRIVATE | MAP_ANONYMOUS,
- -1, 0);
- if (profbuf_all_buffers == MAP_FAILED) {
- profbuf_all_buffers = NULL;
- return -1;
- }
- memset((char *)profbuf_state, PROFBUF_UNUSED, sizeof(profbuf_state));
- profbuf_write_lock = 0;
- profbuf_pending_write = -1;
- return 0;
-}
-
-static int _write_single_ready_buffer(int fd, long i)
-{
- /* Try to write to disk the buffer number 'i'. This function must
- only be called while we hold the write lock. */
- assert(profbuf_write_lock != 0);
-
- if (profbuf_pending_write >= 0) {
- /* A partially written buffer is waiting. We'll write the
- rest of this buffer now, instead of 'i'. */
- i = profbuf_pending_write;
- assert(profbuf_state[i] == PROFBUF_READY);
- }
-
- if (profbuf_state[i] != PROFBUF_READY) {
- /* this used to be a race condition: the buffer was written by a
- different thread already, nothing to do now */
- return 0;
- }
-
- int err;
- struct profbuf_s *p = &profbuf_all_buffers[i];
- ssize_t count = write(fd, p->data + p->data_offset, p->data_size);
- if (count == p->data_size) {
- profbuf_state[i] = PROFBUF_UNUSED;
- profbuf_pending_write = -1;
- }
- else {
- if (count > 0) {
- p->data_offset += count;
- p->data_size -= count;
- }
- profbuf_pending_write = i;
- if (count < 0)
- return -1;
- }
- return 0;
-}
-
-static void _write_ready_buffers(int fd)
-{
- long i;
- int has_write_lock = 0;
-
- for (i = 0; i < MAX_NUM_BUFFERS; i++) {
- if (profbuf_state[i] == PROFBUF_READY) {
- if (!has_write_lock) {
- if (!__sync_bool_compare_and_swap(&profbuf_write_lock, 0, 1))
- return; /* can't acquire the write lock, give up */
- has_write_lock = 1;
- }
- if (_write_single_ready_buffer(fd, i) < 0)
- break;
- }
- }
- if (has_write_lock)
- profbuf_write_lock = 0;
-}
-
-static struct profbuf_s *reserve_buffer(int fd)
-{
- /* Tries to enter a region of code that fills one buffer. If
- successful, returns the profbuf_s. It fails only if the
- concurrent buffers are all busy (extreme multithreaded usage).
-
- This might call write() to emit the data sitting in
- previously-prepared buffers. In case of write() error, the
- error is ignored but unwritten data stays in the buffers.
- */
- long i;
-
- _write_ready_buffers(fd);
-
- for (i = 0; i < MAX_NUM_BUFFERS; i++) {
- if (profbuf_state[i] == PROFBUF_UNUSED &&
- __sync_bool_compare_and_swap(&profbuf_state[i], PROFBUF_UNUSED,
- PROFBUF_FILLING)) {
- struct profbuf_s *p = &profbuf_all_buffers[i];
- p->data_size = 0;
- p->data_offset = 0;
- return p;
- }
- }
- /* no unused buffer found */
- return NULL;
-}
-
-static void commit_buffer(int fd, struct profbuf_s *buf)
-{
- /* Leaves a region of code that filled 'buf'.
-
- This might call write() to emit the data now ready. In case of
- write() error, the error is ignored but unwritten data stays in
- the buffers.
- */
-
- /* Make sure every thread sees the full content of 'buf' */
- write_fence();
-
- /* Then set the 'ready' flag */
- long i = buf - profbuf_all_buffers;
- assert(profbuf_state[i] == PROFBUF_FILLING);
- profbuf_state[i] = PROFBUF_READY;
-
- if (!__sync_bool_compare_and_swap(&profbuf_write_lock, 0, 1)) {
- /* can't acquire the write lock, ignore */
- }
- else {
- _write_single_ready_buffer(fd, i);
- profbuf_write_lock = 0;
- }
-}
-
-static int shutdown_concurrent_bufs(int fd)
-{
- /* no signal handler can be running concurrently here, because we
- already did rpython_vmprof_ignore_signals(1) */
- assert(profbuf_write_lock == 0);
- profbuf_write_lock = 2;
-
- /* last attempt to flush buffers */
- int i;
- for (i = 0; i < MAX_NUM_BUFFERS; i++) {
- while (profbuf_state[i] == PROFBUF_READY) {
- if (_write_single_ready_buffer(fd, i) < 0)
- return -1;
- }
- }
- unprepare_concurrent_bufs();
- return 0;
-}
diff --git a/rpython/rlib/rvmprof/src/rvmprof_config.h
b/rpython/rlib/rvmprof/src/vmprof_config.h
rename from rpython/rlib/rvmprof/src/rvmprof_config.h
rename to rpython/rlib/rvmprof/src/vmprof_config.h
diff --git a/rpython/rlib/rvmprof/src/vmprof_get_custom_offset.h
b/rpython/rlib/rvmprof/src/vmprof_get_custom_offset.h
new file mode 100644
--- /dev/null
+++ b/rpython/rlib/rvmprof/src/vmprof_get_custom_offset.h
@@ -0,0 +1,120 @@
+
+#ifdef PYPY_JIT_CODEMAP
+void *pypy_find_codemap_at_addr(long addr, long *start_addr);
+long pypy_yield_codemap_at_addr(void *codemap_raw, long addr,
+ long *current_pos_addr);
+long pypy_jit_stack_depth_at_loc(long loc);
+#endif
+
+
+#ifdef CPYTHON_GET_CUSTOM_OFFSET
+static void *tramp_start, *tramp_end;
+#endif
+
+
+static ptrdiff_t vmprof_unw_get_custom_offset(void* ip, void *cp) {
+
+#if defined(PYPY_JIT_CODEMAP)
+
+ intptr_t ip_l = (intptr_t)ip;
+ return pypy_jit_stack_depth_at_loc(ip_l);
+
+#elif defined(CPYTHON_GET_CUSTOM_OFFSET)
+
+ if (ip >= tramp_start && ip <= tramp_end) {
+ // XXX the return value is wrong for all the places before push and
+ // after pop, fix
+ void *bp;
+ void *sp;
+
+ /* This is a stage2 trampoline created by hotpatch:
+
+ push %rbx
+ push %rbp
+ mov %rsp,%rbp
+ and $0xfffffffffffffff0,%rsp // make sure the stack is
aligned
+ movabs $0x7ffff687bb10,%rbx
+ callq *%rbx
+ leaveq
+ pop %rbx
+ retq
+
+ the stack layout is like this:
+
+ +-----------+ high addresses
+ | ret addr |
+ +-----------+
+ | saved rbx | start of the function frame
+ +-----------+
+ | saved rbp |
+ +-----------+
+ | ........ | <-- rbp
+ +-----------+ low addresses
+
+ So, the trampoline frame starts at rbp+16, and the return address,
+ is at rbp+24. The vmprof API requires us to return the offset of
+ the frame relative to sp, hence we have this weird computation.
+
+ XXX (antocuni): I think we could change the API to return directly
+ the frame address instead of the offset; however, this require a
+ change in the PyPy code too
+ */
+
+ unw_get_reg (cp, UNW_REG_SP, (unw_word_t*)&sp);
+ unw_get_reg (cp, UNW_X86_64_RBP, (unw_word_t*)&bp);
+ return bp+16+8-sp;
+ }
+ return -1;
+
+#else
+
+ return -1;
+
+#endif
+}
+
+static long vmprof_write_header_for_jit_addr(void **result, long n,
+ void *ip, int max_depth)
+{
+#ifdef PYPY_JIT_CODEMAP
+ void *codemap;
+ long current_pos = 0;
+ intptr_t id;
+ long start_addr = 0;
+ intptr_t addr = (intptr_t)ip;
+ int start, k;
+ void *tmp;
+
+ codemap = pypy_find_codemap_at_addr(addr, &start_addr);
+ if (codemap == NULL)
+ // not a jit code at all
+ return n;
+
+ // modify the last entry to point to start address and not the random one
+ // in the middle
+ result[n - 1] = (void*)start_addr;
+ result[n] = (void*)2;
+ n++;
+ start = n;
+ while (n < max_depth) {
+ id = pypy_yield_codemap_at_addr(codemap, addr, ¤t_pos);
+ if (id == -1)
+ // finish
+ break;
+ if (id == 0)
+ continue; // not main codemap
+ result[n++] = (void *)id;
+ }
+ k = 0;
+ while (k < (n - start) / 2) {
+ tmp = result[start + k];
+ result[start + k] = result[n - k - 1];
+ result[n - k - 1] = tmp;
+ k++;
+ }
+ if (n < max_depth) {
+ result[n++] = (void*)3;
+ }
+#endif
+ return n;
+}
diff --git a/rpython/rlib/rvmprof/src/rvmprof_getpc.h
b/rpython/rlib/rvmprof/src/vmprof_getpc.h
rename from rpython/rlib/rvmprof/src/rvmprof_getpc.h
rename to rpython/rlib/rvmprof/src/vmprof_getpc.h
--- a/rpython/rlib/rvmprof/src/rvmprof_getpc.h
+++ b/rpython/rlib/rvmprof/src/vmprof_getpc.h
@@ -44,7 +44,7 @@
#ifndef BASE_GETPC_H_
#define BASE_GETPC_H_
-#include "rvmprof_config.h"
+#include "vmprof_config.h"
// On many linux systems, we may need _GNU_SOURCE to get access to
// the defined constants that define the register we want to see (eg
diff --git a/rpython/rlib/rvmprof/src/vmprof_main.h
b/rpython/rlib/rvmprof/src/vmprof_main.h
new file mode 100644
--- /dev/null
+++ b/rpython/rlib/rvmprof/src/vmprof_main.h
@@ -0,0 +1,556 @@
+/* VMPROF
+ *
+ * statistical sampling profiler specifically designed to profile programs
+ * which run on a Virtual Machine and/or bytecode interpreter, such as Python,
+ * etc.
+ *
+ * The logic to dump the C stack traces is partly stolen from the code in
+ * gperftools.
+ * The file "getpc.h" has been entirely copied from gperftools.
+ *
+ * Tested only on gcc, linux, x86_64.
+ *
+ * Copyright (C) 2014-2015
+ * Antonio Cuni - [email protected]
+ * Maciej Fijalkowski - [email protected]
+ * Armin Rigo - [email protected]
+ *
+ */
+
+#define _GNU_SOURCE 1
+
+#include <dlfcn.h>
+#include <assert.h>
+#include <pthread.h>
+#include <sys/time.h>
+#include <errno.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include "vmprof_getpc.h"
+#include "vmprof_unwind.h"
+#include "vmprof_mt.h"
+
+
+/************************************************************/
+
+// functions copied from libunwind using dlopen
+
+static int (*unw_get_reg)(unw_cursor_t*, int, unw_word_t*) = NULL;
+static int (*unw_step)(unw_cursor_t*) = NULL;
+static int (*unw_init_local)(unw_cursor_t *, unw_context_t *) = NULL;
+static int (*unw_get_proc_info)(unw_cursor_t *, unw_proc_info_t *) = NULL;
+
+static int profile_file = -1;
+static long prepare_interval_usec;
+static struct profbuf_s *volatile current_codes;
+static void *(*mainloop_get_virtual_ip)(char *) = 0;
+
+static int opened_profile(char *interp_name);
+static void flush_codes(void);
+
+RPY_EXTERN
+char *vmprof_init(int fd, double interval, char *interp_name)
+{
+ if (interval < 1e-6 || interval >= 1.0)
+ return "bad value for 'interval'";
+ prepare_interval_usec = (int)(interval * 1000000.0);
+
+ if (!unw_get_reg) {
+ void *libhandle;
+
+ if (!(libhandle = dlopen("libunwind.so", RTLD_LAZY | RTLD_LOCAL)))
+ goto error;
+ if (!(unw_get_reg = dlsym(libhandle, "_ULx86_64_get_reg")))
+ goto error;
+ if (!(unw_get_proc_info = dlsym(libhandle, "_ULx86_64_get_proc_info")))
+ goto error;
+ if (!(unw_init_local = dlsym(libhandle, "_ULx86_64_init_local")))
+ goto error;
+ if (!(unw_step = dlsym(libhandle, "_ULx86_64_step")))
+ goto error;
+ }
+ if (prepare_concurrent_bufs() < 0)
+ return "out of memory";
+
+ assert(fd >= 0);
+ profile_file = fd;
+ if (opened_profile(interp_name) < 0) {
+ profile_file = -1;
+ return strerror(errno);
+ }
+ return NULL;
+
+ error:
+ return dlerror();
+}
+
+/************************************************************/
+
+/* value: last bit is 1 if signals must be ignored; all other bits
+ are a counter for how many threads are currently in a signal handler */
+static long volatile signal_handler_value = 1;
+
+RPY_EXTERN
+void vmprof_ignore_signals(int ignored)
+{
+ if (!ignored) {
+ __sync_fetch_and_and(&signal_handler_value, ~1L);
+ }
+ else {
+ /* set the last bit, and wait until concurrently-running signal
+ handlers finish */
+ while (__sync_or_and_fetch(&signal_handler_value, 1L) != 1L) {
+ usleep(1);
+ }
+ }
+}
+
+
+/* *************************************************************
+ * functions to write a profile file compatible with gperftools
+ * *************************************************************
+ */
+
+#define MAX_FUNC_NAME 128
+#define MAX_STACK_DEPTH \
+ ((SINGLE_BUF_SIZE - sizeof(struct prof_stacktrace_s)) / sizeof(void *))
+
+#define MARKER_STACKTRACE '\x01'
+#define MARKER_VIRTUAL_IP '\x02'
+#define MARKER_TRAILER '\x03'
+#define MARKER_INTERP_NAME '\x04' /* deprecated */
+#define MARKER_HEADER '\x05'
+
+#define VERSION_BASE '\x00'
+#define VERSION_THREAD_ID '\x01'
+
+struct prof_stacktrace_s {
+ char padding[sizeof(long) - 1];
+ char marker;
+ long count, depth;
+ void *stack[];
+};
+
+static long profile_interval_usec = 0;
+static char atfork_hook_installed = 0;
+
+
+/* ******************************************************
+ * libunwind workaround for process JIT frames correctly
+ * ******************************************************
+ */
+
+#include "vmprof_get_custom_offset.h"
+
+typedef struct {
+ void* _unused1;
+ void* _unused2;
+ void* sp;
+ void* ip;
+ void* _unused3[sizeof(unw_cursor_t)/sizeof(void*) - 4];
+} vmprof_hacked_unw_cursor_t;
+
+static int vmprof_unw_step(unw_cursor_t *cp, int first_run)
+{
+ void* ip;
+ void* sp;
+ ptrdiff_t sp_offset;
+ unw_get_reg (cp, UNW_REG_IP, (unw_word_t*)&ip);
+ unw_get_reg (cp, UNW_REG_SP, (unw_word_t*)&sp);
+ if (!first_run) {
+ // make sure we're pointing to the CALL and not to the first
+ // instruction after. If the callee adjusts the stack for us
+ // it's not safe to be at the instruction after
+ ip -= 1;
+ }
+ sp_offset = vmprof_unw_get_custom_offset(ip, cp);
+
+ if (sp_offset == -1) {
+ // it means that the ip is NOT in JITted code, so we can use the
+ // stardard unw_step
+ return unw_step(cp);
+ }
+ else {
+ // this is a horrible hack to manually walk the stack frame, by
+ // setting the IP and SP in the cursor
+ vmprof_hacked_unw_cursor_t *cp2 = (vmprof_hacked_unw_cursor_t*)cp;
+ void* bp = (void*)sp + sp_offset;
+ cp2->sp = bp;
+ bp -= sizeof(void*);
+ cp2->ip = ((void**)bp)[0];
+ // the ret is on the top of the stack minus WORD
+ return 1;
+ }
+}
+
+
+/* *************************************************************
+ * functions to dump the stack trace
+ * *************************************************************
+ */
+
+static int get_stack_trace(void** result, int max_depth, ucontext_t *ucontext)
+{
+ void *ip;
+ int n = 0;
+ unw_cursor_t cursor;
+ unw_context_t uc = *ucontext;
+
+ int ret = unw_init_local(&cursor, &uc);
+ assert(ret >= 0);
+ (void)ret;
+
+ while (n < max_depth) {
+ if (unw_get_reg(&cursor, UNW_REG_IP, (unw_word_t *) &ip) < 0) {
+ break;
+ }
+
+ unw_proc_info_t pip;
+ unw_get_proc_info(&cursor, &pip);
+
+ /* if n==0, it means that the signal handler interrupted us while we
+ were in the trampoline, so we are not executing (yet) the real main
+ loop function; just skip it */
+ if (VMPROF_ADDR_OF_TRAMPOLINE((void*)pip.start_ip) && n > 0) {
+ // found main loop stack frame
+ void* sp;
+ unw_get_reg(&cursor, UNW_REG_SP, (unw_word_t *) &sp);
+ if (mainloop_get_virtual_ip)
+ ip = mainloop_get_virtual_ip((char *)sp);
+ else
+ ip = *(void **)sp;
+ }
+
+ int first_run = (n == 0);
+ result[n++] = ip;
+ n = vmprof_write_header_for_jit_addr(result, n, ip, max_depth);
+ if (vmprof_unw_step(&cursor, first_run) <= 0)
+ break;
+ }
+ return n;
+}
+
+static void *get_current_thread_id(void)
+{
+ /* xxx This function is a hack on two fronts:
+
+ - It assumes that pthread_self() is async-signal-safe. This
+ should be true on Linux. I hope it is also true elsewhere.
+
+ - It abuses pthread_self() by assuming it just returns an
+ integer. According to comments in CPython's source code, the
+ platforms where it is not the case are rare nowadays.
+
+ An alternative would be to try to look if the information is
+ available in the ucontext_t in the caller.
+ */
+ return (void *)pthread_self();
+}
+
+
+/* *************************************************************
+ * the signal handler
+ * *************************************************************
+ */
+
+static void sigprof_handler(int sig_nr, siginfo_t* info, void *ucontext)
+{
+ long val = __sync_fetch_and_add(&signal_handler_value, 2L);
+
+ if ((val & 1) == 0) {
+ int saved_errno = errno;
+ int fd = profile_file;
+ assert(fd >= 0);
+
+ struct profbuf_s *p = reserve_buffer(fd);
+ if (p == NULL) {
+ /* ignore this signal: there are no free buffers right now */
+ }
+ else {
+ int depth;
+ struct prof_stacktrace_s *st = (struct prof_stacktrace_s *)p->data;
+ st->marker = MARKER_STACKTRACE;
+ st->count = 1;
+ st->stack[0] = GetPC((ucontext_t*)ucontext);
+ depth = get_stack_trace(st->stack+1, MAX_STACK_DEPTH-2, ucontext);
+ depth++; // To account for pc value in stack[0];
+ st->depth = depth;
+ st->stack[depth++] = get_current_thread_id();
+ p->data_offset = offsetof(struct prof_stacktrace_s, marker);
+ p->data_size = (depth * sizeof(void *) +
+ sizeof(struct prof_stacktrace_s) -
+ offsetof(struct prof_stacktrace_s, marker));
+ commit_buffer(fd, p);
+ }
+
+ errno = saved_errno;
+ }
+
+ __sync_sub_and_fetch(&signal_handler_value, 2L);
+}
+
+
+/* *************************************************************
+ * the setup and teardown functions
+ * *************************************************************
+ */
+
+static int install_sigprof_handler(void)
+{
+ struct sigaction sa;
+ memset(&sa, 0, sizeof(sa));
+ sa.sa_sigaction = sigprof_handler;
+ sa.sa_flags = SA_RESTART | SA_SIGINFO;
+ if (sigemptyset(&sa.sa_mask) == -1 ||
+ sigaction(SIGPROF, &sa, NULL) == -1)
+ return -1;
+ return 0;
+}
+
+static int remove_sigprof_handler(void)
+{
+ if (signal(SIGPROF, SIG_DFL) == SIG_ERR)
+ return -1;
+ return 0;
+}
+
+static int install_sigprof_timer(void)
+{
+ static struct itimerval timer;
+ timer.it_interval.tv_sec = 0;
+ timer.it_interval.tv_usec = profile_interval_usec;
+ timer.it_value = timer.it_interval;
+ if (setitimer(ITIMER_PROF, &timer, NULL) != 0)
+ return -1;
+ return 0;
+}
+
+static int remove_sigprof_timer(void) {
+ static struct itimerval timer;
+ timer.it_interval.tv_sec = 0;
+ timer.it_interval.tv_usec = 0;
+ timer.it_value.tv_sec = 0;
+ timer.it_value.tv_usec = 0;
+ if (setitimer(ITIMER_PROF, &timer, NULL) != 0)
+ return -1;
+ return 0;
+}
+
+static void atfork_disable_timer(void) {
+ if (profile_interval_usec > 0) {
+ remove_sigprof_timer();
+ }
+}
+
+static void atfork_enable_timer(void) {
+ if (profile_interval_usec > 0) {
+ install_sigprof_timer();
+ }
+}
+
+static int install_pthread_atfork_hooks(void) {
+ /* this is needed to prevent the problems described there:
+ - http://code.google.com/p/gperftools/issues/detail?id=278
+ - http://lists.debian.org/debian-glibc/2010/03/msg00161.html
+
+ TL;DR: if the RSS of the process is large enough, the clone() syscall
+ will be interrupted by the SIGPROF before it can complete, then
+ retried, interrupted again and so on, in an endless loop. The
+ solution is to disable the timer around the fork, and re-enable it
+ only inside the parent.
+ */
+ if (atfork_hook_installed)
+ return 0;
+ int ret = pthread_atfork(atfork_disable_timer, atfork_enable_timer, NULL);
+ if (ret != 0)
+ return -1;
+ atfork_hook_installed = 1;
+ return 0;
+}
+
+RPY_EXTERN
+int vmprof_enable(void)
+{
+ assert(profile_file >= 0);
+ assert(prepare_interval_usec > 0);
+ profile_interval_usec = prepare_interval_usec;
+
+ if (install_pthread_atfork_hooks() == -1)
+ goto error;
+ if (install_sigprof_handler() == -1)
+ goto error;
+ if (install_sigprof_timer() == -1)
+ goto error;
+ vmprof_ignore_signals(0);
+ return 0;
+
+ error:
+ profile_file = -1;
+ profile_interval_usec = 0;
+ return -1;
+}
+
+static int _write_all(const void *buf, size_t bufsize)
+{
+ while (bufsize > 0) {
+ ssize_t count = write(profile_file, buf, bufsize);
+ if (count <= 0)
+ return -1; /* failed */
+ buf += count;
+ bufsize -= count;
+ }
+ return 0;
+}
+
+static int opened_profile(char *interp_name)
+{
+ struct {
+ long hdr[5];
+ char interp_name[259];
+ } header;
+
+ size_t namelen = strnlen(interp_name, 255);
+ current_codes = NULL;
+
+ header.hdr[0] = 0;
+ header.hdr[1] = 3;
+ header.hdr[2] = 0;
+ header.hdr[3] = prepare_interval_usec;
+ header.hdr[4] = 0;
+ header.interp_name[0] = MARKER_HEADER;
+ header.interp_name[1] = '\x00';
+ header.interp_name[2] = VERSION_THREAD_ID;
+ header.interp_name[3] = namelen;
+ memcpy(&header.interp_name[4], interp_name, namelen);
+ return _write_all(&header, 5 * sizeof(long) + 4 + namelen);
+}
+
+static int close_profile(void)
+{
+ char buf[4096];
+ ssize_t size;
+ unsigned char marker = MARKER_TRAILER;
+
+ if (_write_all(&marker, 1) < 0)
+ return -1;
+
+#ifdef __linux__
+ // copy /proc/self/maps to the end of the profile file
+ int srcfd = open("/proc/self/maps", O_RDONLY);
+ if (srcfd < 0)
+ return -1;
+
+ while ((size = read(srcfd, buf, sizeof buf)) > 0) {
+ if (_write_all(buf, size) < 0) {
+ close(srcfd);
+ return -1;
+ }
+ }
+ close(srcfd);
+#else
+ // freebsd and mac
+ sprintf(buf, "procstat -v %d", getpid());
+ FILE *srcf = popen(buf, "r");
+ if (!srcf)
+ return -1;
+
+ while ((size = fread(buf, 1, sizeof buf, src))) {
+ if (_write_all(buf, size) < 0) {
+ pclose(srcf);
+ return -1;
+ }
+ }
+ pclose(srcf);
+#endif
+
+ /* don't close() the file descriptor from here */
+ profile_file = -1;
+ return 0;
+}
+
+RPY_EXTERN
+int vmprof_disable(void)
+{
+ vmprof_ignore_signals(1);
+ profile_interval_usec = 0;
+
+ if (remove_sigprof_timer() == -1)
+ return -1;
+ if (remove_sigprof_handler() == -1)
+ return -1;
+ flush_codes();
+ if (shutdown_concurrent_bufs(profile_file) < 0)
+ return -1;
+ return close_profile();
+}
+
+RPY_EXTERN
+int vmprof_register_virtual_function(char *code_name, long code_uid,
+ int auto_retry)
+{
+ long namelen = strnlen(code_name, 1023);
+ long blocklen = 1 + 2 * sizeof(long) + namelen;
+ struct profbuf_s *p;
+ char *t;
+
+ retry:
+ p = current_codes;
+ if (p != NULL) {
+ if (__sync_bool_compare_and_swap(¤t_codes, p, NULL)) {
+ /* grabbed 'current_codes': we will append the current block
+ to it if it contains enough room */
+ size_t freesize = SINGLE_BUF_SIZE - p->data_size;
+ if (freesize < blocklen) {
+ /* full: flush it */
+ commit_buffer(profile_file, p);
+ p = NULL;
+ }
+ }
+ else {
+ /* compare-and-swap failed, don't try again */
+ p = NULL;
+ }
+ }
+
+ if (p == NULL) {
+ p = reserve_buffer(profile_file);
+ if (p == NULL) {
+ /* can't get a free block; should almost never be the
+ case. Spin loop if allowed, or return a failure code
+ if not (e.g. we're in a signal handler) */
+ if (auto_retry > 0) {
+ auto_retry--;
+ usleep(1);
+ goto retry;
+ }
+ return -1;
+ }
+ }
+
+ t = p->data + p->data_size;
+ p->data_size += blocklen;
+ assert(p->data_size <= SINGLE_BUF_SIZE);
+ *t++ = MARKER_VIRTUAL_IP;
+ memcpy(t, &code_uid, sizeof(long)); t += sizeof(long);
+ memcpy(t, &namelen, sizeof(long)); t += sizeof(long);
+ memcpy(t, code_name, namelen);
+
+ /* try to reattach 'p' to 'current_codes' */
+ if (!__sync_bool_compare_and_swap(¤t_codes, NULL, p)) {
+ /* failed, flush it */
+ commit_buffer(profile_file, p);
+ }
+ return 0;
+}
+
+static void flush_codes(void)
+{
+ struct profbuf_s *p = current_codes;
+ if (p != NULL) {
+ current_codes = NULL;
+ commit_buffer(profile_file, p);
+ }
+}
diff --git a/rpython/rlib/rvmprof/src/vmprof_mt.h
b/rpython/rlib/rvmprof/src/vmprof_mt.h
new file mode 100644
--- /dev/null
+++ b/rpython/rlib/rvmprof/src/vmprof_mt.h
@@ -0,0 +1,217 @@
+/* Support for multithreaded write() operations */
+
+#include <sys/mman.h>
+#include <string.h>
+
+/* The idea is that we have MAX_NUM_BUFFERS available, all of size
+ SINGLE_BUF_SIZE. Threads and signal handlers can ask to reserve a
+ buffer, fill it, and finally "commit" it, at which point its
+ content is written into the profile file. There is no hard
+ guarantee about the order in which the committed blocks are
+ actually written. We do this with two constrains:
+
+ - write() calls should not overlap; only one thread can be
+ currently calling it.
+
+ - the code needs to be multithread-safe *and* signal-handler-safe,
+ which means it must be written in a wait-free style: never have
+ spin loops waiting for some lock to be released, from any of
+ the functions that can be called from the signal handler! The
+ code holding the lock could be running in the same thread,
+ currently interrupted by the signal handler.
+
+ The value of MAX_NUM_BUFFERS is a trade-off between too high
+ (lots of unnecessary memory, lots of checking all of them)
+ and too low (risk that there is none left).
+*/
+#define MAX_NUM_BUFFERS 20
+#define SINGLE_BUF_SIZE (8192 - 2 * sizeof(unsigned int))
+
+#if defined(__i386__) || defined(__amd64__)
+ static inline void write_fence(void) { asm("" : : : "memory"); }
+#else
+ static inline void write_fence(void) { __sync_synchronize(); }
+#endif
+
+
+#define PROFBUF_UNUSED 0
+#define PROFBUF_FILLING 1
+#define PROFBUF_READY 2
+
+
+struct profbuf_s {
+ unsigned int data_size;
+ unsigned int data_offset;
+ char data[SINGLE_BUF_SIZE];
+};
+
+static char volatile profbuf_state[MAX_NUM_BUFFERS];
+static struct profbuf_s *profbuf_all_buffers = NULL;
+static int volatile profbuf_write_lock = 2;
+static long profbuf_pending_write;
+
+
+static void unprepare_concurrent_bufs(void)
+{
+ if (profbuf_all_buffers != NULL) {
+ munmap(profbuf_all_buffers, sizeof(struct profbuf_s) *
MAX_NUM_BUFFERS);
+ profbuf_all_buffers = NULL;
+ }
+}
+
+static int prepare_concurrent_bufs(void)
+{
+ assert(sizeof(struct profbuf_s) == 8192);
+
+ unprepare_concurrent_bufs();
+ profbuf_all_buffers = mmap(NULL, sizeof(struct profbuf_s) *
MAX_NUM_BUFFERS,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS,
+ -1, 0);
+ if (profbuf_all_buffers == MAP_FAILED) {
+ profbuf_all_buffers = NULL;
+ return -1;
+ }
+ memset((char *)profbuf_state, PROFBUF_UNUSED, sizeof(profbuf_state));
+ profbuf_write_lock = 0;
+ profbuf_pending_write = -1;
+ return 0;
+}
+
+static int _write_single_ready_buffer(int fd, long i)
+{
+ /* Try to write to disk the buffer number 'i'. This function must
+ only be called while we hold the write lock. */
+ assert(profbuf_write_lock != 0);
+
+ if (profbuf_pending_write >= 0) {
+ /* A partially written buffer is waiting. We'll write the
+ rest of this buffer now, instead of 'i'. */
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit