Author: Carl Friedrich Bolz <cfb...@gmx.de> Branch: value-profiling Changeset: r78896:c6fd1f04a9e0 Date: 2015-08-11 15:27 +0200 http://bitbucket.org/pypy/pypy/changeset/c6fd1f04a9e0/
Log: merge diff too long, truncating to 2000 out of 5220 lines diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst --- a/pypy/doc/whatsnew-head.rst +++ b/pypy/doc/whatsnew-head.rst @@ -55,3 +55,11 @@ .. branch: nditer-revisited Implement nditer 'buffered' flag and fix some edge cases + +.. branch: ufunc-reduce + +Allow multiple axes in ufunc.reduce() + +.. branch: fix-tinylang-goals + +Update tinylang goals to match current rpython diff --git a/pypy/interpreter/baseobjspace.py b/pypy/interpreter/baseobjspace.py --- a/pypy/interpreter/baseobjspace.py +++ b/pypy/interpreter/baseobjspace.py @@ -11,7 +11,7 @@ INT_MIN, INT_MAX, UINT_MAX, USHRT_MAX from pypy.interpreter.executioncontext import (ExecutionContext, ActionFlag, - UserDelAction, CodeUniqueIds) + UserDelAction) from pypy.interpreter.error import OperationError, new_exception_class, oefmt from pypy.interpreter.argument import Arguments from pypy.interpreter.miscutils import ThreadLocals, make_weak_value_dictionary @@ -391,7 +391,6 @@ self.actionflag = ActionFlag() # changed by the signal module self.check_signal_action = None # changed by the signal module self.user_del_action = UserDelAction(self) - self.code_unique_ids = CodeUniqueIds() self._code_of_sys_exc_info = None # can be overridden to a subclass @@ -670,16 +669,6 @@ assert ec is not None return ec - def register_code_callback(self, callback): - cui = self.code_unique_ids - cui.code_callback = callback - - def register_code_object(self, pycode): - cui = self.code_unique_ids - if cui.code_callback is None: - return - cui.code_callback(self, pycode) - def _freeze_(self): return True diff --git a/pypy/interpreter/executioncontext.py b/pypy/interpreter/executioncontext.py --- a/pypy/interpreter/executioncontext.py +++ b/pypy/interpreter/executioncontext.py @@ -590,11 +590,3 @@ # there is no list of length n: if n is large, then the GC # will run several times while walking the list, but it will # see lower and lower memory usage, with no lower bound of n. - -class CodeUniqueIds(object): - def __init__(self): - if sys.maxint == 2147483647: - self.code_unique_id = 0 # XXX this is wrong, it won't work on 32bit - else: - self.code_unique_id = 0x7000000000000000 - self.code_callback = None diff --git a/pypy/interpreter/pycode.py b/pypy/interpreter/pycode.py --- a/pypy/interpreter/pycode.py +++ b/pypy/interpreter/pycode.py @@ -94,7 +94,7 @@ self.magic = magic self._signature = cpython_code_signature(self) self._initialize() - space.register_code_object(self) + self._init_ready() self.vprofs = [ValueProf() for i in range(self.co_nlocals)] def _initialize(self): @@ -137,14 +137,8 @@ from pypy.objspace.std.mapdict import init_mapdict_cache init_mapdict_cache(self) - cui = self.space.code_unique_ids - self._unique_id = cui.code_unique_id - cui.code_unique_id += 4 # so we have two bits that we can mark stuff - # with - - def _get_full_name(self): - return "py:%s:%d:%s" % (self.co_name, self.co_firstlineno, - self.co_filename) + def _init_ready(self): + "This is a hook for the vmprof module, which overrides this method." def _cleanup_(self): if (self.magic == cpython_magic and diff --git a/pypy/module/__pypy__/__init__.py b/pypy/module/__pypy__/__init__.py --- a/pypy/module/__pypy__/__init__.py +++ b/pypy/module/__pypy__/__init__.py @@ -62,6 +62,7 @@ } interpleveldefs = { + 'attach_gdb' : 'interp_magic.attach_gdb', 'internal_repr' : 'interp_magic.internal_repr', 'bytebuffer' : 'bytebuffer.bytebuffer', 'identity_dict' : 'interp_identitydict.W_IdentityDict', @@ -100,8 +101,6 @@ def setup_after_space_initialization(self): """NOT_RPYTHON""" - if not self.space.config.translating: - self.extra_interpdef('interp_pdb', 'interp_magic.interp_pdb') if self.space.config.objspace.std.withmethodcachecounter: self.extra_interpdef('method_cache_counter', 'interp_magic.method_cache_counter') diff --git a/pypy/module/__pypy__/interp_magic.py b/pypy/module/__pypy__/interp_magic.py --- a/pypy/module/__pypy__/interp_magic.py +++ b/pypy/module/__pypy__/interp_magic.py @@ -15,12 +15,10 @@ return space.wrap('%r' % (w_object,)) -def interp_pdb(space): - """Run an interp-level pdb. - This is not available in translated versions of PyPy.""" - assert not we_are_translated() - import pdb - pdb.set_trace() +def attach_gdb(space): + """Run an interp-level gdb (or pdb when untranslated)""" + from rpython.rlib.debug import attach_gdb + attach_gdb() @unwrap_spec(name=str) diff --git a/pypy/module/_file/interp_file.py b/pypy/module/_file/interp_file.py --- a/pypy/module/_file/interp_file.py +++ b/pypy/module/_file/interp_file.py @@ -209,7 +209,7 @@ # EAGAIN after already some data was received, return it. # Note that we can get EAGAIN while there is buffered data # waiting; read that too. - if is_wouldblock_error(e): + if is_wouldblock_error(e.errno): m = stream.count_buffered_bytes() if m > 0: result.append(stream.read(min(n, m))) @@ -321,6 +321,10 @@ self.getstream() # check if the file is still open return os.isatty(self.fd) + def direct_readinto(self, w_rwbuffer): + from pypy.module._file.readinto import direct_readinto + return direct_readinto(self, w_rwbuffer) + # ____________________________________________________________ # # The 'file_' methods are the one exposed to app-level. @@ -413,6 +417,9 @@ Notice that when in non-blocking mode, less data than what was requested may be returned, even if no size parameter was given.""") + _decl(locals(), "readinto", + """readinto(buf) -> length. Read into the given read-write buffer.""") + _decl(locals(), "readline", """readline([size]) -> next line from the file, as a string. @@ -508,16 +515,6 @@ for w_line in lines: self.file_write(w_line) - def file_readinto(self, w_rwbuffer): - """readinto() -> Undocumented. Don't use this; it may go away.""" - # XXX not the most efficient solution as it doesn't avoid the copying - space = self.space - rwbuffer = space.writebuf_w(w_rwbuffer) - w_data = self.file_read(rwbuffer.getlength()) - data = space.str_w(w_data) - rwbuffer.setslice(0, data) - return space.wrap(len(data)) - # ____________________________________________________________ @@ -603,7 +600,6 @@ cls=W_File, doc="Support for 'print'."), __repr__ = interp2app(W_File.file__repr__), - readinto = interp2app(W_File.file_readinto), writelines = interp2app(W_File.file_writelines), __exit__ = interp2app(W_File.file__exit__), __weakref__ = make_weakref_descr(W_File), @@ -632,10 +628,10 @@ MAYBE_EAGAIN = getattr(errno, 'EAGAIN', None) MAYBE_EWOULDBLOCK = getattr(errno, 'EWOULDBLOCK', None) -def is_wouldblock_error(e): - if MAYBE_EAGAIN is not None and e.errno == MAYBE_EAGAIN: +def is_wouldblock_error(errno): + if MAYBE_EAGAIN is not None and errno == MAYBE_EAGAIN: return True - if MAYBE_EWOULDBLOCK is not None and e.errno == MAYBE_EWOULDBLOCK: + if MAYBE_EWOULDBLOCK is not None and errno == MAYBE_EWOULDBLOCK: return True return False diff --git a/pypy/module/_file/readinto.py b/pypy/module/_file/readinto.py new file mode 100644 --- /dev/null +++ b/pypy/module/_file/readinto.py @@ -0,0 +1,81 @@ +import sys, errno +from rpython.rlib import rposix +from rpython.rlib.objectmodel import keepalive_until_here +from rpython.rtyper.lltypesystem import lltype, rffi +from pypy.module._file.interp_file import is_wouldblock_error, signal_checker + +_WIN32 = sys.platform.startswith('win') +UNDERSCORE_ON_WIN32 = '_' if _WIN32 else '' + +os_read = rffi.llexternal(UNDERSCORE_ON_WIN32 + 'read', + [rffi.INT, rffi.CCHARP, rffi.SIZE_T], + rffi.SIZE_T, save_err=rffi.RFFI_SAVE_ERRNO) + + +def direct_readinto(self, w_rwbuffer): + rwbuffer = self.space.writebuf_w(w_rwbuffer) + stream = self.getstream() + size = rwbuffer.getlength() + target_address = lltype.nullptr(rffi.CCHARP.TO) + fd = -1 + target_pos = 0 + + if size > 64: + try: + target_address = rwbuffer.get_raw_address() + except ValueError: + pass + else: + fd = stream.try_to_find_file_descriptor() + + if fd < 0 or not target_address: + # fall-back + MAX_PART = 1024 * 1024 # 1 MB + while size > MAX_PART: + data = self.direct_read(MAX_PART) + rwbuffer.setslice(target_pos, data) + target_pos += len(data) + size -= len(data) + if len(data) != MAX_PART: + break + else: + data = self.direct_read(size) + rwbuffer.setslice(target_pos, data) + target_pos += len(data) + + else: + # optimized case: reading more than 64 bytes into a rwbuffer + # with a valid raw address + self.check_readable() + + # first "read" the part that is already sitting in buffers, if any + initial_size = min(size, stream.count_buffered_bytes()) + if initial_size > 0: + data = stream.read(initial_size) + rwbuffer.setslice(target_pos, data) + target_pos += len(data) + size -= len(data) + + # then call os_read() to get the rest + if size > 0: + stream.flush() + while True: + got = os_read(fd, rffi.ptradd(target_address, target_pos), size) + if got > 0: + target_pos += got + size -= got + if size <= 0: + break + elif got == 0: + break + else: + err = rposix.get_saved_errno() + if err == errno.EINTR: + signal_checker(self.space)() + continue + if is_wouldblock_error(err) and target_pos > 0: + break + raise OSError(err, "read error") + keepalive_until_here(rwbuffer) + + return self.space.wrap(target_pos) diff --git a/pypy/module/_vmprof/__init__.py b/pypy/module/_vmprof/__init__.py --- a/pypy/module/_vmprof/__init__.py +++ b/pypy/module/_vmprof/__init__.py @@ -2,7 +2,7 @@ class Module(MixedModule): """ - Write me :) + VMProf for PyPy: a statistical profiler """ appleveldefs = { } @@ -10,9 +10,13 @@ interpleveldefs = { 'enable': 'interp_vmprof.enable', 'disable': 'interp_vmprof.disable', + 'VMProfError': 'space.fromcache(interp_vmprof.Cache).w_VMProfError', } - def setup_after_space_initialization(self): - # force the __extend__ hacks to occur early - from pypy.module._vmprof.interp_vmprof import VMProf - self.vmprof = VMProf() + +# Force the __extend__ hacks and method replacements to occur +# early. Without this, for example, 'PyCode._init_ready' was +# already found by the annotator to be the original empty +# method, and the annotator doesn't notice that interp_vmprof.py +# (loaded later) replaces this method. +import pypy.module._vmprof.interp_vmprof diff --git a/pypy/module/_vmprof/interp_vmprof.py b/pypy/module/_vmprof/interp_vmprof.py --- a/pypy/module/_vmprof/interp_vmprof.py +++ b/pypy/module/_vmprof/interp_vmprof.py @@ -1,252 +1,74 @@ -import py, os, sys -from rpython.rtyper.lltypesystem import lltype, rffi, llmemory -from rpython.translator.tool.cbuild import ExternalCompilationInfo -from rpython.rtyper.annlowlevel import cast_instance_to_gcref, cast_base_ptr_to_instance -from rpython.rlib.objectmodel import we_are_translated -from rpython.rlib import jit, rposix, rgc -from rpython.rlib.rarithmetic import ovfcheck_float_to_int -from rpython.rtyper.tool import rffi_platform as platform -from rpython.rlib.rstring import StringBuilder -from pypy.interpreter.baseobjspace import W_Root -from pypy.interpreter.error import oefmt, wrap_oserror, OperationError +from pypy.interpreter.error import OperationError from pypy.interpreter.gateway import unwrap_spec from pypy.interpreter.pyframe import PyFrame from pypy.interpreter.pycode import PyCode +from pypy.interpreter.baseobjspace import W_Root +from rpython.rlib import rvmprof -ROOT = py.path.local(__file__).join('..') -SRC = ROOT.join('src') +# ____________________________________________________________ -# by default, we statically link vmprof.c into pypy; however, if you set -# DYNAMIC_VMPROF to True, it will be dynamically linked to the libvmprof.so -# which is expected to be inside pypy/module/_vmprof/src: this is very useful -# during development. Note that you have to manually build libvmprof by -# running make inside the src dir -DYNAMIC_VMPROF = False -if sys.platform.startswith('linux'): - libs = ['dl'] -else: - libs = [] +_get_code = lambda frame, w_inputvalue, operr: frame.pycode +_decorator = rvmprof.vmprof_execute_code("pypy", _get_code, W_Root) +my_execute_frame = _decorator(PyFrame.execute_frame) -eci_kwds = dict( - include_dirs = [SRC], - includes = ['vmprof.h', 'trampoline.h'], - separate_module_files = [SRC.join('trampoline.vmprof.s')], - libraries = libs, - - post_include_bits=[""" - int pypy_vmprof_init(void); - """], - - separate_module_sources=[""" - int pypy_vmprof_init(void) { - return vmprof_set_mainloop(pypy_execute_frame_trampoline, 0, - NULL); - } - """], - ) - - -if DYNAMIC_VMPROF: - eci_kwds['libraries'] += ['vmprof'] - eci_kwds['link_extra'] = ['-Wl,-rpath,%s' % SRC, '-L%s' % SRC] -else: - eci_kwds['separate_module_files'] += [SRC.join('vmprof.c')] - -eci = ExternalCompilationInfo(**eci_kwds) - -check_eci = eci.merge(ExternalCompilationInfo(separate_module_files=[ - SRC.join('fake_pypy_api.c')])) - -platform.verify_eci(check_eci) - -pypy_execute_frame_trampoline = rffi.llexternal( - "pypy_execute_frame_trampoline", - [llmemory.GCREF, llmemory.GCREF, llmemory.GCREF, lltype.Signed], - llmemory.GCREF, - compilation_info=eci, - _nowrapper=True, sandboxsafe=True, - random_effects_on_gcobjs=True) - -pypy_vmprof_init = rffi.llexternal("pypy_vmprof_init", [], rffi.INT, - compilation_info=eci) -vmprof_enable = rffi.llexternal("vmprof_enable", - [rffi.INT, rffi.LONG, rffi.INT, - rffi.CCHARP, rffi.INT], - rffi.INT, compilation_info=eci, - save_err=rffi.RFFI_SAVE_ERRNO) -vmprof_disable = rffi.llexternal("vmprof_disable", [], rffi.INT, - compilation_info=eci, - save_err=rffi.RFFI_SAVE_ERRNO) -vmprof_get_error = rffi.llexternal("vmprof_get_error", [], rffi.CCHARP, - compilation_info=eci, - save_err=rffi.RFFI_SAVE_ERRNO) - -vmprof_register_virtual_function = rffi.llexternal( - "vmprof_register_virtual_function", - [rffi.CCHARP, rffi.VOIDP, rffi.VOIDP], lltype.Void, - compilation_info=eci, _nowrapper=True) - -original_execute_frame = PyFrame.execute_frame.im_func -original_execute_frame.c_name = 'pypy_pyframe_execute_frame' -original_execute_frame._dont_inline_ = True class __extend__(PyFrame): - def execute_frame(frame, w_inputvalue=None, operr=None): - # go through the asm trampoline ONLY if we are translated but not being JITted. - # - # If we are not translated, we obviously don't want to go through the - # trampoline because there is no C function it can call. - # - # If we are being JITted, we want to skip the trampoline, else the JIT - # cannot see throug it - if we_are_translated() and not jit.we_are_jitted(): - # if we are translated, call the trampoline - gc_frame = cast_instance_to_gcref(frame) - gc_inputvalue = cast_instance_to_gcref(w_inputvalue) - gc_operr = cast_instance_to_gcref(operr) - unique_id = frame.pycode._unique_id - gc_result = pypy_execute_frame_trampoline(gc_frame, gc_inputvalue, - gc_operr, unique_id) - return cast_base_ptr_to_instance(W_Root, gc_result) - else: - return original_execute_frame(frame, w_inputvalue, operr) + def execute_frame(self, w_inputvalue=None, operr=None): + # indirection for the optional arguments + return my_execute_frame(self, w_inputvalue, operr) +def _safe(s): + if len(s) > 110: + s = s[:107] + '...' + return s.replace(':', ';') -def write_long_to_string_builder(l, b): - if sys.maxint == 2147483647: - b.append(chr(l & 0xff)) - b.append(chr((l >> 8) & 0xff)) - b.append(chr((l >> 16) & 0xff)) - b.append(chr((l >> 24) & 0xff)) - else: - b.append(chr(l & 0xff)) - b.append(chr((l >> 8) & 0xff)) - b.append(chr((l >> 16) & 0xff)) - b.append(chr((l >> 24) & 0xff)) - b.append(chr((l >> 32) & 0xff)) - b.append(chr((l >> 40) & 0xff)) - b.append(chr((l >> 48) & 0xff)) - b.append(chr((l >> 56) & 0xff)) +def _get_full_name(pycode): + # careful, must not have extraneous ':' or be longer than 255 chars + return "py:%s:%d:%s" % (_safe(pycode.co_name), pycode.co_firstlineno, + _safe(pycode.co_filename)) -def try_cast_to_pycode(gcref): - return rgc.try_cast_gcref_to_instance(PyCode, gcref) +rvmprof.register_code_object_class(PyCode, _get_full_name) -MAX_CODES = 1000 -class VMProf(object): - def __init__(self): - self.is_enabled = False - self.ever_enabled = False - self.fileno = -1 - self.current_codes = [] +def _init_ready(pycode): + rvmprof.register_code(pycode, _get_full_name) - def enable(self, space, fileno, period_usec): - if self.is_enabled: - raise oefmt(space.w_ValueError, "_vmprof already enabled") - self.fileno = fileno - self.is_enabled = True - self.write_header(fileno, period_usec) - if not self.ever_enabled: - if we_are_translated(): - res = pypy_vmprof_init() - if res: - raise OperationError( - space.w_IOError, - space.wrap(rffi.charp2str(vmprof_get_error()))) - self.ever_enabled = True - self.gather_all_code_objs(space) - space.register_code_callback(vmprof_register_code) - if we_are_translated(): - # does not work untranslated - res = vmprof_enable(fileno, period_usec, 0, - lltype.nullptr(rffi.CCHARP.TO), 0) - else: - res = 0 - if res == -1: - raise wrap_oserror(space, OSError(rposix.get_saved_errno(), - "_vmprof.enable")) +PyCode._init_ready = _init_ready - def gather_all_code_objs(self, space): - all_code_objs = rgc.do_get_objects(try_cast_to_pycode) - for code in all_code_objs: - self.register_code(space, code) - def write_header(self, fileno, period_usec): - assert period_usec > 0 - b = StringBuilder() - write_long_to_string_builder(0, b) - write_long_to_string_builder(3, b) - write_long_to_string_builder(0, b) - write_long_to_string_builder(period_usec, b) - write_long_to_string_builder(0, b) - b.append('\x04') # interp name - b.append(chr(len('pypy'))) - b.append('pypy') - os.write(fileno, b.build()) +# ____________________________________________________________ - def register_code(self, space, code): - if self.fileno == -1: - raise OperationError(space.w_RuntimeError, - space.wrap("vmprof not running")) - self.current_codes.append(code) - if len(self.current_codes) >= MAX_CODES: - self._flush_codes(space) - def _flush_codes(self, space): - b = StringBuilder() - for code in self.current_codes: - name = code._get_full_name() - b.append('\x02') - write_long_to_string_builder(code._unique_id, b) - write_long_to_string_builder(len(name), b) - b.append(name) - os.write(self.fileno, b.build()) - self.current_codes = [] +class Cache: + def __init__(self, space): + self.w_VMProfError = space.new_exception_class("_vmprof.VMProfError") - def disable(self, space): - if not self.is_enabled: - raise oefmt(space.w_ValueError, "_vmprof not enabled") - self.is_enabled = False - space.register_code_callback(None) - self._flush_codes(space) - self.fileno = -1 - if we_are_translated(): - # does not work untranslated - res = vmprof_disable() - else: - res = 0 - if res == -1: - raise wrap_oserror(space, OSError(rposix.get_saved_errno(), - "_vmprof.disable")) +def VMProfError(space, e): + w_VMProfError = space.fromcache(Cache).w_VMProfError + return OperationError(w_VMProfError, space.wrap(e.msg)) -def vmprof_register_code(space, code): - from pypy.module._vmprof import Module - mod_vmprof = space.getbuiltinmodule('_vmprof') - assert isinstance(mod_vmprof, Module) - mod_vmprof.vmprof.register_code(space, code) @unwrap_spec(fileno=int, period=float) -def enable(space, fileno, period=0.01): # default 100 Hz - from pypy.module._vmprof import Module - mod_vmprof = space.getbuiltinmodule('_vmprof') - assert isinstance(mod_vmprof, Module) - # +def enable(space, fileno, period): + """Enable vmprof. Writes go to the given 'fileno', a file descriptor + opened for writing. *The file descriptor must remain open at least + until disable() is called.* + + 'interval' is a float representing the sampling interval, in seconds. + Must be smaller than 1.0 + """ try: - period_usec = ovfcheck_float_to_int(period * 1000000.0 + 0.5) - if period_usec <= 0 or period_usec >= 1e6: - # we don't want seconds here at all - raise ValueError - except (ValueError, OverflowError): - raise OperationError(space.w_ValueError, - space.wrap("'period' too large or non positive")) - # - mod_vmprof.vmprof.enable(space, fileno, period_usec) + rvmprof.enable(fileno, period) + except rvmprof.VMProfError, e: + raise VMProfError(space, e) def disable(space): - from pypy.module._vmprof import Module - mod_vmprof = space.getbuiltinmodule('_vmprof') - assert isinstance(mod_vmprof, Module) - mod_vmprof.vmprof.disable(space) - + """Disable vmprof. Remember to close the file descriptor afterwards + if necessary. + """ + try: + rvmprof.disable() + except rvmprof.VMProfError, e: + raise VMProfError(space, e) diff --git a/pypy/module/_vmprof/src/config.h b/pypy/module/_vmprof/src/config.h deleted file mode 100644 --- a/pypy/module/_vmprof/src/config.h +++ /dev/null @@ -1,6 +0,0 @@ -#define HAVE_SYS_UCONTEXT_H -#if defined(__FreeBSD__) || defined(__APPLE__) -#define PC_FROM_UCONTEXT uc_mcontext.mc_rip -#else -#define PC_FROM_UCONTEXT uc_mcontext.gregs[REG_RIP] -#endif diff --git a/pypy/module/_vmprof/src/fake_pypy_api.c b/pypy/module/_vmprof/src/fake_pypy_api.c deleted file mode 100644 --- a/pypy/module/_vmprof/src/fake_pypy_api.c +++ /dev/null @@ -1,4 +0,0 @@ - -void pypy_pyframe_execute_frame(void) -{ -} diff --git a/pypy/module/_vmprof/src/get_custom_offset.c b/pypy/module/_vmprof/src/get_custom_offset.c deleted file mode 100644 --- a/pypy/module/_vmprof/src/get_custom_offset.c +++ /dev/null @@ -1,80 +0,0 @@ - -#ifdef PYPY_JIT_CODEMAP - -extern volatile int pypy_codemap_currently_invalid; - -void *pypy_find_codemap_at_addr(long addr, long *start_addr); -long pypy_yield_codemap_at_addr(void *codemap_raw, long addr, - long *current_pos_addr); -long pypy_jit_stack_depth_at_loc(long loc); - -#endif - - -void vmprof_set_tramp_range(void* start, void* end) -{ -} - -int custom_sanity_check() -{ -#ifdef PYPY_JIT_CODEMAP - return !pypy_codemap_currently_invalid; -#else - return 1; -#endif -} - -static ptrdiff_t vmprof_unw_get_custom_offset(void* ip, void *cp) { -#ifdef PYPY_JIT_CODEMAP - intptr_t ip_l = (intptr_t)ip; - return pypy_jit_stack_depth_at_loc(ip_l); -#else - return 0; -#endif -} - -static long vmprof_write_header_for_jit_addr(void **result, long n, - void *ip, int max_depth) -{ -#ifdef PYPY_JIT_CODEMAP - void *codemap; - long current_pos = 0; - intptr_t id; - long start_addr = 0; - intptr_t addr = (intptr_t)ip; - int start, k; - void *tmp; - - codemap = pypy_find_codemap_at_addr(addr, &start_addr); - if (codemap == NULL) - // not a jit code at all - return n; - - // modify the last entry to point to start address and not the random one - // in the middle - result[n - 1] = (void*)start_addr; - result[n] = (void*)2; - n++; - start = n; - while (n < max_depth) { - id = pypy_yield_codemap_at_addr(codemap, addr, ¤t_pos); - if (id == -1) - // finish - break; - if (id == 0) - continue; // not main codemap - result[n++] = (void *)id; - } - k = 0; - while (k < (n - start) / 2) { - tmp = result[start + k]; - result[start + k] = result[n - k - 1]; - result[n - k - 1] = tmp; - k++; - } - if (n < max_depth) { - result[n++] = (void*)3; - } -#endif - return n; -} diff --git a/pypy/module/_vmprof/src/trampoline.h b/pypy/module/_vmprof/src/trampoline.h deleted file mode 100644 --- a/pypy/module/_vmprof/src/trampoline.h +++ /dev/null @@ -1,1 +0,0 @@ -void* pypy_execute_frame_trampoline(void*, void*, void*, long); diff --git a/pypy/module/_vmprof/src/trampoline.vmprof.s b/pypy/module/_vmprof/src/trampoline.vmprof.s deleted file mode 100644 --- a/pypy/module/_vmprof/src/trampoline.vmprof.s +++ /dev/null @@ -1,15 +0,0 @@ -// NOTE: you need to use TABs, not spaces! - - .text - .globl pypy_execute_frame_trampoline - .type pypy_execute_frame_trampoline, @function -pypy_execute_frame_trampoline: - .cfi_startproc - pushq %rcx - .cfi_def_cfa_offset 16 - call pypy_pyframe_execute_frame@PLT - popq %rcx - .cfi_def_cfa_offset 8 - ret - .cfi_endproc - .size pypy_execute_frame_trampoline, .-pypy_execute_frame_trampoline diff --git a/pypy/module/_vmprof/src/vmprof.c b/pypy/module/_vmprof/src/vmprof.c deleted file mode 100644 --- a/pypy/module/_vmprof/src/vmprof.c +++ /dev/null @@ -1,463 +0,0 @@ -/* VMPROF - * - * statistical sampling profiler specifically designed to profile programs - * which run on a Virtual Machine and/or bytecode interpreter, such as Python, - * etc. - * - * The logic to dump the C stack traces is partly stolen from the code in gperftools. - * The file "getpc.h" has been entirely copied from gperftools. - * - * Tested only on gcc, linux, x86_64. - * - * Copyright (C) 2014-2015 - * Antonio Cuni - anto.c...@gmail.com - * Maciej Fijalkowski - fij...@gmail.com - * - */ - - -#include "getpc.h" // should be first to get the _GNU_SOURCE dfn -#include <signal.h> -#include <stdio.h> -#include <string.h> -#include <stddef.h> -#include <assert.h> -#include <unistd.h> -#include <sys/time.h> -#include <sys/types.h> -#include <errno.h> -#include <pthread.h> -#include <dlfcn.h> - -//#define UNW_LOCAL_ONLY -//#include <libunwind.h> - -#include "vmprof.h" -#if defined(__FreeBSD__) || defined(__APPLE__) -#define sighandler_t sig_t -#endif - -#define _unused(x) ((void)x) - -#define MAX_FUNC_NAME 128 -#define MAX_STACK_DEPTH 1024 -#define BUFFER_SIZE 8192 - - -static int profile_file = 0; -static char profile_write_buffer[BUFFER_SIZE]; -static int profile_buffer_position = 0; -void* vmprof_mainloop_func; -char* vmprof_error = NULL; -static ptrdiff_t mainloop_sp_offset; -static vmprof_get_virtual_ip_t mainloop_get_virtual_ip; -static long last_period_usec = 0; -static int atfork_hook_installed = 0; - - -/* ************************************************************* - * functions to write a profile file compatible with gperftools - * ************************************************************* - */ - -#define MARKER_STACKTRACE '\x01' -#define MARKER_VIRTUAL_IP '\x02' -#define MARKER_TRAILER '\x03' - -int (*unw_get_reg)(unw_cursor_t*, int, unw_word_t*) = NULL; -int (*unw_step)(unw_cursor_t*) = NULL; -int (*unw_init_local)(unw_cursor_t *, unw_context_t *) = NULL; -int (*unw_get_proc_info)(unw_cursor_t *, unw_proc_info_t *) = NULL; - -static void prof_word(long x) { - ((long*)(profile_write_buffer + profile_buffer_position))[0] = x; - profile_buffer_position += sizeof(long); -} - -static void prof_header(long period_usec) { - // XXX never used here? - prof_word(0); - prof_word(3); - prof_word(0); - prof_word(period_usec); - prof_word(0); - write(profile_file, profile_write_buffer, profile_buffer_position); - profile_buffer_position = 0; -} - -static void prof_write_stacktrace(void** stack, int depth, int count) { - int i; - char marker = MARKER_STACKTRACE; - - profile_write_buffer[profile_buffer_position++] = MARKER_STACKTRACE; - prof_word(count); - prof_word(depth); - for(i=0; i<depth; i++) - prof_word((long)stack[i]); - write(profile_file, profile_write_buffer, profile_buffer_position); - profile_buffer_position = 0; -} - - -/* ****************************************************** - * libunwind workaround for process JIT frames correctly - * ****************************************************** - */ - -#include "get_custom_offset.c" - -typedef struct { - void* _unused1; - void* _unused2; - void* sp; - void* ip; - void* _unused3[sizeof(unw_cursor_t)/sizeof(void*) - 4]; -} vmprof_hacked_unw_cursor_t; - -static int vmprof_unw_step(unw_cursor_t *cp, int first_run) { - void* ip; - void* sp; - ptrdiff_t sp_offset; - unw_get_reg (cp, UNW_REG_IP, (unw_word_t*)&ip); - unw_get_reg (cp, UNW_REG_SP, (unw_word_t*)&sp); - if (!first_run) - // make sure we're pointing to the CALL and not to the first - // instruction after. If the callee adjusts the stack for us - // it's not safe to be at the instruction after - ip -= 1; - sp_offset = vmprof_unw_get_custom_offset(ip, cp); - - if (sp_offset == -1) { - // it means that the ip is NOT in JITted code, so we can use the - // stardard unw_step - return unw_step(cp); - } - else { - // this is a horrible hack to manually walk the stack frame, by - // setting the IP and SP in the cursor - vmprof_hacked_unw_cursor_t *cp2 = (vmprof_hacked_unw_cursor_t*)cp; - void* bp = (void*)sp + sp_offset; - cp2->sp = bp; - bp -= sizeof(void*); - cp2->ip = ((void**)bp)[0]; - // the ret is on the top of the stack minus WORD - return 1; - } -} - - -/* ************************************************************* - * functions to dump the stack trace - * ************************************************************* - */ - -// The original code here has a comment, "stolen from pprof", -// about a "__thread int recursive". But general __thread -// variables are not really supposed to be accessed from a -// signal handler. Moreover, we are using SIGPROF, which -// should not be recursively called on the same thread. -//static __thread int recursive; - -int get_stack_trace(void** result, int max_depth, ucontext_t *ucontext) { - void *ip; - int n = 0; - unw_cursor_t cursor; - unw_context_t uc = *ucontext; - //if (recursive) { - // return 0; - //} - if (!custom_sanity_check()) { - return 0; - } - //++recursive; - - int ret = unw_init_local(&cursor, &uc); - assert(ret >= 0); - _unused(ret); - int first_run = 1; - - while (n < max_depth) { - if (unw_get_reg(&cursor, UNW_REG_IP, (unw_word_t *) &ip) < 0) { - break; - } - - unw_proc_info_t pip; - unw_get_proc_info(&cursor, &pip); - - /* char funcname[4096]; */ - /* unw_word_t offset; */ - /* unw_get_proc_name(&cursor, funcname, 4096, &offset); */ - /* printf("%s+%#lx <%p>\n", funcname, offset, ip); */ - - /* if n==0, it means that the signal handler interrupted us while we - were in the trampoline, so we are not executing (yet) the real main - loop function; just skip it */ - if (vmprof_mainloop_func && - (void*)pip.start_ip == (void*)vmprof_mainloop_func && - n > 0) { - // found main loop stack frame - void* sp; - unw_get_reg(&cursor, UNW_REG_SP, (unw_word_t *) &sp); - void *arg_addr = (char*)sp + mainloop_sp_offset; - void **arg_ptr = (void**)arg_addr; - // fprintf(stderr, "stacktrace mainloop: rsp %p &f2 %p offset %ld\n", - // sp, arg_addr, mainloop_sp_offset); - if (mainloop_get_virtual_ip) { - ip = mainloop_get_virtual_ip(*arg_ptr); - } else { - ip = *arg_ptr; - } - } - - result[n++] = ip; - n = vmprof_write_header_for_jit_addr(result, n, ip, max_depth); - if (vmprof_unw_step(&cursor, first_run) <= 0) { - break; - } - first_run = 0; - } - //--recursive; - return n; -} - - -static int __attribute__((noinline)) frame_forcer(int rv) { - return rv; -} - -static void sigprof_handler(int sig_nr, siginfo_t* info, void *ucontext) { - void* stack[MAX_STACK_DEPTH]; - int saved_errno = errno; - stack[0] = GetPC((ucontext_t*)ucontext); - int depth = frame_forcer(get_stack_trace(stack+1, MAX_STACK_DEPTH-1, ucontext)); - depth++; // To account for pc value in stack[0]; - prof_write_stacktrace(stack, depth, 1); - errno = saved_errno; -} - -/* ************************************************************* - * functions to enable/disable the profiler - * ************************************************************* - */ - -static int open_profile(int fd, long period_usec, int write_header, char *s, - int slen) { - if ((fd = dup(fd)) == -1) { - return -1; - } - profile_buffer_position = 0; - profile_file = fd; - if (write_header) - prof_header(period_usec); - if (s) - write(profile_file, s, slen); - return 0; -} - -static int close_profile(void) { - // XXX all of this can happily fail - FILE* src; - char buf[BUFSIZ]; - size_t size; - int marker = MARKER_TRAILER; - write(profile_file, &marker, 1); - -#ifdef __linux__ - // copy /proc/PID/maps to the end of the profile file - sprintf(buf, "/proc/%d/maps", getpid()); - src = fopen(buf, "r"); - if (!src) { - vmprof_error = "error opening proc maps"; - return -1; - } - while ((size = fread(buf, 1, BUFSIZ, src))) { - write(profile_file, buf, size); - } - fclose(src); -#else - // freebsd and mac - sprintf(buf, "procstat -v %d", getpid()); - src = popen(buf, "r"); - if (!src) { - vmprof_error = "error calling procstat"; - return -1; - } - while ((size = fread(buf, 1, BUFSIZ, src))) { - write(profile_file, buf, size); - } - pclose(src); -#endif - close(profile_file); - return 0; -} - - -static int install_sigprof_handler(void) { - struct sigaction sa; - memset(&sa, 0, sizeof(sa)); - sa.sa_sigaction = sigprof_handler; - sa.sa_flags = SA_RESTART | SA_SIGINFO; - if (sigemptyset(&sa.sa_mask) == -1 || - sigaction(SIGPROF, &sa, NULL) == -1) { - return -1; - } - return 0; -} - -static int remove_sigprof_handler(void) { - sighandler_t res = signal(SIGPROF, SIG_DFL); - if (res == SIG_ERR) { - return -1; - } - return 0; -}; - -static int install_sigprof_timer(long period_usec) { - static struct itimerval timer; - last_period_usec = period_usec; - timer.it_interval.tv_sec = 0; - timer.it_interval.tv_usec = period_usec; - timer.it_value = timer.it_interval; - if (setitimer(ITIMER_PROF, &timer, NULL) != 0) { - return -1; - } - return 0; -} - -static int remove_sigprof_timer(void) { - static struct itimerval timer; - timer.it_interval.tv_sec = 0; - timer.it_interval.tv_usec = 0; - timer.it_value.tv_sec = 0; - timer.it_value.tv_usec = 0; - if (setitimer(ITIMER_PROF, &timer, NULL) != 0) { - return -1; - } - return 0; -} - -static void atfork_disable_timer(void) { - if (last_period_usec) { - remove_sigprof_timer(); - } -} - -static void atfork_enable_timer(void) { - if (last_period_usec) { - install_sigprof_timer(last_period_usec); - } -} - -static int install_pthread_atfork_hooks(void) { - /* this is needed to prevent the problems described there: - - http://code.google.com/p/gperftools/issues/detail?id=278 - - http://lists.debian.org/debian-glibc/2010/03/msg00161.html - - TL;DR: if the RSS of the process is large enough, the clone() syscall - will be interrupted by the SIGPROF before it can complete, then - retried, interrupted again and so on, in an endless loop. The - solution is to disable the timer around the fork, and re-enable it - only inside the parent. - */ - if (atfork_hook_installed) - return 0; - int ret = pthread_atfork(atfork_disable_timer, atfork_enable_timer, NULL); - if (ret != 0) - return -1; - atfork_hook_installed = 1; - return 0; -} - -/* ************************************************************* - * public API - * ************************************************************* - */ - -int vmprof_set_mainloop(void* func, ptrdiff_t sp_offset, - vmprof_get_virtual_ip_t get_virtual_ip) { - void *libhandle; - - mainloop_sp_offset = sp_offset; - mainloop_get_virtual_ip = get_virtual_ip; - vmprof_mainloop_func = func; - if (!unw_get_reg) { - if (!(libhandle = dlopen("libunwind.so", RTLD_LAZY | RTLD_LOCAL))) { - vmprof_error = dlerror(); - return -1; - } - if (!(unw_get_reg = dlsym(libhandle, "_ULx86_64_get_reg"))) { - vmprof_error = dlerror(); - return -1; - } - if (!(unw_get_proc_info = dlsym(libhandle, "_ULx86_64_get_proc_info"))){ - vmprof_error = dlerror(); - return -1; - } - if (!(unw_init_local = dlsym(libhandle, "_ULx86_64_init_local"))) { - vmprof_error = dlerror(); - return -1; - } - if (!(unw_step = dlsym(libhandle, "_ULx86_64_step"))) { - vmprof_error = dlerror(); - return -1; - } - } - return 0; -} - -char* vmprof_get_error() -{ - char* res; - res = vmprof_error; - vmprof_error = NULL; - return res; -} - -int vmprof_enable(int fd, long period_usec, int write_header, char *s, - int slen) -{ - assert(period_usec > 0); - if (open_profile(fd, period_usec, write_header, s, slen) == -1) { - return -1; - } - if (install_sigprof_handler() == -1) { - return -1; - } - if (install_sigprof_timer(period_usec) == -1) { - return -1; - } - if (install_pthread_atfork_hooks() == -1) { - return -1; - } - return 0; -} - -int vmprof_disable(void) { - if (remove_sigprof_timer() == -1) { - return -1; - } - last_period_usec = 0; - if (remove_sigprof_handler() == -1) { - return -1; - } - if (close_profile() == -1) { - return -1; - } - return 0; -} - -void vmprof_register_virtual_function(const char* name, void* start, void* end) { - // XXX unused by pypy - // for now *end is simply ignored - char buf[1024]; - int lgt = strlen(name) + 2 * sizeof(long) + 1; - - if (lgt > 1024) { - lgt = 1024; - } - buf[0] = MARKER_VIRTUAL_IP; - ((void **)(((void*)buf) + 1))[0] = start; - ((long *)(((void*)buf) + 1 + sizeof(long)))[0] = lgt - 2 * sizeof(long) - 1; - strncpy(buf + 2 * sizeof(long) + 1, name, 1024 - 2 * sizeof(long) - 1); - write(profile_file, buf, lgt); -} diff --git a/pypy/module/_vmprof/src/vmprof.h b/pypy/module/_vmprof/src/vmprof.h deleted file mode 100644 --- a/pypy/module/_vmprof/src/vmprof.h +++ /dev/null @@ -1,121 +0,0 @@ -#ifndef VMPROF_VMPROF_H_ -#define VMPROF_VMPROF_H_ - -#include <stddef.h> -#include <stdint.h> -#include <ucontext.h> - -// copied from libunwind.h - -typedef enum - { - UNW_X86_64_RAX, - UNW_X86_64_RDX, - UNW_X86_64_RCX, - UNW_X86_64_RBX, - UNW_X86_64_RSI, - UNW_X86_64_RDI, - UNW_X86_64_RBP, - UNW_X86_64_RSP, - UNW_X86_64_R8, - UNW_X86_64_R9, - UNW_X86_64_R10, - UNW_X86_64_R11, - UNW_X86_64_R12, - UNW_X86_64_R13, - UNW_X86_64_R14, - UNW_X86_64_R15, - UNW_X86_64_RIP, -#ifdef CONFIG_MSABI_SUPPORT - UNW_X86_64_XMM0, - UNW_X86_64_XMM1, - UNW_X86_64_XMM2, - UNW_X86_64_XMM3, - UNW_X86_64_XMM4, - UNW_X86_64_XMM5, - UNW_X86_64_XMM6, - UNW_X86_64_XMM7, - UNW_X86_64_XMM8, - UNW_X86_64_XMM9, - UNW_X86_64_XMM10, - UNW_X86_64_XMM11, - UNW_X86_64_XMM12, - UNW_X86_64_XMM13, - UNW_X86_64_XMM14, - UNW_X86_64_XMM15, - UNW_TDEP_LAST_REG = UNW_X86_64_XMM15, -#else - UNW_TDEP_LAST_REG = UNW_X86_64_RIP, -#endif - - /* XXX Add other regs here */ - - /* frame info (read-only) */ - UNW_X86_64_CFA, - - UNW_TDEP_IP = UNW_X86_64_RIP, - UNW_TDEP_SP = UNW_X86_64_RSP, - UNW_TDEP_BP = UNW_X86_64_RBP, - UNW_TDEP_EH = UNW_X86_64_RAX - } -x86_64_regnum_t; - -typedef uint64_t unw_word_t; - -#define UNW_TDEP_CURSOR_LEN 127 - -typedef struct unw_cursor - { - unw_word_t opaque[UNW_TDEP_CURSOR_LEN]; - } -unw_cursor_t; - -#define UNW_REG_IP UNW_X86_64_RIP -#define UNW_REG_SP UNW_X86_64_RSP - -typedef ucontext_t unw_context_t; - -typedef struct unw_proc_info - { - unw_word_t start_ip; /* first IP covered by this procedure */ - unw_word_t end_ip; /* first IP NOT covered by this procedure */ - unw_word_t lsda; /* address of lang.-spec. data area (if any) */ - unw_word_t handler; /* optional personality routine */ - unw_word_t gp; /* global-pointer value for this procedure */ - unw_word_t flags; /* misc. flags */ - - int format; /* unwind-info format (arch-specific) */ - int unwind_info_size; /* size of the information (if applicable) */ - void *unwind_info; /* unwind-info (arch-specific) */ - } -unw_proc_info_t; - -// functions copied from libunwind using dlopen - -extern int (*unw_get_reg)(unw_cursor_t*, int, unw_word_t*); -extern int (*unw_step)(unw_cursor_t*); -extern int (*unw_init_local)(unw_cursor_t *, unw_context_t *); -extern int (*unw_get_proc_info)(unw_cursor_t *, unw_proc_info_t *); - -// end of copy - -extern char* vmprof_error; - -typedef void* (*vmprof_get_virtual_ip_t)(void*); -char* vmprof_get_error(); - -extern void* vmprof_mainloop_func; -int vmprof_set_mainloop(void* func, ptrdiff_t sp_offset, - vmprof_get_virtual_ip_t get_virtual_ip); - -void vmprof_register_virtual_function(const char* name, void* start, void* end); - - -int vmprof_enable(int fd, long period_usec, int write_header, char* vips, - int vips_len); -int vmprof_disable(void); - -// XXX: this should be part of _vmprof (the CPython extension), not vmprof (the library) -void vmprof_set_tramp_range(void* start, void* end); - -#endif diff --git a/pypy/module/_vmprof/test/test__vmprof.py b/pypy/module/_vmprof/test/test__vmprof.py --- a/pypy/module/_vmprof/test/test__vmprof.py +++ b/pypy/module/_vmprof/test/test__vmprof.py @@ -1,14 +1,14 @@ -import tempfile +from rpython.tool.udir import udir from pypy.tool.pytest.objspace import gettestobjspace class AppTestVMProf(object): def setup_class(cls): cls.space = gettestobjspace(usemodules=['_vmprof', 'struct']) - cls.tmpfile = tempfile.NamedTemporaryFile() + cls.tmpfile = udir.join('test__vmprof.1').open('wb') cls.w_tmpfileno = cls.space.wrap(cls.tmpfile.fileno()) cls.w_tmpfilename = cls.space.wrap(cls.tmpfile.name) - cls.tmpfile2 = tempfile.NamedTemporaryFile() + cls.tmpfile2 = udir.join('test__vmprof.2').open('wb') cls.w_tmpfileno2 = cls.space.wrap(cls.tmpfile2.fileno()) cls.w_tmpfilename2 = cls.space.wrap(cls.tmpfile2.name) @@ -29,19 +29,23 @@ while i < len(s): if s[i] == '\x03': break - if s[i] == '\x01': - xxx - assert s[i] == '\x02' - i += 1 - _, size = struct.unpack("ll", s[i:i + 2 * WORD]) - count += 1 - i += 2 * WORD + size + elif s[i] == '\x01': + i += 1 + _, size = struct.unpack("ll", s[i:i + 2 * WORD]) + i += 2 * WORD + size * struct.calcsize("P") + elif s[i] == '\x02': + i += 1 + _, size = struct.unpack("ll", s[i:i + 2 * WORD]) + count += 1 + i += 2 * WORD + size + else: + raise AssertionError(ord(s[i])) return count import _vmprof - _vmprof.enable(self.tmpfileno) + _vmprof.enable(self.tmpfileno, 0.01) _vmprof.disable() - s = open(self.tmpfilename).read() + s = open(self.tmpfilename, 'rb').read() no_of_codes = count(s) assert no_of_codes > 10 d = {} @@ -50,14 +54,14 @@ pass """ in d - _vmprof.enable(self.tmpfileno2) + _vmprof.enable(self.tmpfileno2, 0.01) exec """def foo2(): pass """ in d _vmprof.disable() - s = open(self.tmpfilename2).read() + s = open(self.tmpfilename2, 'rb').read() no_of_codes2 = count(s) assert "py:foo:" in s assert "py:foo2:" in s @@ -65,8 +69,9 @@ def test_enable_ovf(self): import _vmprof - raises(ValueError, _vmprof.enable, 999, 0) - raises(ValueError, _vmprof.enable, 999, -2.5) - raises(ValueError, _vmprof.enable, 999, 1e300) - raises(ValueError, _vmprof.enable, 999, 1e300 * 1e300) - raises(ValueError, _vmprof.enable, 999, (1e300*1e300) / (1e300*1e300)) + raises(_vmprof.VMProfError, _vmprof.enable, 999, 0) + raises(_vmprof.VMProfError, _vmprof.enable, 999, -2.5) + raises(_vmprof.VMProfError, _vmprof.enable, 999, 1e300) + raises(_vmprof.VMProfError, _vmprof.enable, 999, 1e300 * 1e300) + NaN = (1e300*1e300) / (1e300*1e300) + raises(_vmprof.VMProfError, _vmprof.enable, 999, NaN) diff --git a/pypy/module/_vmprof/test/test_direct.py b/pypy/module/_vmprof/test/test_direct.py --- a/pypy/module/_vmprof/test/test_direct.py +++ b/pypy/module/_vmprof/test/test_direct.py @@ -5,7 +5,8 @@ except ImportError: py.test.skip('cffi required') -srcdir = py.path.local(__file__).join("..", "..", "src") +from rpython.rlib import rvmprof +srcdir = py.path.local(rvmprof.__file__).join("..", "src") ffi = cffi.FFI() ffi.cdef(""" @@ -17,6 +18,8 @@ """) lib = ffi.verify(""" +#define PYPY_JIT_CODEMAP + volatile int pypy_codemap_currently_invalid = 0; long buffer[] = {0, 0, 0, 0, 0}; @@ -39,7 +42,7 @@ } -""" + open(str(srcdir.join("get_custom_offset.c"))).read()) +""" + open(str(srcdir.join("rvmprof_get_custom_offset.h"))).read()) class TestDirect(object): def test_infrastructure(self): diff --git a/pypy/module/cpyext/TODO b/pypy/module/cpyext/TODO deleted file mode 100644 --- a/pypy/module/cpyext/TODO +++ /dev/null @@ -1,26 +0,0 @@ - - Complete the PyTypeObject initialization code. (see XXX in the code) - - Implement further method callers. - - Copy the slots from the base. - - Those tasks are necessary to be able to call slots from C code correctly. - - Additionally, implement further slot wrappers. This is necessary to call - slots of PTOs defined in C. - - Complete the Members support. - - - Use a WeakKeyDictionary to count how often a PyObject is allocated for - a given wrapped object and use this to assess whether optimizations are - useful - - - replace @cpython_api(external=False) by another explicit name: all - it does is a lltype function pointer, no C code involved. - - - Fix GIL handling (e.g. after releasing the GIL, GC operations might occur in savethreads). - - - refactor management of py_objects_r2w and py_objects_w2r, this can - probably be expressed in terms of _PyObject_GC_TRACK macros. - - - PyWeakref_GetObject() returns a borrowed reference, but this turns the - WeakRef into a strong reference! - - - sort out pypy's buffer protocol. PyPy's buffer right now don't support - raw memory (except array which supports it in a hackish way), which - should be fixed in order to make it nicely work with cpyext. diff --git a/pypy/module/micronumpy/casting.py b/pypy/module/micronumpy/casting.py --- a/pypy/module/micronumpy/casting.py +++ b/pypy/module/micronumpy/casting.py @@ -145,23 +145,32 @@ # equivalent to PyArray_CanCastTypeTo if origin == target: return True - if origin.is_record() or target.is_record(): - return can_cast_record(space, origin, target, casting) + if casting == 'unsafe': + return True + elif casting == 'no': + return origin.eq(space, target) + if origin.num == target.num: + if origin.is_record(): + return (target.is_record() and + can_cast_record(space, origin, target, casting)) + else: + if casting == 'equiv': + return origin.elsize == target.elsize + elif casting == 'safe': + return origin.elsize <= target.elsize + else: + return True - if casting == 'no': - return origin.eq(space, target) - elif casting == 'equiv': - return origin.num == target.num and origin.elsize == target.elsize - elif casting == 'unsafe': - return True elif casting == 'same_kind': if can_cast_to(origin, target): return True if origin.kind in kind_ordering and target.kind in kind_ordering: return kind_ordering[origin.kind] <= kind_ordering[target.kind] return False - else: # 'safe' + elif casting == 'safe': return can_cast_to(origin, target) + else: # 'equiv' + return origin.num == target.num and origin.elsize == target.elsize def can_cast_record(space, origin, target, casting): if origin is target: diff --git a/pypy/module/micronumpy/descriptor.py b/pypy/module/micronumpy/descriptor.py --- a/pypy/module/micronumpy/descriptor.py +++ b/pypy/module/micronumpy/descriptor.py @@ -101,6 +101,9 @@ @specialize.argtype(1) def box(self, value): + if self.is_record(): + raise oefmt(self.itemtype.space.w_NotImplementedError, + "cannot box a value into a 'record' dtype, this is a bug please report it") return self.itemtype.box(value) @specialize.argtype(1, 2) @@ -1028,6 +1031,11 @@ elif space.isinstance_w(w_dtype, space.w_tuple): w_dtype0 = space.getitem(w_dtype, space.wrap(0)) w_dtype1 = space.getitem(w_dtype, space.wrap(1)) + if space.isinstance_w(w_dtype0, space.w_type) and \ + space.isinstance_w(w_dtype1, space.w_list): + #obscure api - (subclass, spec). Ignore the subclass + return make_new_dtype(space, w_subtype, w_dtype1, alignment, + copy=copy, w_shape=w_shape, w_metadata=w_metadata) subdtype = make_new_dtype(space, w_subtype, w_dtype0, alignment, copy) assert isinstance(subdtype, W_Dtype) if subdtype.elsize == 0: diff --git a/pypy/module/micronumpy/iterators.py b/pypy/module/micronumpy/iterators.py --- a/pypy/module/micronumpy/iterators.py +++ b/pypy/module/micronumpy/iterators.py @@ -204,17 +204,16 @@ self.array.setitem(state.offset, elem) -def AxisIter(array, shape, axis, cumulative): +def AxisIter(array, shape, axis): strides = array.get_strides() backstrides = array.get_backstrides() - if not cumulative: - if len(shape) == len(strides): - # keepdims = True - strides = strides[:axis] + [0] + strides[axis + 1:] - backstrides = backstrides[:axis] + [0] + backstrides[axis + 1:] - else: - strides = strides[:axis] + [0] + strides[axis:] - backstrides = backstrides[:axis] + [0] + backstrides[axis:] + if len(shape) == len(strides): + # keepdims = True + strides = strides[:axis] + [0] + strides[axis + 1:] + backstrides = backstrides[:axis] + [0] + backstrides[axis + 1:] + else: + strides = strides[:axis] + [0] + strides[axis:] + backstrides = backstrides[:axis] + [0] + backstrides[axis:] return ArrayIter(array, support.product(shape), shape, strides, backstrides) diff --git a/pypy/module/micronumpy/loop.py b/pypy/module/micronumpy/loop.py --- a/pypy/module/micronumpy/loop.py +++ b/pypy/module/micronumpy/loop.py @@ -9,7 +9,7 @@ from pypy.module.micronumpy import support, constants as NPY from pypy.module.micronumpy.base import W_NDimArray, convert_to_array from pypy.module.micronumpy.iterators import PureShapeIter, AxisIter, \ - AllButAxisIter + AllButAxisIter, ArrayIter from pypy.interpreter.argument import Arguments @@ -190,23 +190,64 @@ source_state = source_iter.next(source_state) return target -reduce_driver = jit.JitDriver(name='numpy_reduce', - greens = ['shapelen', 'func', 'done_func', - 'calc_dtype'], - reds = 'auto') -def compute_reduce(space, obj, calc_dtype, func, done_func, identity): - obj_iter, obj_state = obj.create_iter() +def split_iter(arr, axis_flags): + """Prepare 2 iterators for nested iteration over `arr`. + + Arguments: + arr: instance of BaseConcreteArray + axis_flags: list of bools, one for each dimension of `arr`.The inner + iterator operates over the dimensions for which the flag is True + """ + shape = arr.get_shape() + strides = arr.get_strides() + backstrides = arr.get_backstrides() + shapelen = len(shape) + assert len(axis_flags) == shapelen + inner_shape = [-1] * shapelen + inner_strides = [-1] * shapelen + inner_backstrides = [-1] * shapelen + outer_shape = [-1] * shapelen + outer_strides = [-1] * shapelen + outer_backstrides = [-1] * shapelen + for i in range(len(shape)): + if axis_flags[i]: + inner_shape[i] = shape[i] + inner_strides[i] = strides[i] + inner_backstrides[i] = backstrides[i] + outer_shape[i] = 1 + outer_strides[i] = 0 + outer_backstrides[i] = 0 + else: + outer_shape[i] = shape[i] + outer_strides[i] = strides[i] + outer_backstrides[i] = backstrides[i] + inner_shape[i] = 1 + inner_strides[i] = 0 + inner_backstrides[i] = 0 + inner_iter = ArrayIter(arr, support.product(inner_shape), + inner_shape, inner_strides, inner_backstrides) + outer_iter = ArrayIter(arr, support.product(outer_shape), + outer_shape, outer_strides, outer_backstrides) + return inner_iter, outer_iter + + +reduce_flat_driver = jit.JitDriver( + name='numpy_reduce_flat', + greens = ['shapelen', 'func', 'done_func', 'calc_dtype'], reds = 'auto') + +def reduce_flat(space, func, w_arr, calc_dtype, done_func, identity): + obj_iter, obj_state = w_arr.create_iter() if identity is None: cur_value = obj_iter.getitem(obj_state).convert_to(space, calc_dtype) obj_state = obj_iter.next(obj_state) else: cur_value = identity.convert_to(space, calc_dtype) - shapelen = len(obj.get_shape()) + shapelen = len(w_arr.get_shape()) while not obj_iter.done(obj_state): - reduce_driver.jit_merge_point(shapelen=shapelen, func=func, - done_func=done_func, - calc_dtype=calc_dtype) + reduce_flat_driver.jit_merge_point( + shapelen=shapelen, func=func, + done_func=done_func, calc_dtype=calc_dtype) rval = obj_iter.getitem(obj_state).convert_to(space, calc_dtype) if done_func is not None and done_func(calc_dtype, rval): return rval @@ -214,33 +255,105 @@ obj_state = obj_iter.next(obj_state) return cur_value -reduce_cum_driver = jit.JitDriver( - name='numpy_reduce_cum_driver', + +reduce_driver = jit.JitDriver( + name='numpy_reduce', + greens=['shapelen', 'func', 'dtype'], reds='auto') + +def reduce(space, func, w_arr, axis_flags, dtype, out, identity): + out_iter, out_state = out.create_iter() + out_iter.track_index = False + shape = w_arr.get_shape() + shapelen = len(shape) + inner_iter, outer_iter = split_iter(w_arr.implementation, axis_flags) + assert outer_iter.size == out_iter.size + + if identity is not None: + identity = identity.convert_to(space, dtype) + outer_state = outer_iter.reset() + while not outer_iter.done(outer_state): + inner_state = inner_iter.reset() + inner_state.offset = outer_state.offset + if identity is not None: + w_val = identity + else: + w_val = inner_iter.getitem(inner_state).convert_to(space, dtype) + inner_state = inner_iter.next(inner_state) + while not inner_iter.done(inner_state): + reduce_driver.jit_merge_point( + shapelen=shapelen, func=func, dtype=dtype) + w_item = inner_iter.getitem(inner_state).convert_to(space, dtype) + w_val = func(dtype, w_item, w_val) + inner_state = inner_iter.next(inner_state) + out_iter.setitem(out_state, w_val) + out_state = out_iter.next(out_state) + outer_state = outer_iter.next(outer_state) + return out + +accumulate_flat_driver = jit.JitDriver( + name='numpy_accumulate_flat', greens=['shapelen', 'func', 'dtype', 'out_dtype'], reds='auto') -def compute_reduce_cumulative(space, obj, out, calc_dtype, func, identity): - obj_iter, obj_state = obj.create_iter() - out_iter, out_state = out.create_iter() +def accumulate_flat(space, func, w_arr, calc_dtype, w_out, identity): + arr_iter, arr_state = w_arr.create_iter() + out_iter, out_state = w_out.create_iter() out_iter.track_index = False if identity is None: - cur_value = obj_iter.getitem(obj_state).convert_to(space, calc_dtype) + cur_value = arr_iter.getitem(arr_state).convert_to(space, calc_dtype) out_iter.setitem(out_state, cur_value) out_state = out_iter.next(out_state) - obj_state = obj_iter.next(obj_state) + arr_state = arr_iter.next(arr_state) else: cur_value = identity.convert_to(space, calc_dtype) - shapelen = len(obj.get_shape()) - out_dtype = out.get_dtype() - while not obj_iter.done(obj_state): - reduce_cum_driver.jit_merge_point( - shapelen=shapelen, func=func, - dtype=calc_dtype, out_dtype=out_dtype) - rval = obj_iter.getitem(obj_state).convert_to(space, calc_dtype) - cur_value = func(calc_dtype, cur_value, rval) + shapelen = len(w_arr.get_shape()) + out_dtype = w_out.get_dtype() + while not arr_iter.done(arr_state): + accumulate_flat_driver.jit_merge_point( + shapelen=shapelen, func=func, dtype=calc_dtype, + out_dtype=out_dtype) + w_item = arr_iter.getitem(arr_state).convert_to(space, calc_dtype) + cur_value = func(calc_dtype, cur_value, w_item) out_iter.setitem(out_state, out_dtype.coerce(space, cur_value)) out_state = out_iter.next(out_state) - obj_state = obj_iter.next(obj_state) + arr_state = arr_iter.next(arr_state) + +accumulate_driver = jit.JitDriver( + name='numpy_accumulate', + greens=['shapelen', 'func', 'calc_dtype'], reds='auto') + + +def accumulate(space, func, w_arr, axis, calc_dtype, w_out, identity): + out_iter, out_state = w_out.create_iter() + arr_shape = w_arr.get_shape() + temp_shape = arr_shape[:axis] + arr_shape[axis + 1:] + temp = W_NDimArray.from_shape(space, temp_shape, calc_dtype, w_instance=w_arr) + temp_iter = AxisIter(temp.implementation, w_arr.get_shape(), axis) + temp_state = temp_iter.reset() + arr_iter, arr_state = w_arr.create_iter() + arr_iter.track_index = False + if identity is not None: + identity = identity.convert_to(space, calc_dtype) + shapelen = len(arr_shape) + while not out_iter.done(out_state): + accumulate_driver.jit_merge_point(shapelen=shapelen, func=func, + calc_dtype=calc_dtype) + w_item = arr_iter.getitem(arr_state).convert_to(space, calc_dtype) + arr_state = arr_iter.next(arr_state) + + out_indices = out_iter.indices(out_state) + if out_indices[axis] == 0: + if identity is not None: + w_item = func(calc_dtype, identity, w_item) + else: + cur_value = temp_iter.getitem(temp_state) + w_item = func(calc_dtype, cur_value, w_item) + + out_iter.setitem(out_state, w_item) + out_state = out_iter.next(out_state) + temp_iter.setitem(temp_state, w_item) + temp_state = temp_iter.next(temp_state) + return w_out def fill(arr, box): arr_iter, arr_state = arr.create_iter() @@ -298,64 +411,56 @@ state = x_state return out -axis_reduce_driver = jit.JitDriver(name='numpy_axis_reduce', - greens=['shapelen', 'func', 'dtype'], - reds='auto') - -def do_axis_reduce(space, shape, func, arr, dtype, axis, out, identity, cumulative, - temp): - out_iter = AxisIter(out.implementation, arr.get_shape(), axis, cumulative) - out_state = out_iter.reset() - if cumulative: - temp_iter = AxisIter(temp.implementation, arr.get_shape(), axis, False) - temp_state = temp_iter.reset() - else: - temp_iter = out_iter # hack - temp_state = out_state - arr_iter, arr_state = arr.create_iter() - arr_iter.track_index = False - if identity is not None: - identity = identity.convert_to(space, dtype) - shapelen = len(shape) - while not out_iter.done(out_state): - axis_reduce_driver.jit_merge_point(shapelen=shapelen, func=func, - dtype=dtype) - w_val = arr_iter.getitem(arr_state).convert_to(space, dtype) - arr_state = arr_iter.next(arr_state) - - out_indices = out_iter.indices(out_state) - if out_indices[axis] == 0: - if identity is not None: - w_val = func(dtype, identity, w_val) - else: - cur = temp_iter.getitem(temp_state) - w_val = func(dtype, cur, w_val) - - out_iter.setitem(out_state, w_val) - out_state = out_iter.next(out_state) - if cumulative: - temp_iter.setitem(temp_state, w_val) - temp_state = temp_iter.next(temp_state) - else: - temp_state = out_state - return out - def _new_argmin_argmax(op_name): arg_driver = jit.JitDriver(name='numpy_' + op_name, greens = ['shapelen', 'dtype'], reds = 'auto') + arg_flat_driver = jit.JitDriver(name='numpy_flat_' + op_name, + greens = ['shapelen', 'dtype'], + reds = 'auto') - def argmin_argmax(arr): + def argmin_argmax(space, w_arr, w_out, axis): + from pypy.module.micronumpy.descriptor import get_dtype_cache + dtype = w_arr.get_dtype() + shapelen = len(w_arr.get_shape()) + axis_flags = [False] * shapelen + axis_flags[axis] = True + inner_iter, outer_iter = split_iter(w_arr.implementation, axis_flags) + outer_state = outer_iter.reset() + out_iter, out_state = w_out.create_iter() + while not outer_iter.done(outer_state): + inner_state = inner_iter.reset() + inner_state.offset = outer_state.offset + cur_best = inner_iter.getitem(inner_state) + inner_state = inner_iter.next(inner_state) + result = 0 + idx = 1 + while not inner_iter.done(inner_state): + arg_driver.jit_merge_point(shapelen=shapelen, dtype=dtype) + w_val = inner_iter.getitem(inner_state) + new_best = getattr(dtype.itemtype, op_name)(cur_best, w_val) + if dtype.itemtype.ne(new_best, cur_best): + result = idx + cur_best = new_best + inner_state = inner_iter.next(inner_state) + idx += 1 + result = get_dtype_cache(space).w_longdtype.box(result) + out_iter.setitem(out_state, result) + out_state = out_iter.next(out_state) + outer_state = outer_iter.next(outer_state) + return w_out + + def argmin_argmax_flat(w_arr): result = 0 idx = 1 - dtype = arr.get_dtype() - iter, state = arr.create_iter() + dtype = w_arr.get_dtype() + iter, state = w_arr.create_iter() cur_best = iter.getitem(state) state = iter.next(state) - shapelen = len(arr.get_shape()) + shapelen = len(w_arr.get_shape()) while not iter.done(state): - arg_driver.jit_merge_point(shapelen=shapelen, dtype=dtype) + arg_flat_driver.jit_merge_point(shapelen=shapelen, dtype=dtype) w_val = iter.getitem(state) new_best = getattr(dtype.itemtype, op_name)(cur_best, w_val) if dtype.itemtype.ne(new_best, cur_best): @@ -364,9 +469,10 @@ state = iter.next(state) idx += 1 return result - return argmin_argmax -argmin = _new_argmin_argmax('min') -argmax = _new_argmin_argmax('max') + + return argmin_argmax, argmin_argmax_flat +argmin, argmin_flat = _new_argmin_argmax('min') +argmax, argmax_flat = _new_argmin_argmax('max') dot_driver = jit.JitDriver(name = 'numpy_dot', greens = ['dtype'], diff --git a/pypy/module/micronumpy/ndarray.py b/pypy/module/micronumpy/ndarray.py --- a/pypy/module/micronumpy/ndarray.py +++ b/pypy/module/micronumpy/ndarray.py @@ -23,6 +23,8 @@ get_shape_from_iterable, shape_agreement, shape_agreement_multiple, is_c_contiguous, is_f_contiguous, calc_strides, new_view) from pypy.module.micronumpy.casting import can_cast_array +from pypy.module.micronumpy.descriptor import get_dtype_cache + def _match_dot_shapes(space, left, right): @@ -484,7 +486,7 @@ return self.implementation.swapaxes(space, self, axis1, axis2) def descr_nonzero(self, space): - index_type = descriptor.get_dtype_cache(space).w_int64dtype + index_type = get_dtype_cache(space).w_int64dtype return self.implementation.nonzero(space, index_type) def descr_tolist(self, space): @@ -544,8 +546,10 @@ def descr_set_flatiter(self, space, w_obj): iter, state = self.create_iter() dtype = self.get_dtype() - arr = convert_to_array(space, w_obj) - loop.flatiter_setitem(space, dtype, arr, iter, state, 1, iter.size) + w_arr = convert_to_array(space, w_obj) + if dtype.is_record(): + return self.implementation.setslice(space, w_arr) + loop.flatiter_setitem(space, dtype, w_arr, iter, state, 1, iter.size) def descr_get_flatiter(self, space): from .flatiter import W_FlatIterator @@ -810,7 +814,7 @@ if self.get_dtype().is_bool(): # numpy promotes bool.round() to float16. Go figure. w_out = W_NDimArray.from_shape(space, self.get_shape(), - descriptor.get_dtype_cache(space).w_float16dtype) + get_dtype_cache(space).w_float16dtype) else: w_out = None elif not isinstance(w_out, W_NDimArray): @@ -818,7 +822,7 @@ "return arrays must be of ArrayType")) out = descriptor.dtype_agreement(space, [self], self.get_shape(), w_out) if out.get_dtype().is_bool() and self.get_dtype().is_bool(): - calc_dtype = descriptor.get_dtype_cache(space).w_longdtype + calc_dtype = get_dtype_cache(space).w_longdtype else: calc_dtype = out.get_dtype() @@ -837,7 +841,7 @@ raise oefmt(space.w_ValueError, "a must be a 1-d array") v = convert_to_array(space, w_v) ret = W_NDimArray.from_shape( - space, v.get_shape(), descriptor.get_dtype_cache(space).w_longdtype) + space, v.get_shape(), get_dtype_cache(space).w_longdtype) if side == NPY.SEARCHLEFT: binsearch = loop.binsearch_left else: @@ -1145,35 +1149,46 @@ # ----------------------- reduce ------------------------------- - def _reduce_ufunc_impl(ufunc_name, cumulative=False, bool_result=False): + def _reduce_ufunc_impl(ufunc_name, name, bool_result=False): @unwrap_spec(keepdims=bool) def impl(self, space, w_axis=None, w_dtype=None, w_out=None, keepdims=False): out = out_converter(space, w_out) if bool_result: - w_dtype = descriptor.get_dtype_cache(space).w_booldtype + w_dtype = get_dtype_cache(space).w_booldtype return getattr(ufuncs.get(space), ufunc_name).reduce( _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit