Author: Armin Rigo <ar...@tunes.org> Branch: py3.5 Changeset: r87055:50677989e503 Date: 2016-09-12 20:40 +0100 http://bitbucket.org/pypy/pypy/changeset/50677989e503/
Log: hg merge py3k diff too long, truncating to 2000 out of 2301 lines diff --git a/pypy/doc/release-pypy2.7-v5.4.1.rst b/pypy/doc/release-pypy2.7-v5.4.1.rst --- a/pypy/doc/release-pypy2.7-v5.4.1.rst +++ b/pypy/doc/release-pypy2.7-v5.4.1.rst @@ -9,16 +9,16 @@ this was unfortunately left out of 5.4.0. My apologies to the new contributors - * Allow tests run with `-A` to find `libm.so` even if it is a script not a + * Allow tests run with ``-A`` to find ``libm.so`` even if it is a script not a dynamically loadable file - * Bump `sys.setrecursionlimit()` when translating PyPy, for translating with CPython + * Bump ``sys.setrecursionlimit()`` when translating PyPy, for translating with CPython - * Tweak a float comparison with 0 in `backendopt.inline` to avoid rounding errors + * Tweak a float comparison with 0 in ``backendopt.inline`` to avoid rounding errors - * Fix for an issue where os.access() accepted a float for mode + * Fix for an issue for translating the sandbox - * Fix for and issue where `unicode.decode('utf8', 'custom_replace')` messed up + * Fix for and issue where ``unicode.decode('utf8', 'custom_replace')`` messed up the last byte of a unicode string sometimes * Update built-in cffi_ to version 1.8.1 diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst --- a/pypy/doc/whatsnew-head.rst +++ b/pypy/doc/whatsnew-head.rst @@ -12,4 +12,7 @@ Implement PyObject_GetBuffer, PyMemoryView_GET_BUFFER, and handles memoryviews in numpypy - +.. branch: force-virtual-state +Improve merging of virtual states in the JIT in order to avoid jumping to the +preamble. Accomplished by allocating virtual objects where non-virtuals are +expected. diff --git a/pypy/module/__pypy__/test/test_bytebuffer.py b/pypy/module/__pypy__/test/test_bytebuffer.py --- a/pypy/module/__pypy__/test/test_bytebuffer.py +++ b/pypy/module/__pypy__/test/test_bytebuffer.py @@ -14,12 +14,9 @@ assert b[-3] == ord(b'+') exc = raises(ValueError, "b[3:5] = b'abc'") assert str(exc.value) == "cannot modify size of memoryview object" - raises(NotImplementedError, "b[3:7:2] = b'abc'") b = bytebuffer(10) b[1:3] = b'xy' assert bytes(b) == b"\x00xy" + b"\x00" * 7 - # XXX: supported in 3.3 - raises(NotImplementedError, "b[4:8:2] = b'zw'") - #b[4:8:2] = b'zw' - #assert bytes(b) == b"\x00xy\x00z\x00w" + b"\x00" * 3 + b[4:8:2] = b'zw' + assert bytes(b) == b"\x00xy\x00z\x00w" + b"\x00" * 3 diff --git a/pypy/module/cpyext/api.py b/pypy/module/cpyext/api.py --- a/pypy/module/cpyext/api.py +++ b/pypy/module/cpyext/api.py @@ -118,7 +118,7 @@ constant_names = """ Py_TPFLAGS_READY Py_TPFLAGS_READYING Py_TPFLAGS_HAVE_GETCHARBUFFER -METH_COEXIST METH_STATIC METH_CLASS Py_TPFLAGS_BASETYPE +METH_COEXIST METH_STATIC METH_CLASS Py_TPFLAGS_BASETYPE Py_MAX_FMT METH_NOARGS METH_VARARGS METH_KEYWORDS METH_O Py_TPFLAGS_HAVE_INPLACEOPS Py_TPFLAGS_HEAPTYPE Py_TPFLAGS_HAVE_CLASS Py_TPFLAGS_HAVE_NEWBUFFER Py_LT Py_LE Py_EQ Py_NE Py_GT Py_GE Py_TPFLAGS_CHECKTYPES Py_MAX_NDIMS @@ -647,7 +647,7 @@ ('format', rffi.CCHARP), ('shape', Py_ssize_tP), ('strides', Py_ssize_tP), - ('_format', rffi.UCHAR), + ('_format', rffi.CFixedArray(rffi.UCHAR, Py_MAX_FMT)), ('_shape', rffi.CFixedArray(Py_ssize_t, Py_MAX_NDIMS)), ('_strides', rffi.CFixedArray(Py_ssize_t, Py_MAX_NDIMS)), ('suboffsets', Py_ssize_tP), diff --git a/pypy/module/cpyext/buffer.py b/pypy/module/cpyext/buffer.py --- a/pypy/module/cpyext/buffer.py +++ b/pypy/module/cpyext/buffer.py @@ -1,66 +1,8 @@ -from pypy.interpreter.error import oefmt -from rpython.rtyper.lltypesystem import rffi, lltype +from rpython.rtyper.lltypesystem import rffi from rpython.rlib import buffer -from rpython.rlib.rarithmetic import widen from pypy.module.cpyext.api import ( - cpython_api, CANNOT_FAIL, Py_buffer) -from pypy.module.cpyext.pyobject import PyObject, Py_DecRef - -def _IsFortranContiguous(view): - ndim = widen(view.c_ndim) - if ndim == 0: - return 1 - if not view.c_strides: - return ndim == 1 - sd = view.c_itemsize - if ndim == 1: - return view.c_shape[0] == 1 or sd == view.c_strides[0] - for i in range(view.c_ndim): - dim = view.c_shape[i] - if dim == 0: - return 1 - if view.c_strides[i] != sd: - return 0 - sd *= dim - return 1 - -def _IsCContiguous(view): - ndim = widen(view.c_ndim) - if ndim == 0: - return 1 - if not view.c_strides: - return ndim == 1 - sd = view.c_itemsize - if ndim == 1: - return view.c_shape[0] == 1 or sd == view.c_strides[0] - for i in range(ndim - 1, -1, -1): - dim = view.c_shape[i] - if dim == 0: - return 1 - if view.c_strides[i] != sd: - return 0 - sd *= dim - return 1 - - -@cpython_api([lltype.Ptr(Py_buffer), lltype.Char], rffi.INT_real, error=CANNOT_FAIL) -def PyBuffer_IsContiguous(space, view, fort): - """Return 1 if the memory defined by the view is C-style (fortran is - 'C') or Fortran-style (fortran is 'F') contiguous or either one - (fortran is 'A'). Return 0 otherwise.""" - # traverse the strides, checking for consistent stride increases from - # right-to-left (c) or left-to-right (fortran). Copied from cpython - if not view.c_suboffsets: - return 0 - if (fort == 'C'): - return _IsCContiguous(view) - elif (fort == 'F'): - return _IsFortranContiguous(view) - elif (fort == 'A'): - return (_IsCContiguous(view) or _IsFortranContiguous(view)) - return 0 - - + cpython_api, CANNOT_FAIL, Py_TPFLAGS_HAVE_NEWBUFFER) +from pypy.module.cpyext.pyobject import PyObject class CBuffer(buffer.Buffer): diff --git a/pypy/module/cpyext/include/object.h b/pypy/module/cpyext/include/object.h --- a/pypy/module/cpyext/include/object.h +++ b/pypy/module/cpyext/include/object.h @@ -133,6 +133,7 @@ /* Py3k buffer interface, adapted for PyPy */ #define Py_MAX_NDIMS 32 +#define Py_MAX_FMT 5 typedef struct bufferinfo { void *buf; PyObject *obj; /* owned reference */ @@ -147,7 +148,7 @@ Py_ssize_t *shape; Py_ssize_t *strides; Py_ssize_t *suboffsets; /* alway NULL for app-level objects*/ - unsigned char _format; + unsigned char _format[Py_MAX_FMT]; Py_ssize_t _strides[Py_MAX_NDIMS]; Py_ssize_t _shape[Py_MAX_NDIMS]; /* static store for shape and strides of diff --git a/pypy/module/cpyext/memoryobject.py b/pypy/module/cpyext/memoryobject.py --- a/pypy/module/cpyext/memoryobject.py +++ b/pypy/module/cpyext/memoryobject.py @@ -1,7 +1,8 @@ from pypy.module.cpyext.api import (cpython_api, Py_buffer, CANNOT_FAIL, - Py_MAX_NDIMS, build_type_checkers, Py_ssize_tP) + Py_MAX_FMT, Py_MAX_NDIMS, build_type_checkers, Py_ssize_tP) from pypy.module.cpyext.pyobject import PyObject, make_ref, incref from rpython.rtyper.lltypesystem import lltype, rffi +from rpython.rlib.rarithmetic import widen from pypy.objspace.std.memoryobject import W_MemoryView from pypy.interpreter.error import oefmt @@ -10,6 +11,90 @@ from pypy.objspace.std.memoryobject import W_MemoryView PyMemoryView_Check, PyMemoryView_CheckExact = build_type_checkers("MemoryView", "w_memoryview") +def fill_Py_buffer(space, buf, view): + # c_buf, c_obj have been filled in + ndim = buf.getndim() + view.c_len = buf.getlength() + view.c_itemsize = buf.getitemsize() + rffi.setintfield(view, 'c_ndim', ndim) + view.c_format = rffi.cast(rffi.CCHARP, view.c__format) + view.c_shape = rffi.cast(Py_ssize_tP, view.c__shape) + view.c_strides = rffi.cast(Py_ssize_tP, view.c__strides) + fmt = buf.getformat() + n = Py_MAX_FMT - 1 # NULL terminated buffer + if len(fmt) > n: + ### WARN? + pass + else: + n = len(fmt) + for i in range(n): + if ord(fmt[i]) > 255: + view.c_format[i] = '*' + else: + view.c_format[i] = fmt[i] + view.c_format[n] = '\x00' + shape = buf.getshape() + strides = buf.getstrides() + for i in range(ndim): + view.c_shape[i] = shape[i] + view.c_strides[i] = strides[i] + view.c_suboffsets = lltype.nullptr(Py_ssize_tP.TO) + view.c_internal = lltype.nullptr(rffi.VOIDP.TO) + return 0 + +def _IsFortranContiguous(view): + ndim = widen(view.c_ndim) + if ndim == 0: + return 1 + if not view.c_strides: + return ndim == 1 + sd = view.c_itemsize + if ndim == 1: + return view.c_shape[0] == 1 or sd == view.c_strides[0] + for i in range(view.c_ndim): + dim = view.c_shape[i] + if dim == 0: + return 1 + if view.c_strides[i] != sd: + return 0 + sd *= dim + return 1 + +def _IsCContiguous(view): + ndim = widen(view.c_ndim) + if ndim == 0: + return 1 + if not view.c_strides: + return ndim == 1 + sd = view.c_itemsize + if ndim == 1: + return view.c_shape[0] == 1 or sd == view.c_strides[0] + for i in range(ndim - 1, -1, -1): + dim = view.c_shape[i] + if dim == 0: + return 1 + if view.c_strides[i] != sd: + return 0 + sd *= dim + return 1 + +@cpython_api([lltype.Ptr(Py_buffer), lltype.Char], rffi.INT_real, error=CANNOT_FAIL) +def PyBuffer_IsContiguous(space, view, fort): + """Return 1 if the memory defined by the view is C-style (fortran is + 'C') or Fortran-style (fortran is 'F') contiguous or either one + (fortran is 'A'). Return 0 otherwise.""" + # traverse the strides, checking for consistent stride increases from + # right-to-left (c) or left-to-right (fortran). Copied from cpython + if not view.c_suboffsets: + return 0 + if (fort == 'C'): + return _IsCContiguous(view) + elif (fort == 'F'): + return _IsFortranContiguous(view) + elif (fort == 'A'): + return (_IsCContiguous(view) or _IsFortranContiguous(view)) + return 0 + @cpython_api([PyObject], PyObject) def PyMemoryView_FromObject(space, w_obj): return space.call_method(space.builtin, "memoryview", w_obj) @@ -42,20 +127,7 @@ view.c_obj = make_ref(space, w_s) rffi.setintfield(view, 'c_readonly', 1) isstr = True - view.c_len = w_obj.getlength() - view.c_itemsize = w_obj.buf.getitemsize() - rffi.setintfield(view, 'c_ndim', ndim) - view.c__format = rffi.cast(rffi.UCHAR, w_obj.buf.getformat()) - view.c_format = rffi.cast(rffi.CCHARP, view.c__format) - view.c_shape = rffi.cast(Py_ssize_tP, view.c__shape) - view.c_strides = rffi.cast(Py_ssize_tP, view.c__strides) - shape = w_obj.buf.getshape() - strides = w_obj.buf.getstrides() - for i in range(ndim): - view.c_shape[i] = shape[i] - view.c_strides[i] = strides[i] - view.c_suboffsets = lltype.nullptr(Py_ssize_tP.TO) - view.c_internal = lltype.nullptr(rffi.VOIDP.TO) + fill_Py_buffer(space, w_obj.buf, view) return view @cpython_api([lltype.Ptr(Py_buffer)], PyObject) diff --git a/pypy/module/cpyext/sequence.py b/pypy/module/cpyext/sequence.py --- a/pypy/module/cpyext/sequence.py +++ b/pypy/module/cpyext/sequence.py @@ -43,16 +43,20 @@ def PySequence_Fast(space, w_obj, m): """Returns the sequence o as a tuple, unless it is already a tuple or list, in which case o is returned. Use PySequence_Fast_GET_ITEM() to access the - members of the result. Returns NULL on failure. If the object is not a - sequence, raises TypeError with m as the message text.""" + members of the result. Returns NULL on failure. If the object cannot be + converted to a sequence, and raises a TypeError, raise a new TypeError with + m as the message text. If the conversion otherwise, fails, reraise the + original exception""" if isinstance(w_obj, W_ListObject): # make sure we can return a borrowed obj from PySequence_Fast_GET_ITEM w_obj.convert_to_cpy_strategy(space) return w_obj try: return W_ListObject.newlist_cpyext(space, space.listview(w_obj)) - except OperationError: - raise OperationError(space.w_TypeError, space.wrap(rffi.charp2str(m))) + except OperationError as e: + if e.match(space, space.w_TypeError): + raise OperationError(space.w_TypeError, space.wrap(rffi.charp2str(m))) + raise e @cpython_api([rffi.VOIDP, Py_ssize_t], PyObject, result_borrowed=True) def PySequence_Fast_GET_ITEM(space, w_obj, index): diff --git a/pypy/module/cpyext/test/buffer_test.c b/pypy/module/cpyext/test/buffer_test.c --- a/pypy/module/cpyext/test/buffer_test.c +++ b/pypy/module/cpyext/test/buffer_test.c @@ -192,10 +192,10 @@ PyObject* obj = PyTuple_GetItem(args, 0); PyObject* memoryview = PyMemoryView_FromObject(obj); if (memoryview == NULL) - return PyInt_FromLong(-1); + return PyLong_FromLong(-1); view = PyMemoryView_GET_BUFFER(memoryview); Py_DECREF(memoryview); - return PyInt_FromLong(view->len); + return PyLong_FromLong(view->len); } /* Copied from numpy tests */ diff --git a/pypy/module/cpyext/test/test_api.py b/pypy/module/cpyext/test/test_api.py --- a/pypy/module/cpyext/test/test_api.py +++ b/pypy/module/cpyext/test/test_api.py @@ -1,5 +1,5 @@ import py, pytest -from rpython.rtyper.lltypesystem import rffi, lltype +from rpython.rtyper.lltypesystem import lltype from pypy.interpreter.baseobjspace import W_Root from pypy.module.cpyext.state import State from pypy.module.cpyext import api diff --git a/pypy/module/cpyext/test/test_cpyext.py b/pypy/module/cpyext/test/test_cpyext.py --- a/pypy/module/cpyext/test/test_cpyext.py +++ b/pypy/module/cpyext/test/test_cpyext.py @@ -18,6 +18,8 @@ from .support import c_compile +only_pypy ="config.option.runappdirect and '__pypy__' not in sys.builtin_module_names" + @api.cpython_api([], api.PyObject) def PyPy_Crash1(space): 1/0 @@ -53,7 +55,48 @@ libraries=libraries) return soname -def compile_extension_module(space, modname, include_dirs=[], +class SystemCompilationInfo(object): + """Bundles all the generic information required to compile extensions. + + Note: here, 'system' means OS + target interpreter + test config + ... + """ + def __init__(self, include_extra=None, compile_extra=None, link_extra=None, + extra_libs=None, ext=None): + self.include_extra = include_extra or [] + self.compile_extra = compile_extra + self.link_extra = link_extra + self.extra_libs = extra_libs + self.ext = ext + +def get_cpyext_info(space): + from pypy.module.imp.importing import get_so_extension + state = space.fromcache(State) + api_library = state.api_lib + if sys.platform == 'win32': + libraries = [api_library] + # '%s' undefined; assuming extern returning int + compile_extra = ["/we4013"] + # prevent linking with PythonXX.lib + w_maj, w_min = space.fixedview(space.sys.get('version_info'), 5)[:2] + link_extra = ["/NODEFAULTLIB:Python%d%d.lib" % + (space.int_w(w_maj), space.int_w(w_min))] + else: + libraries = [] + if sys.platform.startswith('linux'): + compile_extra = [ + "-Werror", "-g", "-O0", "-Wp,-U_FORTIFY_SOURCE", "-fPIC"] + link_extra = ["-g"] + else: + compile_extra = link_extra = None + return SystemCompilationInfo( + include_extra=api.include_dirs, + compile_extra=compile_extra, + link_extra=link_extra, + extra_libs=libraries, + ext=get_so_extension(space)) + + +def compile_extension_module(sys_info, modname, include_dirs=[], source_files=None, source_strings=None): """ Build an extension module and return the filename of the resulting native @@ -65,35 +108,15 @@ Any extra keyword arguments are passed on to ExternalCompilationInfo to build the module (so specify your source with one of those). """ - state = space.fromcache(State) - api_library = state.api_lib - if sys.platform == 'win32': - libraries = [api_library] - # '%s' undefined; assuming extern returning int - compile_extra = ["/we4013"] - # prevent linking with PythonXX.lib - w_maj, w_min = space.fixedview(space.sys.get('version_info'), 5)[:2] - link_extra = ["/NODEFAULTLIB:Python%d%d.lib" % - (space.int_w(w_maj), space.int_w(w_min))] - else: - libraries = [] - if sys.platform.startswith('linux'): - compile_extra = ["-Werror", "-g", "-O0", "-Wp,-U_FORTIFY_SOURCE", "-fPIC"] - link_extra = ["-g"] - else: - compile_extra = link_extra = None - modname = modname.split('.')[-1] soname = create_so(modname, - include_dirs=api.include_dirs + include_dirs, - source_files=source_files, - source_strings=source_strings, - compile_extra=compile_extra, - link_extra=link_extra, - libraries=libraries) - from pypy.module.imp.importing import get_so_extension - ext = get_so_extension(space) - pydname = soname.new(purebasename=modname, ext=ext) + include_dirs=sys_info.include_extra + include_dirs, + source_files=source_files, + source_strings=source_strings, + compile_extra=sys_info.compile_extra, + link_extra=sys_info.link_extra, + libraries=sys_info.extra_libs) + pydname = soname.new(purebasename=modname, ext=sys_info.ext) soname.rename(pydname) return str(pydname) @@ -106,18 +129,8 @@ raise RuntimeError("This interpreter does not define a filename " "suffix for C extensions!") -def compile_extension_module_applevel(space, modname, include_dirs=[], - source_files=None, source_strings=None): - """ - Build an extension module and return the filename of the resulting native - code file. - - modname is the name of the module, possibly including dots if it is a module - inside a package. - - Any extra keyword arguments are passed on to ExternalCompilationInfo to - build the module (so specify your source with one of those). - """ +def get_sys_info_app(): + from distutils.sysconfig import get_python_inc if sys.platform == 'win32': compile_extra = ["/we4013"] link_extra = ["/LIBPATH:" + os.path.join(sys.exec_prefix, 'libs')] @@ -128,18 +141,13 @@ compile_extra = [ "-O0", "-g", "-Werror=implicit-function-declaration", "-fPIC"] link_extra = None + ext = get_so_suffix() + return SystemCompilationInfo( + include_extra=[get_python_inc()], + compile_extra=compile_extra, + link_extra=link_extra, + ext=get_so_suffix()) - modname = modname.split('.')[-1] - soname = create_so(modname, - include_dirs=[space.include_dir] + include_dirs, - source_files=source_files, - source_strings=source_strings, - compile_extra=compile_extra, - link_extra=link_extra) - ext = get_so_suffix() - pydname = soname.new(purebasename=modname, ext=ext) - soname.rename(pydname) - return str(pydname) def freeze_refcnts(self): rawrefcount._dont_free_any_more() @@ -154,9 +162,7 @@ class FakeSpace(object): """Like TinyObjSpace, but different""" def __init__(self, config): - from distutils.sysconfig import get_python_inc self.config = config - self.include_dir = get_python_inc() def passthrough(self, arg): return arg @@ -249,9 +255,9 @@ # enabled automatically by pypy.conftest. return leaking +@pytest.mark.xfail(reason="Skipped until other tests in this file are unskipped") class AppTestApi(LeakCheckingTest): def setup_class(cls): - skip("Skipped until other tests in this file are unskipped") from rpython.rlib.clibffi import get_libc_name if cls.runappdirect: cls.libc = get_libc_name() @@ -273,11 +279,11 @@ "the test actually passed in the first place; if it failed " "it is likely to reach this place.") - @pytest.mark.skipif('__pypy__' not in sys.builtin_module_names, reason='pypy only test') + @pytest.mark.skipif(only_pypy, reason='pypy only test') def test_only_import(self): import cpyext - @pytest.mark.skipif('__pypy__' not in sys.builtin_module_names, reason='pypy only test') + @pytest.mark.skipif(only_pypy, reason='pypy only test') def test_load_error(self): import cpyext raises(ImportError, cpyext.load_module, "missing.file", "foo") @@ -305,26 +311,23 @@ def setup_method(self, func): @gateway.unwrap_spec(name=str) def compile_module(space, name, - w_separate_module_files=None, - w_separate_module_sources=None): + w_source_files=None, + w_source_strings=None): """ Build an extension module linked against the cpyext api library. """ - if not space.is_none(w_separate_module_files): - separate_module_files = space.unwrap(w_separate_module_files) - assert separate_module_files is not None + if not space.is_none(w_source_files): + source_files = space.unwrap(w_source_files) else: - separate_module_files = [] - if not space.is_none(w_separate_module_sources): - separate_module_sources = space.listview_bytes( - w_separate_module_sources) - assert separate_module_sources is not None + source_files = None + if not space.is_none(w_source_strings): + source_strings = space.listview_bytes(w_source_strings) else: - separate_module_sources = [] - pydname = self.compile_extension_module( - space, name, - source_files=separate_module_files, - source_strings=separate_module_sources) + source_strings = None + pydname = compile_extension_module( + self.sys_info, name, + source_files=source_files, + source_strings=source_strings) return space.wrap(pydname) @gateway.unwrap_spec(name=str, init='str_or_None', body=str, @@ -375,7 +378,7 @@ filename = py.path.local(pypydir) / 'module' \ / 'cpyext'/ 'test' / (filename + ".c") kwds = dict(source_files=[filename]) - mod = self.compile_extension_module(space, name, + mod = compile_extension_module(self.sys_info, name, include_dirs=include_dirs, **kwds) if load_it: @@ -475,11 +478,11 @@ return run def wrap(func): return func - self.compile_extension_module = compile_extension_module_applevel + self.sys_info = get_sys_info_app() else: interp2app = gateway.interp2app wrap = self.space.wrap - self.compile_extension_module = compile_extension_module + self.sys_info = get_cpyext_info(self.space) self.w_compile_module = wrap(interp2app(compile_module)) self.w_import_module = wrap(interp2app(import_module)) self.w_reimport_module = wrap(interp2app(reimport_module)) @@ -629,10 +632,10 @@ skip('record_imported_module not supported in runappdirect mode') # Build the extensions. banana = self.compile_module( - "apple.banana", separate_module_files=[self.here + 'banana.c']) + "apple.banana", source_files=[self.here + 'banana.c']) self.record_imported_module("apple.banana") date = self.compile_module( - "cherry.date", separate_module_files=[self.here + 'date.c']) + "cherry.date", source_files=[self.here + 'date.c']) self.record_imported_module("cherry.date") # Set up some package state so that the extensions can actually be @@ -901,7 +904,7 @@ ]) raises(SystemError, mod.newexc, "name", Exception, {}) - @pytest.mark.skipif('__pypy__' not in sys.builtin_module_names, reason='pypy specific test') + @pytest.mark.skipif(only_pypy, reason='pypy specific test') def test_hash_pointer(self): mod = self.import_extension('foo', [ ('get_hash', 'METH_NOARGS', @@ -952,7 +955,7 @@ print(p) assert 'py' in p - @pytest.mark.skipif('__pypy__' not in sys.builtin_module_names, reason='pypy only test') + @pytest.mark.skipif(only_pypy, reason='pypy only test') def test_get_version(self): mod = self.import_extension('foo', [ ('get_version', 'METH_NOARGS', diff --git a/pypy/module/cpyext/test/test_memoryobject.py b/pypy/module/cpyext/test/test_memoryobject.py --- a/pypy/module/cpyext/test/test_memoryobject.py +++ b/pypy/module/cpyext/test/test_memoryobject.py @@ -1,6 +1,9 @@ +import pytest +from rpython.rtyper.lltypesystem import rffi from pypy.module.cpyext.test.test_api import BaseApiTest from pypy.module.cpyext.test.test_cpyext import AppTestCpythonExtensionBase from rpython.rlib.buffer import StringBuffer + class TestMemoryViewObject(BaseApiTest): def test_fromobject(self, space, api): w_hello = space.newbytes("hello") @@ -15,8 +18,12 @@ w_buf = space.newbuffer(StringBuffer("hello")) w_memoryview = api.PyMemoryView_FromObject(w_buf) w_view = api.PyMemoryView_GET_BUFFER(w_memoryview) - ndim = w_view.c_ndim - assert ndim == 1 + assert w_view.c_ndim == 1 + f = rffi.charp2str(w_view.c_format) + assert f == 'B' + assert w_view.c_shape[0] == 5 + assert w_view.c_strides[0] == 1 + assert w_view.c_len == 5 class AppTestPyBuffer_FillInfo(AppTestCpythonExtensionBase): def test_fillWithObject(self): @@ -76,18 +83,15 @@ viewlen = module.test_buffer(arr) assert viewlen == y.itemsize * len(y) + @pytest.mark.skipif(True, reason="no _numpypy on py3k") def test_buffer_info(self): from _numpypy import multiarray as np module = self.import_module(name='buffer_test') get_buffer_info = module.get_buffer_info - # test_export_flags from numpy test_multiarray raises(ValueError, get_buffer_info, np.arange(5)[::2], ('SIMPLE',)) - # test_relaxed_strides from numpy test_multiarray - arr = np.zeros((1, 10)) - if arr.flags.f_contiguous: - shape, strides = get_buffer_info(arr, ['F_CONTIGUOUS']) - assert strides[0] == 8 - arr = np.ones((10, 1), order='F') - shape, strides = get_buffer_info(arr, ['C_CONTIGUOUS']) - assert strides[-1] == 8 - + arr = np.zeros((1, 10), order='F') + shape, strides = get_buffer_info(arr, ['F_CONTIGUOUS']) + assert strides[0] == 8 + arr = np.zeros((10, 1), order='C') + shape, strides = get_buffer_info(arr, ['C_CONTIGUOUS']) + assert strides[-1] == 8 diff --git a/pypy/module/cpyext/test/test_sequence.py b/pypy/module/cpyext/test/test_sequence.py --- a/pypy/module/cpyext/test/test_sequence.py +++ b/pypy/module/cpyext/test/test_sequence.py @@ -267,3 +267,31 @@ assert module.test_fast_sequence(s[0:-1]) assert module.test_fast_sequence(s[::-1]) + def test_fast_keyerror(self): + module = self.import_extension('foo', [ + ("test_fast_sequence", "METH_VARARGS", + """ + PyObject *foo; + PyObject * seq = PyTuple_GetItem(args, 0); + if (seq == NULL) + Py_RETURN_NONE; + foo = PySequence_Fast(seq, "Could not convert object to sequence"); + if (foo != NULL) + { + return foo; + } + if (PyErr_ExceptionMatches(PyExc_KeyError)) { + PyErr_Clear(); + return PyBool_FromLong(1); + } + return NULL; + """)]) + class Map(object): + def __len__(self): + return 1 + + def __getitem__(self, index): + raise KeyError() + + assert module.test_fast_sequence(Map()) is True + diff --git a/pypy/module/cpyext/test/test_thread.py b/pypy/module/cpyext/test/test_thread.py --- a/pypy/module/cpyext/test/test_thread.py +++ b/pypy/module/cpyext/test/test_thread.py @@ -1,12 +1,13 @@ import sys -import py, pytest +import pytest from pypy.module.cpyext.test.test_cpyext import AppTestCpythonExtensionBase +only_pypy ="config.option.runappdirect and '__pypy__' not in sys.builtin_module_names" class AppTestThread(AppTestCpythonExtensionBase): - @pytest.mark.skipif('__pypy__' not in sys.builtin_module_names, reason='pypy only test') + @pytest.mark.skipif(only_pypy, reason='pypy only test') def test_get_thread_ident(self): module = self.import_extension('foo', [ ("get_thread_ident", "METH_NOARGS", @@ -33,7 +34,7 @@ assert results[0][0] != results[1][0] - @pytest.mark.skipif('__pypy__' not in sys.builtin_module_names, reason='pypy only test') + @pytest.mark.skipif(only_pypy, reason='pypy only test') def test_acquire_lock(self): module = self.import_extension('foo', [ ("test_acquire_lock", "METH_NOARGS", @@ -57,7 +58,7 @@ ]) module.test_acquire_lock() - @pytest.mark.skipif('__pypy__' not in sys.builtin_module_names, reason='pypy only test') + @pytest.mark.skipif(only_pypy, reason='pypy only test') def test_release_lock(self): module = self.import_extension('foo', [ ("test_release_lock", "METH_NOARGS", @@ -79,7 +80,7 @@ ]) module.test_release_lock() - @pytest.mark.skipif('__pypy__' not in sys.builtin_module_names, reason='pypy only test') + @pytest.mark.skipif(only_pypy, reason='pypy only test') def test_tls(self): module = self.import_extension('foo', [ ("create_key", "METH_NOARGS", diff --git a/pypy/module/cpyext/test/test_version.py b/pypy/module/cpyext/test/test_version.py --- a/pypy/module/cpyext/test/test_version.py +++ b/pypy/module/cpyext/test/test_version.py @@ -3,6 +3,7 @@ import py, pytest from pypy.module.cpyext.test.test_cpyext import AppTestCpythonExtensionBase +only_pypy ="config.option.runappdirect and '__pypy__' not in sys.builtin_module_names" def test_pragma_version(): from pypy.module.sys.version import CPYTHON_VERSION @@ -41,11 +42,9 @@ assert module.py_minor_version == sys.version_info.minor assert module.py_micro_version == sys.version_info.micro - #@pytest.mark.skipif('__pypy__' not in sys.builtin_module_names, reason='pypy only test') + @pytest.mark.skipif(only_pypy, reason='pypy only test') def test_pypy_versions(self): import sys - if '__pypy__' not in sys.builtin_module_names: - py.test.skip("pypy only test") init = """ if (Py_IsInitialized()) { PyObject *m = Py_InitModule("foo", NULL); diff --git a/pypy/module/imp/test/test_import.py b/pypy/module/imp/test/test_import.py --- a/pypy/module/imp/test/test_import.py +++ b/pypy/module/imp/test/test_import.py @@ -95,8 +95,9 @@ ) setuppkg("pkg.pkg2", a='', b='') setuppkg("pkg.withall", - __init__ = "__all__ = ['foobar']", - foobar = "found = 123") + __init__ = "__all__ = ['foobar', 'barbaz']", + foobar = "found = 123", + barbaz = "other = 543") setuppkg("pkg.withoutall", __init__ = "", foobar = "found = 123") @@ -724,6 +725,7 @@ d = {} exec("from pkg.withall import *", d) assert d["foobar"].found == 123 + assert d["barbaz"].other == 543 def test_import_star_does_not_find_submodules_without___all__(self): for case in ["not-imported-yet", "already-imported"]: diff --git a/pypy/module/micronumpy/ufuncs.py b/pypy/module/micronumpy/ufuncs.py --- a/pypy/module/micronumpy/ufuncs.py +++ b/pypy/module/micronumpy/ufuncs.py @@ -392,31 +392,39 @@ extobj_w = space.newlist([space.wrap(8192), space.wrap(0), space.w_None]) return extobj_w + +_reflected_ops = { + 'add': 'radd', + 'subtract': 'rsub', + 'multiply': 'rmul', + 'divide': 'rdiv', + 'true_divide': 'rtruediv', + 'floor_divide': 'rfloordiv', + 'remainder': 'rmod', + 'power': 'rpow', + 'left_shift': 'rlshift', + 'right_shift': 'rrshift', + 'bitwise_and': 'rand', + 'bitwise_xor': 'rxor', + 'bitwise_or': 'ror', + #/* Comparisons */ + 'equal': 'eq', + 'not_equal': 'ne', + 'greater': 'lt', + 'less': 'gt', + 'greater_equal': 'le', + 'less_equal': 'ge', +} + +for key, value in _reflected_ops.items(): + _reflected_ops[key] = "__" + value + "__" +del key +del value + def _has_reflected_op(space, w_obj, op): - refops ={ 'add': 'radd', - 'subtract': 'rsub', - 'multiply': 'rmul', - 'divide': 'rdiv', - 'true_divide': 'rtruediv', - 'floor_divide': 'rfloordiv', - 'remainder': 'rmod', - 'power': 'rpow', - 'left_shift': 'rlshift', - 'right_shift': 'rrshift', - 'bitwise_and': 'rand', - 'bitwise_xor': 'rxor', - 'bitwise_or': 'ror', - #/* Comparisons */ - 'equal': 'eq', - 'not_equal': 'ne', - 'greater': 'lt', - 'less': 'gt', - 'greater_equal': 'le', - 'less_equal': 'ge', - } - if op not in refops: + if op not in _reflected_ops: return False - return space.getattr(w_obj, space.wrap('__' + refops[op] + '__')) is not None + return space.getattr(w_obj, space.wrap(_reflected_ops[op])) is not None def safe_casting_mode(casting): assert casting is not None diff --git a/pypy/module/pypyjit/test_pypy_c/test_containers.py b/pypy/module/pypyjit/test_pypy_c/test_containers.py --- a/pypy/module/pypyjit/test_pypy_c/test_containers.py +++ b/pypy/module/pypyjit/test_pypy_c/test_containers.py @@ -67,7 +67,7 @@ p10 = call_r(ConstClass(ll_str__IntegerR_SignedConst_Signed), i5, descr=<Callr . i EF=3>) guard_no_exception(descr=...) guard_nonnull(p10, descr=...) - i12 = call_i(ConstClass(ll_strhash), p10, descr=<Calli . r EF=0>) + i12 = call_i(ConstClass(_ll_strhash__rpy_stringPtr), p10, descr=<Calli . r EF=0>) p13 = new(descr=...) p15 = new_array_clear(16, descr=<ArrayU 1>) {{{ diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py --- a/pypy/objspace/std/unicodeobject.py +++ b/pypy/objspace/std/unicodeobject.py @@ -7,6 +7,7 @@ from rpython.rlib.runicode import ( make_unicode_escape_function, str_decode_ascii, str_decode_utf_8, unicode_encode_ascii, unicode_encode_utf_8, fast_str_decode_ascii) +from rpython.rlib import jit from pypy.interpreter import unicodehelper from pypy.interpreter.baseobjspace import W_Root @@ -25,7 +26,7 @@ class W_UnicodeObject(W_Root): import_from_mixin(StringMethods) - _immutable_fields_ = ['_value', '_utf8?'] + _immutable_fields_ = ['_value'] def __init__(self, unistr): assert isinstance(unistr, unicode) @@ -76,6 +77,8 @@ def unicode_w(self, space): return self._value + @jit.elidable + @jit.call_shortcut def identifier_w(self, space): identifier = self._utf8 if identifier is not None: diff --git a/rpython/jit/backend/arm/regalloc.py b/rpython/jit/backend/arm/regalloc.py --- a/rpython/jit/backend/arm/regalloc.py +++ b/rpython/jit/backend/arm/regalloc.py @@ -1002,6 +1002,9 @@ prepare_op_cond_call_gc_wb_array = prepare_op_cond_call_gc_wb def prepare_op_cond_call(self, op, fcond): + # XXX don't force the arguments to be loaded in specific + # locations before knowing if we can take the fast path + # XXX add cond_call_value support assert 2 <= op.numargs() <= 4 + 2 tmpreg = self.get_scratch_reg(INT, selected_reg=r.r4) v = op.getarg(1) diff --git a/rpython/jit/backend/llgraph/runner.py b/rpython/jit/backend/llgraph/runner.py --- a/rpython/jit/backend/llgraph/runner.py +++ b/rpython/jit/backend/llgraph/runner.py @@ -325,6 +325,7 @@ supports_longlong = r_uint is not r_ulonglong supports_singlefloats = True supports_guard_gc_type = True + supports_cond_call_value = True translate_support_code = False is_llgraph = True vector_extension = True @@ -1334,6 +1335,16 @@ # cond_call can't have a return value self.execute_call_n(calldescr, func, *args) + def execute_cond_call_value_i(self, calldescr, value, func, *args): + if not value: + value = self.execute_call_i(calldescr, func, *args) + return value + + def execute_cond_call_value_r(self, calldescr, value, func, *args): + if not value: + value = self.execute_call_r(calldescr, func, *args) + return value + def _execute_call(self, calldescr, func, *args): effectinfo = calldescr.get_extra_info() if effectinfo is not None and hasattr(effectinfo, 'oopspecindex'): diff --git a/rpython/jit/backend/llsupport/regalloc.py b/rpython/jit/backend/llsupport/regalloc.py --- a/rpython/jit/backend/llsupport/regalloc.py +++ b/rpython/jit/backend/llsupport/regalloc.py @@ -759,6 +759,8 @@ if (opnum != rop.GUARD_TRUE and opnum != rop.GUARD_FALSE and opnum != rop.COND_CALL): return False + # NB: don't list COND_CALL_VALUE_I/R here, these two variants + # of COND_CALL don't accept a cc as input if next_op.getarg(0) is not op: return False if self.longevity[op][1] > i + 1: diff --git a/rpython/jit/backend/llsupport/rewrite.py b/rpython/jit/backend/llsupport/rewrite.py --- a/rpython/jit/backend/llsupport/rewrite.py +++ b/rpython/jit/backend/llsupport/rewrite.py @@ -11,7 +11,7 @@ from rpython.jit.backend.llsupport.symbolic import (WORD, get_array_token) from rpython.jit.backend.llsupport.descr import SizeDescr, ArrayDescr,\ - FLAG_POINTER + FLAG_POINTER, CallDescr from rpython.jit.metainterp.history import JitCellToken from rpython.jit.backend.llsupport.descr import (unpack_arraydescr, unpack_fielddescr, unpack_interiorfielddescr) @@ -370,7 +370,9 @@ self.consider_setfield_gc(op) elif op.getopnum() == rop.SETARRAYITEM_GC: self.consider_setarrayitem_gc(op) - # ---------- call assembler ----------- + # ---------- calls ----------- + if OpHelpers.is_plain_call(op.getopnum()): + self.expand_call_shortcut(op) if OpHelpers.is_call_assembler(op.getopnum()): self.handle_call_assembler(op) continue @@ -616,6 +618,30 @@ self.emit_gc_store_or_indexed(None, ptr, ConstInt(0), value, size, 1, ofs) + def expand_call_shortcut(self, op): + if not self.cpu.supports_cond_call_value: + return + descr = op.getdescr() + if descr is None: + return + assert isinstance(descr, CallDescr) + effectinfo = descr.get_extra_info() + if effectinfo is None or effectinfo.call_shortcut is None: + return + if op.type == 'r': + cond_call_opnum = rop.COND_CALL_VALUE_R + elif op.type == 'i': + cond_call_opnum = rop.COND_CALL_VALUE_I + else: + return + cs = effectinfo.call_shortcut + ptr_box = op.getarg(1 + cs.argnum) + value_box = self.emit_getfield(ptr_box, descr=cs.fielddescr, + raw=(ptr_box.type == 'i')) + self.replace_op_with(op, ResOperation(cond_call_opnum, + [value_box] + op.getarglist(), + descr=descr)) + def handle_call_assembler(self, op): descrs = self.gc_ll_descr.getframedescrs(self.cpu) loop_token = op.getdescr() diff --git a/rpython/jit/backend/llsupport/test/test_rewrite.py b/rpython/jit/backend/llsupport/test/test_rewrite.py --- a/rpython/jit/backend/llsupport/test/test_rewrite.py +++ b/rpython/jit/backend/llsupport/test/test_rewrite.py @@ -1,7 +1,8 @@ import py from rpython.jit.backend.llsupport.descr import get_size_descr,\ get_field_descr, get_array_descr, ArrayDescr, FieldDescr,\ - SizeDescr, get_interiorfield_descr + SizeDescr, get_interiorfield_descr, get_call_descr +from rpython.jit.codewriter.effectinfo import EffectInfo, CallShortcut from rpython.jit.backend.llsupport.gc import GcLLDescr_boehm,\ GcLLDescr_framework from rpython.jit.backend.llsupport import jitframe @@ -80,6 +81,14 @@ lltype.malloc(T, zero=True)) self.myT = myT # + call_shortcut = CallShortcut(0, tzdescr) + effectinfo = EffectInfo(None, None, None, None, None, None, + EffectInfo.EF_RANDOM_EFFECTS, + call_shortcut=call_shortcut) + call_shortcut_descr = get_call_descr(self.gc_ll_descr, + [lltype.Ptr(T)], lltype.Signed, + effectinfo) + # A = lltype.GcArray(lltype.Signed) adescr = get_array_descr(self.gc_ll_descr, A) adescr.tid = 4321 @@ -200,6 +209,7 @@ load_constant_offset = True load_supported_factors = (1,2,4,8) + supports_cond_call_value = True translate_support_code = None @@ -1429,3 +1439,15 @@ jump() """) assert len(self.gcrefs) == 2 + + def test_handle_call_shortcut(self): + self.check_rewrite(""" + [p0] + i1 = call_i(123, p0, descr=call_shortcut_descr) + jump(i1) + """, """ + [p0] + i2 = gc_load_i(p0, %(tzdescr.offset)s, %(tzdescr.field_size)s) + i1 = cond_call_value_i(i2, 123, p0, descr=call_shortcut_descr) + jump(i1) + """) diff --git a/rpython/jit/backend/model.py b/rpython/jit/backend/model.py --- a/rpython/jit/backend/model.py +++ b/rpython/jit/backend/model.py @@ -16,6 +16,7 @@ # Boxes and Consts are BoxFloats and ConstFloats. supports_singlefloats = False supports_guard_gc_type = False + supports_cond_call_value = False propagate_exception_descr = None diff --git a/rpython/jit/backend/test/runner_test.py b/rpython/jit/backend/test/runner_test.py --- a/rpython/jit/backend/test/runner_test.py +++ b/rpython/jit/backend/test/runner_test.py @@ -2389,7 +2389,7 @@ f2 = longlong.getfloatstorage(3.4) frame = self.cpu.execute_token(looptoken, 1, 0, 1, 2, 3, 4, 5, f1, f2) assert not called - for j in range(5): + for j in range(6): assert self.cpu.get_int_value(frame, j) == j assert longlong.getrealfloat(self.cpu.get_float_value(frame, 6)) == 1.2 assert longlong.getrealfloat(self.cpu.get_float_value(frame, 7)) == 3.4 @@ -2447,6 +2447,54 @@ 67, 89) assert called == [(67, 89)] + def test_cond_call_value(self): + if not self.cpu.supports_cond_call_value: + py.test.skip("missing supports_cond_call_value") + + def func_int(*args): + called.append(args) + return len(args) * 100 + 1000 + + for i in range(5): + called = [] + + FUNC = self.FuncType([lltype.Signed] * i, lltype.Signed) + func_ptr = llhelper(lltype.Ptr(FUNC), func_int) + calldescr = self.cpu.calldescrof(FUNC, FUNC.ARGS, FUNC.RESULT, + EffectInfo.MOST_GENERAL) + + ops = ''' + [i0, i1, i2, i3, i4, i5, i6, f0, f1] + i15 = cond_call_value_i(i1, ConstClass(func_ptr), %s) + guard_false(i0, descr=faildescr) [i1,i2,i3,i4,i5,i6,i15, f0,f1] + finish(i15) + ''' % ', '.join(['i%d' % (j + 2) for j in range(i)] + + ["descr=calldescr"]) + loop = parse(ops, namespace={'faildescr': BasicFailDescr(), + 'func_ptr': func_ptr, + 'calldescr': calldescr}) + looptoken = JitCellToken() + self.cpu.compile_loop(loop.inputargs, loop.operations, looptoken) + f1 = longlong.getfloatstorage(1.2) + f2 = longlong.getfloatstorage(3.4) + frame = self.cpu.execute_token(looptoken, 1, 50, 1, 2, 3, 4, 5, + f1, f2) + assert not called + assert [self.cpu.get_int_value(frame, j) for j in range(7)] == [ + 50, 1, 2, 3, 4, 5, 50] + assert longlong.getrealfloat( + self.cpu.get_float_value(frame, 7)) == 1.2 + assert longlong.getrealfloat( + self.cpu.get_float_value(frame, 8)) == 3.4 + # + frame = self.cpu.execute_token(looptoken, 1, 0, 1, 2, 3, 4, 5, + f1, f2) + assert called == [(1, 2, 3, 4)[:i]] + assert [self.cpu.get_int_value(frame, j) for j in range(7)] == [ + 0, 1, 2, 3, 4, 5, i * 100 + 1000] + assert longlong.getrealfloat(self.cpu.get_float_value(frame, 7)) == 1.2 + assert longlong.getrealfloat(self.cpu.get_float_value(frame, 8)) == 3.4 + def test_force_operations_returning_void(self): values = [] def maybe_force(token, flag): diff --git a/rpython/jit/backend/test/test_ll_random.py b/rpython/jit/backend/test/test_ll_random.py --- a/rpython/jit/backend/test/test_ll_random.py +++ b/rpython/jit/backend/test/test_ll_random.py @@ -594,7 +594,7 @@ return subset, d['f'], vtableptr def getresulttype(self): - if self.opnum == rop.CALL_I: + if self.opnum == rop.CALL_I or self.opnum == rop.COND_CALL_VALUE_I: return lltype.Signed elif self.opnum == rop.CALL_F: return lltype.Float @@ -712,7 +712,12 @@ class CondCallOperation(BaseCallOperation): def produce_into(self, builder, r): fail_subset = builder.subset_of_intvars(r) - v_cond = builder.get_bool_var(r) + if self.opnum == rop.COND_CALL: + RESULT_TYPE = lltype.Void + v_cond = builder.get_bool_var(r) + else: + RESULT_TYPE = lltype.Signed + v_cond = r.choice(builder.intvars) subset = builder.subset_of_intvars(r)[:4] for i in range(len(subset)): if r.random() < 0.35: @@ -724,8 +729,10 @@ seen.append(args) else: assert seen[0] == args + if RESULT_TYPE is lltype.Signed: + return len(args) - 42000 # - TP = lltype.FuncType([lltype.Signed] * len(subset), lltype.Void) + TP = lltype.FuncType([lltype.Signed] * len(subset), RESULT_TYPE) ptr = llhelper(lltype.Ptr(TP), call_me) c_addr = ConstAddr(llmemory.cast_ptr_to_adr(ptr), builder.cpu) args = [v_cond, c_addr] + subset @@ -769,6 +776,7 @@ for i in range(2): OPERATIONS.append(GuardClassOperation(rop.GUARD_CLASS)) OPERATIONS.append(CondCallOperation(rop.COND_CALL)) + OPERATIONS.append(CondCallOperation(rop.COND_CALL_VALUE_I)) OPERATIONS.append(RaisingCallOperation(rop.CALL_N)) OPERATIONS.append(RaisingCallOperationGuardNoException(rop.CALL_N)) OPERATIONS.append(RaisingCallOperationWrongGuardException(rop.CALL_N)) diff --git a/rpython/jit/backend/x86/assembler.py b/rpython/jit/backend/x86/assembler.py --- a/rpython/jit/backend/x86/assembler.py +++ b/rpython/jit/backend/x86/assembler.py @@ -174,8 +174,8 @@ # copy registers to the frame, with the exception of the # 'cond_call_register_arguments' and eax, because these have already # been saved by the caller. Note that this is not symmetrical: - # these 5 registers are saved by the caller but restored here at - # the end of this function. + # these 5 registers are saved by the caller but 4 of them are + # restored here at the end of this function. self._push_all_regs_to_frame(mc, cond_call_register_arguments + [eax], supports_floats, callee_only) # the caller already did push_gcmap(store=True) @@ -198,7 +198,7 @@ mc.ADD(esp, imm(WORD * 7)) self.set_extra_stack_depth(mc, 0) self.pop_gcmap(mc) # cancel the push_gcmap(store=True) in the caller - self._pop_all_regs_from_frame(mc, [], supports_floats, callee_only) + self._pop_all_regs_from_frame(mc, [eax], supports_floats, callee_only) mc.RET() return mc.materialize(self.cpu, []) @@ -1703,7 +1703,8 @@ self.implement_guard(guard_token) # If the previous operation was a COND_CALL, overwrite its conditional # jump to jump over this GUARD_NO_EXCEPTION as well, if we can - if self._find_nearby_operation(-1).getopnum() == rop.COND_CALL: + if self._find_nearby_operation(-1).getopnum() in ( + rop.COND_CALL, rop.COND_CALL_VALUE_I, rop.COND_CALL_VALUE_R): jmp_adr = self.previous_cond_call_jcond offset = self.mc.get_relative_pos() - jmp_adr if offset <= 127: @@ -2381,7 +2382,7 @@ def label(self): self._check_frame_depth_debug(self.mc) - def cond_call(self, op, gcmap, imm_func, arglocs): + def cond_call(self, gcmap, imm_func, arglocs, resloc=None): assert self.guard_success_cc >= 0 self.mc.J_il8(rx86.invert_condition(self.guard_success_cc), 0) # patched later @@ -2394,11 +2395,14 @@ # plus the register 'eax' base_ofs = self.cpu.get_baseofs_of_frame_field() should_be_saved = self._regalloc.rm.reg_bindings.values() + restore_eax = False for gpr in cond_call_register_arguments + [eax]: - if gpr not in should_be_saved: + if gpr not in should_be_saved or gpr is resloc: continue v = gpr_reg_mgr_cls.all_reg_indexes[gpr.value] self.mc.MOV_br(v * WORD + base_ofs, gpr.value) + if gpr is eax: + restore_eax = True # # load the 0-to-4 arguments into these registers from rpython.jit.backend.x86.jump import remap_frame_layout @@ -2422,8 +2426,16 @@ floats = True cond_call_adr = self.cond_call_slowpath[floats * 2 + callee_only] self.mc.CALL(imm(follow_jump(cond_call_adr))) + # if this is a COND_CALL_VALUE, we need to move the result in place + if resloc is not None and resloc is not eax: + self.mc.MOV(resloc, eax) # restoring the registers saved above, and doing pop_gcmap(), is left - # to the cond_call_slowpath helper. We never have any result value. + # to the cond_call_slowpath helper. We must only restore eax, if + # needed. + if restore_eax: + v = gpr_reg_mgr_cls.all_reg_indexes[eax.value] + self.mc.MOV_rb(eax.value, v * WORD + base_ofs) + # offset = self.mc.get_relative_pos() - jmp_adr assert 0 < offset <= 127 self.mc.overwrite(jmp_adr-1, chr(offset)) diff --git a/rpython/jit/backend/x86/regalloc.py b/rpython/jit/backend/x86/regalloc.py --- a/rpython/jit/backend/x86/regalloc.py +++ b/rpython/jit/backend/x86/regalloc.py @@ -938,16 +938,45 @@ self.rm.force_spill_var(box) assert box not in self.rm.reg_bindings # - assert op.type == 'v' args = op.getarglist() assert 2 <= len(args) <= 4 + 2 # maximum 4 arguments - v = args[1] - assert isinstance(v, Const) - imm_func = self.rm.convert_to_imm(v) + v_func = args[1] + assert isinstance(v_func, Const) + imm_func = self.rm.convert_to_imm(v_func) + + # Delicate ordering here. First get the argument's locations. + # If this also contains args[0], this returns the current + # location too. arglocs = [self.loc(args[i]) for i in range(2, len(args))] gcmap = self.get_gcmap() - self.load_condition_into_cc(op.getarg(0)) - self.assembler.cond_call(op, gcmap, imm_func, arglocs) + + if op.type == 'v': + # a plain COND_CALL. Calls the function when args[0] is + # true. Often used just after a comparison operation. + self.load_condition_into_cc(op.getarg(0)) + resloc = None + else: + # COND_CALL_VALUE_I/R. Calls the function when args[0] + # is equal to 0 or NULL. Returns the result from the + # function call if done, or args[0] if it was not 0/NULL. + # Implemented by forcing the result to live in the same + # register as args[0], and overwriting it if we really do + # the call. + + # Load the register for the result. Possibly reuse 'args[0]'. + # But the old value of args[0], if it survives, is first + # spilled away. We can't overwrite any of op.args[2:] here. + resloc = self.rm.force_result_in_reg(op, args[0], + forbidden_vars=args[2:]) + + # Test the register for the result. + self.assembler.test_location(resloc) + self.assembler.guard_success_cc = rx86.Conditions['Z'] + + self.assembler.cond_call(gcmap, imm_func, arglocs, resloc) + + consider_cond_call_value_i = consider_cond_call + consider_cond_call_value_r = consider_cond_call def consider_call_malloc_nursery(self, op): size_box = op.getarg(0) diff --git a/rpython/jit/backend/x86/runner.py b/rpython/jit/backend/x86/runner.py --- a/rpython/jit/backend/x86/runner.py +++ b/rpython/jit/backend/x86/runner.py @@ -15,6 +15,7 @@ debug = True supports_floats = True supports_singlefloats = True + supports_cond_call_value = True dont_keepalive_stuff = False # for tests with_threads = False diff --git a/rpython/jit/codewriter/call.py b/rpython/jit/codewriter/call.py --- a/rpython/jit/codewriter/call.py +++ b/rpython/jit/codewriter/call.py @@ -7,9 +7,10 @@ from rpython.jit.codewriter.jitcode import JitCode from rpython.jit.codewriter.effectinfo import (VirtualizableAnalyzer, QuasiImmutAnalyzer, RandomEffectsAnalyzer, effectinfo_from_writeanalyze, - EffectInfo, CallInfoCollection) + EffectInfo, CallInfoCollection, CallShortcut) from rpython.rtyper.lltypesystem import lltype, llmemory from rpython.rtyper.lltypesystem.lltype import getfunctionptr +from rpython.flowspace.model import Constant, Variable from rpython.rlib import rposix from rpython.translator.backendopt.canraise import RaiseAnalyzer from rpython.translator.backendopt.writeanalyze import ReadWriteAnalyzer @@ -214,6 +215,7 @@ elidable = False loopinvariant = False call_release_gil_target = EffectInfo._NO_CALL_RELEASE_GIL_TARGET + call_shortcut = None if op.opname == "direct_call": funcobj = op.args[0].value._obj assert getattr(funcobj, 'calling_conv', 'c') == 'c', ( @@ -228,6 +230,12 @@ tgt_func, tgt_saveerr = func._call_aroundstate_target_ tgt_func = llmemory.cast_ptr_to_adr(tgt_func) call_release_gil_target = (tgt_func, tgt_saveerr) + if hasattr(funcobj, 'graph'): + call_shortcut = self.find_call_shortcut(funcobj.graph) + if getattr(func, "_call_shortcut_", False): + assert call_shortcut is not None, ( + "%r: marked as @jit.call_shortcut but shortcut not found" + % (func,)) elif op.opname == 'indirect_call': # check that we're not trying to call indirectly some # function with the special flags @@ -298,6 +306,7 @@ self.readwrite_analyzer.analyze(op, self.seen_rw), self.cpu, extraeffect, oopspecindex, can_invalidate, call_release_gil_target, extradescr, self.collect_analyzer.analyze(op, self.seen_gc), + call_shortcut, ) # assert effectinfo is not None @@ -368,3 +377,65 @@ if GTYPE_fieldname in jd.greenfield_info.green_fields: return True return False + + def find_call_shortcut(self, graph): + """Identifies graphs that start like this: + + def graph(x, y, z): def graph(x, y, z): + if y.field: r = y.field + return y.field if r: return r + """ + block = graph.startblock + if len(block.operations) == 0: + return + op = block.operations[0] + if op.opname != 'getfield': + return + [v_inst, c_fieldname] = op.args + if not isinstance(v_inst, Variable): + return + v_result = op.result + if v_result.concretetype != graph.getreturnvar().concretetype: + return + if v_result.concretetype == lltype.Void: + return + argnum = i = 0 + while block.inputargs[i] is not v_inst: + if block.inputargs[i].concretetype != lltype.Void: + argnum += 1 + i += 1 + PSTRUCT = v_inst.concretetype + v_check = v_result + fastcase = True + for op in block.operations[1:]: + if (op.opname in ('int_is_true', 'ptr_nonzero', 'same_as') + and v_check is op.args[0]): + v_check = op.result + elif op.opname == 'ptr_iszero' and v_check is op.args[0]: + v_check = op.result + fastcase = not fastcase + elif (op.opname in ('int_eq', 'int_ne') + and v_check is op.args[0] + and isinstance(op.args[1], Constant) + and op.args[1].value == 0): + v_check = op.result + if op.opname == 'int_eq': + fastcase = not fastcase + else: + return + if v_check.concretetype is not lltype.Bool: + return + if block.exitswitch is not v_check: + return + + links = [link for link in block.exits if link.exitcase == fastcase] + if len(links) != 1: + return + [link] = links + if link.args != [v_result]: + return + if not link.target.is_final_block(): + return + + fielddescr = self.cpu.fielddescrof(PSTRUCT.TO, c_fieldname.value) + return CallShortcut(argnum, fielddescr) diff --git a/rpython/jit/codewriter/effectinfo.py b/rpython/jit/codewriter/effectinfo.py --- a/rpython/jit/codewriter/effectinfo.py +++ b/rpython/jit/codewriter/effectinfo.py @@ -117,7 +117,8 @@ can_invalidate=False, call_release_gil_target=_NO_CALL_RELEASE_GIL_TARGET, extradescrs=None, - can_collect=True): + can_collect=True, + call_shortcut=None): readonly_descrs_fields = frozenset_or_none(readonly_descrs_fields) readonly_descrs_arrays = frozenset_or_none(readonly_descrs_arrays) readonly_descrs_interiorfields = frozenset_or_none( @@ -135,7 +136,8 @@ extraeffect, oopspecindex, can_invalidate, - can_collect) + can_collect, + call_shortcut) tgt_func, tgt_saveerr = call_release_gil_target if tgt_func: key += (object(),) # don't care about caching in this case @@ -190,6 +192,7 @@ result.oopspecindex = oopspecindex result.extradescrs = extradescrs result.call_release_gil_target = call_release_gil_target + result.call_shortcut = call_shortcut if result.check_can_raise(ignore_memoryerror=True): assert oopspecindex in cls._OS_CANRAISE @@ -275,7 +278,8 @@ call_release_gil_target= EffectInfo._NO_CALL_RELEASE_GIL_TARGET, extradescr=None, - can_collect=True): + can_collect=True, + call_shortcut=None): from rpython.translator.backendopt.writeanalyze import top_set if effects is top_set or extraeffect == EffectInfo.EF_RANDOM_EFFECTS: readonly_descrs_fields = None @@ -364,7 +368,8 @@ can_invalidate, call_release_gil_target, extradescr, - can_collect) + can_collect, + call_shortcut) def consider_struct(TYPE, fieldname): if fieldType(TYPE, fieldname) is lltype.Void: @@ -387,6 +392,24 @@ # ____________________________________________________________ + +class CallShortcut(object): + def __init__(self, argnum, fielddescr): + self.argnum = argnum + self.fielddescr = fielddescr + + def __eq__(self, other): + return (isinstance(other, CallShortcut) and + self.argnum == other.argnum and + self.fielddescr == other.fielddescr) + def __ne__(self, other): + return not (self == other) + def __hash__(self): + return hash((self.argnum, self.fielddescr)) + +# ____________________________________________________________ + + class VirtualizableAnalyzer(BoolGraphAnalyzer): def analyze_simple_operation(self, op, graphinfo): return op.opname in ('jit_force_virtualizable', diff --git a/rpython/jit/codewriter/jtransform.py b/rpython/jit/codewriter/jtransform.py --- a/rpython/jit/codewriter/jtransform.py +++ b/rpython/jit/codewriter/jtransform.py @@ -364,7 +364,7 @@ return getattr(self, 'handle_%s_indirect_call' % kind)(op) def rewrite_call(self, op, namebase, initialargs, args=None, - calldescr=None): + calldescr=None, force_ir=False): """Turn 'i0 = direct_call(fn, i1, i2, ref1, ref2)' into 'i0 = xxx_call_ir_i(fn, descr, [i1,i2], [ref1,ref2])'. The name is one of '{residual,direct}_call_{r,ir,irf}_{i,r,f,v}'.""" @@ -374,8 +374,9 @@ lst_i, lst_r, lst_f = self.make_three_lists(args) reskind = getkind(op.result.concretetype)[0] if lst_f or reskind == 'f': kinds = 'irf' - elif lst_i: kinds = 'ir' + elif lst_i or force_ir: kinds = 'ir' else: kinds = 'r' + if force_ir: assert kinds == 'ir' # no 'f' sublists = [] if 'i' in kinds: sublists.append(lst_i) if 'r' in kinds: sublists.append(lst_r) @@ -1577,7 +1578,7 @@ assert not calldescr.get_extra_info().check_forces_virtual_or_virtualizable() op1 = self.rewrite_call(op, 'conditional_call', op.args[:2], args=op.args[2:], - calldescr=calldescr) + calldescr=calldescr, force_ir=True) if self.callcontrol.calldescr_canraise(calldescr): op1 = [op1, SpaceOperation('-live-', [], None)] return op1 diff --git a/rpython/jit/codewriter/test/test_call.py b/rpython/jit/codewriter/test/test_call.py --- a/rpython/jit/codewriter/test/test_call.py +++ b/rpython/jit/codewriter/test/test_call.py @@ -6,7 +6,7 @@ from rpython.rlib import jit from rpython.jit.codewriter import support, call from rpython.jit.codewriter.call import CallControl -from rpython.jit.codewriter.effectinfo import EffectInfo +from rpython.jit.codewriter.effectinfo import EffectInfo, CallShortcut class FakePolicy: @@ -368,3 +368,100 @@ assert call_op.opname == 'direct_call' call_descr = cc.getcalldescr(call_op) assert call_descr.extrainfo.check_can_collect() == expected + +def test_find_call_shortcut(): + class FakeCPU: + def fielddescrof(self, TYPE, fieldname): + if isinstance(TYPE, lltype.GcStruct): + if fieldname == 'inst_foobar': + return 'foobardescr' + if fieldname == 'inst_fooref': + return 'foorefdescr' + if TYPE == RAW and fieldname == 'x': + return 'xdescr' + assert False, (TYPE, fieldname) + cc = CallControl(FakeCPU()) + + class B(object): + foobar = 0 + fooref = None + + def f1(a, b, c): + if b.foobar: + return b.foobar + b.foobar = a + c + return b.foobar + + def f2(x, y, z, b): + r = b.fooref + if r is not None: + return r + r = b.fooref = B() + return r + + class Space(object): + def _freeze_(self): + return True + space = Space() + + def f3(space, b): + r = b.foobar + if not r: + r = b.foobar = 123 + return r + + def f4(raw): + r = raw.x + if r != 0: + return r + raw.x = 123 + return 123 + RAW = lltype.Struct('RAW', ('x', lltype.Signed)) + + def f5(b): + r = b.foobar + if r == 0: + r = b.foobar = 123 + return r + + def f(a, c): + b = B() + f1(a, b, c) + f2(a, c, a, b) + f3(space, b) + r = lltype.malloc(RAW, flavor='raw') + f4(r) + f5(b) + + rtyper = support.annotate(f, [10, 20]) + f1_graph = rtyper.annotator.translator._graphof(f1) + assert cc.find_call_shortcut(f1_graph) == CallShortcut(1, "foobardescr") + f2_graph = rtyper.annotator.translator._graphof(f2) + assert cc.find_call_shortcut(f2_graph) == CallShortcut(3, "foorefdescr") + f3_graph = rtyper.annotator.translator._graphof(f3) + assert cc.find_call_shortcut(f3_graph) == CallShortcut(0, "foobardescr") + f4_graph = rtyper.annotator.translator._graphof(f4) + assert cc.find_call_shortcut(f4_graph) == CallShortcut(0, "xdescr") + f5_graph = rtyper.annotator.translator._graphof(f5) + assert cc.find_call_shortcut(f5_graph) == CallShortcut(0, "foobardescr") + +def test_cant_find_call_shortcut(): + from rpython.jit.backend.llgraph.runner import LLGraphCPU + + @jit.dont_look_inside + @jit.call_shortcut + def f1(n): + return n + 17 # no call shortcut found + + def f(n): + return f1(n) + + rtyper = support.annotate(f, [1]) + jitdriver_sd = FakeJitDriverSD(rtyper.annotator.translator.graphs[0]) + cc = CallControl(LLGraphCPU(rtyper), jitdrivers_sd=[jitdriver_sd]) + res = cc.find_all_graphs(FakePolicy()) + [f_graph] = [x for x in res if x.func is f] + call_op = f_graph.startblock.operations[0] + assert call_op.opname == 'direct_call' + e = py.test.raises(AssertionError, cc.getcalldescr, call_op) + assert "shortcut not found" in str(e.value) diff --git a/rpython/jit/metainterp/blackhole.py b/rpython/jit/metainterp/blackhole.py --- a/rpython/jit/metainterp/blackhole.py +++ b/rpython/jit/metainterp/blackhole.py @@ -1200,28 +1200,12 @@ return cpu.bh_call_v(func, args_i, args_r, args_f, calldescr) # conditional calls - note that they cannot return stuff - @arguments("cpu", "i", "i", "I", "d") - def bhimpl_conditional_call_i_v(cpu, condition, func, args_i, calldescr): - if condition: - cpu.bh_call_v(func, args_i, None, None, calldescr) - - @arguments("cpu", "i", "i", "R", "d") - def bhimpl_conditional_call_r_v(cpu, condition, func, args_r, calldescr): - if condition: - cpu.bh_call_v(func, None, args_r, None, calldescr) - @arguments("cpu", "i", "i", "I", "R", "d") def bhimpl_conditional_call_ir_v(cpu, condition, func, args_i, args_r, calldescr): if condition: cpu.bh_call_v(func, args_i, args_r, None, calldescr) - @arguments("cpu", "i", "i", "I", "R", "F", "d") - def bhimpl_conditional_call_irf_v(cpu, condition, func, args_i, args_r, - args_f, calldescr): - if condition: - cpu.bh_call_v(func, args_i, args_r, args_f, calldescr) - @arguments("cpu", "j", "R", returns="i") def bhimpl_inline_call_r_i(cpu, jitcode, args_r): return cpu.bh_call_i(jitcode.get_fnaddr_as_int(), diff --git a/rpython/jit/metainterp/executor.py b/rpython/jit/metainterp/executor.py --- a/rpython/jit/metainterp/executor.py +++ b/rpython/jit/metainterp/executor.py @@ -101,6 +101,18 @@ if condbox.getint(): do_call_n(cpu, metainterp, argboxes[1:], descr) +def do_cond_call_value_i(cpu, metainterp, argboxes, descr): + value = argboxes[0].getint() + if value == 0: + value = do_call_i(cpu, metainterp, argboxes[1:], descr) + return value + +def do_cond_call_value_r(cpu, metainterp, argboxes, descr): + value = argboxes[0].getref_base() + if not value: + value = do_call_r(cpu, metainterp, argboxes[1:], descr) + return value + def do_getarrayitem_gc_i(cpu, _, arraybox, indexbox, arraydescr): array = arraybox.getref_base() index = indexbox.getint() @@ -366,6 +378,8 @@ rop.CALL_ASSEMBLER_I, rop.CALL_ASSEMBLER_N, rop.INCREMENT_DEBUG_COUNTER, + rop.COND_CALL_VALUE_R, + rop.COND_CALL_VALUE_I, rop.COND_CALL_GC_WB, rop.COND_CALL_GC_WB_ARRAY, rop.ZERO_ARRAY, diff --git a/rpython/jit/metainterp/optimizeopt/info.py b/rpython/jit/metainterp/optimizeopt/info.py --- a/rpython/jit/metainterp/optimizeopt/info.py +++ b/rpython/jit/metainterp/optimizeopt/info.py @@ -365,6 +365,13 @@ def visitor_dispatch_virtual_type(self, visitor): raise NotImplementedError("abstract") + def make_guards(self, op, short, optimizer): + from rpython.jit.metainterp.optimizeopt.optimizer import CONST_0 + op = ResOperation(rop.INT_EQ, [op, CONST_0]) + short.append(op) + op = ResOperation(rop.GUARD_FALSE, [op]) + short.append(op) + class RawBufferPtrInfo(AbstractRawPtrInfo): buffer = None diff --git a/rpython/jit/metainterp/optimizeopt/test/test_optimizeopt.py b/rpython/jit/metainterp/optimizeopt/test/test_optimizeopt.py --- a/rpython/jit/metainterp/optimizeopt/test/test_optimizeopt.py +++ b/rpython/jit/metainterp/optimizeopt/test/test_optimizeopt.py @@ -7592,7 +7592,7 @@ ops = """ [i0] p1 = new_with_vtable(descr=nodesize) - cond_call(1, 123, p1, descr=clear_vable) + cond_call(i0, 123, p1, descr=clear_vable) jump(i0) """ expected = """ diff --git a/rpython/jit/metainterp/optimizeopt/unroll.py b/rpython/jit/metainterp/optimizeopt/unroll.py --- a/rpython/jit/metainterp/optimizeopt/unroll.py +++ b/rpython/jit/metainterp/optimizeopt/unroll.py @@ -16,7 +16,7 @@ from rpython.rlib.debug import debug_print, debug_start, debug_stop,\ have_debug_prints -class UnrollableOptimizer(Optimizer): +class UnrollableOptimizer(Optimizer): def force_op_from_preamble(self, preamble_op): if isinstance(preamble_op, PreambleOp): if self.optunroll.short_preamble_producer is None: @@ -120,7 +120,8 @@ assert op.get_forwarded() is None if check_newops: assert not self.optimizer._newoperations - + + def optimize_preamble(self, trace, runtime_boxes, call_pure_results, memo): info, newops = self.optimizer.propagate_all_forward( trace.get_iter(), call_pure_results, flush=False) @@ -156,7 +157,7 @@ current_vs = self.get_virtual_state(end_jump.getarglist()) # pick the vs we want to jump to assert isinstance(celltoken, JitCellToken) - + target_virtual_state = self.pick_virtual_state(current_vs, state.virtual_state, celltoken.target_tokens) @@ -180,17 +181,27 @@ self.jump_to_preamble(celltoken, end_jump, info) return (UnrollInfo(target_token, label_op, extra_same_as, self.optimizer.quasi_immutable_deps), - self.optimizer._newoperations) + self.optimizer._newoperations) try: - new_virtual_state = self.jump_to_existing_trace(end_jump, label_op, - state.runtime_boxes) + new_virtual_state = self.jump_to_existing_trace( + end_jump, label_op, state.runtime_boxes, force_boxes=False) except InvalidLoop: # inlining short preamble failed, jump to preamble self.jump_to_preamble(celltoken, end_jump, info) return (UnrollInfo(target_token, label_op, extra_same_as, self.optimizer.quasi_immutable_deps), self.optimizer._newoperations) + + if new_virtual_state is not None: + # Attempt to force virtual boxes in order to avoid jumping + # to the preamble. + try: + new_virtual_state = self.jump_to_existing_trace( + end_jump, label_op, state.runtime_boxes, force_boxes=True) + except InvalidLoop: + pass + if new_virtual_state is not None: self.jump_to_preamble(celltoken, end_jump, info) return (UnrollInfo(target_token, label_op, extra_same_as, @@ -199,7 +210,7 @@ self.disable_retracing_if_max_retrace_guards( self.optimizer._newoperations, target_token) - + return (UnrollInfo(target_token, label_op, extra_same_as, self.optimizer.quasi_immutable_deps), self.optimizer._newoperations) @@ -241,7 +252,8 @@ for a in jump_op.getarglist(): self.optimizer.force_box_for_end_of_preamble(a) try: - vs = self.jump_to_existing_trace(jump_op, None, runtime_boxes) + vs = self.jump_to_existing_trace(jump_op, None, runtime_boxes, + force_boxes=False) except InvalidLoop: return self.jump_to_preamble(cell_token, jump_op, info) if vs is None: @@ -252,6 +264,14 @@ cell_token.retraced_count += 1 debug_print('Retracing (%d/%d)' % (cell_token.retraced_count, limit)) else: + # Try forcing boxes to avoid jumping to the preamble + try: + vs = self.jump_to_existing_trace(jump_op, None, runtime_boxes, + force_boxes=True) + except InvalidLoop: + pass + if vs is None: + return info, self.optimizer._newoperations[:] debug_print("Retrace count reached, jumping to preamble") return self.jump_to_preamble(cell_token, jump_op, info) exported_state = self.export_state(info.jump_op.getarglist(), @@ -288,7 +308,7 @@ return info, self.optimizer._newoperations[:] - def jump_to_existing_trace(self, jump_op, label_op, runtime_boxes): + def jump_to_existing_trace(self, jump_op, label_op, runtime_boxes, force_boxes=False): jitcelltoken = jump_op.getdescr() assert isinstance(jitcelltoken, JitCellToken) virtual_state = self.get_virtual_state(jump_op.getarglist()) @@ -299,7 +319,8 @@ continue try: extra_guards = target_virtual_state.generate_guards( - virtual_state, args, runtime_boxes, self.optimizer) + virtual_state, args, runtime_boxes, self.optimizer, + force_boxes=force_boxes) patchguardop = self.optimizer.patchguardop for guard in extra_guards.extra_guards: if isinstance(guard, GuardResOp): @@ -308,8 +329,18 @@ self.send_extra_operation(guard) except VirtualStatesCantMatch: continue - args, virtuals = target_virtual_state.make_inputargs_and_virtuals( - args, self.optimizer) + + # When force_boxes == True, creating the virtual args can fail when + # components of the virtual state alias. If this occurs, we must + # recompute the virtual state as boxes will have been forced. + try: + args, virtuals = target_virtual_state.make_inputargs_and_virtuals( + args, self.optimizer, force_boxes=force_boxes) + except VirtualStatesCantMatch: + assert force_boxes + virtual_state = self.get_virtual_state(args) + continue + short_preamble = target_token.short_preamble try: extra = self.inline_short_preamble(args + virtuals, args, @@ -452,7 +483,7 @@ # by short preamble label_args = exported_state.virtual_state.make_inputargs( targetargs, self.optimizer) - + self.short_preamble_producer = ShortPreambleBuilder( label_args, exported_state.short_boxes, exported_state.short_inputargs, exported_state.exported_infos, @@ -497,7 +528,7 @@ * runtime_boxes - runtime values for boxes, necessary when generating guards to jump to """ - + def __init__(self, end_args, next_iteration_args, virtual_state, exported_infos, short_boxes, renamed_inputargs, short_inputargs, runtime_boxes, memo): diff --git a/rpython/jit/metainterp/optimizeopt/virtualstate.py b/rpython/jit/metainterp/optimizeopt/virtualstate.py --- a/rpython/jit/metainterp/optimizeopt/virtualstate.py +++ b/rpython/jit/metainterp/optimizeopt/virtualstate.py @@ -4,7 +4,7 @@ ArrayStructInfo, AbstractStructPtrInfo from rpython.jit.metainterp.optimizeopt.intutils import \ MININT, MAXINT, IntBound, IntLowerBound -from rpython.jit.metainterp.resoperation import rop, ResOperation,\ +from rpython.jit.metainterp.resoperation import rop, ResOperation, \ InputArgInt, InputArgRef, InputArgFloat from rpython.rlib.debug import debug_print @@ -20,7 +20,7 @@ class GenerateGuardState(object): - def __init__(self, optimizer=None, guards=None, renum=None, bad=None): + def __init__(self, optimizer=None, guards=None, renum=None, bad=None, force_boxes=False): self.optimizer = optimizer self.cpu = optimizer.cpu if guards is None: @@ -32,6 +32,7 @@ if bad is None: bad = {} self.bad = bad + self.force_boxes = force_boxes def get_runtime_item(self, box, descr, i): array = box.getref_base() @@ -303,7 +304,7 @@ opinfo = state.optimizer.getptrinfo(box) assert isinstance(opinfo, ArrayPtrInfo) else: - opinfo = None + opinfo = None for i in range(self.length): for descr in self.fielddescrs: index = i * len(self.fielddescrs) + descr.get_index() @@ -514,6 +515,8 @@ NotVirtualStateInfo.__init__(self, cpu, type, info) def _generate_guards(self, other, box, runtime_box, state): + if state.force_boxes and isinstance(other, VirtualStateInfo): + return self._generate_virtual_guards(other, box, runtime_box, state) if not isinstance(other, NotVirtualStateInfoPtr): raise VirtualStatesCantMatch( 'The VirtualStates does not match as a ' + @@ -545,6 +548,23 @@ # to an existing compiled loop or retracing the loop. Both alternatives # will always generate correct behaviour, but performance will differ. + def _generate_virtual_guards(self, other, box, runtime_box, state): + """ + Generate the guards and add state information for unifying a virtual + object with a non-virtual. This involves forcing the object in the + event that unification can succeed. Since virtual objects cannot be null, + this method need only check that the virtual object has the expected type. + """ + assert state.force_boxes and isinstance(other, VirtualStateInfo) + + if self.level == LEVEL_CONSTANT: + raise VirtualStatesCantMatch( + "cannot unify a constant value with a virtual object") + + if self.level == LEVEL_KNOWNCLASS: + if not self.known_class.same_constant(other.known_class): + raise VirtualStatesCantMatch("classes don't match") + def _generate_guards_nonnull(self, other, box, runtime_box, extra_guards, state): if not isinstance(other, NotVirtualStateInfoPtr): @@ -617,10 +637,10 @@ return False return True - def generate_guards(self, other, boxes, runtime_boxes, optimizer): + def generate_guards(self, other, boxes, runtime_boxes, optimizer, force_boxes=False): assert (len(self.state) == len(other.state) == len(boxes) == len(runtime_boxes)) - state = GenerateGuardState(optimizer) + state = GenerateGuardState(optimizer, force_boxes=force_boxes) _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit