Author: Matti Picus <matti.pi...@gmail.com> Branch: py3.6 Changeset: r96084:56db4a646df6 Date: 2019-02-19 09:43 +0200 http://bitbucket.org/pypy/pypy/changeset/56db4a646df6/
Log: merge heads diff too long, truncating to 2000 out of 29000 lines diff --git a/.hgtags b/.hgtags --- a/.hgtags +++ b/.hgtags @@ -58,3 +58,12 @@ 3f6eaa010fce78cc7973bdc1dfdb95970f08fed2 release-pypy3.5-v5.10.1 ab0b9caf307db6592905a80b8faffd69b39005b8 release-pypy2.7-v6.0.0 fdd60ed87e941677e8ea11acf9f1819466521bf2 release-pypy3.5-v6.0.0 +9112c8071614108b1042bfef0713915107004d62 release-pypy2.7-v7.0.0 +1f86f25937b6ae6c8b25236c35228fac587678bf release-pypy3.5-v7.0.0 +dab365a465140aa79a5f3ba4db784c4af4d5c195 release-pypy3.6-v7.0.0 +9112c8071614108b1042bfef0713915107004d62 release-pypy2.7-v7.0.0 +c8805ee6d7846ca2722b106eeaa2f128c699aba3 release-pypy2.7-v7.0.0 +1f86f25937b6ae6c8b25236c35228fac587678bf release-pypy3.5-v7.0.0 +928a4f70d3de7d17449456946154c5da6e600162 release-pypy3.5-v7.0.0 +dab365a465140aa79a5f3ba4db784c4af4d5c195 release-pypy3.6-v7.0.0 +fb40f7a5524c77b80e6c468e087d621610137261 release-pypy3.6-v7.0.0 diff --git a/TODO b/TODO new file mode 100644 --- /dev/null +++ b/TODO @@ -0,0 +1,20 @@ +* find a better way to run "find" without creating the index storage, if one + if one is not already readily available (understand cost now, improve after merge) +* improve performance of splitlines (CF) +* think about cost of utf8 list strategy (CF) +* revisit why runicode import str_decode_utf_8_impl needed instead of runicode + import str_decode_utf_8 +* revisit remaining places in win32 where we do utf8.decode('utf-8'), they should work + directly with utf8 (can be converted via runicode.str_decode_utf_8 as well) + - rutf8.utf8_encode_mbcs + - unicodehelper.fsencode + - _winreg.interp_winreg +* remove 'assert not isinstance(*, unicode) +* add a flag that prevents support for unicode in rpython and enable it in PyPy (CF, Armin) +* convert all realunicode_w to unicode_w after we flush out all old uses of + unicode_w +* review all uses of W_Unicode.text_w, right now it is exactly W_Unicode.utf8_w. + It shoud only return valid utf8 (see 0be26dc39a59 which broke translation on + win32 and failed tests on linux64). Then we can use it in places like + _socket.interp_func.getaddrinfo instead of space.encode_unicode_object(w_port, + 'utf-8', 'strict') diff --git a/extra_tests/cffi_tests/cffi0/test_ownlib.py b/extra_tests/cffi_tests/cffi0/test_ownlib.py --- a/extra_tests/cffi_tests/cffi0/test_ownlib.py +++ b/extra_tests/cffi_tests/cffi0/test_ownlib.py @@ -352,6 +352,8 @@ def test_modify_struct_value(self): if self.module is None: py.test.skip("fix the auto-generation of the tiny test lib") + if self.Backend is CTypesBackend: + py.test.skip("fails with the ctypes backend on some architectures") ffi = FFI(backend=self.Backend()) ffi.cdef(""" typedef struct { diff --git a/extra_tests/cffi_tests/embedding/thread3-test.c b/extra_tests/cffi_tests/embedding/thread3-test.c --- a/extra_tests/cffi_tests/embedding/thread3-test.c +++ b/extra_tests/cffi_tests/embedding/thread3-test.c @@ -52,5 +52,6 @@ assert(status == 0); } printf("done\n"); + fflush(stdout); /* this is occasionally needed on Windows */ return 0; } diff --git a/lib-python/3/idlelib/calltips.py b/lib-python/3/idlelib/calltips.py new file mode 100644 --- /dev/null +++ b/lib-python/3/idlelib/calltips.py @@ -0,0 +1,175 @@ +"""calltips.py - An IDLE Extension to Jog Your Memory + +Call Tips are floating windows which display function, class, and method +parameter and docstring information when you type an opening parenthesis, and +which disappear when you type a closing parenthesis. + +""" +import inspect +import re +import sys +import textwrap +import types + +from idlelib import calltip_w +from idlelib.hyperparser import HyperParser +import __main__ + +class CallTips: + + menudefs = [ + ('edit', [ + ("Show call tip", "<<force-open-calltip>>"), + ]) + ] + + def __init__(self, editwin=None): + if editwin is None: # subprocess and test + self.editwin = None + else: + self.editwin = editwin + self.text = editwin.text + self.active_calltip = None + self._calltip_window = self._make_tk_calltip_window + + def close(self): + self._calltip_window = None + + def _make_tk_calltip_window(self): + # See __init__ for usage + return calltip_w.CallTip(self.text) + + def _remove_calltip_window(self, event=None): + if self.active_calltip: + self.active_calltip.hidetip() + self.active_calltip = None + + def force_open_calltip_event(self, event): + "The user selected the menu entry or hotkey, open the tip." + self.open_calltip(True) + + def try_open_calltip_event(self, event): + """Happens when it would be nice to open a CallTip, but not really + necessary, for example after an opening bracket, so function calls + won't be made. + """ + self.open_calltip(False) + + def refresh_calltip_event(self, event): + if self.active_calltip and self.active_calltip.is_active(): + self.open_calltip(False) + + def open_calltip(self, evalfuncs): + self._remove_calltip_window() + + hp = HyperParser(self.editwin, "insert") + sur_paren = hp.get_surrounding_brackets('(') + if not sur_paren: + return + hp.set_index(sur_paren[0]) + expression = hp.get_expression() + if not expression: + return + if not evalfuncs and (expression.find('(') != -1): + return + argspec = self.fetch_tip(expression) + if not argspec: + return + self.active_calltip = self._calltip_window() + self.active_calltip.showtip(argspec, sur_paren[0], sur_paren[1]) + + def fetch_tip(self, expression): + """Return the argument list and docstring of a function or class. + + If there is a Python subprocess, get the calltip there. Otherwise, + either this fetch_tip() is running in the subprocess or it was + called in an IDLE running without the subprocess. + + The subprocess environment is that of the most recently run script. If + two unrelated modules are being edited some calltips in the current + module may be inoperative if the module was not the last to run. + + To find methods, fetch_tip must be fed a fully qualified name. + + """ + try: + rpcclt = self.editwin.flist.pyshell.interp.rpcclt + except AttributeError: + rpcclt = None + if rpcclt: + return rpcclt.remotecall("exec", "get_the_calltip", + (expression,), {}) + else: + return get_argspec(get_entity(expression)) + +def get_entity(expression): + """Return the object corresponding to expression evaluated + in a namespace spanning sys.modules and __main.dict__. + """ + if expression: + namespace = sys.modules.copy() + namespace.update(__main__.__dict__) + try: + return eval(expression, namespace) + except BaseException: + # An uncaught exception closes idle, and eval can raise any + # exception, especially if user classes are involved. + return None + +# The following are used in get_argspec and some in tests +_MAX_COLS = 85 +_MAX_LINES = 5 # enough for bytes +_INDENT = ' '*4 # for wrapped signatures +_first_param = re.compile(r'(?<=\()\w*\,?\s*') +_default_callable_argspec = "See source or doc" + + +def get_argspec(ob): + '''Return a string describing the signature of a callable object, or ''. + + For Python-coded functions and methods, the first line is introspected. + Delete 'self' parameter for classes (.__init__) and bound methods. + The next lines are the first lines of the doc string up to the first + empty line or _MAX_LINES. For builtins, this typically includes + the arguments in addition to the return value. + ''' + argspec = "" + try: + ob_call = ob.__call__ + except BaseException: + return argspec + if isinstance(ob, type): + fob = ob.__init__ + elif isinstance(ob_call, types.MethodType): + fob = ob_call + else: + fob = ob + if isinstance(fob, (types.FunctionType, types.MethodType)): + argspec = inspect.formatargspec(*inspect.getfullargspec(fob)) + if (isinstance(ob, (type, types.MethodType)) or + isinstance(ob_call, types.MethodType)): + argspec = _first_param.sub("", argspec) + + lines = (textwrap.wrap(argspec, _MAX_COLS, subsequent_indent=_INDENT) + if len(argspec) > _MAX_COLS else [argspec] if argspec else []) + + if isinstance(ob_call, types.MethodType): + doc = ob_call.__doc__ + else: + doc = getattr(ob, "__doc__", "") + if doc: + for line in doc.split('\n', _MAX_LINES)[:_MAX_LINES]: + line = line.strip() + if not line: + break + if len(line) > _MAX_COLS: + line = line[: _MAX_COLS - 3] + '...' + lines.append(line) + argspec = '\n'.join(lines) + if not argspec: + argspec = _default_callable_argspec + return argspec + +if __name__ == '__main__': + from unittest import main + main('idlelib.idle_test.test_calltips', verbosity=2) diff --git a/lib_pypy/cffi.egg-info/PKG-INFO b/lib_pypy/cffi.egg-info/PKG-INFO --- a/lib_pypy/cffi.egg-info/PKG-INFO +++ b/lib_pypy/cffi.egg-info/PKG-INFO @@ -1,6 +1,6 @@ Metadata-Version: 1.1 Name: cffi -Version: 1.12.0 +Version: 1.12.1 Summary: Foreign Function Interface for Python calling C code. Home-page: http://cffi.readthedocs.org Author: Armin Rigo, Maciej Fijalkowski diff --git a/lib_pypy/cffi/__init__.py b/lib_pypy/cffi/__init__.py --- a/lib_pypy/cffi/__init__.py +++ b/lib_pypy/cffi/__init__.py @@ -5,8 +5,8 @@ from .error import CDefError, FFIError, VerificationError, VerificationMissing from .error import PkgConfigError -__version__ = "1.12.0" -__version_info__ = (1, 12, 0) +__version__ = "1.12.1" +__version_info__ = (1, 12, 1) # The verifier module file names are based on the CRC32 of a string that # contains the following version number. It may be older than __version__ diff --git a/lib_pypy/cffi/_cffi_include.h b/lib_pypy/cffi/_cffi_include.h --- a/lib_pypy/cffi/_cffi_include.h +++ b/lib_pypy/cffi/_cffi_include.h @@ -8,43 +8,20 @@ the same works for the other two macros. Py_DEBUG implies them, but not the other way around. - The implementation is messy (issue #350): on Windows, with _MSC_VER, - we have to define Py_LIMITED_API even before including pyconfig.h. - In that case, we guess what pyconfig.h will do to the macros above, - and check our guess after the #include. - - Note that on Windows, with CPython 3.x, you need virtualenv version - >= 16.0.0. Older versions don't copy PYTHON3.DLL. As a workaround - you can remove the definition of Py_LIMITED_API here. - - See also 'py_limited_api' in cffi/setuptools_ext.py. + Issue #350 is still open: on Windows, the code here causes it to link + with PYTHON36.DLL (for example) instead of PYTHON3.DLL. A fix was + attempted in 164e526a5515 and 14ce6985e1c3, but reverted: virtualenv + does not make PYTHON3.DLL available, and so the "correctly" compiled + version would not run inside a virtualenv. We will re-apply the fix + after virtualenv has been fixed for some time. For explanation, see + issue #355. For a workaround if you want PYTHON3.DLL and don't worry + about virtualenv, see issue #350. See also 'py_limited_api' in + setuptools_ext.py. */ #if !defined(_CFFI_USE_EMBEDDING) && !defined(Py_LIMITED_API) -# ifdef _MSC_VER -# if !defined(_DEBUG) && !defined(Py_DEBUG) && !defined(Py_TRACE_REFS) && !defined(Py_REF_DEBUG) -# define Py_LIMITED_API -# endif -# include <pyconfig.h> - /* sanity-check: Py_LIMITED_API will cause crashes if any of these - are also defined. Normally, the Python file PC/pyconfig.h does not - cause any of these to be defined, with the exception that _DEBUG - causes Py_DEBUG. Double-check that. */ -# ifdef Py_LIMITED_API -# if defined(Py_DEBUG) -# error "pyconfig.h unexpectedly defines Py_DEBUG, but Py_LIMITED_API is set" -# endif -# if defined(Py_TRACE_REFS) -# error "pyconfig.h unexpectedly defines Py_TRACE_REFS, but Py_LIMITED_API is set" -# endif -# if defined(Py_REF_DEBUG) -# error "pyconfig.h unexpectedly defines Py_REF_DEBUG, but Py_LIMITED_API is set" -# endif -# endif -# else -# include <pyconfig.h> -# if !defined(Py_DEBUG) && !defined(Py_TRACE_REFS) && !defined(Py_REF_DEBUG) -# define Py_LIMITED_API -# endif +# include <pyconfig.h> +# if !defined(Py_DEBUG) && !defined(Py_TRACE_REFS) && !defined(Py_REF_DEBUG) +# define Py_LIMITED_API # endif #endif diff --git a/lib_pypy/cffi/_embedding.h b/lib_pypy/cffi/_embedding.h --- a/lib_pypy/cffi/_embedding.h +++ b/lib_pypy/cffi/_embedding.h @@ -221,7 +221,7 @@ if (f != NULL && f != Py_None) { PyFile_WriteString("\nFrom: " _CFFI_MODULE_NAME - "\ncompiled with cffi version: 1.12.0" + "\ncompiled with cffi version: 1.12.1" "\n_cffi_backend module: ", f); modules = PyImport_GetModuleDict(); mod = PyDict_GetItemString(modules, "_cffi_backend"); diff --git a/lib_pypy/cffi/setuptools_ext.py b/lib_pypy/cffi/setuptools_ext.py --- a/lib_pypy/cffi/setuptools_ext.py +++ b/lib_pypy/cffi/setuptools_ext.py @@ -81,8 +81,14 @@ it doesn't so far, creating troubles. That's why we check for "not hasattr(sys, 'gettotalrefcount')" (the 2.7 compatible equivalent of 'd' not in sys.abiflags). (http://bugs.python.org/issue28401) + + On Windows, with CPython <= 3.4, it's better not to use py_limited_api + because virtualenv *still* doesn't copy PYTHON3.DLL on these versions. + For now we'll skip py_limited_api on all Windows versions to avoid an + inconsistent mess. """ - if 'py_limited_api' not in kwds and not hasattr(sys, 'gettotalrefcount'): + if ('py_limited_api' not in kwds and not hasattr(sys, 'gettotalrefcount') + and sys.platform != 'win32'): import setuptools try: setuptools_major_version = int(setuptools.__version__.partition('.')[0]) diff --git a/pypy/TODO b/pypy/TODO --- a/pypy/TODO +++ b/pypy/TODO @@ -1,6 +1,3 @@ -... - - antocuni's older TODO: * run coverage against the parser/astbuilder/astcompiler: it's probably full of @@ -11,3 +8,5 @@ * re-enable BUILD_LIST_FROM_ARG: see the comment in astcompiler/codegen.py in ast.ListComp.build_container + +* review use of std_decode_utf8, we probably do not want to be using it diff --git a/pypy/doc/extending.rst b/pypy/doc/extending.rst --- a/pypy/doc/extending.rst +++ b/pypy/doc/extending.rst @@ -45,16 +45,13 @@ with the `CPython ctypes`_ version. It works for large examples, such as pyglet. PyPy's implementation is not strictly 100% compatible with CPython, but close enough for most cases. - -We also used to provide ``ctypes-configure`` for some API-level access. -This is now viewed as a precursor of CFFI, which you should use instead. More (but older) information is available :doc:`here <discussion/ctypes-implementation>`. Also, ctypes' performance is not as good as CFFI's. .. _CPython ctypes: http://docs.python.org/library/ctypes.html PyPy implements ctypes as pure Python code around two built-in modules -called ``_ffi`` and ``_rawffi``, which give a very low-level binding to +called ``_rawffi`` and ``_rawffi.alt``, which give a very low-level binding to the C library libffi_. Nowadays it is not recommended to use directly these two modules. diff --git a/pypy/doc/index.rst b/pypy/doc/index.rst --- a/pypy/doc/index.rst +++ b/pypy/doc/index.rst @@ -103,7 +103,7 @@ the `development mailing list`_. .. _#pypy on irc.freenode.net: irc://irc.freenode.net/pypy -.. _here: https://botbot.me/freenode/pypy/ +.. _here: https://quodlibet.duckdns.org/irc/pypy/latest.log.html#irc-end .. _Development mailing list: http://mail.python.org/mailman/listinfo/pypy-dev .. _Commit mailing list: http://mail.python.org/mailman/listinfo/pypy-commit .. _Development bug/feature tracker: https://bitbucket.org/pypy/pypy/issues diff --git a/pypy/doc/release-v7.0.0.rst b/pypy/doc/release-v7.0.0.rst --- a/pypy/doc/release-v7.0.0.rst +++ b/pypy/doc/release-v7.0.0.rst @@ -19,11 +19,12 @@ Until we can work with downstream providers to distribute builds with PyPy, we have made packages for some common packages `available as wheels`_. -The GC `hooks`_ , which can be used to gain more insights into its +The `GC hooks`_ , which can be used to gain more insights into its performance, has been improved and it is now possible to manually manage the GC by using a combination of ``gc.disable`` and ``gc.collect_step``. See the `GC blog post`_. +.. _`GC hooks`: http://doc.pypy.org/en/latest/gc_info.html#semi-manual-gc-management We updated the `cffi`_ module included in PyPy to version 1.12, and the `cppyy`_ backend to 1.4. Please use these to wrap your C and C++ code, @@ -39,7 +40,7 @@ The utf8 branch that changes internal representation of unicode to utf8 did not make it into the release, so there is still more goodness coming. -You can download the v6.0 releases here: +You can download the v7.0 releases here: http://pypy.org/download.html @@ -49,7 +50,7 @@ We would also like to thank our contributors and encourage new people to join the project. PyPy has many layers and we need help with all of them: `PyPy`_ -and `RPython`_ documentation improvements, tweaking popular `modules`_ to run +and `RPython`_ documentation improvements, tweaking popular modules to run on pypy, or general `help`_ with making RPython's JIT even better. .. _`PyPy`: index.html diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst --- a/pypy/doc/whatsnew-head.rst +++ b/pypy/doc/whatsnew-head.rst @@ -4,3 +4,31 @@ .. this is a revision shortly after release-pypy-7.0.0 .. startrev: 481c69f7d81f + +.. branch: zlib-copying-third-time-a-charm + +Make sure zlib decompressobjs have their streams deallocated immediately +on flush. + +.. branch: zlib-copying-redux + +Fix calling copy on already-flushed compressobjs. + + + +.. branch: math-improvements + +Improve performance of long operations where one of the operands fits into +an int. + +.. branch: regalloc-playground + +Improve register allocation in the JIT. + +.. branch: promote-unicode + +Implement rlib.jit.promote_unicode to complement promote_string + +.. branch: unicode-utf8 + +Use utf8 internally to represent unicode, with the goal of never using rpython-level unicode diff --git a/pypy/doc/whatsnew-pypy2-5.10.0.rst b/pypy/doc/whatsnew-pypy2-5.10.0.rst --- a/pypy/doc/whatsnew-pypy2-5.10.0.rst +++ b/pypy/doc/whatsnew-pypy2-5.10.0.rst @@ -1,42 +1,42 @@ -========================== -What's new in PyPy2.7 5.10 -========================== - -.. this is a revision shortly after release-pypy2.7-v5.9.0 -.. startrev:d56dadcef996 - - -.. branch: cppyy-packaging - -Cleanup and improve cppyy packaging - -.. branch: docs-osx-brew-openssl - -.. branch: keep-debug-symbols - -Add a smartstrip tool, which can optionally keep the debug symbols in a -separate file, instead of just stripping them away. Use it in packaging - -.. branch: bsd-patches - -Fix failures on FreeBSD, contributed by David Naylor as patches on the issue -tracker (issues 2694, 2695, 2696, 2697) - -.. branch: run-extra-tests - -Run extra_tests/ in buildbot - -.. branch: vmprof-0.4.10 - -Upgrade the _vmprof backend to vmprof 0.4.10 - -.. branch: fix-vmprof-stacklet-switch -.. branch: fix-vmprof-stacklet-switch-2 - -Fix a vmprof+continulets (i.e. greenelts, eventlet, gevent, ...) - -.. branch: win32-vcvars - -.. branch: rdict-fast-hash - -Make it possible to declare that the hash function of an r_dict is fast in RPython. +========================== +What's new in PyPy2.7 5.10 +========================== + +.. this is a revision shortly after release-pypy2.7-v5.9.0 +.. startrev:d56dadcef996 + + +.. branch: cppyy-packaging + +Cleanup and improve cppyy packaging + +.. branch: docs-osx-brew-openssl + +.. branch: keep-debug-symbols + +Add a smartstrip tool, which can optionally keep the debug symbols in a +separate file, instead of just stripping them away. Use it in packaging + +.. branch: bsd-patches + +Fix failures on FreeBSD, contributed by David Naylor as patches on the issue +tracker (issues 2694, 2695, 2696, 2697) + +.. branch: run-extra-tests + +Run extra_tests/ in buildbot + +.. branch: vmprof-0.4.10 + +Upgrade the _vmprof backend to vmprof 0.4.10 + +.. branch: fix-vmprof-stacklet-switch +.. branch: fix-vmprof-stacklet-switch-2 + +Fix a vmprof+continulets (i.e. greenelts, eventlet, gevent, ...) + +.. branch: win32-vcvars + +.. branch: rdict-fast-hash + +Make it possible to declare that the hash function of an r_dict is fast in RPython. diff --git a/pypy/doc/whatsnew-pypy2-6.0.0.rst b/pypy/doc/whatsnew-pypy2-6.0.0.rst --- a/pypy/doc/whatsnew-pypy2-6.0.0.rst +++ b/pypy/doc/whatsnew-pypy2-6.0.0.rst @@ -1,132 +1,128 @@ -=========================== -What's new in PyPy2.7 5.10+ -=========================== - -.. this is a revision shortly after release-pypy2.7-v5.10.0 -.. startrev: 6b024edd9d12 - -.. branch: cpyext-avoid-roundtrip - -Big refactoring of some cpyext code, which avoids a lot of nonsense when -calling C from Python and vice-versa: the result is a big speedup in -function/method calls, up to 6 times faster. - -.. branch: cpyext-datetime2 - -Support ``tzinfo`` field on C-API datetime objects, fixes latest pandas HEAD - - -.. branch: mapdict-size-limit - -Fix a corner case of mapdict: When an instance is used like a dict (using -``setattr`` and ``getattr``, or ``.__dict__``) and a lot of attributes are -added, then the performance using mapdict is linear in the number of -attributes. This is now fixed (by switching to a regular dict after 80 -attributes). - - -.. branch: cpyext-faster-arg-passing - -When using cpyext, improve the speed of passing certain objects from PyPy to C -code, most notably None, True, False, types, all instances of C-defined types. -Before, a dict lookup was needed every time such an object crossed over, now it -is just a field read. - - -.. branch: 2634_datetime_timedelta_performance - -Improve datetime + timedelta performance. - -.. branch: memory-accounting - -Improve way to describe memory - -.. branch: msvc14 - -Allow compilaiton with Visual Studio 2017 compiler suite on windows - -.. branch: winapi - -Update _winapi and internal _winbase_cffi (via _winbase_build) for python 3 - -.. branch: refactor-slots - -Refactor cpyext slots. - - -.. branch: call-loopinvariant-into-bridges - -Speed up branchy code that does a lot of function inlining by saving one call -to read the TLS in most bridges. - -.. branch: rpython-sprint - -Refactor in rpython signatures - -.. branch: cpyext-tls-operror2 - -Store error state thread-locally in executioncontext, fixes issue #2764 - -.. branch: cpyext-fast-typecheck - -Optimize `Py*_Check` for `Bool`, `Float`, `Set`. Also refactor and simplify -`W_PyCWrapperObject` which is used to call slots from the C-API, greatly -improving microbenchmarks in https://github.com/antocuni/cpyext-benchmarks - - -.. branch: fix-sre-problems - -Fix two (unrelated) JIT bugs manifesting in the re module: - -- green fields are broken and were thus disabled, plus their usage removed from - the _sre implementation - -- in rare "trace is too long" situations, the JIT could break behaviour - arbitrarily. - -.. branch: jit-hooks-can-be-disabled - -Be more efficient about JIT hooks. Make it possible for the frontend to declare -that jit hooks are currently not enabled at all. in that case, the list of ops -does not have to be created in the case of the on_abort hook (which is -expensive). - - -.. branch: pyparser-improvements - -Improve speed of Python parser, improve ParseError messages slightly. - -.. branch: ioctl-arg-size - -Work around possible bugs in upstream ioctl users, like CPython allocate at -least 1024 bytes for the arg in calls to ``ioctl(fd, request, arg)``. Fixes -issue #2776 - -.. branch: cpyext-subclass-setattr - -Fix for python-level classes that inherit from C-API types, previously the -`w_obj` was not necessarily preserved throughout the lifetime of the `pyobj` -which led to cases where instance attributes were lost. Fixes issue #2793 - - -.. branch: pyparser-improvements-2 - -Improve line offsets that are reported by SyntaxError. Improve error messages -for a few situations, including mismatched parenthesis. - -.. branch: issue2752 - -Fix a rare GC bug that was introduced more than one year ago, but was -not diagnosed before issue #2752. - -.. branch: gc-hooks - -Introduce GC hooks, as documented in doc/gc_info.rst - -.. branch: gc-hook-better-timestamp - -Improve GC hooks - -.. branch: cppyy-packaging - -Update backend to 0.6.0 and support exceptions through wrappers +=========================== +What's new in PyPy2.7 5.10+ +=========================== + +.. this is a revision shortly after release-pypy2.7-v5.10.0 +.. startrev: 6b024edd9d12 + +.. branch: cpyext-avoid-roundtrip + +Big refactoring of some cpyext code, which avoids a lot of nonsense when +calling C from Python and vice-versa: the result is a big speedup in +function/method calls, up to 6 times faster. + +.. branch: cpyext-datetime2 + +Support ``tzinfo`` field on C-API datetime objects, fixes latest pandas HEAD + + +.. branch: mapdict-size-limit + +Fix a corner case of mapdict: When an instance is used like a dict (using +``setattr`` and ``getattr``, or ``.__dict__``) and a lot of attributes are +added, then the performance using mapdict is linear in the number of +attributes. This is now fixed (by switching to a regular dict after 80 +attributes). + + +.. branch: cpyext-faster-arg-passing + +When using cpyext, improve the speed of passing certain objects from PyPy to C +code, most notably None, True, False, types, all instances of C-defined types. +Before, a dict lookup was needed every time such an object crossed over, now it +is just a field read. + + +.. branch: 2634_datetime_timedelta_performance + +Improve datetime + timedelta performance. + +.. branch: memory-accounting + +Improve way to describe memory + +.. branch: msvc14 + +Allow compilaiton with Visual Studio 2017 compiler suite on windows + +.. branch: refactor-slots + +Refactor cpyext slots. + + +.. branch: call-loopinvariant-into-bridges + +Speed up branchy code that does a lot of function inlining by saving one call +to read the TLS in most bridges. + +.. branch: rpython-sprint + +Refactor in rpython signatures + +.. branch: cpyext-tls-operror2 + +Store error state thread-locally in executioncontext, fixes issue #2764 + +.. branch: cpyext-fast-typecheck + +Optimize `Py*_Check` for `Bool`, `Float`, `Set`. Also refactor and simplify +`W_PyCWrapperObject` which is used to call slots from the C-API, greatly +improving microbenchmarks in https://github.com/antocuni/cpyext-benchmarks + + +.. branch: fix-sre-problems + +Fix two (unrelated) JIT bugs manifesting in the re module: + +- green fields are broken and were thus disabled, plus their usage removed from + the _sre implementation + +- in rare "trace is too long" situations, the JIT could break behaviour + arbitrarily. + +.. branch: jit-hooks-can-be-disabled + +Be more efficient about JIT hooks. Make it possible for the frontend to declare +that jit hooks are currently not enabled at all. in that case, the list of ops +does not have to be created in the case of the on_abort hook (which is +expensive). + + +.. branch: pyparser-improvements + +Improve speed of Python parser, improve ParseError messages slightly. + +.. branch: ioctl-arg-size + +Work around possible bugs in upstream ioctl users, like CPython allocate at +least 1024 bytes for the arg in calls to ``ioctl(fd, request, arg)``. Fixes +issue #2776 + +.. branch: cpyext-subclass-setattr + +Fix for python-level classes that inherit from C-API types, previously the +`w_obj` was not necessarily preserved throughout the lifetime of the `pyobj` +which led to cases where instance attributes were lost. Fixes issue #2793 + + +.. branch: pyparser-improvements-2 + +Improve line offsets that are reported by SyntaxError. Improve error messages +for a few situations, including mismatched parenthesis. + +.. branch: issue2752 + +Fix a rare GC bug that was introduced more than one year ago, but was +not diagnosed before issue #2752. + +.. branch: gc-hooks + +Introduce GC hooks, as documented in doc/gc_info.rst + +.. branch: gc-hook-better-timestamp + +Improve GC hooks + +.. branch: cppyy-packaging + +Update backend to 0.6.0 and support exceptions through wrappers diff --git a/pypy/doc/whatsnew-pypy2-7.0.0.rst b/pypy/doc/whatsnew-pypy2-7.0.0.rst --- a/pypy/doc/whatsnew-pypy2-7.0.0.rst +++ b/pypy/doc/whatsnew-pypy2-7.0.0.rst @@ -1,73 +1,73 @@ -========================== -What's new in PyPy2.7 6.0+ -========================== - -.. this is a revision shortly after release-pypy-6.0.0 -.. startrev: e50e11af23f1 - -.. branch: cppyy-packaging - -Main items: vastly better template resolution and improved performance. In -detail: upgrade to backend 1.4, improved handling of templated methods and -functions (in particular automatic deduction of types), improved pythonization -interface, range of compatibility fixes for Python3, free functions now take -fast libffi path when possible, moves for strings (incl. from Python str), -easier/faster handling of std::vector by numpy, improved and faster object -identity preservation - -.. branch: socket_default_timeout_blockingness - -Make sure 'blocking-ness' of socket is set along with default timeout - -.. branch: crypt_h - -Include crypt.h for crypt() on Linux - -.. branch: gc-more-logging - -Log additional gc-minor and gc-collect-step info in the PYPYLOG - -.. branch: reverse-debugger - -The reverse-debugger branch has been merged. For more information, see -https://bitbucket.org/pypy/revdb - - -.. branch: pyparser-improvements-3 - -Small refactorings in the Python parser. - -.. branch: fix-readme-typo - -.. branch: py3.6-wordcode - -implement new wordcode instruction encoding on the 3.6 branch - -.. branch: socket_default_timeout_blockingness - -Backport CPython fix for possible shell injection issue in `distutils.spawn`, -https://bugs.python.org/issue34540 - -.. branch: cffi_dlopen_unicode - -Enable use of unicode file names in `dlopen` - -.. branch: rlock-in-rpython - -Backport CPython fix for `thread.RLock` - - -.. branch: expose-gc-time - -Make GC hooks measure time in seconds (as opposed to an opaque unit). - -.. branch: cleanup-test_lib_pypy - -Update most test_lib_pypy/ tests and move them to extra_tests/. - -.. branch: gc-disable - -Make it possible to manually manage the GC by using a combination of -gc.disable() and gc.collect_step(). Make sure to write a proper release -announcement in which we explain that existing programs could leak memory if -they run for too much time between a gc.disable()/gc.enable() +========================== +What's new in PyPy2.7 6.0+ +========================== + +.. this is a revision shortly after release-pypy-6.0.0 +.. startrev: e50e11af23f1 + +.. branch: cppyy-packaging + +Main items: vastly better template resolution and improved performance. In +detail: upgrade to backend 1.4, improved handling of templated methods and +functions (in particular automatic deduction of types), improved pythonization +interface, range of compatibility fixes for Python3, free functions now take +fast libffi path when possible, moves for strings (incl. from Python str), +easier/faster handling of std::vector by numpy, improved and faster object +identity preservation + +.. branch: socket_default_timeout_blockingness + +Make sure 'blocking-ness' of socket is set along with default timeout + +.. branch: crypt_h + +Include crypt.h for crypt() on Linux + +.. branch: gc-more-logging + +Log additional gc-minor and gc-collect-step info in the PYPYLOG + +.. branch: reverse-debugger + +The reverse-debugger branch has been merged. For more information, see +https://bitbucket.org/pypy/revdb + + +.. branch: pyparser-improvements-3 + +Small refactorings in the Python parser. + +.. branch: fix-readme-typo + +.. branch: py3.6-wordcode + +implement new wordcode instruction encoding on the 3.6 branch + +.. branch: socket_default_timeout_blockingness + +Backport CPython fix for possible shell injection issue in `distutils.spawn`, +https://bugs.python.org/issue34540 + +.. branch: cffi_dlopen_unicode + +Enable use of unicode file names in `dlopen` + +.. branch: rlock-in-rpython + +Backport CPython fix for `thread.RLock` + + +.. branch: expose-gc-time + +Make GC hooks measure time in seconds (as opposed to an opaque unit). + +.. branch: cleanup-test_lib_pypy + +Update most test_lib_pypy/ tests and move them to extra_tests/. + +.. branch: gc-disable + +Make it possible to manually manage the GC by using a combination of +gc.disable() and gc.collect_step(). Make sure to write a proper release +announcement in which we explain that existing programs could leak memory if +they run for too much time between a gc.disable()/gc.enable() diff --git a/pypy/doc/whatsnew-pypy3-5.10.0.rst b/pypy/doc/whatsnew-pypy3-5.10.0.rst --- a/pypy/doc/whatsnew-pypy3-5.10.0.rst +++ b/pypy/doc/whatsnew-pypy3-5.10.0.rst @@ -1,21 +1,7 @@ -========================= -What's new in PyPy3 5.9+ -========================= - -.. this is the revision after release-pypy3.5-5.9 -.. startrev: be41e3ac0a29 - -.. branch: sched_yield -Add sched_yield posix attribute - -.. branch: py3.5-appexec -Raise if space.is_true(space.appexec()) used in app level tests, fix tests -that did this - -.. branch: py3.5-mac-embedding -Download and patch dependencies when building cffi-based stdlib modules - -.. branch: os_lockf - -.. branch: py3.5-xattr -Add posix.*attr() functions +======================== +What's new in PyPy3 7.0+ +======================== + +.. this is the revision after release-pypy3.5-v7.0 +.. startrev: 9d2fa7c63b7c + diff --git a/pypy/doc/whatsnew-pypy3-6.0.0.rst b/pypy/doc/whatsnew-pypy3-6.0.0.rst --- a/pypy/doc/whatsnew-pypy3-6.0.0.rst +++ b/pypy/doc/whatsnew-pypy3-6.0.0.rst @@ -1,28 +1,7 @@ -========================= -What's new in PyPy3 5.10+ -========================= +======================== +What's new in PyPy3 7.0+ +======================== -.. this is the revision after release-pypy3.5-v5.10 -.. startrev: 34c63fba0bba +.. this is the revision after release-pypy3.5-v7.0 +.. startrev: 9d2fa7c63b7c -.. branch: hroncok/fix-typeerror-str-does-not-support-the-b-1514414905375 - -Fix for bytestrings in console repl - -.. branch: py3-winreg - -Update winreg module to use unicode, wide-strings - -.. branch: cpyext-py3-instancemethod-attributes - -Add missing ``__doc__``, ``__module__``, ``__name__`` attributes to -``instancemethod`` - -.. branch: winapi - -Update support for _winapi cffi module for python3 - -.. branch: py3.5-refactor-slots - -Refactor cpyext slots. - diff --git a/pypy/doc/whatsnew-pypy3-7.0.0.rst b/pypy/doc/whatsnew-pypy3-7.0.0.rst --- a/pypy/doc/whatsnew-pypy3-7.0.0.rst +++ b/pypy/doc/whatsnew-pypy3-7.0.0.rst @@ -5,21 +5,16 @@ .. this is the revision after release-pypy3.5-v6.0 .. startrev: 580e3e26cd32 -.. branch: hroncok/fix-multiprocessing-regression-on-newer--1524656522151 +.. branch: unicode-utf8 -Fix multiprocessing regression on newer glibcs +Use utf-8 internally to represent unicode strings -.. branch: py3.5-user-site-impl +.. branch: unicode-utf8-py3 -Use implementation-specific site directories in sysconfig like in Python2 +Use utf-8 internally to represent unicode strings .. branch: alex_gaynor/remove-an-unneeded-call-into-openssl-th-1526429141011 Remove an unneeded call into OpenSSL, from cpython https://github.com/python/cpython/pull/6887 - -.. branch: py3.5-reverse-debugger - -The reverse-debugger branch has been merged. For more information, see -https://bitbucket.org/pypy/revdb diff --git a/pypy/doc/whatsnew-pypy3-head.rst b/pypy/doc/whatsnew-pypy3-head.rst --- a/pypy/doc/whatsnew-pypy3-head.rst +++ b/pypy/doc/whatsnew-pypy3-head.rst @@ -4,3 +4,7 @@ .. this is the revision after release-pypy3.5-v7.0 .. startrev: 9d2fa7c63b7c + +.. branch: unicode-utf8-py3 + +Use utf8 instead of rpython-level unicode \ No newline at end of file diff --git a/pypy/goal/targetpypystandalone.py b/pypy/goal/targetpypystandalone.py --- a/pypy/goal/targetpypystandalone.py +++ b/pypy/goal/targetpypystandalone.py @@ -83,7 +83,7 @@ ## con.interact() except OperationError as e: debug("OperationError:") - debug(" operror-type: " + e.w_type.getname(space).encode('utf-8')) + debug(" operror-type: " + e.w_type.getname(space)) debug(" operror-value: " + space.text_w(space.str(e.get_w_value(space)))) return 1 finally: @@ -91,7 +91,7 @@ space.finish() except OperationError as e: debug("OperationError:") - debug(" operror-type: " + e.w_type.getname(space).encode('utf-8')) + debug(" operror-type: " + e.w_type.getname(space)) debug(" operror-value: " + space.text_w(space.str(e.get_w_value(space)))) return 1 return exitcode @@ -148,7 +148,7 @@ except OperationError as e: if verbose: debug("OperationError:") - debug(" operror-type: " + e.w_type.getname(space).encode('utf-8')) + debug(" operror-type: " + e.w_type.getname(space)) debug(" operror-value: " + space.text_w(space.str(e.get_w_value(space)))) return rffi.cast(rffi.INT, -1) finally: @@ -202,7 +202,7 @@ """) except OperationError as e: debug("OperationError:") - debug(" operror-type: " + e.w_type.getname(space).encode('utf-8')) + debug(" operror-type: " + e.w_type.getname(space)) debug(" operror-value: " + space.text_w(space.str(e.get_w_value(space)))) return -1 return 0 diff --git a/pypy/interpreter/argument.py b/pypy/interpreter/argument.py --- a/pypy/interpreter/argument.py +++ b/pypy/interpreter/argument.py @@ -596,6 +596,10 @@ except IndexError: name = '?' else: + w_enc = space.newtext(space.sys.defaultencoding) + w_err = space.newtext("replace") + w_name = space.call_method(w_name, "encode", w_enc, + w_err) name = space.text_w(w_name) break self.kwd_name = name diff --git a/pypy/interpreter/astcompiler/astbuilder.py b/pypy/interpreter/astcompiler/astbuilder.py --- a/pypy/interpreter/astcompiler/astbuilder.py +++ b/pypy/interpreter/astcompiler/astbuilder.py @@ -58,6 +58,7 @@ self.space = space self.compile_info = compile_info self.root_node = n + # used in f-strings self.recursive_parser = recursive_parser def build_ast(self): diff --git a/pypy/interpreter/astcompiler/fstring.py b/pypy/interpreter/astcompiler/fstring.py --- a/pypy/interpreter/astcompiler/fstring.py +++ b/pypy/interpreter/astcompiler/fstring.py @@ -3,6 +3,7 @@ from pypy.interpreter import error from pypy.interpreter import unicodehelper from rpython.rlib.rstring import StringBuilder +from rpython.rlib.rutf8 import codepoints_in_utf8 def add_constant_string(astbuilder, joined_pieces, w_string, atom_node): @@ -21,10 +22,8 @@ joined_pieces.append(node(w_string, atom_node.get_lineno(), atom_node.get_column())) -def f_constant_string(astbuilder, joined_pieces, u, atom_node): - space = astbuilder.space - add_constant_string(astbuilder, joined_pieces, space.newunicode(u), - atom_node) +def f_constant_string(astbuilder, joined_pieces, w_u, atom_node): + add_constant_string(astbuilder, joined_pieces, w_u, atom_node) def f_string_compile(astbuilder, source, atom_node): # Note: a f-string is kept as a single literal up to here. @@ -259,20 +258,20 @@ i += 1 fstr.current_index = i + space = astbuilder.space literal = builder.build() + lgt = codepoints_in_utf8(literal) if not fstr.raw_mode and '\\' in literal: - space = astbuilder.space literal = parsestring.decode_unicode_utf8(space, literal, 0, len(literal)) - return unicodehelper.decode_unicode_escape(space, literal) - else: - return literal.decode('utf-8') + literal, lgt, pos = unicodehelper.decode_unicode_escape(space, literal) + return space.newtext(literal, lgt) def fstring_find_literal_and_expr(astbuilder, fstr, atom_node, rec): - # Return a tuple with the next literal part, and optionally the + # Return a tuple with the next literal part as a W_Unicode, and optionally the # following expression node. Updates the current index inside 'fstr'. - literal = fstring_find_literal(astbuilder, fstr, atom_node, rec) + w_u = fstring_find_literal(astbuilder, fstr, atom_node, rec) s = fstr.unparsed i = fstr.current_index @@ -284,7 +283,7 @@ # We must now be the start of an expression, on a '{'. assert s[i] == '{' expr = fstring_find_expr(astbuilder, fstr, atom_node, rec) - return literal, expr + return w_u, expr def parse_f_string(astbuilder, joined_pieces, fstr, atom_node, rec=0): @@ -303,11 +302,11 @@ "really the case", atom_node) while True: - literal, expr = fstring_find_literal_and_expr(astbuilder, fstr, + w_u, expr = fstring_find_literal_and_expr(astbuilder, fstr, atom_node, rec) # add the literal part - f_constant_string(astbuilder, joined_pieces, literal, atom_node) + f_constant_string(astbuilder, joined_pieces, w_u, atom_node) if expr is None: break # We're done with this f-string. diff --git a/pypy/interpreter/astcompiler/misc.py b/pypy/interpreter/astcompiler/misc.py --- a/pypy/interpreter/astcompiler/misc.py +++ b/pypy/interpreter/astcompiler/misc.py @@ -116,7 +116,7 @@ # only intern identifier-like strings from pypy.objspace.std.unicodeobject import _isidentifier if (space.is_w(space.type(w_const), space.w_unicode) and - _isidentifier(space.unicode_w(w_const))): + _isidentifier(space.utf8_w(w_const))): return space.new_interned_w_str(w_const) return w_const diff --git a/pypy/interpreter/astcompiler/optimize.py b/pypy/interpreter/astcompiler/optimize.py --- a/pypy/interpreter/astcompiler/optimize.py +++ b/pypy/interpreter/astcompiler/optimize.py @@ -5,7 +5,7 @@ from pypy.tool import stdlib_opcode as ops from pypy.interpreter.error import OperationError from rpython.rlib.unroll import unrolling_iterable -from rpython.rlib.runicode import MAXUNICODE +from rpython.rlib.rutf8 import MAXUNICODE from rpython.rlib.objectmodel import specialize @@ -329,7 +329,7 @@ # produce compatible pycs. if (self.space.isinstance_w(w_obj, self.space.w_unicode) and self.space.isinstance_w(w_const, self.space.w_unicode)): - #unistr = self.space.unicode_w(w_const) + #unistr = self.space.utf8_w(w_const) #if len(unistr) == 1: # ch = ord(unistr[0]) #else: diff --git a/pypy/interpreter/astcompiler/test/test_compiler.py b/pypy/interpreter/astcompiler/test/test_compiler.py --- a/pypy/interpreter/astcompiler/test/test_compiler.py +++ b/pypy/interpreter/astcompiler/test/test_compiler.py @@ -1,5 +1,6 @@ from __future__ import division import py, sys +from pytest import raises from pypy.interpreter.astcompiler import codegen, astbuilder, symtable, optimize from pypy.interpreter.pyparser import pyparse from pypy.interpreter.pyparser.test import expressions @@ -75,7 +76,7 @@ space = self.space pyco_expr = space.createcompiler().compile(evalexpr, '<evalexpr>', 'eval', 0) w_res = space.exec_(pyco_expr, w_dict, w_dict) - res = space.str_w(space.repr(w_res)) + res = space.text_w(space.repr(w_res)) expected_repr = self.get_py3_repr(expected) if isinstance(expected, float): # Float representation can vary a bit between interpreter @@ -1255,7 +1256,6 @@ def test_revdb_metavar(self): from pypy.interpreter.reverse_debugging import dbstate, setup_revdb - self.space.config.translation.reverse_debugger = True self.space.reverse_debugging = True try: setup_revdb(self.space) @@ -1270,9 +1270,6 @@ class AppTestCompiler: - def setup_class(cls): - cls.w_maxunicode = cls.space.wrap(sys.maxunicode) - def test_docstring_not_loaded(self): import io, dis, sys ns = {} @@ -1442,7 +1439,7 @@ ''', d) return d['f'](5) """) - assert 'generator' in space.str_w(space.repr(w_generator)) + assert 'generator' in space.text_w(space.repr(w_generator)) def test_folding_of_list_constants(self): for source in ( diff --git a/pypy/interpreter/baseobjspace.py b/pypy/interpreter/baseobjspace.py --- a/pypy/interpreter/baseobjspace.py +++ b/pypy/interpreter/baseobjspace.py @@ -3,7 +3,7 @@ from rpython.rlib.cache import Cache from rpython.tool.uid import HUGEVAL_BYTES -from rpython.rlib import jit, types +from rpython.rlib import jit, types, rutf8 from rpython.rlib.debug import make_sure_not_resized from rpython.rlib.objectmodel import (we_are_translated, newlist_hint, compute_unique_id, specialize, not_rpython) @@ -80,10 +80,10 @@ def getname(self, space): try: - return space.unicode_w(space.getattr(self, space.newtext('__name__'))) + return space.utf8_w(space.getattr(self, space.newtext('__name__'))) except OperationError as e: if e.match(space, space.w_TypeError) or e.match(space, space.w_AttributeError): - return u'?' + return '?' raise def getaddrstring(self, space): @@ -105,9 +105,9 @@ w_id = space.rshift(w_id, w_4) return ''.join(addrstring) - def getrepr(self, space, info, moreinfo=u''): - addrstring = unicode(self.getaddrstring(space)) - return space.newunicode(u"<%s at 0x%s%s>" % (info, addrstring, moreinfo)) + def getrepr(self, space, info, moreinfo=''): + addrstring = self.getaddrstring(space) + return space.newtext("<%s at 0x%s%s>" % (info, addrstring, moreinfo)) def getslotvalue(self, index): raise NotImplementedError @@ -245,11 +245,14 @@ def bytes_w(self, space): self._typed_unwrap_error(space, "bytes") - def unicode_w(self, space): - self._typed_unwrap_error(space, "string") + def text_w(self, space): + self._typed_unwrap_error(space, "unicode") - def text_w(self, space): - self._typed_unwrap_error(space, "string") + def utf8_w(self, space): + self._typed_unwrap_error(space, "unicode") + + def convert_to_w_unicode(self, space): + self._typed_unwrap_error(space, "unicode") def bytearray_list_of_chars_w(self, space): self._typed_unwrap_error(space, "bytearray") @@ -423,7 +426,7 @@ self.builtin_modules = {} self.reloading_modules = {} - self.interned_strings = make_weak_value_dictionary(self, unicode, W_Root) + self.interned_strings = make_weak_value_dictionary(self, str, W_Root) self.actionflag = ActionFlag() # changed by the signal module self.check_signal_action = None # changed by the signal module make_finalizer_queue(W_Root, self) @@ -784,12 +787,12 @@ def setitem_str(self, w_obj, key, w_value): # key is a "text", i.e. a byte string (in python3 it - # represents a utf-8-encoded unicode) + # represents a valid utf-8-encoded unicode) return self.setitem(w_obj, self.newtext(key), w_value) def finditem_str(self, w_obj, key): # key is a "text", i.e. a byte string (in python3 it - # represents a utf-8-encoded unicode) + # represents a valid utf-8-encoded unicode) return self.finditem(w_obj, self.newtext(key)) def finditem(self, w_obj, w_key): @@ -823,9 +826,9 @@ def new_interned_w_str(self, w_u): assert isinstance(w_u, W_Root) # and is not None - u = self.unicode_w(w_u) + u = self.utf8_w(w_u) if not we_are_translated(): - assert type(u) is unicode + assert type(u) is str w_u1 = self.interned_strings.get(u) if w_u1 is None: w_u1 = w_u @@ -838,12 +841,11 @@ # returns a "text" object (ie str in python2 and unicode in python3) if not we_are_translated(): assert type(s) is str - u = s.decode('utf-8') - w_s1 = self.interned_strings.get(u) + w_s1 = self.interned_strings.get(s) if w_s1 is None: - w_s1 = self.newunicode(u) + w_s1 = self.newtext(s) if self._side_effects_ok(): - self.interned_strings.set(u, w_s1) + self.interned_strings.set(s, w_s1) return w_s1 def _revdb_startup(self): @@ -882,11 +884,7 @@ # interface for marshal_impl if not we_are_translated(): assert type(s) is str - try: - u = s.decode('utf-8') - except UnicodeDecodeError: - return None - return self.interned_strings.get(u) # may be None + return self.interned_strings.get(s) # may be None @specialize.arg(1) def descr_self_interp_w(self, RequiredClass, w_obj): @@ -1069,7 +1067,7 @@ """ return None - def listview_unicode(self, w_list): + def listview_utf8(self, w_list): """ Return a list of unwrapped unicode out of a list of unicode. If the argument is not a list or does not contain only unicode, return None. May return None anyway. @@ -1099,8 +1097,15 @@ def newlist_bytes(self, list_s): return self.newlist([self.newbytes(s) for s in list_s]) - def newlist_unicode(self, list_u): - return self.newlist([self.newunicode(u) for u in list_u]) + def newlist_utf8(self, list_u, is_ascii): + l_w = [None] * len(list_u) + for i, item in enumerate(list_u): + if not is_ascii: + length = rutf8.check_utf8(item, True) + else: + length = len(item) + l_w[i] = self.newutf8(item, length) + return self.newlist(l_w) def newlist_int(self, list_i): return self.newlist([self.newint(i) for i in list_i]) @@ -1598,6 +1603,8 @@ else: assert False + if self.isinstance_w(w_obj, self.w_unicode): + return w_obj.charbuf_w(self) def text_or_none_w(self, w_obj): return None if self.is_none(w_obj) else self.text_w(w_obj) @@ -1620,18 +1627,22 @@ an utf-8 encoded rpython string. """ assert w_obj is not None + if not self.isinstance_w(w_obj, self.w_unicode): + w_obj._typed_unwrap_error(self, "unicode") return w_obj.text_w(self) @not_rpython # tests only; should be replaced with bytes_w or text_w def str_w(self, w_obj): """ - if w_obj is unicode, call text_w() (i.e., return the UTF-8-nosg + if w_obj is unicode, call utf8_w() (i.e., return the UTF-8-nosg encoded string). Else, call bytes_w(). We should kill str_w completely and manually substitute it with text_w/bytes_w at all call sites. It remains for now for tests only. """ + XXX # deprecated, leaving in place for clear errors if self.isinstance_w(w_obj, self.w_unicode): + # XXX lo text_w, but better to deprecate str_w than to fix this return w_obj.text_w(self) else: return w_obj.bytes_w(self) @@ -1714,23 +1725,38 @@ assert w_obj is not None return w_obj.float_w(self, allow_conversion) - @specialize.argtype(1) - def unicode_w(self, w_obj): - assert w_obj is not None - return w_obj.unicode_w(self) + def utf8_w(self, w_obj): + return w_obj.utf8_w(self) - def unicode0_w(self, w_obj): - "Like unicode_w, but rejects strings with NUL bytes." + def utf8_0_w(self, w_obj): + "Like utf_w, but rejects strings with NUL bytes." from rpython.rlib import rstring - result = w_obj.unicode_w(self) - if u'\x00' in result: + result = w_obj.utf8_w(self) + if '\x00' in result: + raise oefmt(self.w_TypeError, + "argument must be a string without NUL " + "characters") + return rstring.assert_str0(result) + + def convert_to_w_unicode(self, w_obj): + return w_obj.convert_to_w_unicode(self) + + def realunicode_w(self, w_obj): + from pypy.interpreter.unicodehelper import decode_utf8sp + utf8 = self.utf8_w(w_obj) + return decode_utf8sp(self, utf8)[0].decode('utf8') + + def utf8_0_w(self, w_obj): + "Like utf8_w, but rejects strings with NUL bytes." + from rpython.rlib import rstring + result = w_obj.utf8_w(self) + if '\x00' in result: raise oefmt(self.w_ValueError, - "argument must be a unicode string without NUL " + "argument must be a utf8 string without NUL " "characters") return rstring.assert_str0(result) realtext_w = text_w # Python 2 compatibility - realunicode_w = unicode_w def fsencode(space, w_obj): from pypy.interpreter.unicodehelper import fsencode @@ -1755,6 +1781,27 @@ w_obj = self.fsencode(w_obj) return self.bytesbuf0_w(w_obj) + def convert_arg_to_w_unicode(self, w_obj, strict=None): + # XXX why convert_to_w_unicode does something slightly different? + from pypy.objspace.std.unicodeobject import W_UnicodeObject + # for z_translation tests + if hasattr(self, 'is_fake_objspace'): return self.newtext("foobar") + return W_UnicodeObject.convert_arg_to_w_unicode(self, w_obj, strict) + + def utf8_len_w(self, w_obj): + w_obj = self.convert_arg_to_w_unicode(w_obj) + return w_obj._utf8, w_obj._len() + + def realutf8_w(self, w_obj): + # Like utf8_w(), but only works if w_obj is really of type + # 'unicode'. On Python 3 this is the same as utf8_w(). + from pypy.objspace.std.unicodeobject import W_UnicodeObject + # for z_translation tests + if hasattr(self, 'is_fake_objspace'): return self.newtext("foobar") + if not isinstance(w_obj, W_UnicodeObject): + raise oefmt(self.w_TypeError, "argument must be a unicode") + return self.utf8_w(w_obj) + def bytesbuf0_w(self, w_obj): # Like bytes0_w(), but also accept a read-only buffer. from rpython.rlib import rstring @@ -1777,7 +1824,7 @@ w_obj = fspath(self, w_obj) else: w_obj = self.fsdecode(w_obj) - return self.unicode0_w(w_obj) + return self.utf8_w(w_obj) def bool_w(self, w_obj): # Unwraps a bool, also accepting an int for compatibility. @@ -2105,7 +2152,7 @@ 'float_w', 'uint_w', 'bigint_w', - 'unicode_w', + 'utf8_w', 'unwrap', 'is_true', 'is_w', diff --git a/pypy/interpreter/error.py b/pypy/interpreter/error.py --- a/pypy/interpreter/error.py +++ b/pypy/interpreter/error.py @@ -9,8 +9,7 @@ from rpython.rlib.objectmodel import we_are_translated, specialize from rpython.rlib.objectmodel import dont_inline, not_rpython from rpython.rlib import rstack, rstackovf -from rpython.rlib import rwin32 -from rpython.rlib import runicode +from rpython.rlib import rwin32, rutf8 from pypy.interpreter import debug @@ -21,7 +20,8 @@ def strerror(errno): """Translate an error code to a unicode message string.""" from pypy.module._codecs.locale import str_decode_locale_surrogateescape - return str_decode_locale_surrogateescape(os.strerror(errno)) + utf8, lgt = str_decode_locale_surrogateescape(os.strerror(errno)) + return utf8, lgt class OperationError(Exception): """Interpreter-level exception that signals an exception that should be @@ -72,7 +72,7 @@ space = getattr(self.w_type, 'space', None) if space is not None: if self.__class__ is not OperationError and s is None: - s = self._compute_value(space) + s, lgt = self._compute_value(space) try: s = space.text_w(s) except Exception: @@ -306,8 +306,8 @@ def get_w_value(self, space): w_value = self._w_value if w_value is None: - value = self._compute_value(space) - self._w_value = w_value = space.newunicode(value) + value, lgt = self._compute_value(space) + self._w_value = w_value = space.newtext(value, lgt) return w_value def _compute_value(self, space): @@ -478,16 +478,7 @@ assert len(formats) > 0, "unsupported: no % command found" return tuple(parts), tuple(formats) -def _decode_utf8(string): - # when building the error message, don't crash if the byte string - # provided is not valid UTF-8 - assert isinstance(string, str) - result, consumed = runicode.str_decode_utf_8( - string, len(string), "replace", final=True) - return result - def get_operrcls2(valuefmt): - valuefmt = valuefmt.decode('ascii') strings, formats = decompose_valuefmt(valuefmt) assert len(strings) == len(formats) + 1 try: @@ -507,30 +498,51 @@ def _compute_value(self, space): lst = [None] * (len(formats) + len(formats) + 1) + lgt = 0 for i, fmt, attr in entries: lst[i + i] = self.xstrings[i] + lgt += len(self.xstrings[i]) value = getattr(self, attr) if fmt == 'd': - result = str(value).decode('ascii') + result = str(value) + lgt += len(result) elif fmt == 'R': - result = space.unicode_w(space.repr(value)) + s = space.repr(value) + result = space.utf8_w(s) + lgt += space.len_w(s) elif fmt == 'S': - result = space.unicode_w(space.str(value)) + s = space.str(value) + result = space.utf8_w(s) + lgt += space.len_w(s) elif fmt == 'T': - result = _decode_utf8(space.type(value).name) + result = space.type(value).name + lgt += rutf8.codepoints_in_utf8(result) elif fmt == 'N': result = value.getname(space) + lgt += len(result) elif fmt == '8': - result = _decode_utf8(value) + # u'str\uxxxx' -> 'str\xXX\xXX' -> u"'str\xXX\xXX'" + from pypy.interpreter import unicodehelper + result, _lgt, pos = unicodehelper.str_decode_utf8( + value, 'replace', True, + unicodehelper.decode_never_raise, True) + lgt += _lgt + elif isinstance(value, unicode): + # 's' + result = str(value.encode('utf-8')) + lgt += len(value) else: - if isinstance(value, unicode): - result = value - else: - result = _decode_utf8(str(value)) + result = str(value) + try: + lgt += rutf8.check_utf8(result, True) + except rutf8.CheckError as e: + lgt -= e.pos lst[i + i + 1] = result lst[-1] = self.xstrings[-1] - return u''.join(lst) - # + lgt += len(self.xstrings[-1]) + retval = ''.join(lst) + return retval, lgt + _fmtcache2[formats] = OpErrFmt return OpErrFmt, strings @@ -540,7 +552,7 @@ self.setup(w_type) def _compute_value(self, space): - return self._value.decode('utf-8') + return self._value, len(self._value) def async(self, space): # also matches a RuntimeError("maximum rec.") if the stack is @@ -571,8 +583,8 @@ %8 - The result of arg.decode('utf-8') %N - The result of w_arg.getname(space) - %R - The result of space.unicode_w(space.repr(w_arg)) - %S - The result of space.unicode_w(space.str(w_arg)) + %R - The result of space.utf8_w(space.repr(w_arg)) + %S - The result of space.utf8_w(space.str(w_arg)) %T - The result of space.type(w_arg).name """ @@ -627,12 +639,13 @@ if rwin32.WIN32 and isinstance(e, WindowsError): winerror = e.winerror try: - msg = rwin32.FormatErrorW(winerror) + msg, lgt = rwin32.FormatErrorW(winerror) except ValueError: - msg = u'Windows Error %d' % winerror + msg = 'Windows Error %d' % winerror + lgt = len(msg) w_errno = space.w_None w_winerror = space.newint(winerror) - w_msg = space.newunicode(msg) + w_msg = space.newtext(msg, lgt) else: errno = e.errno if errno == EINTR: @@ -641,12 +654,13 @@ return None try: - msg = strerror(errno) + msg, lgt = strerror(errno) except ValueError: - msg = u'error %d' % errno + msg = 'error %d' % errno + lgt = len(msg) w_errno = space.newint(errno) w_winerror = space.w_None - w_msg = space.newunicode(msg) + w_msg = space.newtext(msg, lgt) if w_filename is None: w_filename = space.w_None @@ -676,9 +690,9 @@ eintr_retry=eintr_retry) def exception_from_errno(space, w_type, errno): - msg = strerror(errno) + msg, lgt = strerror(errno) w_error = space.call_function(w_type, space.newint(errno), - space.newunicode(msg)) + space.newtext(msg, lgt)) return OperationError(w_type, w_error) def exception_from_saved_errno(space, w_type): diff --git a/pypy/interpreter/function.py b/pypy/interpreter/function.py --- a/pypy/interpreter/function.py +++ b/pypy/interpreter/function.py @@ -45,7 +45,8 @@ closure=None, w_ann=None, forcename=None, qualname=None): self.space = space self.name = forcename or code.co_name - self.qualname = qualname or self.name.decode('utf-8') + self.qualname = qualname or self.name + assert isinstance(self.qualname, str) self.w_doc = None # lazily read from code.getdocstring() self.code = code # Code instance self.w_func_globals = w_globals # the globals dictionary @@ -255,7 +256,7 @@ return self.call_args(__args__) def descr_function_repr(self): - return self.getrepr(self.space, u'function %s' % self.qualname) + return self.getrepr(self.space, 'function %s' % self.qualname) def _cleanup_(self): @@ -313,7 +314,7 @@ tup_base = [] tup_state = [ space.newtext(self.name), - space.newunicode(self.qualname), + space.newtext(self.qualname), w_doc, self.code, w_func_globals, @@ -337,7 +338,7 @@ self.space = space self.name = space.text_w(w_name) - self.qualname = space.unicode_w(w_qualname) + self.qualname = space.utf8_w(w_qualname) self.code = space.interp_w(Code, w_code) if not space.is_w(w_closure, space.w_None): from pypy.interpreter.nestedscope import Cell @@ -430,11 +431,11 @@ "__name__ must be set to a string object") def fget_func_qualname(self, space): - return space.newunicode(self.qualname) + return space.newtext(self.qualname) def fset_func_qualname(self, space, w_name): try: - self.qualname = space.unicode_w(w_name) + self.qualname = space.realutf8_w(w_name) except OperationError as e: if e.match(space, space.w_TypeError): raise oefmt(space.w_TypeError, @@ -549,14 +550,14 @@ name = self.w_function.getname(self.space) else: try: - name = space.unicode_w(w_name) + name = space.utf8_w(w_name) except OperationError as e: if not e.match(space, space.w_TypeError): raise - name = u'?' - objrepr = space.unicode_w(space.repr(self.w_instance)) - s = u'<bound method %s of %s>' % (name, objrepr) - return space.newunicode(s) + name = '?' + objrepr = space.utf8_w(space.repr(self.w_instance)) + s = b'<bound method %s of %s>' % (name, objrepr) + return space.newtext(s) def descr_method_getattribute(self, w_attr): space = self.space @@ -598,7 +599,7 @@ else: w_builtins = space.getbuiltinmodule('builtins') new_inst = space.getattr(w_builtins, space.newtext('getattr')) - tup = [w_instance, space.newunicode(w_function.getname(space))] + tup = [w_instance, space.newtext(w_function.getname(space))] return space.newtuple([new_inst, space.newtuple(tup)]) @@ -699,7 +700,7 @@ return self.space.newtext('<built-in function %s>' % (self.name,)) def descr__reduce__(self, space): - return space.newunicode(self.qualname) + return space.newtext(self.qualname) def is_builtin_code(w_func): from pypy.interpreter.gateway import BuiltinCode diff --git a/pypy/interpreter/gateway.py b/pypy/interpreter/gateway.py --- a/pypy/interpreter/gateway.py +++ b/pypy/interpreter/gateway.py @@ -174,6 +174,9 @@ def visit_unicode(self, el, app_sig): self.checked_space_method(el, app_sig) + def visit_utf8(self, el, app_sig): + self.checked_space_method(el, app_sig) + def visit_fsencode(self, el, app_sig): self.checked_space_method(el, app_sig) @@ -324,7 +327,10 @@ self.run_args.append("space.text0_w(%s)" % (self.scopenext(),)) def visit_unicode(self, typ): - self.run_args.append("space.unicode_w(%s)" % (self.scopenext(),)) + self.run_args.append("space.realunicode_w(%s)" % (self.scopenext(),)) + + def visit_utf8(self, typ): + self.run_args.append("space.utf8_w(%s)" % (self.scopenext(),)) def visit_fsencode(self, typ): self.run_args.append("space.fsencode_w(%s)" % (self.scopenext(),)) @@ -492,11 +498,14 @@ self.unwrap.append("space.text_w(%s)" % (self.nextarg(),)) def visit_unicode(self, typ): - self.unwrap.append("space.unicode_w(%s)" % (self.nextarg(),)) + self.unwrap.append("space.realunicode_w(%s)" % (self.nextarg(),)) def visit_text0(self, typ): self.unwrap.append("space.text0_w(%s)" % (self.nextarg(),)) + def visit_utf8(self, typ): + self.unwrap.append("space.utf8_w(%s)" % (self.nextarg(),)) + def visit_fsencode(self, typ): self.unwrap.append("space.fsencode_w(%s)" % (self.nextarg(),)) @@ -567,8 +576,10 @@ assert typ in (int, str, float, unicode, r_longlong, r_uint, r_ulonglong, bool) if typ is r_int is r_longlong: return 'gateway_r_longlong_w' - elif typ in (str, unicode): - return typ.__name__ + '_w' + elif typ is str: + return 'utf8_w' + elif typ is unicode: + return 'realunicode_w' elif typ is bool: # For argument clinic's "bool" specifier: accept any object, and # convert it to a boolean value. If you don't want this @@ -1113,7 +1124,7 @@ kw_defs_w = [] for name, w_def in sorted(alldefs_w.items()): assert name in sig.kwonlyargnames - w_name = space.newunicode(name.decode('utf-8')) + w_name = space.newtext(name) kw_defs_w.append((w_name, w_def)) return defs_w, kw_defs_w diff --git a/pypy/interpreter/generator.py b/pypy/interpreter/generator.py --- a/pypy/interpreter/generator.py +++ b/pypy/interpreter/generator.py @@ -38,14 +38,12 @@ # 'qualname' is a unicode string if self._qualname is not None: return self._qualname - return self.get_name().decode('utf-8') + return self.get_name() def descr__repr__(self, space): addrstring = self.getaddrstring(space) - return space.newunicode(u"<%s object %s at 0x%s>" % - (self.KIND_U, - self.get_qualname(), - unicode(addrstring))) + return space.newtext("<%s object %s at 0x%s>" % + (self.KIND, self.get_qualname(), addrstring)) def descr_send(self, w_arg): """send(arg) -> send 'arg' into generator/coroutine, @@ -229,7 +227,7 @@ e2.chain_exceptions_from_cause(space, e) raise e2 else: - space.warn(space.newunicode(u"generator '%s' raised StopIteration" + space.warn(space.newtext("generator '%s' raised StopIteration" % self.get_qualname()), space.w_DeprecationWarning) @@ -329,11 +327,11 @@ "__name__ must be set to a string object") def descr__qualname__(self, space): - return space.newunicode(self.get_qualname()) + return space.newtext(self.get_qualname()) def descr_set__qualname__(self, space, w_name): try: - self._qualname = space.unicode_w(w_name) + self._qualname = space.utf8_w(w_name) except OperationError as e: if e.match(space, space.w_TypeError): raise oefmt(space.w_TypeError, @@ -422,8 +420,8 @@ self.frame is not None and \ self.frame.last_instr == -1: space = self.space - msg = u"coroutine '%s' was never awaited" % self.get_qualname() - space.warn(space.newunicode(msg), space.w_RuntimeWarning) + msg = "coroutine '%s' was never awaited" % self.get_qualname() + space.warn(space.newtext(msg), space.w_RuntimeWarning) GeneratorOrCoroutine._finalize_(self) diff --git a/pypy/interpreter/mixedmodule.py b/pypy/interpreter/mixedmodule.py --- a/pypy/interpreter/mixedmodule.py +++ b/pypy/interpreter/mixedmodule.py @@ -130,7 +130,7 @@ bltin.w_module = self.w_name func._builtinversion_ = bltin bltin.name = name - bltin.qualname = bltin.name.decode('utf-8') + bltin.qualname = bltin.name w_value = bltin space.setitem(self.w_dict, w_name, w_value) return w_value @@ -197,6 +197,17 @@ def get__doc__(cls, space): return space.newtext_or_none(cls.__doc__) + def setdictvalue_dont_introduce_cell(self, name, w_value): + """ inofficial interface on MixedModules to override an existing value _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit