Author: Armin Rigo <ar...@tunes.org> Branch: gc-forkfriendly-2 Changeset: r90336:7baaab0977a5 Date: 2017-02-24 08:40 +0100 http://bitbucket.org/pypy/pypy/changeset/7baaab0977a5/
Log: hg merge default diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst --- a/pypy/doc/whatsnew-head.rst +++ b/pypy/doc/whatsnew-head.rst @@ -151,6 +151,8 @@ not on PyPy 3.x. The latter is used to get an app-level unicode string by decoding the RPython string, assumed to be utf-8. +.. branch: space-wrap + .. branch: fix_bool_restype Fix for ``ctypes.c_bool``-returning ctypes functions diff --git a/pypy/interpreter/astcompiler/ast.py b/pypy/interpreter/astcompiler/ast.py --- a/pypy/interpreter/astcompiler/ast.py +++ b/pypy/interpreter/astcompiler/ast.py @@ -420,7 +420,7 @@ w_decorator_list = get_field(space, w_node, 'decorator_list', False) w_lineno = get_field(space, w_node, 'lineno', False) w_col_offset = get_field(space, w_node, 'col_offset', False) - _name = space.realstr_w(w_name) + _name = space.realtext_w(w_name) if _name is None: raise_required_value(space, w_node, 'name') _args = arguments.from_object(space, w_args) @@ -497,7 +497,7 @@ w_decorator_list = get_field(space, w_node, 'decorator_list', False) w_lineno = get_field(space, w_node, 'lineno', False) w_col_offset = get_field(space, w_node, 'col_offset', False) - _name = space.realstr_w(w_name) + _name = space.realtext_w(w_name) if _name is None: raise_required_value(space, w_node, 'name') bases_w = space.unpackiterable(w_bases) @@ -1318,7 +1318,7 @@ w_level = get_field(space, w_node, 'level', True) w_lineno = get_field(space, w_node, 'lineno', False) w_col_offset = get_field(space, w_node, 'col_offset', False) - _module = space.str_or_None_w(w_module) + _module = space.realtext_w(w_module) if not space.is_none(w_module) else None names_w = space.unpackiterable(w_names) _names = [alias.from_object(space, w_item) for w_item in names_w] _level = space.int_w(w_level) @@ -1413,7 +1413,7 @@ w_lineno = get_field(space, w_node, 'lineno', False) w_col_offset = get_field(space, w_node, 'col_offset', False) names_w = space.unpackiterable(w_names) - _names = [space.realstr_w(w_item) for w_item in names_w] + _names = [space.realtext_w(w_item) for w_item in names_w] _lineno = space.int_w(w_lineno) _col_offset = space.int_w(w_col_offset) return Global(_names, _lineno, _col_offset) @@ -2495,7 +2495,7 @@ _value = expr.from_object(space, w_value) if _value is None: raise_required_value(space, w_node, 'value') - _attr = space.realstr_w(w_attr) + _attr = space.realtext_w(w_attr) if _attr is None: raise_required_value(space, w_node, 'attr') _ctx = expr_context.from_object(space, w_ctx) @@ -2592,7 +2592,7 @@ w_ctx = get_field(space, w_node, 'ctx', False) w_lineno = get_field(space, w_node, 'lineno', False) w_col_offset = get_field(space, w_node, 'col_offset', False) - _id = space.realstr_w(w_id) + _id = space.realtext_w(w_id) if _id is None: raise_required_value(space, w_node, 'id') _ctx = expr_context.from_object(space, w_ctx) @@ -3415,8 +3415,8 @@ w_defaults = get_field(space, w_node, 'defaults', False) args_w = space.unpackiterable(w_args) _args = [expr.from_object(space, w_item) for w_item in args_w] - _vararg = space.str_or_None_w(w_vararg) - _kwarg = space.str_or_None_w(w_kwarg) + _vararg = space.realtext_w(w_vararg) if not space.is_none(w_vararg) else None + _kwarg = space.realtext_w(w_kwarg) if not space.is_none(w_kwarg) else None defaults_w = space.unpackiterable(w_defaults) _defaults = [expr.from_object(space, w_item) for w_item in defaults_w] return arguments(_args, _vararg, _kwarg, _defaults) @@ -3448,7 +3448,7 @@ def from_object(space, w_node): w_arg = get_field(space, w_node, 'arg', False) w_value = get_field(space, w_node, 'value', False) - _arg = space.realstr_w(w_arg) + _arg = space.realtext_w(w_arg) if _arg is None: raise_required_value(space, w_node, 'arg') _value = expr.from_object(space, w_value) @@ -3482,10 +3482,10 @@ def from_object(space, w_node): w_name = get_field(space, w_node, 'name', False) w_asname = get_field(space, w_node, 'asname', True) - _name = space.realstr_w(w_name) + _name = space.realtext_w(w_name) if _name is None: raise_required_value(space, w_node, 'name') - _asname = space.str_or_None_w(w_asname) + _asname = space.realtext_w(w_asname) if not space.is_none(w_asname) else None return alias(_name, _asname) State.ast_type('alias', 'AST', ['name', 'asname']) diff --git a/pypy/interpreter/astcompiler/tools/asdl_py.py b/pypy/interpreter/astcompiler/tools/asdl_py.py --- a/pypy/interpreter/astcompiler/tools/asdl_py.py +++ b/pypy/interpreter/astcompiler/tools/asdl_py.py @@ -150,8 +150,9 @@ return "check_string(space, %s)" % (value,) elif field.type in ("identifier",): if field.opt: - return "space.str_or_None_w(%s)" % (value,) - return "space.realstr_w(%s)" % (value,) + return ("space.realtext_w(%s) if not space.is_none(%s) " + "else None" % (value, value)) + return "space.realtext_w(%s)" % (value,) elif field.type in ("int",): return "space.int_w(%s)" % (value,) elif field.type in ("bool",): diff --git a/pypy/interpreter/baseobjspace.py b/pypy/interpreter/baseobjspace.py --- a/pypy/interpreter/baseobjspace.py +++ b/pypy/interpreter/baseobjspace.py @@ -1604,9 +1604,9 @@ else: return buf.as_str() - def str_or_None_w(self, w_obj): - # YYY rename - return None if self.is_none(w_obj) else self.bytes_w(w_obj) + def text_or_none_w(self, w_obj): + # return text_w(w_obj) or None + return None if self.is_none(w_obj) else self.text_w(w_obj) def bytes_w(self, w_obj): "Takes a bytes object and returns an unwrapped RPython bytestring." @@ -1617,15 +1617,11 @@ unwrapped RPython bytestring.""" return w_obj.str_w(self) - #@not_rpython BACKCOMPAT: should be replaced with bytes_w or text_w + @not_rpython # tests only; should be replaced with bytes_w or text_w def str_w(self, w_obj): """For tests only.""" return self.bytes_w(w_obj) - #@not_rpython BACKCOMPAT - def str0_w(self, w_obj): - return self.bytes0_w(w_obj) - def bytes0_w(self, w_obj): "Like bytes_w, but rejects strings with NUL bytes." from rpython.rlib import rstring @@ -1647,6 +1643,9 @@ getfilesystemencoding(self)) return self.bytes0_w(w_obj) + def fsencode_or_none_w(self, w_obj): + return None if self.is_none(w_obj) else self.fsencode_w(w_obj) + def int_w(self, w_obj, allow_conversion=True): """ Unwrap an app-level int object into an interpret-level int. @@ -1681,9 +1680,9 @@ """ return w_obj.float_w(self, allow_conversion) - def realstr_w(self, w_obj): - # YYY rename - # Like bytes_w, but only works if w_obj is really of type 'str'. + def realtext_w(self, w_obj): + # Like bytes_w(), but only works if w_obj is really of type 'str'. + # On Python 3 this is the same as text_w(). if not self.isinstance_w(w_obj, self.w_bytes): raise oefmt(self.w_TypeError, "argument must be a string") return self.bytes_w(w_obj) @@ -1702,8 +1701,8 @@ return rstring.assert_str0(result) def realunicode_w(self, w_obj): - # Like unicode_w, but only works if w_obj is really of type - # 'unicode'. + # Like unicode_w(), but only works if w_obj is really of type + # 'unicode'. On Python 3 this is the same as unicode_w(). if not self.isinstance_w(w_obj, self.w_unicode): raise oefmt(self.w_TypeError, "argument must be a unicode") return self.unicode_w(w_obj) diff --git a/pypy/interpreter/gateway.py b/pypy/interpreter/gateway.py --- a/pypy/interpreter/gateway.py +++ b/pypy/interpreter/gateway.py @@ -145,10 +145,7 @@ def visit_bufferstr(self, el, app_sig): self.checked_space_method(el, app_sig) - def visit_str_or_None(self, el, app_sig): - self.checked_space_method(el, app_sig) - - def visit_str0(self, el, app_sig): + def visit_text_or_none(self, el, app_sig): self.checked_space_method(el, app_sig) def visit_bytes(self, el, app_sig): @@ -166,6 +163,9 @@ def visit_fsencode(self, el, app_sig): self.checked_space_method(el, app_sig) + def visit_fsencode_or_none(self, el, app_sig): + self.checked_space_method(el, app_sig) + def visit_nonnegint(self, el, app_sig): self.checked_space_method(el, app_sig) @@ -289,11 +289,8 @@ def visit_bufferstr(self, typ): self.run_args.append("space.bufferstr_w(%s)" % (self.scopenext(),)) - def visit_str_or_None(self, typ): - self.run_args.append("space.str_or_None_w(%s)" % (self.scopenext(),)) - - def visit_str0(self, typ): - self.run_args.append("space.str0_w(%s)" % (self.scopenext(),)) + def visit_text_or_none(self, typ): + self.run_args.append("space.text_or_none_w(%s)" % (self.scopenext(),)) def visit_bytes(self, typ): self.run_args.append("space.bytes_w(%s)" % (self.scopenext(),)) @@ -310,6 +307,9 @@ def visit_fsencode(self, typ): self.run_args.append("space.fsencode_w(%s)" % (self.scopenext(),)) + def visit_fsencode_or_none(self, typ): + self.run_args.append("space.fsencode_or_none_w(%s)" % (self.scopenext(),)) + def visit_nonnegint(self, typ): self.run_args.append("space.gateway_nonnegint_w(%s)" % ( self.scopenext(),)) @@ -454,11 +454,8 @@ def visit_bufferstr(self, typ): self.unwrap.append("space.bufferstr_w(%s)" % (self.nextarg(),)) - def visit_str_or_None(self, typ): - self.unwrap.append("space.str_or_None_w(%s)" % (self.nextarg(),)) - - def visit_str0(self, typ): - self.unwrap.append("space.str0_w(%s)" % (self.nextarg(),)) + def visit_text_or_none(self, typ): + self.unwrap.append("space.text_or_none_w(%s)" % (self.nextarg(),)) def visit_bytes(self, typ): self.unwrap.append("space.bytes_w(%s)" % (self.nextarg(),)) @@ -475,6 +472,9 @@ def visit_fsencode(self, typ): self.unwrap.append("space.fsencode_w(%s)" % (self.nextarg(),)) + def visit_fsencode_or_none(self, typ): + self.unwrap.append("space.fsencode_or_none_w(%s)" % (self.nextarg(),)) + def visit_nonnegint(self, typ): self.unwrap.append("space.gateway_nonnegint_w(%s)" % (self.nextarg(),)) @@ -606,6 +606,8 @@ "the name of an argument of the following " "function" % (name,)) + assert str not in unwrap_spec # use 'text' or 'bytes' instead of str + return unwrap_spec diff --git a/pypy/interpreter/main.py b/pypy/interpreter/main.py --- a/pypy/interpreter/main.py +++ b/pypy/interpreter/main.py @@ -18,7 +18,8 @@ def compilecode(space, source, filename, cmd='exec'): w_code = space.builtin.call( - 'compile', space.wrap(source), space.wrap(filename), space.wrap(cmd), space.newint(0), space.newint(0)) + 'compile', space.newtext(source), space.newtext(filename), + space.newtext(cmd), space.newint(0), space.newint(0)) pycode = space.interp_w(eval.Code, w_code) return pycode @@ -85,10 +86,11 @@ argv.extend(args) space.setitem(space.sys.w_dict, space.newtext('argv'), space.wrap(argv)) w_import = space.builtin.get('__import__') - runpy = space.call_function(w_import, space.wrap('runpy')) - w_run_module = space.getitem(runpy.w_dict, space.wrap('run_module')) - return space.call_function(w_run_module, space.wrap(module_name), space.w_None, - space.wrap('__main__'), space.w_True) + runpy = space.call_function(w_import, space.newtext('runpy')) + w_run_module = space.getitem(runpy.w_dict, space.newtext('run_module')) + return space.call_function(w_run_module, space.newtext(module_name), + space.w_None, space.newtext('__main__'), + space.w_True) def run_toplevel(space, f, verbose=False): diff --git a/pypy/interpreter/mixedmodule.py b/pypy/interpreter/mixedmodule.py --- a/pypy/interpreter/mixedmodule.py +++ b/pypy/interpreter/mixedmodule.py @@ -48,7 +48,7 @@ space.call_method(self.w_dict, 'update', self.w_initialdict) for w_submodule in self.submodules_w: - name = space.str0_w(w_submodule.w_name) + name = space.text0_w(w_submodule.w_name) space.setitem(self.w_dict, space.newtext(name.split(".")[-1]), w_submodule) space.getbuiltinmodule(name) diff --git a/pypy/interpreter/module.py b/pypy/interpreter/module.py --- a/pypy/interpreter/module.py +++ b/pypy/interpreter/module.py @@ -42,7 +42,7 @@ def install(self): """NOT_RPYTHON: installs this module into space.builtin_modules""" - modulename = self.space.str0_w(self.w_name) + modulename = self.space.text0_w(self.w_name) self.space.builtin_modules[modulename] = self def setup_after_space_initialization(self): diff --git a/pypy/interpreter/pycode.py b/pypy/interpreter/pycode.py --- a/pypy/interpreter/pycode.py +++ b/pypy/interpreter/pycode.py @@ -25,7 +25,7 @@ # helper -def unpack_text_tuple(space,w_str_tuple): +def unpack_text_tuple(space, w_str_tuple): return [space.text_w(w_el) for w_el in space.unpackiterable(w_str_tuple)] diff --git a/pypy/interpreter/test/test_gateway.py b/pypy/interpreter/test/test_gateway.py --- a/pypy/interpreter/test/test_gateway.py +++ b/pypy/interpreter/test/test_gateway.py @@ -377,7 +377,7 @@ return space.wrap(s0+s1) app_g3_ss = gateway.interp2app_temp(g3_ss, unwrap_spec=[gateway.ObjSpace, - str, 'str_or_None']) + 'text', 'text_or_none']) w_app_g3_ss = space.wrap(app_g3_ss) assert self.space.eq_w( space.call(w_app_g3_ss, @@ -512,7 +512,7 @@ app_g3_s = gateway.interp2app_temp(g3_id, unwrap_spec=[gateway.ObjSpace, - str]) + 'text']) w_app_g3_s = space.wrap(app_g3_s) assert space.eq_w(space.call_function(w_app_g3_s,w("foo")),w("foo")) raises(gateway.OperationError,space.call_function,w_app_g3_s,w(None)) diff --git a/pypy/interpreter/test/test_objspace.py b/pypy/interpreter/test/test_objspace.py --- a/pypy/interpreter/test/test_objspace.py +++ b/pypy/interpreter/test/test_objspace.py @@ -212,11 +212,11 @@ res = self.space.interp_w(Function, w(None), can_be_None=True) assert res is None - def test_str0_w(self): + def test_text0_w(self): space = self.space w = space.wrap - assert space.str0_w(w("123")) == "123" - exc = space.raises_w(space.w_TypeError, space.str0_w, w("123\x004")) + assert space.text0_w(w("123")) == "123" + exc = space.raises_w(space.w_TypeError, space.text0_w, w("123\x004")) assert space.unicode0_w(w(u"123")) == u"123" exc = space.raises_w(space.w_TypeError, space.unicode0_w, w(u"123\x004")) diff --git a/pypy/interpreter/unicodehelper.py b/pypy/interpreter/unicodehelper.py --- a/pypy/interpreter/unicodehelper.py +++ b/pypy/interpreter/unicodehelper.py @@ -16,17 +16,18 @@ space.newtext(msg)])) return raise_unicode_exception_decode -class RUnicodeEncodeError(Exception): - def __init__(self, encoding, object, start, end, reason): - self.encoding = encoding - self.object = object - self.start = start - self.end = end - self.reason = reason - -def raise_unicode_exception_encode(errors, encoding, msg, u, - startingpos, endingpos): - raise RUnicodeEncodeError(encoding, u, startingpos, endingpos, msg) +@specialize.memo() +def encode_error_handler(space): + # Fast version of the "strict" errors handler. + def raise_unicode_exception_encode(errors, encoding, msg, u, + startingpos, endingpos): + raise OperationError(space.w_UnicodeEncodeError, + space.newtuple([space.newtext(encoding), + space.newunicode(u), + space.newint(startingpos), + space.newint(endingpos), + space.newtext(msg)])) + return raise_unicode_exception_encode # ____________________________________________________________ @@ -68,5 +69,5 @@ # it stands for. These are the Python2 rules; Python3 differs. return runicode.unicode_encode_utf_8( uni, len(uni), "strict", - errorhandler=raise_unicode_exception_encode, + errorhandler=None, allow_surrogates=True) diff --git a/pypy/module/__pypy__/interp_os.py b/pypy/module/__pypy__/interp_os.py --- a/pypy/module/__pypy__/interp_os.py +++ b/pypy/module/__pypy__/interp_os.py @@ -3,7 +3,7 @@ from pypy.interpreter.gateway import unwrap_spec -@unwrap_spec(name='str0') +@unwrap_spec(name='text0') def real_getenv(space, name): """Get an OS environment value skipping Python cache""" return space.newtext_or_none(os.environ.get(name)) diff --git a/pypy/module/_cffi_backend/ffi_obj.py b/pypy/module/_cffi_backend/ffi_obj.py --- a/pypy/module/_cffi_backend/ffi_obj.py +++ b/pypy/module/_cffi_backend/ffi_obj.py @@ -572,7 +572,7 @@ return self.ffi_type(w_arg, ACCEPT_STRING | ACCEPT_CDATA) - @unwrap_spec(filename="str_or_None", flags=int) + @unwrap_spec(filename="fsencode_or_none", flags=int) def descr_dlopen(self, filename, flags=0): """\ Load and return a dynamic library identified by 'name'. The standard diff --git a/pypy/module/_cffi_backend/libraryobj.py b/pypy/module/_cffi_backend/libraryobj.py --- a/pypy/module/_cffi_backend/libraryobj.py +++ b/pypy/module/_cffi_backend/libraryobj.py @@ -91,7 +91,7 @@ W_Library.typedef.acceptable_as_base_class = False -@unwrap_spec(filename="str_or_None", flags=int) +@unwrap_spec(filename="fsencode_or_none", flags=int) def load_library(space, filename, flags=0): lib = W_Library(space, filename, flags) return lib diff --git a/pypy/module/_cffi_backend/test/test_recompiler.py b/pypy/module/_cffi_backend/test/test_recompiler.py --- a/pypy/module/_cffi_backend/test/test_recompiler.py +++ b/pypy/module/_cffi_backend/test/test_recompiler.py @@ -6,7 +6,7 @@ import pypy.module.cpyext.api # side-effect of pre-importing it -@unwrap_spec(cdef=str, module_name=str, source=str, packed=int) +@unwrap_spec(cdef='text', module_name='text', source='text', packed=int) def prepare(space, cdef, module_name, source, w_includes=None, w_extra_source=None, w_min_version=None, packed=False): try: diff --git a/pypy/module/_codecs/interp_codecs.py b/pypy/module/_codecs/interp_codecs.py --- a/pypy/module/_codecs/interp_codecs.py +++ b/pypy/module/_codecs/interp_codecs.py @@ -314,12 +314,12 @@ w_res = space.call_function(w_encoder, w_obj, space.newtext(errors)) return space.getitem(w_res, space.newint(0)) -@unwrap_spec(errors='str_or_None') +@unwrap_spec(errors='text_or_none') def readbuffer_encode(space, w_data, errors='strict'): s = space.getarg_w('s#', w_data) return space.newtuple([space.newbytes(s), space.newint(len(s))]) -@unwrap_spec(errors='str_or_None') +@unwrap_spec(errors='text_or_none') def charbuffer_encode(space, w_data, errors='strict'): s = space.getarg_w('t#', w_data) return space.newtuple([space.newbytes(s), space.newint(len(s))]) @@ -373,7 +373,7 @@ def make_encoder_wrapper(name): rname = "unicode_encode_%s" % (name.replace("_encode", ""), ) assert hasattr(runicode, rname) - @unwrap_spec(uni=unicode, errors='str_or_None') + @unwrap_spec(uni=unicode, errors='text_or_none') def wrap_encoder(space, uni, errors="strict"): if errors is None: errors = 'strict' @@ -387,7 +387,7 @@ def make_decoder_wrapper(name): rname = "str_decode_%s" % (name.replace("_decode", ""), ) assert hasattr(runicode, rname) - @unwrap_spec(string='bufferstr', errors='str_or_None', + @unwrap_spec(string='bufferstr', errors='text_or_none', w_final=WrappedDefault(False)) def wrap_decoder(space, string, errors="strict", w_final=None): if errors is None: @@ -437,7 +437,7 @@ # utf-8 functions are not regular, because we have to pass # "allow_surrogates=True" -@unwrap_spec(uni=unicode, errors='str_or_None') +@unwrap_spec(uni=unicode, errors='text_or_none') def utf_8_encode(space, uni, errors="strict"): if errors is None: errors = 'strict' @@ -450,7 +450,7 @@ allow_surrogates=True) return space.newtuple([space.newbytes(result), space.newint(len(uni))]) -@unwrap_spec(string='bufferstr', errors='str_or_None', +@unwrap_spec(string='bufferstr', errors='text_or_none', w_final = WrappedDefault(False)) def utf_8_decode(space, string, errors="strict", w_final=None): if errors is None: @@ -466,7 +466,7 @@ allow_surrogates=True) return space.newtuple([space.newunicode(result), space.newint(consumed)]) -@unwrap_spec(data='bufferstr', errors='str_or_None', byteorder=int, +@unwrap_spec(data='bufferstr', errors='text_or_none', byteorder=int, w_final=WrappedDefault(False)) def utf_16_ex_decode(space, data, errors='strict', byteorder=0, w_final=None): if errors is None: @@ -487,7 +487,7 @@ return space.newtuple([space.newunicode(res), space.newint(consumed), space.newint(byteorder)]) -@unwrap_spec(data='bufferstr', errors='str_or_None', byteorder=int, +@unwrap_spec(data='bufferstr', errors='text_or_none', byteorder=int, w_final=WrappedDefault(False)) def utf_32_ex_decode(space, data, errors='strict', byteorder=0, w_final=None): final = space.is_true(w_final) @@ -585,7 +585,7 @@ "character mapping must return integer, None or str") -@unwrap_spec(string='bufferstr', errors='str_or_None') +@unwrap_spec(string='bufferstr', errors='text_or_none') def charmap_decode(space, string, errors="strict", w_mapping=None): if errors is None: errors = 'strict' @@ -604,7 +604,7 @@ final, state.decode_error_handler, mapping) return space.newtuple([space.newunicode(result), space.newint(consumed)]) -@unwrap_spec(uni=unicode, errors='str_or_None') +@unwrap_spec(uni=unicode, errors='text_or_none') def charmap_encode(space, uni, errors="strict", w_mapping=None): if errors is None: errors = 'strict' @@ -647,7 +647,7 @@ return -1 return space.int_w(w_code) -@unwrap_spec(string='bufferstr', errors='str_or_None', +@unwrap_spec(string='bufferstr', errors='text_or_none', w_final=WrappedDefault(False)) def unicode_escape_decode(space, string, errors="strict", w_final=None): if errors is None: @@ -667,7 +667,7 @@ # ____________________________________________________________ # Unicode-internal -@unwrap_spec(errors='str_or_None') +@unwrap_spec(errors='text_or_none') def unicode_internal_decode(space, w_string, errors="strict"): if errors is None: errors = 'strict' @@ -691,7 +691,7 @@ # support for the "string escape" codec # This is a bytes-to bytes transformation -@unwrap_spec(data='bytes', errors='str_or_None') +@unwrap_spec(data='bytes', errors='text_or_none') def escape_encode(space, data, errors='strict'): from pypy.objspace.std.bytesobject import string_escape_encode result = string_escape_encode(data, quote="'") @@ -701,7 +701,7 @@ w_result = space.newbytes(result[start:end]) return space.newtuple([w_result, space.newint(len(data))]) -@unwrap_spec(data='bytes', errors='str_or_None') +@unwrap_spec(data='bytes', errors='text_or_none') def escape_decode(space, data, errors='strict'): from pypy.interpreter.pyparser.parsestring import PyString_DecodeEscape result = PyString_DecodeEscape(space, data, errors, None) diff --git a/pypy/module/_file/interp_file.py b/pypy/module/_file/interp_file.py --- a/pypy/module/_file/interp_file.py +++ b/pypy/module/_file/interp_file.py @@ -666,7 +666,7 @@ return False -@unwrap_spec(w_file=W_File, encoding="str_or_None", errors="str_or_None") +@unwrap_spec(w_file=W_File, encoding="text_or_none", errors="text_or_none") def set_file_encoding(space, w_file, encoding=None, errors=None): w_file.encoding = encoding w_file.errors = errors diff --git a/pypy/module/_io/interp_io.py b/pypy/module/_io/interp_io.py --- a/pypy/module/_io/interp_io.py +++ b/pypy/module/_io/interp_io.py @@ -41,8 +41,8 @@ DEFAULT_BUFFER_SIZE = 8 * 1024 @unwrap_spec(mode='text', buffering=int, - encoding="str_or_None", errors="str_or_None", - newline="str_or_None", closefd=bool) + encoding="text_or_none", errors="text_or_none", + newline="text_or_none", closefd=bool) def open(space, w_file, mode="r", buffering=-1, encoding=None, errors=None, newline=None, closefd=True): from pypy.module._io.interp_bufferedio import (W_BufferedRandom, diff --git a/pypy/module/_io/interp_textio.py b/pypy/module/_io/interp_textio.py --- a/pypy/module/_io/interp_textio.py +++ b/pypy/module/_io/interp_textio.py @@ -359,7 +359,7 @@ # of the stream self.snapshot = None - @unwrap_spec(encoding="str_or_None", line_buffering=int) + @unwrap_spec(encoding="text_or_none", line_buffering=int) def descr_init(self, space, w_buffer, encoding=None, w_errors=None, w_newline=None, line_buffering=0): self.state = STATE_ZERO diff --git a/pypy/module/_multibytecodec/interp_incremental.py b/pypy/module/_multibytecodec/interp_incremental.py --- a/pypy/module/_multibytecodec/interp_incremental.py +++ b/pypy/module/_multibytecodec/interp_incremental.py @@ -68,7 +68,7 @@ return space.newunicode(output) -@unwrap_spec(errors="str_or_None") +@unwrap_spec(errors="text_or_none") def mbidecoder_new(space, w_subtype, errors=None): r = space.allocate_instance(MultibyteIncrementalDecoder, w_subtype) r.__init__(space, errors) @@ -116,7 +116,7 @@ return space.newbytes(output) -@unwrap_spec(errors="str_or_None") +@unwrap_spec(errors="text_or_none") def mbiencoder_new(space, w_subtype, errors=None): r = space.allocate_instance(MultibyteIncrementalEncoder, w_subtype) r.__init__(space, errors) diff --git a/pypy/module/_multibytecodec/interp_multibytecodec.py b/pypy/module/_multibytecodec/interp_multibytecodec.py --- a/pypy/module/_multibytecodec/interp_multibytecodec.py +++ b/pypy/module/_multibytecodec/interp_multibytecodec.py @@ -11,7 +11,7 @@ self.name = name self.codec = codec - @unwrap_spec(input='bytes', errors="str_or_None") + @unwrap_spec(input='bytes', errors="text_or_none") def decode(self, space, input, errors=None): if errors is None: errors = 'strict' @@ -27,7 +27,7 @@ return space.newtuple([space.newunicode(output), space.newint(len(input))]) - @unwrap_spec(input=unicode, errors="str_or_None") + @unwrap_spec(input=unicode, errors="text_or_none") def encode(self, space, input, errors=None): if errors is None: errors = 'strict' diff --git a/pypy/module/_multiprocessing/interp_win32.py b/pypy/module/_multiprocessing/interp_win32.py --- a/pypy/module/_multiprocessing/interp_win32.py +++ b/pypy/module/_multiprocessing/interp_win32.py @@ -114,7 +114,7 @@ # __________________________________________________________ # functions for the "win32" namespace -@unwrap_spec(name=str, openmode=r_uint, pipemode=r_uint, maxinstances=r_uint, +@unwrap_spec(name='text', openmode=r_uint, pipemode=r_uint, maxinstances=r_uint, outputsize=r_uint, inputsize=r_uint, timeout=r_uint) def CreateNamedPipe(space, name, openmode, pipemode, maxinstances, outputsize, inputsize, timeout, w_security): @@ -161,13 +161,13 @@ lltype.free(state, flavor='raw') lltype.free(statep, flavor='raw') -@unwrap_spec(name=str, timeout=r_uint) +@unwrap_spec(name='text', timeout=r_uint) def WaitNamedPipe(space, name, timeout): # Careful: zero means "default value specified by CreateNamedPipe()" if not _WaitNamedPipe(name, timeout): raise wrap_windowserror(space, rwin32.lastSavedWindowsError()) -@unwrap_spec(filename=str, access=r_uint, share=r_uint, +@unwrap_spec(filename='fsencode', access=r_uint, share=r_uint, disposition=r_uint, flags=r_uint) def CreateFile(space, filename, access, share, w_security, disposition, flags, w_templatefile): diff --git a/pypy/module/_rawffi/alt/interp_funcptr.py b/pypy/module/_rawffi/alt/interp_funcptr.py --- a/pypy/module/_rawffi/alt/interp_funcptr.py +++ b/pypy/module/_rawffi/alt/interp_funcptr.py @@ -344,7 +344,7 @@ def getidentifier(self, space): return space.newint(self.cdll.getidentifier()) -@unwrap_spec(name='str_or_None', mode=int) +@unwrap_spec(name='fsencode_or_none', mode=int) def descr_new_cdll(space, w_type, name, mode=-1): return W_CDLL(space, name, mode) @@ -363,7 +363,7 @@ W_CDLL.__init__(self, space, name, mode) self.flags = libffi.FUNCFLAG_STDCALL -@unwrap_spec(name='str_or_None', mode=int) +@unwrap_spec(name='fsencode_or_none', mode=int) def descr_new_windll(space, w_type, name, mode=-1): return W_WinDLL(space, name, mode) diff --git a/pypy/module/_rawffi/alt/test/test_struct.py b/pypy/module/_rawffi/alt/test/test_struct.py --- a/pypy/module/_rawffi/alt/test/test_struct.py +++ b/pypy/module/_rawffi/alt/test/test_struct.py @@ -43,7 +43,7 @@ def setup_class(cls): BaseAppTestFFI.setup_class.im_func(cls) - @unwrap_spec(addr=int, typename=str, length=int) + @unwrap_spec(addr=int, typename='text', length=int) def read_raw_mem(space, addr, typename, length): import ctypes addr = ctypes.cast(addr, ctypes.c_void_p) diff --git a/pypy/module/_rawffi/interp_rawffi.py b/pypy/module/_rawffi/interp_rawffi.py --- a/pypy/module/_rawffi/interp_rawffi.py +++ b/pypy/module/_rawffi/interp_rawffi.py @@ -235,7 +235,7 @@ except OSError as e: raise wrap_oserror(space, e) -@unwrap_spec(name='str_or_None') +@unwrap_spec(name='fsencode_or_none') def descr_new_cdll(space, w_type, name): cdll = open_cdll(space, name) return W_CDLL(space, name, cdll) diff --git a/pypy/module/_ssl/interp_win32.py b/pypy/module/_ssl/interp_win32.py --- a/pypy/module/_ssl/interp_win32.py +++ b/pypy/module/_ssl/interp_win32.py @@ -99,7 +99,7 @@ usage.c_rgpszUsageIdentifier[i])) return space.newset(result_w) -@unwrap_spec(store_name=str) +@unwrap_spec(store_name='text') def enum_certificates_w(space, store_name): """enum_certificates(store_name) -> [] @@ -142,7 +142,7 @@ return space.newlist(result_w) -@unwrap_spec(store_name=str) +@unwrap_spec(store_name='text') def enum_crls_w(space, store_name): """enum_crls(store_name) -> [] diff --git a/pypy/module/_winreg/interp_winreg.py b/pypy/module/_winreg/interp_winreg.py --- a/pypy/module/_winreg/interp_winreg.py +++ b/pypy/module/_winreg/interp_winreg.py @@ -689,7 +689,7 @@ The return value is the handle of the opened key. If the function fails, an EnvironmentError exception is raised.""" - machine = space.str_or_None_w(w_machine) + machine = space.text_or_none_w(w_machine) hkey = hkey_w(w_hkey, space) with lltype.scoped_alloc(rwinreg.PHKEY.TO, 1) as rethkey: ret = rwinreg.RegConnectRegistry(machine, hkey, rethkey) diff --git a/pypy/module/bz2/interp_bz2.py b/pypy/module/bz2/interp_bz2.py --- a/pypy/module/bz2/interp_bz2.py +++ b/pypy/module/bz2/interp_bz2.py @@ -328,7 +328,7 @@ raise oefmt(space.w_ValueError, "cannot open in read-write mode") if basemode == "a": raise oefmt(space.w_ValueError, "cannot append to bz2 file") - stream = open_path_helper(space.str0_w(w_path), os_flags, False) + stream = open_path_helper(space.fsencode_w(w_path), os_flags, False) if reading: bz2stream = ReadBZ2Filter(space, stream, buffering) buffering = 0 # by construction, the ReadBZ2Filter acts like diff --git a/pypy/module/bz2/test/test_bz2_file.py b/pypy/module/bz2/test/test_bz2_file.py --- a/pypy/module/bz2/test/test_bz2_file.py +++ b/pypy/module/bz2/test/test_bz2_file.py @@ -28,7 +28,7 @@ data = DATA[:100] f.write(data, 'wb') - @unwrap_spec(data=str) + @unwrap_spec(data='bytes') def decompress(space, data): import popen2 import bz2 diff --git a/pypy/module/cppyy/test/test_crossing.py b/pypy/module/cppyy/test/test_crossing.py --- a/pypy/module/cppyy/test/test_crossing.py +++ b/pypy/module/cppyy/test/test_crossing.py @@ -78,7 +78,7 @@ import ctypes, cppyy""") # prevents leak-checking complaints on ctypes' statics def setup_method(self, func): - @unwrap_spec(name=str, init=str, body=str) + @unwrap_spec(name='text', init='text', body='text') def create_cdll(space, name, init, body): # the following is loosely from test_cpyext.py import_module; it # is copied here to be able to tweak the call to diff --git a/pypy/module/cpyext/memoryobject.py b/pypy/module/cpyext/memoryobject.py --- a/pypy/module/cpyext/memoryobject.py +++ b/pypy/module/cpyext/memoryobject.py @@ -119,7 +119,7 @@ try: view.c_buf = rffi.cast(rffi.VOIDP, buf.get_raw_address()) except ValueError: - if not space.isinstance_w(w_obj, space.w_str): + if not space.isinstance_w(w_obj, space.w_bytes): # XXX Python 3? raise BufferError("could not create buffer from object") view.c_buf = rffi.cast(rffi.VOIDP, rffi.str2charp(space.bytes_w(w_obj), track_allocation=False)) diff --git a/pypy/module/cpyext/slotdefs.py b/pypy/module/cpyext/slotdefs.py --- a/pypy/module/cpyext/slotdefs.py +++ b/pypy/module/cpyext/slotdefs.py @@ -237,7 +237,10 @@ def wrap_lenfunc(space, w_self, w_args, func): func_len = rffi.cast(lenfunc, func) check_num_args(space, w_args, 0) - return space.newint(generic_cpy_call(space, func_len, w_self)) + res = generic_cpy_call(space, func_len, w_self) + if widen(res) == -1: + space.fromcache(State).check_and_raise_exception(always=True) + return space.newint(res) def wrap_sq_item(space, w_self, w_args, func): func_target = rffi.cast(ssizeargfunc, func) diff --git a/pypy/module/cpyext/test/test_cpyext.py b/pypy/module/cpyext/test/test_cpyext.py --- a/pypy/module/cpyext/test/test_cpyext.py +++ b/pypy/module/cpyext/test/test_cpyext.py @@ -287,7 +287,7 @@ if self.runappdirect: return - @unwrap_spec(name=str) + @unwrap_spec(name='text') def compile_module(space, name, w_source_files=None, w_source_strings=None): @@ -313,8 +313,8 @@ return space.wrap(pydname) - @unwrap_spec(name=str, init='str_or_None', body=str, - filename='str_or_None', PY_SSIZE_T_CLEAN=bool) + @unwrap_spec(name='text', init='text_or_none', body='text', + filename='fsencode_or_none', PY_SSIZE_T_CLEAN=bool) def import_module(space, name, init=None, body='', filename=None, w_include_dirs=None, PY_SSIZE_T_CLEAN=False): @@ -325,12 +325,12 @@ return w_result - @unwrap_spec(mod=str, name=str) + @unwrap_spec(mod='text', name='text') def load_module(space, mod, name): return self.sys_info.load_module(mod, name) - @unwrap_spec(modname=str, prologue=str, - more_init=str, PY_SSIZE_T_CLEAN=bool) + @unwrap_spec(modname='text', prologue='text', + more_init='text', PY_SSIZE_T_CLEAN=bool) def import_extension(space, modname, w_functions, prologue="", w_include_dirs=None, more_init="", PY_SSIZE_T_CLEAN=False): functions = space.unwrap(w_functions) diff --git a/pypy/module/cpyext/test/test_pystate.py b/pypy/module/cpyext/test/test_pystate.py --- a/pypy/module/cpyext/test/test_pystate.py +++ b/pypy/module/cpyext/test/test_pystate.py @@ -178,7 +178,6 @@ ("bounce", "METH_NOARGS", """ PyGILState_STATE gilstate; - PyThreadState *tstate; PyObject *dict; if (PyEval_ThreadsInitialized() == 0) diff --git a/pypy/module/exceptions/interp_exceptions.py b/pypy/module/exceptions/interp_exceptions.py --- a/pypy/module/exceptions/interp_exceptions.py +++ b/pypy/module/exceptions/interp_exceptions.py @@ -288,7 +288,7 @@ space.realunicode_w(w_object) space.int_w(w_start) space.int_w(w_end) - space.realstr_w(w_reason) + space.realtext_w(w_reason) # assign attributes self.w_object = w_object self.w_start = w_start @@ -628,11 +628,11 @@ def descr_init(self, space, w_encoding, w_object, w_start, w_end, w_reason): # typechecking - space.realstr_w(w_encoding) - space.realstr_w(w_object) + space.realtext_w(w_encoding) + space.realtext_w(w_object) space.int_w(w_start) space.int_w(w_end) - space.realstr_w(w_reason) + space.realtext_w(w_reason) # assign attributes self.w_encoding = w_encoding self.w_object = w_object @@ -718,11 +718,11 @@ def descr_init(self, space, w_encoding, w_object, w_start, w_end, w_reason): # typechecking - space.realstr_w(w_encoding) + space.realtext_w(w_encoding) space.realunicode_w(w_object) space.int_w(w_start) space.int_w(w_end) - space.realstr_w(w_reason) + space.realtext_w(w_reason) # assign attributes self.w_encoding = w_encoding self.w_object = w_object diff --git a/pypy/module/gc/interp_gc.py b/pypy/module/gc/interp_gc.py --- a/pypy/module/gc/interp_gc.py +++ b/pypy/module/gc/interp_gc.py @@ -79,7 +79,7 @@ # ____________________________________________________________ -@unwrap_spec(filename='str0') +@unwrap_spec(filename='fsencode') def dump_heap_stats(space, filename): tb = rgc._heap_stats() if not tb: diff --git a/pypy/module/imp/importing.py b/pypy/module/imp/importing.py --- a/pypy/module/imp/importing.py +++ b/pypy/module/imp/importing.py @@ -364,7 +364,7 @@ length = space.len_w(w_fromlist) for i in range(length): w_name = space.getitem(w_fromlist, space.newint(i)) - if not space.isinstance_w(w_name, space.w_str): + if not space.isinstance_w(w_name, space.w_text): raise oefmt(space.w_TypeError, "'fromlist' items must be str, not %T", w_name) if w_path is not None: @@ -491,7 +491,7 @@ def __init__(self, space): pass - @unwrap_spec(path='str0') + @unwrap_spec(path='fsencode') def descr_init(self, space, path): if not path: raise oefmt(space.w_ImportError, "empty pathname") @@ -570,7 +570,7 @@ if w_loader: return FindInfo.fromLoader(w_loader) - path = space.str0_w(w_pathitem) + path = space.fsencode_w(w_pathitem) filepart = os.path.join(path, partname) log_pyverbose(space, 2, "# trying %s\n" % (filepart,)) if os.path.isdir(filepart) and case_ok(filepart): diff --git a/pypy/module/imp/interp_imp.py b/pypy/module/imp/interp_imp.py --- a/pypy/module/imp/interp_imp.py +++ b/pypy/module/imp/interp_imp.py @@ -78,7 +78,7 @@ def load_module(space, w_name, w_file, w_filename, w_info): w_suffix, w_filemode, w_modtype = space.unpackiterable(w_info, 3) - filename = space.str0_w(w_filename) + filename = space.fsencode_w(w_filename) filemode = space.text_w(w_filemode) if space.is_w(w_file, space.w_None): stream = None @@ -95,7 +95,7 @@ space, w_name, find_info, reuse=True) def load_source(space, w_modulename, w_filename, w_file=None): - filename = space.str0_w(w_filename) + filename = space.fsencode_w(w_filename) stream = get_file(space, w_file, filename, 'U') @@ -109,7 +109,7 @@ stream.close() return w_mod -@unwrap_spec(filename='str0', check_afterwards=int) +@unwrap_spec(filename='fsencode', check_afterwards=int) def _run_compiled_module(space, w_modulename, filename, w_file, w_module, check_afterwards=False): # the function 'imp._run_compiled_module' is a pypy-only extension @@ -125,14 +125,14 @@ stream.close() return w_mod -@unwrap_spec(filename='str0') +@unwrap_spec(filename='fsencode') def load_compiled(space, w_modulename, filename, w_file=None): w_mod = Module(space, w_modulename) importing._prepare_module(space, w_mod, filename, None) return _run_compiled_module(space, w_modulename, filename, w_file, w_mod, check_afterwards=True) -@unwrap_spec(filename='text') +@unwrap_spec(filename='fsencode') def load_dynamic(space, w_modulename, filename, w_file=None): if not importing.has_so_extension(space): raise oefmt(space.w_ImportError, "Not implemented") diff --git a/pypy/module/mmap/interp_mmap.py b/pypy/module/mmap/interp_mmap.py --- a/pypy/module/mmap/interp_mmap.py +++ b/pypy/module/mmap/interp_mmap.py @@ -183,7 +183,7 @@ def descr_setitem(self, w_index, w_value): space = self.space - value = space.realstr_w(w_value) + value = space.realtext_w(w_value) self.check_valid() self.check_writeable() @@ -238,7 +238,7 @@ if not space.isinstance_w(w_item, space.w_bytes): raise oefmt(space.w_IndexError, "mmap slice assignment must be a string") - value = space.realstr_w(w_item) + value = space.realtext_w(w_item) if len(value) != (j - i): raise oefmt(space.w_IndexError, "mmap slice assignment is wrong size") diff --git a/pypy/module/sys/initpath.py b/pypy/module/sys/initpath.py --- a/pypy/module/sys/initpath.py +++ b/pypy/module/sys/initpath.py @@ -147,17 +147,17 @@ return None -@unwrap_spec(executable='str0') +@unwrap_spec(executable='fsencode') def pypy_find_executable(space, executable): return space.newtext(find_executable(executable)) -@unwrap_spec(filename='str0') +@unwrap_spec(filename='fsencode') def pypy_resolvedirof(space, filename): return space.newtext(resolvedirof(filename)) -@unwrap_spec(executable='str0') +@unwrap_spec(executable='fsencode') def pypy_find_stdlib(space, executable): path, prefix = None, None if executable != '*': diff --git a/pypy/module/test_lib_pypy/test_md5_extra.py b/pypy/module/test_lib_pypy/test_md5_extra.py --- a/pypy/module/test_lib_pypy/test_md5_extra.py +++ b/pypy/module/test_lib_pypy/test_md5_extra.py @@ -94,7 +94,7 @@ # interp2app doesn't work in appdirect mode cls.w_compare_host = staticmethod(compare_host) else: - compare_host.unwrap_spec = [str, str, str] + compare_host.unwrap_spec = ['bytes', 'bytes', 'text'] cls.w_compare_host = space.wrap(gateway.interp2app(compare_host)) def w_compare(self, message): diff --git a/pypy/module/zipimport/interp_zipimport.py b/pypy/module/zipimport/interp_zipimport.py --- a/pypy/module/zipimport/interp_zipimport.py +++ b/pypy/module/zipimport/interp_zipimport.py @@ -350,7 +350,7 @@ space = self.space return space.newtext(self.filename) -@unwrap_spec(name='str0') +@unwrap_spec(name='text0') def descr_new_zipimporter(space, w_type, name): ok = False parts_ends = [i for i in range(0, len(name)) diff --git a/pypy/objspace/fake/objspace.py b/pypy/objspace/fake/objspace.py --- a/pypy/objspace/fake/objspace.py +++ b/pypy/objspace/fake/objspace.py @@ -45,7 +45,6 @@ def str_w(self, space): return NonConstant("foobar") - identifier_w = bytes_w = str_w def unicode_w(self, space): return NonConstant(u"foobar") diff --git a/pypy/objspace/std/bytearrayobject.py b/pypy/objspace/std/bytearrayobject.py --- a/pypy/objspace/std/bytearrayobject.py +++ b/pypy/objspace/std/bytearrayobject.py @@ -181,7 +181,7 @@ # we ignore w_type and always return a bytearray return new_bytearray(space, space.w_bytearray, data) - @unwrap_spec(encoding='str_or_None', errors='str_or_None') + @unwrap_spec(encoding='text_or_none', errors='text_or_none') def descr_init(self, space, w_source=None, encoding=None, errors=None): if w_source is None: w_source = space.newbytes('') diff --git a/pypy/objspace/std/objspace.py b/pypy/objspace/std/objspace.py --- a/pypy/objspace/std/objspace.py +++ b/pypy/objspace/std/objspace.py @@ -89,12 +89,15 @@ for typedef, cls in builtin_type_classes.items(): w_type = self.gettypeobject(typedef) self.builtin_types[typedef.name] = w_type - if 1: # typedef.name != "str": BACKCOMPAT - setattr(self, 'w_' + typedef.name, w_type) - if typedef.name == "str": - self.w_bytes = w_type + name = typedef.name + # we don't expose 'space.w_str' at all, to avoid confusion + # with Python 3. Instead, in Python 2, it becomes + # space.w_bytes (or space.w_text). + if name == 'str': + name = 'bytes' + setattr(self, 'w_' + name, w_type) self._interplevel_classes[w_type] = cls - self.w_text = self.w_bytes # this is w_unicode on Py3 + self.w_text = self.w_bytes # 'space.w_text' is w_unicode on Py3 self.w_dict.flag_map_or_seq = 'M' self.builtin_types["NotImplemented"] = self.w_NotImplemented self.builtin_types["Ellipsis"] = self.w_Ellipsis diff --git a/pypy/objspace/std/test/test_mapdict.py b/pypy/objspace/std/test/test_mapdict.py --- a/pypy/objspace/std/test/test_mapdict.py +++ b/pypy/objspace/std/test/test_mapdict.py @@ -899,7 +899,7 @@ successes = entry.success_counter globalfailures = INVALID_CACHE_ENTRY.failure_counter return space.wrap((failures, successes, globalfailures)) - check.unwrap_spec = [gateway.ObjSpace, gateway.W_Root, str] + check.unwrap_spec = [gateway.ObjSpace, gateway.W_Root, 'text'] cls.w_check = cls.space.wrap(gateway.interp2app(check)) def test_simple(self): diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py --- a/pypy/objspace/std/unicodeobject.py +++ b/pypy/objspace/std/unicodeobject.py @@ -462,26 +462,17 @@ w_encoder = space.sys.get_w_default_encoder() else: if errors is None or errors == 'strict': - try: - if encoding == 'ascii': - u = space.unicode_w(w_object) - eh = unicodehelper.raise_unicode_exception_encode - return space.newbytes(unicode_encode_ascii( - u, len(u), None, errorhandler=eh)) - if encoding == 'utf-8': - u = space.unicode_w(w_object) - eh = unicodehelper.raise_unicode_exception_encode - return space.newbytes(unicode_encode_utf_8( - u, len(u), None, errorhandler=eh, - allow_surrogates=True)) - except unicodehelper.RUnicodeEncodeError as ue: - raise OperationError(space.w_UnicodeEncodeError, - space.newtuple([ - space.newtext(ue.encoding), - space.newunicode(ue.object), - space.newint(ue.start), - space.newint(ue.end), - space.newtext(ue.reason)])) + if encoding == 'ascii': + u = space.unicode_w(w_object) + eh = unicodehelper.encode_error_handler(space) + return space.newbytes(unicode_encode_ascii( + u, len(u), None, errorhandler=eh)) + if encoding == 'utf-8': + u = space.unicode_w(w_object) + eh = unicodehelper.encode_error_handler(space) + return space.newbytes(unicode_encode_utf_8( + u, len(u), None, errorhandler=eh, + allow_surrogates=True)) from pypy.module._codecs.interp_codecs import lookup_codec w_encoder = space.getitem(lookup_codec(space, encoding), space.newint(0)) if errors is None: diff --git a/rpython/rlib/runicode.py b/rpython/rlib/runicode.py --- a/rpython/rlib/runicode.py +++ b/rpython/rlib/runicode.py @@ -5,7 +5,7 @@ from rpython.rlib.unicodedata import unicodedb from rpython.tool.sourcetools import func_with_new_name from rpython.rtyper.lltypesystem import lltype, rffi -from rpython.rlib import jit +from rpython.rlib import jit, nonconst if rffi.sizeof(lltype.UniChar) == 4: @@ -133,6 +133,26 @@ def _invalid_cont_byte(ordch): return ordch>>6 != 0x2 # 0b10 +_invalid_byte_2_of_2 = _invalid_cont_byte +_invalid_byte_3_of_3 = _invalid_cont_byte +_invalid_byte_3_of_4 = _invalid_cont_byte +_invalid_byte_4_of_4 = _invalid_cont_byte + +@enforceargs(allow_surrogates=bool) +def _invalid_byte_2_of_3(ordch1, ordch2, allow_surrogates): + return (ordch2>>6 != 0x2 or # 0b10 + (ordch1 == 0xe0 and ordch2 < 0xa0) + # surrogates shouldn't be valid UTF-8! + or (ordch1 == 0xed and ordch2 > 0x9f and not allow_surrogates)) + +def _invalid_byte_2_of_4(ordch1, ordch2): + return (ordch2>>6 != 0x2 or # 0b10 + (ordch1 == 0xf0 and ordch2 < 0x90) or + (ordch1 == 0xf4 and ordch2 > 0x8f)) + +# NOTE: this is a slightly fixed algorithm when compared with +# CPython2's. It is closer to CPython3's. See comments in +# test_invalid_cb_for_3bytes_seq(). def str_decode_utf_8_impl(s, size, errors, final, errorhandler, allow_surrogates): if size == 0: @@ -153,20 +173,60 @@ if pos + n > size: if not final: break + # argh, this obscure block of code is mostly a copy of + # what follows :-( charsleft = size - pos - 1 # either 0, 1, 2 # note: when we get the 'unexpected end of data' we need # to care about the pos returned; it can be lower than size, # in case we need to continue running this loop - endpos = pos + 1 - if charsleft >= 1 and not _invalid_cont_byte(ord(s[pos+1])): - endpos = pos + 2 - if charsleft >= 2 and not _invalid_cont_byte(ord(s[pos+2])): - endpos = pos + 3 - r, pos = errorhandler(errors, 'utf8', - 'unexpected end of data', - s, pos, endpos) - result.append(r) - continue + if not charsleft: + # there's only the start byte and nothing else + r, pos = errorhandler(errors, 'utf8', + 'unexpected end of data', + s, pos, pos+1) + result.append(r) + continue + ordch2 = ord(s[pos+1]) + if n == 3: + # 3-bytes seq with only a continuation byte + if _invalid_byte_2_of_3(ordch1, ordch2, allow_surrogates): + # second byte invalid, take the first and continue + r, pos = errorhandler(errors, 'utf8', + 'invalid continuation byte', + s, pos, pos+1) + result.append(r) + continue + else: + # second byte valid, but third byte missing + r, pos = errorhandler(errors, 'utf8', + 'unexpected end of data', + s, pos, pos+2) + result.append(r) + continue + elif n == 4: + # 4-bytes seq with 1 or 2 continuation bytes + if _invalid_byte_2_of_4(ordch1, ordch2): + # second byte invalid, take the first and continue + r, pos = errorhandler(errors, 'utf8', + 'invalid continuation byte', + s, pos, pos+1) + result.append(r) + continue + elif charsleft == 2 and _invalid_byte_3_of_4(ord(s[pos+2])): + # third byte invalid, take the first two and continue + r, pos = errorhandler(errors, 'utf8', + 'invalid continuation byte', + s, pos, pos+2) + result.append(r) + continue + else: + # there's only 1 or 2 valid cb, but the others are missing + r, pos = errorhandler(errors, 'utf8', + 'unexpected end of data', + s, pos, pos+charsleft+1) + result.append(r) + continue + raise AssertionError("unreachable") if n == 0: r, pos = errorhandler(errors, 'utf8', @@ -179,7 +239,7 @@ elif n == 2: ordch2 = ord(s[pos+1]) - if _invalid_cont_byte(ordch2): + if _invalid_byte_2_of_2(ordch2): r, pos = errorhandler(errors, 'utf8', 'invalid continuation byte', s, pos, pos+1) @@ -193,48 +253,41 @@ elif n == 3: ordch2 = ord(s[pos+1]) ordch3 = ord(s[pos+2]) - if _invalid_cont_byte(ordch2): + if _invalid_byte_2_of_3(ordch1, ordch2, allow_surrogates): r, pos = errorhandler(errors, 'utf8', 'invalid continuation byte', s, pos, pos+1) result.append(r) continue - elif _invalid_cont_byte(ordch3): + elif _invalid_byte_3_of_3(ordch3): r, pos = errorhandler(errors, 'utf8', 'invalid continuation byte', s, pos, pos+2) result.append(r) continue # 1110xxxx 10yyyyyy 10zzzzzz -> 00000000 xxxxyyyy yyzzzzzz - c = (((ordch1 & 0x0F) << 12) + # 0b00001111 - ((ordch2 & 0x3F) << 6) + # 0b00111111 - (ordch3 & 0x3F)) # 0b00111111 - if c < 2048 or (0xd800 <= c <= 0xdfff and not allow_surrogates): - r, pos = errorhandler(errors, 'utf8', - 'invalid continuation byte', - s, pos, pos+2) - result.append(r) - continue - result.append(unichr(c)) + result.append(unichr(((ordch1 & 0x0F) << 12) + # 0b00001111 + ((ordch2 & 0x3F) << 6) + # 0b00111111 + (ordch3 & 0x3F))) # 0b00111111 pos += 3 elif n == 4: ordch2 = ord(s[pos+1]) ordch3 = ord(s[pos+2]) ordch4 = ord(s[pos+3]) - if _invalid_cont_byte(ordch2): + if _invalid_byte_2_of_4(ordch1, ordch2): r, pos = errorhandler(errors, 'utf8', 'invalid continuation byte', s, pos, pos+1) result.append(r) continue - elif _invalid_cont_byte(ordch3): + elif _invalid_byte_3_of_4(ordch3): r, pos = errorhandler(errors, 'utf8', 'invalid continuation byte', s, pos, pos+2) result.append(r) continue - elif _invalid_cont_byte(ordch4): + elif _invalid_byte_4_of_4(ordch4): r, pos = errorhandler(errors, 'utf8', 'invalid continuation byte', s, pos, pos+3) @@ -245,12 +298,6 @@ ((ordch2 & 0x3F) << 12) + # 0b00111111 ((ordch3 & 0x3F) << 6) + # 0b00111111 (ordch4 & 0x3F)) # 0b00111111 - if c <= 65535 or c > 0x10ffff: - r, pos = errorhandler(errors, 'utf8', - 'invalid continuation byte', - s, pos, pos+3) - result.append(r) - continue if c <= MAXUNICODE: result.append(UNICHR(c)) else: @@ -326,7 +373,12 @@ pos += 1 _encodeUCS4(result, ch3) continue - if not allow_surrogates: + # note: if the program only ever calls this with + # allow_surrogates=True, then we'll never annotate + # the following block of code, and errorhandler() + # will never be called. This causes RPython + # problems. Avoid it with the nonconst hack. + if not allow_surrogates or nonconst.NonConstant(False): ru, rs, pos = errorhandler(errors, 'utf8', 'surrogates not allowed', s, pos-1, pos) diff --git a/rpython/rlib/test/test_runicode.py b/rpython/rlib/test/test_runicode.py --- a/rpython/rlib/test/test_runicode.py +++ b/rpython/rlib/test/test_runicode.py @@ -700,27 +700,6 @@ assert decoder(seq, len(seq), 'ignore', final=True ) == (res, len(seq)) - @settings(max_examples=10000) - @given(strategies.binary()) - def test_str_check_utf8(self, s): - try: - u = s.decode("utf8") - valid = True - except UnicodeDecodeError as e: - valid = False - try: - result, length = runicode.str_decode_utf_8(s, len(s), None, - errorhandler=None, final=True, allow_surrogates=True) - except UnicodeDecodeError as a: - assert not valid - assert a.start == e.start - assert a.end == e.end - assert str(a) == str(e) - else: - assert valid - assert result == u - assert length == len(s) - class TestEncoding(UnicodeTests): def test_all_ascii(self): diff --git a/rpython/translator/c/test/test_newgc.py b/rpython/translator/c/test/test_newgc.py --- a/rpython/translator/c/test/test_newgc.py +++ b/rpython/translator/c/test/test_newgc.py @@ -1733,7 +1733,11 @@ (ulimitv, ' '.join(args),)] popen = subprocess.Popen(args1, stderr=subprocess.PIPE) _, child_stderr = popen.communicate() - assert popen.wait() == 134 # aborted + assert popen.wait() in (-6, 134) # aborted + # note: it seems that on some systems we get 134 and on + # others we get -6. Bash is supposed to translate the + # SIGABRT (signal 6) from the subprocess into the exit + # code 128+6, but I guess it may not always do so. assert 'out of memory:' in child_stderr return '42' # _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit