Author: Armin Rigo <ar...@tunes.org>
Branch: gc-forkfriendly-2
Changeset: r90336:7baaab0977a5
Date: 2017-02-24 08:40 +0100
http://bitbucket.org/pypy/pypy/changeset/7baaab0977a5/

Log:    hg merge default

diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst
--- a/pypy/doc/whatsnew-head.rst
+++ b/pypy/doc/whatsnew-head.rst
@@ -151,6 +151,8 @@
 not on PyPy 3.x.  The latter is used to get an app-level unicode string
 by decoding the RPython string, assumed to be utf-8.
 
+.. branch: space-wrap
+
 .. branch: fix_bool_restype
 
 Fix for ``ctypes.c_bool``-returning ctypes functions
diff --git a/pypy/interpreter/astcompiler/ast.py 
b/pypy/interpreter/astcompiler/ast.py
--- a/pypy/interpreter/astcompiler/ast.py
+++ b/pypy/interpreter/astcompiler/ast.py
@@ -420,7 +420,7 @@
         w_decorator_list = get_field(space, w_node, 'decorator_list', False)
         w_lineno = get_field(space, w_node, 'lineno', False)
         w_col_offset = get_field(space, w_node, 'col_offset', False)
-        _name = space.realstr_w(w_name)
+        _name = space.realtext_w(w_name)
         if _name is None:
             raise_required_value(space, w_node, 'name')
         _args = arguments.from_object(space, w_args)
@@ -497,7 +497,7 @@
         w_decorator_list = get_field(space, w_node, 'decorator_list', False)
         w_lineno = get_field(space, w_node, 'lineno', False)
         w_col_offset = get_field(space, w_node, 'col_offset', False)
-        _name = space.realstr_w(w_name)
+        _name = space.realtext_w(w_name)
         if _name is None:
             raise_required_value(space, w_node, 'name')
         bases_w = space.unpackiterable(w_bases)
@@ -1318,7 +1318,7 @@
         w_level = get_field(space, w_node, 'level', True)
         w_lineno = get_field(space, w_node, 'lineno', False)
         w_col_offset = get_field(space, w_node, 'col_offset', False)
-        _module = space.str_or_None_w(w_module)
+        _module = space.realtext_w(w_module) if not space.is_none(w_module) 
else None
         names_w = space.unpackiterable(w_names)
         _names = [alias.from_object(space, w_item) for w_item in names_w]
         _level = space.int_w(w_level)
@@ -1413,7 +1413,7 @@
         w_lineno = get_field(space, w_node, 'lineno', False)
         w_col_offset = get_field(space, w_node, 'col_offset', False)
         names_w = space.unpackiterable(w_names)
-        _names = [space.realstr_w(w_item) for w_item in names_w]
+        _names = [space.realtext_w(w_item) for w_item in names_w]
         _lineno = space.int_w(w_lineno)
         _col_offset = space.int_w(w_col_offset)
         return Global(_names, _lineno, _col_offset)
@@ -2495,7 +2495,7 @@
         _value = expr.from_object(space, w_value)
         if _value is None:
             raise_required_value(space, w_node, 'value')
-        _attr = space.realstr_w(w_attr)
+        _attr = space.realtext_w(w_attr)
         if _attr is None:
             raise_required_value(space, w_node, 'attr')
         _ctx = expr_context.from_object(space, w_ctx)
@@ -2592,7 +2592,7 @@
         w_ctx = get_field(space, w_node, 'ctx', False)
         w_lineno = get_field(space, w_node, 'lineno', False)
         w_col_offset = get_field(space, w_node, 'col_offset', False)
-        _id = space.realstr_w(w_id)
+        _id = space.realtext_w(w_id)
         if _id is None:
             raise_required_value(space, w_node, 'id')
         _ctx = expr_context.from_object(space, w_ctx)
@@ -3415,8 +3415,8 @@
         w_defaults = get_field(space, w_node, 'defaults', False)
         args_w = space.unpackiterable(w_args)
         _args = [expr.from_object(space, w_item) for w_item in args_w]
-        _vararg = space.str_or_None_w(w_vararg)
-        _kwarg = space.str_or_None_w(w_kwarg)
+        _vararg = space.realtext_w(w_vararg) if not space.is_none(w_vararg) 
else None
+        _kwarg = space.realtext_w(w_kwarg) if not space.is_none(w_kwarg) else 
None
         defaults_w = space.unpackiterable(w_defaults)
         _defaults = [expr.from_object(space, w_item) for w_item in defaults_w]
         return arguments(_args, _vararg, _kwarg, _defaults)
@@ -3448,7 +3448,7 @@
     def from_object(space, w_node):
         w_arg = get_field(space, w_node, 'arg', False)
         w_value = get_field(space, w_node, 'value', False)
-        _arg = space.realstr_w(w_arg)
+        _arg = space.realtext_w(w_arg)
         if _arg is None:
             raise_required_value(space, w_node, 'arg')
         _value = expr.from_object(space, w_value)
@@ -3482,10 +3482,10 @@
     def from_object(space, w_node):
         w_name = get_field(space, w_node, 'name', False)
         w_asname = get_field(space, w_node, 'asname', True)
-        _name = space.realstr_w(w_name)
+        _name = space.realtext_w(w_name)
         if _name is None:
             raise_required_value(space, w_node, 'name')
-        _asname = space.str_or_None_w(w_asname)
+        _asname = space.realtext_w(w_asname) if not space.is_none(w_asname) 
else None
         return alias(_name, _asname)
 
 State.ast_type('alias', 'AST', ['name', 'asname'])
diff --git a/pypy/interpreter/astcompiler/tools/asdl_py.py 
b/pypy/interpreter/astcompiler/tools/asdl_py.py
--- a/pypy/interpreter/astcompiler/tools/asdl_py.py
+++ b/pypy/interpreter/astcompiler/tools/asdl_py.py
@@ -150,8 +150,9 @@
             return "check_string(space, %s)" % (value,)
         elif field.type in ("identifier",):
             if field.opt:
-                return "space.str_or_None_w(%s)" % (value,)
-            return "space.realstr_w(%s)" % (value,)
+                return ("space.realtext_w(%s) if not space.is_none(%s) "
+                        "else None" % (value, value))
+            return "space.realtext_w(%s)" % (value,)
         elif field.type in ("int",):
             return "space.int_w(%s)" % (value,)
         elif field.type in ("bool",):
diff --git a/pypy/interpreter/baseobjspace.py b/pypy/interpreter/baseobjspace.py
--- a/pypy/interpreter/baseobjspace.py
+++ b/pypy/interpreter/baseobjspace.py
@@ -1604,9 +1604,9 @@
         else:
             return buf.as_str()
 
-    def str_or_None_w(self, w_obj):
-        # YYY rename
-        return None if self.is_none(w_obj) else self.bytes_w(w_obj)
+    def text_or_none_w(self, w_obj):
+        # return text_w(w_obj) or None
+        return None if self.is_none(w_obj) else self.text_w(w_obj)
 
     def bytes_w(self, w_obj):
         "Takes a bytes object and returns an unwrapped RPython bytestring."
@@ -1617,15 +1617,11 @@
         unwrapped RPython bytestring."""
         return w_obj.str_w(self)
 
-    #@not_rpython    BACKCOMPAT: should be replaced with bytes_w or text_w
+    @not_rpython    # tests only; should be replaced with bytes_w or text_w
     def str_w(self, w_obj):
         """For tests only."""
         return self.bytes_w(w_obj)
 
-    #@not_rpython    BACKCOMPAT
-    def str0_w(self, w_obj):
-        return self.bytes0_w(w_obj)
-
     def bytes0_w(self, w_obj):
         "Like bytes_w, but rejects strings with NUL bytes."
         from rpython.rlib import rstring
@@ -1647,6 +1643,9 @@
                                      getfilesystemencoding(self))
         return self.bytes0_w(w_obj)
 
+    def fsencode_or_none_w(self, w_obj):
+        return None if self.is_none(w_obj) else self.fsencode_w(w_obj)
+
     def int_w(self, w_obj, allow_conversion=True):
         """
         Unwrap an app-level int object into an interpret-level int.
@@ -1681,9 +1680,9 @@
         """
         return w_obj.float_w(self, allow_conversion)
 
-    def realstr_w(self, w_obj):
-        # YYY rename
-        # Like bytes_w, but only works if w_obj is really of type 'str'.
+    def realtext_w(self, w_obj):
+        # Like bytes_w(), but only works if w_obj is really of type 'str'.
+        # On Python 3 this is the same as text_w().
         if not self.isinstance_w(w_obj, self.w_bytes):
             raise oefmt(self.w_TypeError, "argument must be a string")
         return self.bytes_w(w_obj)
@@ -1702,8 +1701,8 @@
         return rstring.assert_str0(result)
 
     def realunicode_w(self, w_obj):
-        # Like unicode_w, but only works if w_obj is really of type
-        # 'unicode'.
+        # Like unicode_w(), but only works if w_obj is really of type
+        # 'unicode'.  On Python 3 this is the same as unicode_w().
         if not self.isinstance_w(w_obj, self.w_unicode):
             raise oefmt(self.w_TypeError, "argument must be a unicode")
         return self.unicode_w(w_obj)
diff --git a/pypy/interpreter/gateway.py b/pypy/interpreter/gateway.py
--- a/pypy/interpreter/gateway.py
+++ b/pypy/interpreter/gateway.py
@@ -145,10 +145,7 @@
     def visit_bufferstr(self, el, app_sig):
         self.checked_space_method(el, app_sig)
 
-    def visit_str_or_None(self, el, app_sig):
-        self.checked_space_method(el, app_sig)
-
-    def visit_str0(self, el, app_sig):
+    def visit_text_or_none(self, el, app_sig):
         self.checked_space_method(el, app_sig)
 
     def visit_bytes(self, el, app_sig):
@@ -166,6 +163,9 @@
     def visit_fsencode(self, el, app_sig):
         self.checked_space_method(el, app_sig)
 
+    def visit_fsencode_or_none(self, el, app_sig):
+        self.checked_space_method(el, app_sig)
+
     def visit_nonnegint(self, el, app_sig):
         self.checked_space_method(el, app_sig)
 
@@ -289,11 +289,8 @@
     def visit_bufferstr(self, typ):
         self.run_args.append("space.bufferstr_w(%s)" % (self.scopenext(),))
 
-    def visit_str_or_None(self, typ):
-        self.run_args.append("space.str_or_None_w(%s)" % (self.scopenext(),))
-
-    def visit_str0(self, typ):
-        self.run_args.append("space.str0_w(%s)" % (self.scopenext(),))
+    def visit_text_or_none(self, typ):
+        self.run_args.append("space.text_or_none_w(%s)" % (self.scopenext(),))
 
     def visit_bytes(self, typ):
         self.run_args.append("space.bytes_w(%s)" % (self.scopenext(),))
@@ -310,6 +307,9 @@
     def visit_fsencode(self, typ):
         self.run_args.append("space.fsencode_w(%s)" % (self.scopenext(),))
 
+    def visit_fsencode_or_none(self, typ):
+        self.run_args.append("space.fsencode_or_none_w(%s)" % 
(self.scopenext(),))
+
     def visit_nonnegint(self, typ):
         self.run_args.append("space.gateway_nonnegint_w(%s)" % (
             self.scopenext(),))
@@ -454,11 +454,8 @@
     def visit_bufferstr(self, typ):
         self.unwrap.append("space.bufferstr_w(%s)" % (self.nextarg(),))
 
-    def visit_str_or_None(self, typ):
-        self.unwrap.append("space.str_or_None_w(%s)" % (self.nextarg(),))
-
-    def visit_str0(self, typ):
-        self.unwrap.append("space.str0_w(%s)" % (self.nextarg(),))
+    def visit_text_or_none(self, typ):
+        self.unwrap.append("space.text_or_none_w(%s)" % (self.nextarg(),))
 
     def visit_bytes(self, typ):
         self.unwrap.append("space.bytes_w(%s)" % (self.nextarg(),))
@@ -475,6 +472,9 @@
     def visit_fsencode(self, typ):
         self.unwrap.append("space.fsencode_w(%s)" % (self.nextarg(),))
 
+    def visit_fsencode_or_none(self, typ):
+        self.unwrap.append("space.fsencode_or_none_w(%s)" % (self.nextarg(),))
+
     def visit_nonnegint(self, typ):
         self.unwrap.append("space.gateway_nonnegint_w(%s)" % (self.nextarg(),))
 
@@ -606,6 +606,8 @@
                              "the name of an argument of the following "
                              "function" % (name,))
 
+    assert str not in unwrap_spec   # use 'text' or 'bytes' instead of str
+
     return unwrap_spec
 
 
diff --git a/pypy/interpreter/main.py b/pypy/interpreter/main.py
--- a/pypy/interpreter/main.py
+++ b/pypy/interpreter/main.py
@@ -18,7 +18,8 @@
 
 def compilecode(space, source, filename, cmd='exec'):
     w_code = space.builtin.call(
-        'compile', space.wrap(source), space.wrap(filename), space.wrap(cmd), 
space.newint(0), space.newint(0))
+        'compile', space.newtext(source), space.newtext(filename),
+        space.newtext(cmd), space.newint(0), space.newint(0))
     pycode = space.interp_w(eval.Code, w_code)
     return pycode
 
@@ -85,10 +86,11 @@
         argv.extend(args)
     space.setitem(space.sys.w_dict, space.newtext('argv'), space.wrap(argv))
     w_import = space.builtin.get('__import__')
-    runpy = space.call_function(w_import, space.wrap('runpy'))
-    w_run_module = space.getitem(runpy.w_dict, space.wrap('run_module'))
-    return space.call_function(w_run_module, space.wrap(module_name), 
space.w_None,
-                               space.wrap('__main__'), space.w_True)
+    runpy = space.call_function(w_import, space.newtext('runpy'))
+    w_run_module = space.getitem(runpy.w_dict, space.newtext('run_module'))
+    return space.call_function(w_run_module, space.newtext(module_name),
+                               space.w_None, space.newtext('__main__'),
+                               space.w_True)
 
 
 def run_toplevel(space, f, verbose=False):
diff --git a/pypy/interpreter/mixedmodule.py b/pypy/interpreter/mixedmodule.py
--- a/pypy/interpreter/mixedmodule.py
+++ b/pypy/interpreter/mixedmodule.py
@@ -48,7 +48,7 @@
             space.call_method(self.w_dict, 'update', self.w_initialdict)
 
         for w_submodule in self.submodules_w:
-            name = space.str0_w(w_submodule.w_name)
+            name = space.text0_w(w_submodule.w_name)
             space.setitem(self.w_dict, space.newtext(name.split(".")[-1]), 
w_submodule)
             space.getbuiltinmodule(name)
 
diff --git a/pypy/interpreter/module.py b/pypy/interpreter/module.py
--- a/pypy/interpreter/module.py
+++ b/pypy/interpreter/module.py
@@ -42,7 +42,7 @@
 
     def install(self):
         """NOT_RPYTHON: installs this module into space.builtin_modules"""
-        modulename = self.space.str0_w(self.w_name)
+        modulename = self.space.text0_w(self.w_name)
         self.space.builtin_modules[modulename] = self
 
     def setup_after_space_initialization(self):
diff --git a/pypy/interpreter/pycode.py b/pypy/interpreter/pycode.py
--- a/pypy/interpreter/pycode.py
+++ b/pypy/interpreter/pycode.py
@@ -25,7 +25,7 @@
 
 # helper
 
-def unpack_text_tuple(space,w_str_tuple):
+def unpack_text_tuple(space, w_str_tuple):
     return [space.text_w(w_el) for w_el in space.unpackiterable(w_str_tuple)]
 
 
diff --git a/pypy/interpreter/test/test_gateway.py 
b/pypy/interpreter/test/test_gateway.py
--- a/pypy/interpreter/test/test_gateway.py
+++ b/pypy/interpreter/test/test_gateway.py
@@ -377,7 +377,7 @@
             return space.wrap(s0+s1)
         app_g3_ss = gateway.interp2app_temp(g3_ss,
                                          unwrap_spec=[gateway.ObjSpace,
-                                                      str, 'str_or_None'])
+                                                      'text', 'text_or_none'])
         w_app_g3_ss = space.wrap(app_g3_ss)
         assert self.space.eq_w(
             space.call(w_app_g3_ss,
@@ -512,7 +512,7 @@
 
         app_g3_s = gateway.interp2app_temp(g3_id,
                                          unwrap_spec=[gateway.ObjSpace,
-                                                      str])
+                                                      'text'])
         w_app_g3_s = space.wrap(app_g3_s)
         assert space.eq_w(space.call_function(w_app_g3_s,w("foo")),w("foo"))
         raises(gateway.OperationError,space.call_function,w_app_g3_s,w(None))
diff --git a/pypy/interpreter/test/test_objspace.py 
b/pypy/interpreter/test/test_objspace.py
--- a/pypy/interpreter/test/test_objspace.py
+++ b/pypy/interpreter/test/test_objspace.py
@@ -212,11 +212,11 @@
         res = self.space.interp_w(Function, w(None), can_be_None=True)
         assert res is None
 
-    def test_str0_w(self):
+    def test_text0_w(self):
         space = self.space
         w = space.wrap
-        assert space.str0_w(w("123")) == "123"
-        exc = space.raises_w(space.w_TypeError, space.str0_w, w("123\x004"))
+        assert space.text0_w(w("123")) == "123"
+        exc = space.raises_w(space.w_TypeError, space.text0_w, w("123\x004"))
         assert space.unicode0_w(w(u"123")) == u"123"
         exc = space.raises_w(space.w_TypeError, space.unicode0_w, 
w(u"123\x004"))
 
diff --git a/pypy/interpreter/unicodehelper.py 
b/pypy/interpreter/unicodehelper.py
--- a/pypy/interpreter/unicodehelper.py
+++ b/pypy/interpreter/unicodehelper.py
@@ -16,17 +16,18 @@
                                              space.newtext(msg)]))
     return raise_unicode_exception_decode
 
-class RUnicodeEncodeError(Exception):
-    def __init__(self, encoding, object, start, end, reason):
-        self.encoding = encoding
-        self.object = object
-        self.start = start
-        self.end = end
-        self.reason = reason
-
-def raise_unicode_exception_encode(errors, encoding, msg, u,
-                                   startingpos, endingpos):
-    raise RUnicodeEncodeError(encoding, u, startingpos, endingpos, msg)
+@specialize.memo()
+def encode_error_handler(space):
+    # Fast version of the "strict" errors handler.
+    def raise_unicode_exception_encode(errors, encoding, msg, u,
+                                       startingpos, endingpos):
+        raise OperationError(space.w_UnicodeEncodeError,
+                             space.newtuple([space.newtext(encoding),
+                                             space.newunicode(u),
+                                             space.newint(startingpos),
+                                             space.newint(endingpos),
+                                             space.newtext(msg)]))
+    return raise_unicode_exception_encode
 
 # ____________________________________________________________
 
@@ -68,5 +69,5 @@
     # it stands for.  These are the Python2 rules; Python3 differs.
     return runicode.unicode_encode_utf_8(
         uni, len(uni), "strict",
-        errorhandler=raise_unicode_exception_encode,
+        errorhandler=None,
         allow_surrogates=True)
diff --git a/pypy/module/__pypy__/interp_os.py 
b/pypy/module/__pypy__/interp_os.py
--- a/pypy/module/__pypy__/interp_os.py
+++ b/pypy/module/__pypy__/interp_os.py
@@ -3,7 +3,7 @@
 from pypy.interpreter.gateway import unwrap_spec
 
 
-@unwrap_spec(name='str0')
+@unwrap_spec(name='text0')
 def real_getenv(space, name):
     """Get an OS environment value skipping Python cache"""
     return space.newtext_or_none(os.environ.get(name))
diff --git a/pypy/module/_cffi_backend/ffi_obj.py 
b/pypy/module/_cffi_backend/ffi_obj.py
--- a/pypy/module/_cffi_backend/ffi_obj.py
+++ b/pypy/module/_cffi_backend/ffi_obj.py
@@ -572,7 +572,7 @@
         return self.ffi_type(w_arg, ACCEPT_STRING | ACCEPT_CDATA)
 
 
-    @unwrap_spec(filename="str_or_None", flags=int)
+    @unwrap_spec(filename="fsencode_or_none", flags=int)
     def descr_dlopen(self, filename, flags=0):
         """\
 Load and return a dynamic library identified by 'name'.  The standard
diff --git a/pypy/module/_cffi_backend/libraryobj.py 
b/pypy/module/_cffi_backend/libraryobj.py
--- a/pypy/module/_cffi_backend/libraryobj.py
+++ b/pypy/module/_cffi_backend/libraryobj.py
@@ -91,7 +91,7 @@
 W_Library.typedef.acceptable_as_base_class = False
 
 
-@unwrap_spec(filename="str_or_None", flags=int)
+@unwrap_spec(filename="fsencode_or_none", flags=int)
 def load_library(space, filename, flags=0):
     lib = W_Library(space, filename, flags)
     return lib
diff --git a/pypy/module/_cffi_backend/test/test_recompiler.py 
b/pypy/module/_cffi_backend/test/test_recompiler.py
--- a/pypy/module/_cffi_backend/test/test_recompiler.py
+++ b/pypy/module/_cffi_backend/test/test_recompiler.py
@@ -6,7 +6,7 @@
 import pypy.module.cpyext.api     # side-effect of pre-importing it
 
 
-@unwrap_spec(cdef=str, module_name=str, source=str, packed=int)
+@unwrap_spec(cdef='text', module_name='text', source='text', packed=int)
 def prepare(space, cdef, module_name, source, w_includes=None,
             w_extra_source=None, w_min_version=None, packed=False):
     try:
diff --git a/pypy/module/_codecs/interp_codecs.py 
b/pypy/module/_codecs/interp_codecs.py
--- a/pypy/module/_codecs/interp_codecs.py
+++ b/pypy/module/_codecs/interp_codecs.py
@@ -314,12 +314,12 @@
     w_res = space.call_function(w_encoder, w_obj, space.newtext(errors))
     return space.getitem(w_res, space.newint(0))
 
-@unwrap_spec(errors='str_or_None')
+@unwrap_spec(errors='text_or_none')
 def readbuffer_encode(space, w_data, errors='strict'):
     s = space.getarg_w('s#', w_data)
     return space.newtuple([space.newbytes(s), space.newint(len(s))])
 
-@unwrap_spec(errors='str_or_None')
+@unwrap_spec(errors='text_or_none')
 def charbuffer_encode(space, w_data, errors='strict'):
     s = space.getarg_w('t#', w_data)
     return space.newtuple([space.newbytes(s), space.newint(len(s))])
@@ -373,7 +373,7 @@
 def make_encoder_wrapper(name):
     rname = "unicode_encode_%s" % (name.replace("_encode", ""), )
     assert hasattr(runicode, rname)
-    @unwrap_spec(uni=unicode, errors='str_or_None')
+    @unwrap_spec(uni=unicode, errors='text_or_none')
     def wrap_encoder(space, uni, errors="strict"):
         if errors is None:
             errors = 'strict'
@@ -387,7 +387,7 @@
 def make_decoder_wrapper(name):
     rname = "str_decode_%s" % (name.replace("_decode", ""), )
     assert hasattr(runicode, rname)
-    @unwrap_spec(string='bufferstr', errors='str_or_None',
+    @unwrap_spec(string='bufferstr', errors='text_or_none',
                  w_final=WrappedDefault(False))
     def wrap_decoder(space, string, errors="strict", w_final=None):
         if errors is None:
@@ -437,7 +437,7 @@
 
 # utf-8 functions are not regular, because we have to pass
 # "allow_surrogates=True"
-@unwrap_spec(uni=unicode, errors='str_or_None')
+@unwrap_spec(uni=unicode, errors='text_or_none')
 def utf_8_encode(space, uni, errors="strict"):
     if errors is None:
         errors = 'strict'
@@ -450,7 +450,7 @@
         allow_surrogates=True)
     return space.newtuple([space.newbytes(result), space.newint(len(uni))])
 
-@unwrap_spec(string='bufferstr', errors='str_or_None',
+@unwrap_spec(string='bufferstr', errors='text_or_none',
              w_final = WrappedDefault(False))
 def utf_8_decode(space, string, errors="strict", w_final=None):
     if errors is None:
@@ -466,7 +466,7 @@
         allow_surrogates=True)
     return space.newtuple([space.newunicode(result), space.newint(consumed)])
 
-@unwrap_spec(data='bufferstr', errors='str_or_None', byteorder=int,
+@unwrap_spec(data='bufferstr', errors='text_or_none', byteorder=int,
              w_final=WrappedDefault(False))
 def utf_16_ex_decode(space, data, errors='strict', byteorder=0, w_final=None):
     if errors is None:
@@ -487,7 +487,7 @@
     return space.newtuple([space.newunicode(res), space.newint(consumed),
                            space.newint(byteorder)])
 
-@unwrap_spec(data='bufferstr', errors='str_or_None', byteorder=int,
+@unwrap_spec(data='bufferstr', errors='text_or_none', byteorder=int,
              w_final=WrappedDefault(False))
 def utf_32_ex_decode(space, data, errors='strict', byteorder=0, w_final=None):
     final = space.is_true(w_final)
@@ -585,7 +585,7 @@
             "character mapping must return integer, None or str")
 
 
-@unwrap_spec(string='bufferstr', errors='str_or_None')
+@unwrap_spec(string='bufferstr', errors='text_or_none')
 def charmap_decode(space, string, errors="strict", w_mapping=None):
     if errors is None:
         errors = 'strict'
@@ -604,7 +604,7 @@
         final, state.decode_error_handler, mapping)
     return space.newtuple([space.newunicode(result), space.newint(consumed)])
 
-@unwrap_spec(uni=unicode, errors='str_or_None')
+@unwrap_spec(uni=unicode, errors='text_or_none')
 def charmap_encode(space, uni, errors="strict", w_mapping=None):
     if errors is None:
         errors = 'strict'
@@ -647,7 +647,7 @@
             return -1
         return space.int_w(w_code)
 
-@unwrap_spec(string='bufferstr', errors='str_or_None',
+@unwrap_spec(string='bufferstr', errors='text_or_none',
              w_final=WrappedDefault(False))
 def unicode_escape_decode(space, string, errors="strict", w_final=None):
     if errors is None:
@@ -667,7 +667,7 @@
 # ____________________________________________________________
 # Unicode-internal
 
-@unwrap_spec(errors='str_or_None')
+@unwrap_spec(errors='text_or_none')
 def unicode_internal_decode(space, w_string, errors="strict"):
     if errors is None:
         errors = 'strict'
@@ -691,7 +691,7 @@
 # support for the "string escape" codec
 # This is a bytes-to bytes transformation
 
-@unwrap_spec(data='bytes', errors='str_or_None')
+@unwrap_spec(data='bytes', errors='text_or_none')
 def escape_encode(space, data, errors='strict'):
     from pypy.objspace.std.bytesobject import string_escape_encode
     result = string_escape_encode(data, quote="'")
@@ -701,7 +701,7 @@
     w_result = space.newbytes(result[start:end])
     return space.newtuple([w_result, space.newint(len(data))])
 
-@unwrap_spec(data='bytes', errors='str_or_None')
+@unwrap_spec(data='bytes', errors='text_or_none')
 def escape_decode(space, data, errors='strict'):
     from pypy.interpreter.pyparser.parsestring import PyString_DecodeEscape
     result = PyString_DecodeEscape(space, data, errors, None)
diff --git a/pypy/module/_file/interp_file.py b/pypy/module/_file/interp_file.py
--- a/pypy/module/_file/interp_file.py
+++ b/pypy/module/_file/interp_file.py
@@ -666,7 +666,7 @@
     return False
 
 
-@unwrap_spec(w_file=W_File, encoding="str_or_None", errors="str_or_None")
+@unwrap_spec(w_file=W_File, encoding="text_or_none", errors="text_or_none")
 def set_file_encoding(space, w_file, encoding=None, errors=None):
     w_file.encoding = encoding
     w_file.errors = errors
diff --git a/pypy/module/_io/interp_io.py b/pypy/module/_io/interp_io.py
--- a/pypy/module/_io/interp_io.py
+++ b/pypy/module/_io/interp_io.py
@@ -41,8 +41,8 @@
 DEFAULT_BUFFER_SIZE = 8 * 1024
 
 @unwrap_spec(mode='text', buffering=int,
-             encoding="str_or_None", errors="str_or_None",
-             newline="str_or_None", closefd=bool)
+             encoding="text_or_none", errors="text_or_none",
+             newline="text_or_none", closefd=bool)
 def open(space, w_file, mode="r", buffering=-1, encoding=None, errors=None,
     newline=None, closefd=True):
     from pypy.module._io.interp_bufferedio import (W_BufferedRandom,
diff --git a/pypy/module/_io/interp_textio.py b/pypy/module/_io/interp_textio.py
--- a/pypy/module/_io/interp_textio.py
+++ b/pypy/module/_io/interp_textio.py
@@ -359,7 +359,7 @@
                                               # of the stream
         self.snapshot = None
 
-    @unwrap_spec(encoding="str_or_None", line_buffering=int)
+    @unwrap_spec(encoding="text_or_none", line_buffering=int)
     def descr_init(self, space, w_buffer, encoding=None,
                    w_errors=None, w_newline=None, line_buffering=0):
         self.state = STATE_ZERO
diff --git a/pypy/module/_multibytecodec/interp_incremental.py 
b/pypy/module/_multibytecodec/interp_incremental.py
--- a/pypy/module/_multibytecodec/interp_incremental.py
+++ b/pypy/module/_multibytecodec/interp_incremental.py
@@ -68,7 +68,7 @@
         return space.newunicode(output)
 
 
-@unwrap_spec(errors="str_or_None")
+@unwrap_spec(errors="text_or_none")
 def mbidecoder_new(space, w_subtype, errors=None):
     r = space.allocate_instance(MultibyteIncrementalDecoder, w_subtype)
     r.__init__(space, errors)
@@ -116,7 +116,7 @@
         return space.newbytes(output)
 
 
-@unwrap_spec(errors="str_or_None")
+@unwrap_spec(errors="text_or_none")
 def mbiencoder_new(space, w_subtype, errors=None):
     r = space.allocate_instance(MultibyteIncrementalEncoder, w_subtype)
     r.__init__(space, errors)
diff --git a/pypy/module/_multibytecodec/interp_multibytecodec.py 
b/pypy/module/_multibytecodec/interp_multibytecodec.py
--- a/pypy/module/_multibytecodec/interp_multibytecodec.py
+++ b/pypy/module/_multibytecodec/interp_multibytecodec.py
@@ -11,7 +11,7 @@
         self.name = name
         self.codec = codec
 
-    @unwrap_spec(input='bytes', errors="str_or_None")
+    @unwrap_spec(input='bytes', errors="text_or_none")
     def decode(self, space, input, errors=None):
         if errors is None:
             errors = 'strict'
@@ -27,7 +27,7 @@
         return space.newtuple([space.newunicode(output),
                                space.newint(len(input))])
 
-    @unwrap_spec(input=unicode, errors="str_or_None")
+    @unwrap_spec(input=unicode, errors="text_or_none")
     def encode(self, space, input, errors=None):
         if errors is None:
             errors = 'strict'
diff --git a/pypy/module/_multiprocessing/interp_win32.py 
b/pypy/module/_multiprocessing/interp_win32.py
--- a/pypy/module/_multiprocessing/interp_win32.py
+++ b/pypy/module/_multiprocessing/interp_win32.py
@@ -114,7 +114,7 @@
 # __________________________________________________________
 # functions for the "win32" namespace
 
-@unwrap_spec(name=str, openmode=r_uint, pipemode=r_uint, maxinstances=r_uint,
+@unwrap_spec(name='text', openmode=r_uint, pipemode=r_uint, 
maxinstances=r_uint,
              outputsize=r_uint, inputsize=r_uint, timeout=r_uint)
 def CreateNamedPipe(space, name, openmode, pipemode, maxinstances,
                     outputsize, inputsize, timeout, w_security):
@@ -161,13 +161,13 @@
         lltype.free(state, flavor='raw')
         lltype.free(statep, flavor='raw')
 
-@unwrap_spec(name=str, timeout=r_uint)
+@unwrap_spec(name='text', timeout=r_uint)
 def WaitNamedPipe(space, name, timeout):
     # Careful: zero means "default value specified by CreateNamedPipe()"
     if not _WaitNamedPipe(name, timeout):
         raise wrap_windowserror(space, rwin32.lastSavedWindowsError())
 
-@unwrap_spec(filename=str, access=r_uint, share=r_uint,
+@unwrap_spec(filename='fsencode', access=r_uint, share=r_uint,
              disposition=r_uint, flags=r_uint)
 def CreateFile(space, filename, access, share, w_security,
                disposition, flags, w_templatefile):
diff --git a/pypy/module/_rawffi/alt/interp_funcptr.py 
b/pypy/module/_rawffi/alt/interp_funcptr.py
--- a/pypy/module/_rawffi/alt/interp_funcptr.py
+++ b/pypy/module/_rawffi/alt/interp_funcptr.py
@@ -344,7 +344,7 @@
     def getidentifier(self, space):
         return space.newint(self.cdll.getidentifier())
 
-@unwrap_spec(name='str_or_None', mode=int)
+@unwrap_spec(name='fsencode_or_none', mode=int)
 def descr_new_cdll(space, w_type, name, mode=-1):
     return W_CDLL(space, name, mode)
 
@@ -363,7 +363,7 @@
         W_CDLL.__init__(self, space, name, mode)
         self.flags = libffi.FUNCFLAG_STDCALL
 
-@unwrap_spec(name='str_or_None', mode=int)
+@unwrap_spec(name='fsencode_or_none', mode=int)
 def descr_new_windll(space, w_type, name, mode=-1):
     return W_WinDLL(space, name, mode)
 
diff --git a/pypy/module/_rawffi/alt/test/test_struct.py 
b/pypy/module/_rawffi/alt/test/test_struct.py
--- a/pypy/module/_rawffi/alt/test/test_struct.py
+++ b/pypy/module/_rawffi/alt/test/test_struct.py
@@ -43,7 +43,7 @@
     def setup_class(cls):
         BaseAppTestFFI.setup_class.im_func(cls)
 
-        @unwrap_spec(addr=int, typename=str, length=int)
+        @unwrap_spec(addr=int, typename='text', length=int)
         def read_raw_mem(space, addr, typename, length):
             import ctypes
             addr = ctypes.cast(addr, ctypes.c_void_p)
diff --git a/pypy/module/_rawffi/interp_rawffi.py 
b/pypy/module/_rawffi/interp_rawffi.py
--- a/pypy/module/_rawffi/interp_rawffi.py
+++ b/pypy/module/_rawffi/interp_rawffi.py
@@ -235,7 +235,7 @@
     except OSError as e:
         raise wrap_oserror(space, e)
 
-@unwrap_spec(name='str_or_None')
+@unwrap_spec(name='fsencode_or_none')
 def descr_new_cdll(space, w_type, name):
     cdll = open_cdll(space, name)
     return W_CDLL(space, name, cdll)
diff --git a/pypy/module/_ssl/interp_win32.py b/pypy/module/_ssl/interp_win32.py
--- a/pypy/module/_ssl/interp_win32.py
+++ b/pypy/module/_ssl/interp_win32.py
@@ -99,7 +99,7 @@
                     usage.c_rgpszUsageIdentifier[i]))
             return space.newset(result_w)
 
-@unwrap_spec(store_name=str)
+@unwrap_spec(store_name='text')
 def enum_certificates_w(space, store_name):
     """enum_certificates(store_name) -> []
 
@@ -142,7 +142,7 @@
 
     return space.newlist(result_w)
 
-@unwrap_spec(store_name=str)
+@unwrap_spec(store_name='text')
 def enum_crls_w(space, store_name):
     """enum_crls(store_name) -> []
 
diff --git a/pypy/module/_winreg/interp_winreg.py 
b/pypy/module/_winreg/interp_winreg.py
--- a/pypy/module/_winreg/interp_winreg.py
+++ b/pypy/module/_winreg/interp_winreg.py
@@ -689,7 +689,7 @@
 
 The return value is the handle of the opened key.
 If the function fails, an EnvironmentError exception is raised."""
-    machine = space.str_or_None_w(w_machine)
+    machine = space.text_or_none_w(w_machine)
     hkey = hkey_w(w_hkey, space)
     with lltype.scoped_alloc(rwinreg.PHKEY.TO, 1) as rethkey:
         ret = rwinreg.RegConnectRegistry(machine, hkey, rethkey)
diff --git a/pypy/module/bz2/interp_bz2.py b/pypy/module/bz2/interp_bz2.py
--- a/pypy/module/bz2/interp_bz2.py
+++ b/pypy/module/bz2/interp_bz2.py
@@ -328,7 +328,7 @@
         raise oefmt(space.w_ValueError, "cannot open in read-write mode")
     if basemode == "a":
         raise oefmt(space.w_ValueError, "cannot append to bz2 file")
-    stream = open_path_helper(space.str0_w(w_path), os_flags, False)
+    stream = open_path_helper(space.fsencode_w(w_path), os_flags, False)
     if reading:
         bz2stream = ReadBZ2Filter(space, stream, buffering)
         buffering = 0     # by construction, the ReadBZ2Filter acts like
diff --git a/pypy/module/bz2/test/test_bz2_file.py 
b/pypy/module/bz2/test/test_bz2_file.py
--- a/pypy/module/bz2/test/test_bz2_file.py
+++ b/pypy/module/bz2/test/test_bz2_file.py
@@ -28,7 +28,7 @@
         data = DATA[:100]
         f.write(data, 'wb')
 
-    @unwrap_spec(data=str)
+    @unwrap_spec(data='bytes')
     def decompress(space, data):
         import popen2
         import bz2
diff --git a/pypy/module/cppyy/test/test_crossing.py 
b/pypy/module/cppyy/test/test_crossing.py
--- a/pypy/module/cppyy/test/test_crossing.py
+++ b/pypy/module/cppyy/test/test_crossing.py
@@ -78,7 +78,7 @@
             import ctypes, cppyy""")    # prevents leak-checking complaints on 
ctypes' statics
 
     def setup_method(self, func):
-        @unwrap_spec(name=str, init=str, body=str)
+        @unwrap_spec(name='text', init='text', body='text')
         def create_cdll(space, name, init, body):
             # the following is loosely from test_cpyext.py import_module; it
             # is copied here to be able to tweak the call to
diff --git a/pypy/module/cpyext/memoryobject.py 
b/pypy/module/cpyext/memoryobject.py
--- a/pypy/module/cpyext/memoryobject.py
+++ b/pypy/module/cpyext/memoryobject.py
@@ -119,7 +119,7 @@
     try:
         view.c_buf = rffi.cast(rffi.VOIDP, buf.get_raw_address())
     except ValueError:
-        if not space.isinstance_w(w_obj, space.w_str):
+        if not space.isinstance_w(w_obj, space.w_bytes):
             # XXX Python 3?
             raise BufferError("could not create buffer from object")
         view.c_buf = rffi.cast(rffi.VOIDP, 
rffi.str2charp(space.bytes_w(w_obj), track_allocation=False))
diff --git a/pypy/module/cpyext/slotdefs.py b/pypy/module/cpyext/slotdefs.py
--- a/pypy/module/cpyext/slotdefs.py
+++ b/pypy/module/cpyext/slotdefs.py
@@ -237,7 +237,10 @@
 def wrap_lenfunc(space, w_self, w_args, func):
     func_len = rffi.cast(lenfunc, func)
     check_num_args(space, w_args, 0)
-    return space.newint(generic_cpy_call(space, func_len, w_self))
+    res = generic_cpy_call(space, func_len, w_self)
+    if widen(res) == -1:
+        space.fromcache(State).check_and_raise_exception(always=True)
+    return space.newint(res)
 
 def wrap_sq_item(space, w_self, w_args, func):
     func_target = rffi.cast(ssizeargfunc, func)
diff --git a/pypy/module/cpyext/test/test_cpyext.py 
b/pypy/module/cpyext/test/test_cpyext.py
--- a/pypy/module/cpyext/test/test_cpyext.py
+++ b/pypy/module/cpyext/test/test_cpyext.py
@@ -287,7 +287,7 @@
         if self.runappdirect:
             return
 
-        @unwrap_spec(name=str)
+        @unwrap_spec(name='text')
         def compile_module(space, name,
                            w_source_files=None,
                            w_source_strings=None):
@@ -313,8 +313,8 @@
 
             return space.wrap(pydname)
 
-        @unwrap_spec(name=str, init='str_or_None', body=str,
-                     filename='str_or_None', PY_SSIZE_T_CLEAN=bool)
+        @unwrap_spec(name='text', init='text_or_none', body='text',
+                     filename='fsencode_or_none', PY_SSIZE_T_CLEAN=bool)
         def import_module(space, name, init=None, body='',
                           filename=None, w_include_dirs=None,
                           PY_SSIZE_T_CLEAN=False):
@@ -325,12 +325,12 @@
             return w_result
 
 
-        @unwrap_spec(mod=str, name=str)
+        @unwrap_spec(mod='text', name='text')
         def load_module(space, mod, name):
             return self.sys_info.load_module(mod, name)
 
-        @unwrap_spec(modname=str, prologue=str,
-                             more_init=str, PY_SSIZE_T_CLEAN=bool)
+        @unwrap_spec(modname='text', prologue='text',
+                             more_init='text', PY_SSIZE_T_CLEAN=bool)
         def import_extension(space, modname, w_functions, prologue="",
                              w_include_dirs=None, more_init="", 
PY_SSIZE_T_CLEAN=False):
             functions = space.unwrap(w_functions)
diff --git a/pypy/module/cpyext/test/test_pystate.py 
b/pypy/module/cpyext/test/test_pystate.py
--- a/pypy/module/cpyext/test/test_pystate.py
+++ b/pypy/module/cpyext/test/test_pystate.py
@@ -178,7 +178,6 @@
             ("bounce", "METH_NOARGS",
             """
             PyGILState_STATE gilstate;
-            PyThreadState *tstate;
             PyObject *dict;
 
             if (PyEval_ThreadsInitialized() == 0)
diff --git a/pypy/module/exceptions/interp_exceptions.py 
b/pypy/module/exceptions/interp_exceptions.py
--- a/pypy/module/exceptions/interp_exceptions.py
+++ b/pypy/module/exceptions/interp_exceptions.py
@@ -288,7 +288,7 @@
         space.realunicode_w(w_object)
         space.int_w(w_start)
         space.int_w(w_end)
-        space.realstr_w(w_reason)
+        space.realtext_w(w_reason)
         # assign attributes
         self.w_object = w_object
         self.w_start = w_start
@@ -628,11 +628,11 @@
 
     def descr_init(self, space, w_encoding, w_object, w_start, w_end, 
w_reason):
         # typechecking
-        space.realstr_w(w_encoding)
-        space.realstr_w(w_object)
+        space.realtext_w(w_encoding)
+        space.realtext_w(w_object)
         space.int_w(w_start)
         space.int_w(w_end)
-        space.realstr_w(w_reason)
+        space.realtext_w(w_reason)
         # assign attributes
         self.w_encoding = w_encoding
         self.w_object = w_object
@@ -718,11 +718,11 @@
 
     def descr_init(self, space, w_encoding, w_object, w_start, w_end, 
w_reason):
         # typechecking
-        space.realstr_w(w_encoding)
+        space.realtext_w(w_encoding)
         space.realunicode_w(w_object)
         space.int_w(w_start)
         space.int_w(w_end)
-        space.realstr_w(w_reason)
+        space.realtext_w(w_reason)
         # assign attributes
         self.w_encoding = w_encoding
         self.w_object = w_object
diff --git a/pypy/module/gc/interp_gc.py b/pypy/module/gc/interp_gc.py
--- a/pypy/module/gc/interp_gc.py
+++ b/pypy/module/gc/interp_gc.py
@@ -79,7 +79,7 @@
 
 # ____________________________________________________________
 
-@unwrap_spec(filename='str0')
+@unwrap_spec(filename='fsencode')
 def dump_heap_stats(space, filename):
     tb = rgc._heap_stats()
     if not tb:
diff --git a/pypy/module/imp/importing.py b/pypy/module/imp/importing.py
--- a/pypy/module/imp/importing.py
+++ b/pypy/module/imp/importing.py
@@ -364,7 +364,7 @@
         length = space.len_w(w_fromlist)
         for i in range(length):
             w_name = space.getitem(w_fromlist, space.newint(i))
-            if not space.isinstance_w(w_name, space.w_str):
+            if not space.isinstance_w(w_name, space.w_text):
                 raise oefmt(space.w_TypeError,
                     "'fromlist' items must be str, not %T", w_name)
         if w_path is not None:
@@ -491,7 +491,7 @@
     def __init__(self, space):
         pass
 
-    @unwrap_spec(path='str0')
+    @unwrap_spec(path='fsencode')
     def descr_init(self, space, path):
         if not path:
             raise oefmt(space.w_ImportError, "empty pathname")
@@ -570,7 +570,7 @@
                 if w_loader:
                     return FindInfo.fromLoader(w_loader)
 
-            path = space.str0_w(w_pathitem)
+            path = space.fsencode_w(w_pathitem)
             filepart = os.path.join(path, partname)
             log_pyverbose(space, 2, "# trying %s\n" % (filepart,))
             if os.path.isdir(filepart) and case_ok(filepart):
diff --git a/pypy/module/imp/interp_imp.py b/pypy/module/imp/interp_imp.py
--- a/pypy/module/imp/interp_imp.py
+++ b/pypy/module/imp/interp_imp.py
@@ -78,7 +78,7 @@
 def load_module(space, w_name, w_file, w_filename, w_info):
     w_suffix, w_filemode, w_modtype = space.unpackiterable(w_info, 3)
 
-    filename = space.str0_w(w_filename)
+    filename = space.fsencode_w(w_filename)
     filemode = space.text_w(w_filemode)
     if space.is_w(w_file, space.w_None):
         stream = None
@@ -95,7 +95,7 @@
         space, w_name, find_info, reuse=True)
 
 def load_source(space, w_modulename, w_filename, w_file=None):
-    filename = space.str0_w(w_filename)
+    filename = space.fsencode_w(w_filename)
 
     stream = get_file(space, w_file, filename, 'U')
 
@@ -109,7 +109,7 @@
         stream.close()
     return w_mod
 
-@unwrap_spec(filename='str0', check_afterwards=int)
+@unwrap_spec(filename='fsencode', check_afterwards=int)
 def _run_compiled_module(space, w_modulename, filename, w_file, w_module,
                          check_afterwards=False):
     # the function 'imp._run_compiled_module' is a pypy-only extension
@@ -125,14 +125,14 @@
         stream.close()
     return w_mod
 
-@unwrap_spec(filename='str0')
+@unwrap_spec(filename='fsencode')
 def load_compiled(space, w_modulename, filename, w_file=None):
     w_mod = Module(space, w_modulename)
     importing._prepare_module(space, w_mod, filename, None)
     return _run_compiled_module(space, w_modulename, filename, w_file, w_mod,
                                 check_afterwards=True)
 
-@unwrap_spec(filename='text')
+@unwrap_spec(filename='fsencode')
 def load_dynamic(space, w_modulename, filename, w_file=None):
     if not importing.has_so_extension(space):
         raise oefmt(space.w_ImportError, "Not implemented")
diff --git a/pypy/module/mmap/interp_mmap.py b/pypy/module/mmap/interp_mmap.py
--- a/pypy/module/mmap/interp_mmap.py
+++ b/pypy/module/mmap/interp_mmap.py
@@ -183,7 +183,7 @@
 
     def descr_setitem(self, w_index, w_value):
         space = self.space
-        value = space.realstr_w(w_value)
+        value = space.realtext_w(w_value)
         self.check_valid()
 
         self.check_writeable()
@@ -238,7 +238,7 @@
         if not space.isinstance_w(w_item, space.w_bytes):
             raise oefmt(space.w_IndexError,
                         "mmap slice assignment must be a string")
-        value = space.realstr_w(w_item)
+        value = space.realtext_w(w_item)
         if len(value) != (j - i):
             raise oefmt(space.w_IndexError,
                         "mmap slice assignment is wrong size")
diff --git a/pypy/module/sys/initpath.py b/pypy/module/sys/initpath.py
--- a/pypy/module/sys/initpath.py
+++ b/pypy/module/sys/initpath.py
@@ -147,17 +147,17 @@
         return None
 
 
-@unwrap_spec(executable='str0')
+@unwrap_spec(executable='fsencode')
 def pypy_find_executable(space, executable):
     return space.newtext(find_executable(executable))
 
 
-@unwrap_spec(filename='str0')
+@unwrap_spec(filename='fsencode')
 def pypy_resolvedirof(space, filename):
     return space.newtext(resolvedirof(filename))
 
 
-@unwrap_spec(executable='str0')
+@unwrap_spec(executable='fsencode')
 def pypy_find_stdlib(space, executable):
     path, prefix = None, None
     if executable != '*':
diff --git a/pypy/module/test_lib_pypy/test_md5_extra.py 
b/pypy/module/test_lib_pypy/test_md5_extra.py
--- a/pypy/module/test_lib_pypy/test_md5_extra.py
+++ b/pypy/module/test_lib_pypy/test_md5_extra.py
@@ -94,7 +94,7 @@
             # interp2app doesn't work in appdirect mode
             cls.w_compare_host = staticmethod(compare_host)
         else:
-            compare_host.unwrap_spec = [str, str, str]
+            compare_host.unwrap_spec = ['bytes', 'bytes', 'text']
             cls.w_compare_host = space.wrap(gateway.interp2app(compare_host))
 
     def w_compare(self, message):
diff --git a/pypy/module/zipimport/interp_zipimport.py 
b/pypy/module/zipimport/interp_zipimport.py
--- a/pypy/module/zipimport/interp_zipimport.py
+++ b/pypy/module/zipimport/interp_zipimport.py
@@ -350,7 +350,7 @@
         space = self.space
         return space.newtext(self.filename)
 
-@unwrap_spec(name='str0')
+@unwrap_spec(name='text0')
 def descr_new_zipimporter(space, w_type, name):
     ok = False
     parts_ends = [i for i in range(0, len(name))
diff --git a/pypy/objspace/fake/objspace.py b/pypy/objspace/fake/objspace.py
--- a/pypy/objspace/fake/objspace.py
+++ b/pypy/objspace/fake/objspace.py
@@ -45,7 +45,6 @@
 
     def str_w(self, space):
         return NonConstant("foobar")
-    identifier_w = bytes_w = str_w
 
     def unicode_w(self, space):
         return NonConstant(u"foobar")
diff --git a/pypy/objspace/std/bytearrayobject.py 
b/pypy/objspace/std/bytearrayobject.py
--- a/pypy/objspace/std/bytearrayobject.py
+++ b/pypy/objspace/std/bytearrayobject.py
@@ -181,7 +181,7 @@
         # we ignore w_type and always return a bytearray
         return new_bytearray(space, space.w_bytearray, data)
 
-    @unwrap_spec(encoding='str_or_None', errors='str_or_None')
+    @unwrap_spec(encoding='text_or_none', errors='text_or_none')
     def descr_init(self, space, w_source=None, encoding=None, errors=None):
         if w_source is None:
             w_source = space.newbytes('')
diff --git a/pypy/objspace/std/objspace.py b/pypy/objspace/std/objspace.py
--- a/pypy/objspace/std/objspace.py
+++ b/pypy/objspace/std/objspace.py
@@ -89,12 +89,15 @@
         for typedef, cls in builtin_type_classes.items():
             w_type = self.gettypeobject(typedef)
             self.builtin_types[typedef.name] = w_type
-            if 1: # typedef.name != "str":      BACKCOMPAT
-                setattr(self, 'w_' + typedef.name, w_type)
-            if typedef.name == "str":
-                self.w_bytes = w_type
+            name = typedef.name
+            # we don't expose 'space.w_str' at all, to avoid confusion
+            # with Python 3.  Instead, in Python 2, it becomes
+            # space.w_bytes (or space.w_text).
+            if name == 'str':
+                name = 'bytes'
+            setattr(self, 'w_' + name, w_type)
             self._interplevel_classes[w_type] = cls
-        self.w_text = self.w_bytes # this is w_unicode on Py3
+        self.w_text = self.w_bytes   # 'space.w_text' is w_unicode on Py3
         self.w_dict.flag_map_or_seq = 'M'
         self.builtin_types["NotImplemented"] = self.w_NotImplemented
         self.builtin_types["Ellipsis"] = self.w_Ellipsis
diff --git a/pypy/objspace/std/test/test_mapdict.py 
b/pypy/objspace/std/test/test_mapdict.py
--- a/pypy/objspace/std/test/test_mapdict.py
+++ b/pypy/objspace/std/test/test_mapdict.py
@@ -899,7 +899,7 @@
                 successes = entry.success_counter
             globalfailures = INVALID_CACHE_ENTRY.failure_counter
             return space.wrap((failures, successes, globalfailures))
-        check.unwrap_spec = [gateway.ObjSpace, gateway.W_Root, str]
+        check.unwrap_spec = [gateway.ObjSpace, gateway.W_Root, 'text']
         cls.w_check = cls.space.wrap(gateway.interp2app(check))
 
     def test_simple(self):
diff --git a/pypy/objspace/std/unicodeobject.py 
b/pypy/objspace/std/unicodeobject.py
--- a/pypy/objspace/std/unicodeobject.py
+++ b/pypy/objspace/std/unicodeobject.py
@@ -462,26 +462,17 @@
         w_encoder = space.sys.get_w_default_encoder()
     else:
         if errors is None or errors == 'strict':
-            try:
-                if encoding == 'ascii':
-                    u = space.unicode_w(w_object)
-                    eh = unicodehelper.raise_unicode_exception_encode
-                    return space.newbytes(unicode_encode_ascii(
-                            u, len(u), None, errorhandler=eh))
-                if encoding == 'utf-8':
-                    u = space.unicode_w(w_object)
-                    eh = unicodehelper.raise_unicode_exception_encode
-                    return space.newbytes(unicode_encode_utf_8(
-                            u, len(u), None, errorhandler=eh,
-                            allow_surrogates=True))
-            except unicodehelper.RUnicodeEncodeError as ue:
-                raise OperationError(space.w_UnicodeEncodeError,
-                                     space.newtuple([
-                    space.newtext(ue.encoding),
-                    space.newunicode(ue.object),
-                    space.newint(ue.start),
-                    space.newint(ue.end),
-                    space.newtext(ue.reason)]))
+            if encoding == 'ascii':
+                u = space.unicode_w(w_object)
+                eh = unicodehelper.encode_error_handler(space)
+                return space.newbytes(unicode_encode_ascii(
+                        u, len(u), None, errorhandler=eh))
+            if encoding == 'utf-8':
+                u = space.unicode_w(w_object)
+                eh = unicodehelper.encode_error_handler(space)
+                return space.newbytes(unicode_encode_utf_8(
+                        u, len(u), None, errorhandler=eh,
+                        allow_surrogates=True))
         from pypy.module._codecs.interp_codecs import lookup_codec
         w_encoder = space.getitem(lookup_codec(space, encoding), 
space.newint(0))
     if errors is None:
diff --git a/rpython/rlib/runicode.py b/rpython/rlib/runicode.py
--- a/rpython/rlib/runicode.py
+++ b/rpython/rlib/runicode.py
@@ -5,7 +5,7 @@
 from rpython.rlib.unicodedata import unicodedb
 from rpython.tool.sourcetools import func_with_new_name
 from rpython.rtyper.lltypesystem import lltype, rffi
-from rpython.rlib import jit
+from rpython.rlib import jit, nonconst
 
 
 if rffi.sizeof(lltype.UniChar) == 4:
@@ -133,6 +133,26 @@
 def _invalid_cont_byte(ordch):
     return ordch>>6 != 0x2    # 0b10
 
+_invalid_byte_2_of_2 = _invalid_cont_byte
+_invalid_byte_3_of_3 = _invalid_cont_byte
+_invalid_byte_3_of_4 = _invalid_cont_byte
+_invalid_byte_4_of_4 = _invalid_cont_byte
+
+@enforceargs(allow_surrogates=bool)
+def _invalid_byte_2_of_3(ordch1, ordch2, allow_surrogates):
+    return (ordch2>>6 != 0x2 or    # 0b10
+            (ordch1 == 0xe0 and ordch2 < 0xa0)
+            # surrogates shouldn't be valid UTF-8!
+            or (ordch1 == 0xed and ordch2 > 0x9f and not allow_surrogates))
+
+def _invalid_byte_2_of_4(ordch1, ordch2):
+    return (ordch2>>6 != 0x2 or    # 0b10
+            (ordch1 == 0xf0 and ordch2 < 0x90) or
+            (ordch1 == 0xf4 and ordch2 > 0x8f))
+
+# NOTE: this is a slightly fixed algorithm when compared with
+# CPython2's.  It is closer to CPython3's.  See comments in
+# test_invalid_cb_for_3bytes_seq().
 def str_decode_utf_8_impl(s, size, errors, final, errorhandler,
                           allow_surrogates):
     if size == 0:
@@ -153,20 +173,60 @@
         if pos + n > size:
             if not final:
                 break
+            # argh, this obscure block of code is mostly a copy of
+            # what follows :-(
             charsleft = size - pos - 1 # either 0, 1, 2
             # note: when we get the 'unexpected end of data' we need
             # to care about the pos returned; it can be lower than size,
             # in case we need to continue running this loop
-            endpos = pos + 1
-            if charsleft >= 1 and not _invalid_cont_byte(ord(s[pos+1])):
-                endpos = pos + 2
-                if charsleft >= 2 and not _invalid_cont_byte(ord(s[pos+2])):
-                    endpos = pos + 3
-            r, pos = errorhandler(errors, 'utf8',
-                                  'unexpected end of data',
-                                  s, pos, endpos)
-            result.append(r)
-            continue
+            if not charsleft:
+                # there's only the start byte and nothing else
+                r, pos = errorhandler(errors, 'utf8',
+                                      'unexpected end of data',
+                                      s, pos, pos+1)
+                result.append(r)
+                continue
+            ordch2 = ord(s[pos+1])
+            if n == 3:
+                # 3-bytes seq with only a continuation byte
+                if _invalid_byte_2_of_3(ordch1, ordch2, allow_surrogates):
+                    # second byte invalid, take the first and continue
+                    r, pos = errorhandler(errors, 'utf8',
+                                          'invalid continuation byte',
+                                          s, pos, pos+1)
+                    result.append(r)
+                    continue
+                else:
+                    # second byte valid, but third byte missing
+                    r, pos = errorhandler(errors, 'utf8',
+                                      'unexpected end of data',
+                                      s, pos, pos+2)
+                    result.append(r)
+                    continue
+            elif n == 4:
+                # 4-bytes seq with 1 or 2 continuation bytes
+                if _invalid_byte_2_of_4(ordch1, ordch2):
+                    # second byte invalid, take the first and continue
+                    r, pos = errorhandler(errors, 'utf8',
+                                          'invalid continuation byte',
+                                          s, pos, pos+1)
+                    result.append(r)
+                    continue
+                elif charsleft == 2 and _invalid_byte_3_of_4(ord(s[pos+2])):
+                    # third byte invalid, take the first two and continue
+                    r, pos = errorhandler(errors, 'utf8',
+                                          'invalid continuation byte',
+                                          s, pos, pos+2)
+                    result.append(r)
+                    continue
+                else:
+                    # there's only 1 or 2 valid cb, but the others are missing
+                    r, pos = errorhandler(errors, 'utf8',
+                                      'unexpected end of data',
+                                      s, pos, pos+charsleft+1)
+                    result.append(r)
+                    continue
+            raise AssertionError("unreachable")
 
         if n == 0:
             r, pos = errorhandler(errors, 'utf8',
@@ -179,7 +239,7 @@
 
         elif n == 2:
             ordch2 = ord(s[pos+1])
-            if _invalid_cont_byte(ordch2):
+            if _invalid_byte_2_of_2(ordch2):
                 r, pos = errorhandler(errors, 'utf8',
                                       'invalid continuation byte',
                                       s, pos, pos+1)
@@ -193,48 +253,41 @@
         elif n == 3:
             ordch2 = ord(s[pos+1])
             ordch3 = ord(s[pos+2])
-            if _invalid_cont_byte(ordch2):
+            if _invalid_byte_2_of_3(ordch1, ordch2, allow_surrogates):
                 r, pos = errorhandler(errors, 'utf8',
                                       'invalid continuation byte',
                                       s, pos, pos+1)
                 result.append(r)
                 continue
-            elif _invalid_cont_byte(ordch3):
+            elif _invalid_byte_3_of_3(ordch3):
                 r, pos = errorhandler(errors, 'utf8',
                                       'invalid continuation byte',
                                       s, pos, pos+2)
                 result.append(r)
                 continue
             # 1110xxxx 10yyyyyy 10zzzzzz -> 00000000 xxxxyyyy yyzzzzzz
-            c = (((ordch1 & 0x0F) << 12) +     # 0b00001111
-                 ((ordch2 & 0x3F) << 6) +      # 0b00111111
-                 (ordch3 & 0x3F))              # 0b00111111
-            if c < 2048 or (0xd800 <= c <= 0xdfff and not allow_surrogates):
-                r, pos = errorhandler(errors, 'utf8',
-                                      'invalid continuation byte',
-                                      s, pos, pos+2)
-                result.append(r)
-                continue
-            result.append(unichr(c))
+            result.append(unichr(((ordch1 & 0x0F) << 12) +     # 0b00001111
+                                 ((ordch2 & 0x3F) << 6) +      # 0b00111111
+                                 (ordch3 & 0x3F)))             # 0b00111111
             pos += 3
 
         elif n == 4:
             ordch2 = ord(s[pos+1])
             ordch3 = ord(s[pos+2])
             ordch4 = ord(s[pos+3])
-            if _invalid_cont_byte(ordch2):
+            if _invalid_byte_2_of_4(ordch1, ordch2):
                 r, pos = errorhandler(errors, 'utf8',
                                       'invalid continuation byte',
                                       s, pos, pos+1)
                 result.append(r)
                 continue
-            elif _invalid_cont_byte(ordch3):
+            elif _invalid_byte_3_of_4(ordch3):
                 r, pos = errorhandler(errors, 'utf8',
                                       'invalid continuation byte',
                                       s, pos, pos+2)
                 result.append(r)
                 continue
-            elif _invalid_cont_byte(ordch4):
+            elif _invalid_byte_4_of_4(ordch4):
                 r, pos = errorhandler(errors, 'utf8',
                                       'invalid continuation byte',
                                       s, pos, pos+3)
@@ -245,12 +298,6 @@
                  ((ordch2 & 0x3F) << 12) +      # 0b00111111
                  ((ordch3 & 0x3F) << 6) +       # 0b00111111
                  (ordch4 & 0x3F))               # 0b00111111
-            if c <= 65535 or c > 0x10ffff:
-                r, pos = errorhandler(errors, 'utf8',
-                                      'invalid continuation byte',
-                                      s, pos, pos+3)
-                result.append(r)
-                continue
             if c <= MAXUNICODE:
                 result.append(UNICHR(c))
             else:
@@ -326,7 +373,12 @@
                             pos += 1
                             _encodeUCS4(result, ch3)
                             continue
-                    if not allow_surrogates:
+                    # note: if the program only ever calls this with
+                    # allow_surrogates=True, then we'll never annotate
+                    # the following block of code, and errorhandler()
+                    # will never be called.  This causes RPython
+                    # problems.  Avoid it with the nonconst hack.
+                    if not allow_surrogates or nonconst.NonConstant(False):
                         ru, rs, pos = errorhandler(errors, 'utf8',
                                                    'surrogates not allowed',
                                                    s, pos-1, pos)
diff --git a/rpython/rlib/test/test_runicode.py 
b/rpython/rlib/test/test_runicode.py
--- a/rpython/rlib/test/test_runicode.py
+++ b/rpython/rlib/test/test_runicode.py
@@ -700,27 +700,6 @@
             assert decoder(seq, len(seq), 'ignore', final=True
                            ) == (res, len(seq))
 
-    @settings(max_examples=10000)
-    @given(strategies.binary())
-    def test_str_check_utf8(self, s):
-        try:
-            u = s.decode("utf8")
-            valid = True
-        except UnicodeDecodeError as e:
-            valid = False
-        try:
-            result, length = runicode.str_decode_utf_8(s, len(s), None,
-                errorhandler=None, final=True, allow_surrogates=True)
-        except UnicodeDecodeError as a:
-            assert not valid
-            assert a.start == e.start
-            assert a.end == e.end
-            assert str(a) == str(e)
-        else:
-            assert valid
-            assert result == u
-            assert length == len(s)
-
 
 class TestEncoding(UnicodeTests):
     def test_all_ascii(self):
diff --git a/rpython/translator/c/test/test_newgc.py 
b/rpython/translator/c/test/test_newgc.py
--- a/rpython/translator/c/test/test_newgc.py
+++ b/rpython/translator/c/test/test_newgc.py
@@ -1733,7 +1733,11 @@
                      (ulimitv, ' '.join(args),)]
             popen = subprocess.Popen(args1, stderr=subprocess.PIPE)
             _, child_stderr = popen.communicate()
-            assert popen.wait() == 134     # aborted
+            assert popen.wait() in (-6, 134)     # aborted
+            # note: it seems that on some systems we get 134 and on
+            # others we get -6.  Bash is supposed to translate the
+            # SIGABRT (signal 6) from the subprocess into the exit 
+            # code 128+6, but I guess it may not always do so.
             assert 'out of memory:' in child_stderr
             return '42'
         #
_______________________________________________
pypy-commit mailing list
pypy-commit@python.org
https://mail.python.org/mailman/listinfo/pypy-commit

Reply via email to