Author: Matti Picus <matti.pi...@gmail.com>
Branch: py3.6
Changeset: r96059:395bf13f5d16
Date: 2019-02-17 18:09 +0200
http://bitbucket.org/pypy/pypy/changeset/395bf13f5d16/

Log:    merge default into branch

diff --git a/pypy/interpreter/unicodehelper.py 
b/pypy/interpreter/unicodehelper.py
--- a/pypy/interpreter/unicodehelper.py
+++ b/pypy/interpreter/unicodehelper.py
@@ -21,7 +21,7 @@
                                              space.newtext(msg)]))
     return raise_unicode_exception_decode
 
-def decode_never_raise(errors, encoding, msg, s, startingpos, endingpos):
+def _decode_never_raise(errors, encoding, msg, s, startingpos, endingpos):
     assert startingpos >= 0
     ux = ['\ux' + hex(ord(x))[2:].upper() for x in s[startingpos:endingpos]]
     return ''.join(ux), endingpos, 'b'
@@ -1013,57 +1013,15 @@
 
     return result.build()
 
-@specialize.memo()
-def _encode_unicode_error_handler(space):
-    # Fast version of the "strict" errors handler.
     # used only in (unused) encode_utf8
-    from rpython.rlib import runicode
-    def raise_unicode_exception_encode(errors, encoding, msg, uni,
-                                       startingpos, endingpos):
-        assert isinstance(uni, unicode)
-        u_len = len(uni)
-        utf8 = runicode.unicode_encode_utf8sp(uni, u_len)
-        raise OperationError(space.w_UnicodeEncodeError,
-                             space.newtuple([space.newtext(encoding),
-                                             space.newtext(utf8, u_len),
-                                             space.newint(startingpos),
-                                             space.newint(endingpos),
-                                             space.newtext(msg)]))
-        return u'', None, 0
-    return raise_unicode_exception_encode
-
-
-def encode_utf8(space, uni, allow_surrogates=False):
-    # Note that Python3 tends to forbid *all* surrogates in utf-8.
-    # If allow_surrogates=True, then revert to the Python 2 behavior
-    # which never raises UnicodeEncodeError.  Surrogate pairs are then
-    # allowed, either paired or lone.  A paired surrogate is considered
-    # like the non-BMP character it stands for.  See also *_utf8sp().
     xxx
-    from rpython.rlib import runicode
-    assert isinstance(uni, unicode)
-    return runicode.unicode_encode_utf_8(
-        uni, len(uni), "strict",
-        errorhandler=_encode_unicode_error_handler(space),
-        allow_surrogates=allow_surrogates)
-
-def encode_utf8sp(space, uni, allow_surrogates=True):
-    xxx
-    # Surrogate-preserving utf-8 encoding.  Any surrogate character
-    # turns into its 3-bytes encoding, whether it is paired or not.
-    # This should always be reversible, and the reverse is
-    # decode_utf8sp().
-    from rpython.rlib import runicode
-    return runicode.unicode_encode_utf8sp(uni, len(uni))
-
 def decode_utf8sp(space, string):
     # Surrogate-preserving utf-8 decoding.  Assuming there is no
     # encoding error, it should always be reversible, and the reverse is
     # unused encode_utf8sp().
-    return str_decode_utf8(string, "string", True, decode_never_raise,
+    return str_decode_utf8(string, "string", True, _decode_never_raise,
                            allow_surrogates=True)
 
-
 # ____________________________________________________________
 # utf-16
 
_______________________________________________
pypy-commit mailing list
pypy-commit@python.org
https://mail.python.org/mailman/listinfo/pypy-commit

Reply via email to