Author: Amaury Forgeot d'Arc <[email protected]>
Branch: py3.5
Changeset: r88134:647ced05d718
Date: 2016-11-03 22:31 +0100
http://bitbucket.org/pypy/pypy/changeset/647ced05d718/

Log:    Implement bytes.decode(errors='backslashreplace')

diff --git a/pypy/module/_codecs/interp_codecs.py 
b/pypy/module/_codecs/interp_codecs.py
--- a/pypy/module/_codecs/interp_codecs.py
+++ b/pypy/module/_codecs/interp_codecs.py
@@ -1,8 +1,9 @@
 from rpython.rlib import jit
 from rpython.rlib.objectmodel import we_are_translated
-from rpython.rlib.rstring import StringBuilder
+from rpython.rlib.rstring import StringBuilder, UnicodeBuilder
 from rpython.rlib.runicode import (
-    code_to_unichr, MAXUNICODE, raw_unicode_escape_helper)
+    code_to_unichr, MAXUNICODE,
+    raw_unicode_escape_helper, raw_unicode_escape_helper)
 
 from pypy.interpreter.error import OperationError, oefmt
 from pypy.interpreter.gateway import interp2app, unwrap_spec, WrappedDefault
@@ -260,14 +261,15 @@
             builder.append(str(code))
             builder.append(";")
             pos += 1
-        return space.newtuple([space.wrap(builder.build()), w_end])
+        return space.newtuple([space.newbytes(builder.build()), w_end])
     else:
         raise oefmt(space.w_TypeError,
                     "don't know how to handle %T in error callback", w_exc)
 
 def backslashreplace_errors(space, w_exc):
     check_exception(space, w_exc)
-    if space.isinstance_w(w_exc, space.w_UnicodeEncodeError):
+    if (space.isinstance_w(w_exc, space.w_UnicodeEncodeError) or
+        space.isinstance_w(w_exc, space.w_UnicodeTranslateError)):
         obj = space.realunicode_w(space.getattr(w_exc, space.wrap('object')))
         start = space.int_w(space.getattr(w_exc, space.wrap('start')))
         w_end = space.getattr(w_exc, space.wrap('end'))
@@ -278,6 +280,18 @@
             oc = ord(obj[pos])
             raw_unicode_escape_helper(builder, oc)
             pos += 1
+        return space.newtuple([space.newbytes(builder.build()), w_end])
+    elif space.isinstance_w(w_exc, space.w_UnicodeDecodeError):
+        obj = space.bytes_w(space.getattr(w_exc, space.wrap('object')))
+        start = space.int_w(space.getattr(w_exc, space.wrap('start')))
+        w_end = space.getattr(w_exc, space.wrap('end'))
+        end = space.int_w(w_end)
+        builder = UnicodeBuilder()
+        pos = start
+        while pos < end:
+            oc = ord(obj[pos])
+            runicode.raw_unicode_escape_helper_unicode(builder, oc)
+            pos += 1
         return space.newtuple([space.wrap(builder.build()), w_end])
     else:
         raise oefmt(space.w_TypeError,
diff --git a/pypy/module/_codecs/test/test_codecs.py 
b/pypy/module/_codecs/test/test_codecs.py
--- a/pypy/module/_codecs/test/test_codecs.py
+++ b/pypy/module/_codecs/test/test_codecs.py
@@ -562,7 +562,12 @@
         assert b'\x00'.decode('unicode-internal', 'ignore') == ''
 
     def test_backslashreplace(self):
+        import codecs
         assert 'a\xac\u1234\u20ac\u8000'.encode('ascii', 'backslashreplace') 
== b'a\\xac\u1234\u20ac\u8000'
+        assert b'\x00\x60\x80'.decode(
+            'ascii', 'backslashreplace') == u'\x00\x60\\x80'
+        assert codecs.charmap_decode(
+            b"\x00\x01\x02", "backslashreplace", "ab") == ("ab\\x02", 3)
 
     def test_namereplace(self):
         assert 'a\xac\u1234\u20ac\u8000'.encode('ascii', 'namereplace') == (
diff --git a/rpython/rlib/runicode.py b/rpython/rlib/runicode.py
--- a/rpython/rlib/runicode.py
+++ b/rpython/rlib/runicode.py
@@ -1432,6 +1432,8 @@
 # This function is also used by _codecs/interp_codecs.py
 (unicode_encode_unicode_escape, raw_unicode_escape_helper
  ) = make_unicode_escape_function()
+(_, raw_unicode_escape_helper_unicode
+) = make_unicode_escape_function(unicode_output=True)
 
 # ____________________________________________________________
 # Raw unicode escape
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit

Reply via email to