Author: Matti Picus <[email protected]>
Branch: unicode-utf8-py3
Changeset: r95009:30294b8f3847
Date: 2018-08-17 02:57 +0300
http://bitbucket.org/pypy/pypy/changeset/30294b8f3847/

Log:    fill out missing code paths hit by cpyext testing

diff --git a/pypy/interpreter/unicodehelper.py 
b/pypy/interpreter/unicodehelper.py
--- a/pypy/interpreter/unicodehelper.py
+++ b/pypy/interpreter/unicodehelper.py
@@ -221,7 +221,6 @@
                 end += 1
             res.append_slice(s, start, end)
             i = end
-    # cannot be ASCII, cannot have surrogates, I believe
     return res.build(), len(s), len(s)
 
 def utf8_encode_utf_8(s, errors, errorhandler, allow_surrogates=False):
@@ -267,8 +266,13 @@
             msg = "ordinal not in range(256)"
             res_8, newindex = errorhandler(
                 errors, 'latin1', msg, s, startindex, index)
-            result.append(res_8)
-            pos = rutf8._pos_at_index(s, newindex)
+            for cp in rutf8.Utf8StringIterator(res_8):
+                if cp > 0xFF:
+                    errorhandler("strict", 'latin1', msg, s, startindex, index)
+                result.append(chr(cp))
+            if index != newindex:  # Should be uncommon
+                index = newindex
+                pos = rutf8._pos_at_index(s, newindex)
     return result.build()
 
 def utf8_encode_ascii(s, errors, errorhandler):
@@ -649,8 +653,12 @@
             pos += 1
             continue
 
-        digits = 4 if s[pos] == 'u' else 8
-        message = "truncated \\uXXXX escape"
+        if s[pos] == 'u':
+            digits = 4
+            message = "truncated \\uXXXX escape"
+        else:
+            digits = 8
+            message = "truncated \\UXXXXXXXX escape"
         pos += 1
         pos = hexescape(builder, s, pos, digits,
                            "rawunicodeescape", errorhandler, message, errors)
diff --git a/pypy/module/cpyext/unicodeobject.py 
b/pypy/module/cpyext/unicodeobject.py
--- a/pypy/module/cpyext/unicodeobject.py
+++ b/pypy/module/cpyext/unicodeobject.py
@@ -820,10 +820,10 @@
     else:
         errors = None
 
-    result, _,  length, byteorder = str_decode_utf_16_helper(
+    result, length, pos = str_decode_utf_16_helper(
         string, errors, final=True, errorhandler=None, byteorder=byteorder)
     if pbyteorder is not None:
-        pbyteorder[0] = rffi.cast(rffi.INT_real, byteorder)
+        pbyteorder[0] = rffi.cast(rffi.INT_real, pos > 0)
     return space.newutf8(result, length)
 
 @cpython_api([CONST_STRING, Py_ssize_t, CONST_STRING, INTP_real], PyObject)
@@ -872,10 +872,10 @@
     else:
         errors = None
 
-    result, _,  length, byteorder = unicodehelper.str_decode_utf_32_helper(
-        string, errors, final=True, errorhandler=None, byteorder=byteorder)
+    result, length, pos = unicodehelper.str_decode_utf_32_helper(
+        string, errors, final=True, errorhandler='strict', byteorder=byteorder)
     if pbyteorder is not None:
-        pbyteorder[0] = rffi.cast(rffi.INT_real, byteorder)
+        pbyteorder[0] = rffi.cast(rffi.INT_real, pos>0)
     return space.newutf8(result, length)
 
 @cpython_api([rffi.CWCHARP, Py_ssize_t, rffi.CCHARP, CONST_STRING],
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit

Reply via email to