Author: Matti Picus <[email protected]>
Branch: unicode-utf8-py3
Changeset: r95009:30294b8f3847
Date: 2018-08-17 02:57 +0300
http://bitbucket.org/pypy/pypy/changeset/30294b8f3847/
Log: fill out missing code paths hit by cpyext testing
diff --git a/pypy/interpreter/unicodehelper.py
b/pypy/interpreter/unicodehelper.py
--- a/pypy/interpreter/unicodehelper.py
+++ b/pypy/interpreter/unicodehelper.py
@@ -221,7 +221,6 @@
end += 1
res.append_slice(s, start, end)
i = end
- # cannot be ASCII, cannot have surrogates, I believe
return res.build(), len(s), len(s)
def utf8_encode_utf_8(s, errors, errorhandler, allow_surrogates=False):
@@ -267,8 +266,13 @@
msg = "ordinal not in range(256)"
res_8, newindex = errorhandler(
errors, 'latin1', msg, s, startindex, index)
- result.append(res_8)
- pos = rutf8._pos_at_index(s, newindex)
+ for cp in rutf8.Utf8StringIterator(res_8):
+ if cp > 0xFF:
+ errorhandler("strict", 'latin1', msg, s, startindex, index)
+ result.append(chr(cp))
+ if index != newindex: # Should be uncommon
+ index = newindex
+ pos = rutf8._pos_at_index(s, newindex)
return result.build()
def utf8_encode_ascii(s, errors, errorhandler):
@@ -649,8 +653,12 @@
pos += 1
continue
- digits = 4 if s[pos] == 'u' else 8
- message = "truncated \\uXXXX escape"
+ if s[pos] == 'u':
+ digits = 4
+ message = "truncated \\uXXXX escape"
+ else:
+ digits = 8
+ message = "truncated \\UXXXXXXXX escape"
pos += 1
pos = hexescape(builder, s, pos, digits,
"rawunicodeescape", errorhandler, message, errors)
diff --git a/pypy/module/cpyext/unicodeobject.py
b/pypy/module/cpyext/unicodeobject.py
--- a/pypy/module/cpyext/unicodeobject.py
+++ b/pypy/module/cpyext/unicodeobject.py
@@ -820,10 +820,10 @@
else:
errors = None
- result, _, length, byteorder = str_decode_utf_16_helper(
+ result, length, pos = str_decode_utf_16_helper(
string, errors, final=True, errorhandler=None, byteorder=byteorder)
if pbyteorder is not None:
- pbyteorder[0] = rffi.cast(rffi.INT_real, byteorder)
+ pbyteorder[0] = rffi.cast(rffi.INT_real, pos > 0)
return space.newutf8(result, length)
@cpython_api([CONST_STRING, Py_ssize_t, CONST_STRING, INTP_real], PyObject)
@@ -872,10 +872,10 @@
else:
errors = None
- result, _, length, byteorder = unicodehelper.str_decode_utf_32_helper(
- string, errors, final=True, errorhandler=None, byteorder=byteorder)
+ result, length, pos = unicodehelper.str_decode_utf_32_helper(
+ string, errors, final=True, errorhandler='strict', byteorder=byteorder)
if pbyteorder is not None:
- pbyteorder[0] = rffi.cast(rffi.INT_real, byteorder)
+ pbyteorder[0] = rffi.cast(rffi.INT_real, pos>0)
return space.newutf8(result, length)
@cpython_api([rffi.CWCHARP, Py_ssize_t, rffi.CCHARP, CONST_STRING],
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit