Author: Ronan Lamy <[email protected]>
Branch: py3.5
Changeset: r93418:badb71ed332d
Date: 2017-12-14 02:22 +0000
http://bitbucket.org/pypy/pypy/changeset/badb71ed332d/
Log: Port b0267eee69d8 to unicodehelper and fix it
diff --git a/pypy/interpreter/test/test_unicodehelper.py
b/pypy/interpreter/test/test_unicodehelper.py
--- a/pypy/interpreter/test/test_unicodehelper.py
+++ b/pypy/interpreter/test/test_unicodehelper.py
@@ -2,7 +2,7 @@
import pytest
import struct
from pypy.interpreter.unicodehelper import (
- encode_utf8, decode_utf8, unicode_encode_utf_32_be)
+ encode_utf8, decode_utf8, unicode_encode_utf_32_be, str_decode_utf_32_be)
from pypy.interpreter.unicodehelper import encode_utf8sp, decode_utf8sp
@@ -90,3 +90,6 @@
assert replace_with(u'rep', None) == u'<rep>'.encode('utf-32-be')
assert (replace_with(None, '\xca\xfe\xca\xfe') ==
'\x00\x00\x00<\xca\xfe\xca\xfe\x00\x00\x00>')
+
+ with pytest.raises(UnicodeDecodeError):
+ str_decode_utf_32_be(b"\x00\x00\xdc\x80", 4, None)
diff --git a/pypy/interpreter/unicodehelper.py
b/pypy/interpreter/unicodehelper.py
--- a/pypy/interpreter/unicodehelper.py
+++ b/pypy/interpreter/unicodehelper.py
@@ -172,19 +172,22 @@
def str_decode_utf_32(s, size, errors, final=True,
errorhandler=None):
result, length, byteorder = str_decode_utf_32_helper(
- s, size, errors, final, errorhandler, "native", 'utf-32-' + BYTEORDER2)
+ s, size, errors, final, errorhandler, "native", 'utf-32-' + BYTEORDER2,
+ allow_surrogates=False)
return result, length
def str_decode_utf_32_be(s, size, errors, final=True,
errorhandler=None):
result, length, byteorder = str_decode_utf_32_helper(
- s, size, errors, final, errorhandler, "big", 'utf-32-be')
+ s, size, errors, final, errorhandler, "big", 'utf-32-be',
+ allow_surrogates=False)
return result, length
def str_decode_utf_32_le(s, size, errors, final=True,
errorhandler=None):
result, length, byteorder = str_decode_utf_32_helper(
- s, size, errors, final, errorhandler, "little", 'utf-32-le')
+ s, size, errors, final, errorhandler, "little", 'utf-32-le',
+ allow_surrogates=False)
return result, length
BOM32_DIRECT = intmask(0x0000FEFF)
@@ -193,7 +196,8 @@
def str_decode_utf_32_helper(s, size, errors, final=True,
errorhandler=None,
byteorder="native",
- public_encoding_name='utf32'):
+ public_encoding_name='utf32',
+ allow_surrogates=True):
if errorhandler is None:
errorhandler = default_unicode_error_decode
bo = 0
@@ -256,10 +260,17 @@
continue
ch = ((ord(s[pos + iorder[3]]) << 24) | (ord(s[pos + iorder[2]]) <<
16) |
(ord(s[pos + iorder[1]]) << 8) | ord(s[pos + iorder[0]]))
- if ch >= 0x110000:
+ if not allow_surrogates and 0xD800 <= ch <= 0xDFFF:
+ r, pos = errorhandler(errors, public_encoding_name,
+ "code point in surrogate code point "
+ "range(0xd800, 0xe000)",
+ s, pos, pos + 4)
+ result.append(r)
+ continue
+ elif ch >= 0x110000:
r, pos = errorhandler(errors, public_encoding_name,
"codepoint not in range(0x110000)",
- s, pos, len(s))
+ s, pos, pos + 4)
result.append(r)
continue
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit