Author: Armin Rigo <[email protected]>
Branch:
Changeset: r86862:74b4b27aaa7b
Date: 2016-09-04 14:49 +0200
http://bitbucket.org/pypy/pypy/changeset/74b4b27aaa7b/
Log: Another attempt at fixing the original problem
diff --git a/rpython/rlib/runicode.py b/rpython/rlib/runicode.py
--- a/rpython/rlib/runicode.py
+++ b/rpython/rlib/runicode.py
@@ -1,5 +1,5 @@
import sys
-from rpython.rlib.objectmodel import specialize, we_are_translated
+from rpython.rlib.objectmodel import specialize, we_are_translated, enforceargs
from rpython.rlib.rstring import StringBuilder, UnicodeBuilder
from rpython.rlib.rarithmetic import r_uint, intmask, widen
from rpython.rlib.unicodedata import unicodedb
@@ -145,17 +145,21 @@
_invalid_byte_3_of_4 = _invalid_cont_byte
_invalid_byte_4_of_4 = _invalid_cont_byte
+@enforceargs(allow_surrogates=bool)
def _invalid_byte_2_of_3(ordch1, ordch2, allow_surrogates):
return (ordch2>>6 != 0x2 or # 0b10
(ordch1 == 0xe0 and ordch2 < 0xa0)
# surrogates shouldn't be valid UTF-8!
- or (not allow_surrogates and ordch1 == 0xed and ordch2 > 0x9f))
+ or (ordch1 == 0xed and ordch2 > 0x9f and not allow_surrogates))
def _invalid_byte_2_of_4(ordch1, ordch2):
return (ordch2>>6 != 0x2 or # 0b10
(ordch1 == 0xf0 and ordch2 < 0x90) or
(ordch1 == 0xf4 and ordch2 > 0x8f))
+# note: this specialize() is here for rtyper/rstr.py, which calls this
+# function too but with its own fixed errorhandler
[email protected]_or_var(4)
def str_decode_utf_8_impl(s, size, errors, final, errorhandler,
allow_surrogates, result):
if size == 0:
@@ -328,6 +332,9 @@
return unicode_encode_utf_8_impl(s, size, errors, errorhandler,
allow_surrogates=allow_surrogates)
+# note: this specialize() is here for rtyper/rstr.py, which calls this
+# function too but with its own fixed errorhandler
[email protected]_or_var(3)
def unicode_encode_utf_8_impl(s, size, errors, errorhandler,
allow_surrogates=False):
assert(size >= 0)
diff --git a/rpython/rlib/test/test_runicode.py
b/rpython/rlib/test/test_runicode.py
--- a/rpython/rlib/test/test_runicode.py
+++ b/rpython/rlib/test/test_runicode.py
@@ -55,7 +55,7 @@
s = s.encode(encoding)
except LookupError as e:
py.test.skip(e)
- result, consumed = decoder(s, len(s), True)
+ result, consumed = decoder(s, len(s), 'strict', final=True)
assert consumed == len(s)
self.typeequals(trueresult, result)
@@ -69,7 +69,7 @@
s = s.decode(encoding)
except LookupError as e:
py.test.skip(e)
- result = encoder(s, len(s), True)
+ result = encoder(s, len(s), 'strict')
self.typeequals(trueresult, result)
def checkencodeerror(self, s, encoding, start, stop):
diff --git a/rpython/rtyper/rstr.py b/rpython/rtyper/rstr.py
--- a/rpython/rtyper/rstr.py
+++ b/rpython/rtyper/rstr.py
@@ -35,7 +35,8 @@
allow_surrogates=False, result=result)
return self.ll.llunicode(result.build())
- def ll_raise_unicode_exception_decode(self, errors, encoding, msg, s,
+ @staticmethod
+ def ll_raise_unicode_exception_decode(errors, encoding, msg, s,
startingpos, endingpos):
raise UnicodeDecodeError(encoding, s, startingpos, endingpos, msg)
@@ -411,7 +412,8 @@
allow_surrogates=False)
return self.ll.llstr(bytes)
- def ll_raise_unicode_exception_encode(self, errors, encoding, msg, u,
+ @staticmethod
+ def ll_raise_unicode_exception_encode(errors, encoding, msg, u,
startingpos, endingpos):
raise UnicodeEncodeError(encoding, u, startingpos, endingpos, msg)
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit