Author: Richard Plangger <planri...@gmail.com> Branch: py3.5 Changeset: r90414:ee44cdf8b435 Date: 2017-02-28 11:36 +0100 http://bitbucket.org/pypy/pypy/changeset/ee44cdf8b435/
Log: merge py3.5-text-utf8 diff --git a/pypy/module/imp/importing.py b/pypy/module/imp/importing.py --- a/pypy/module/imp/importing.py +++ b/pypy/module/imp/importing.py @@ -67,7 +67,7 @@ lib_pypy = os.path.join(os.path.dirname(__file__), '..', '..', '..', 'lib_pypy') -@unwrap_spec(modulename='text0', level=int) +@unwrap_spec(modulename='fsencode', level=int) def importhook(space, modulename, w_globals=None, w_locals=None, w_fromlist=None, level=0): # A minimal version, that can only import builtin and lib_pypy modules! assert w_locals is w_globals diff --git a/pypy/module/imp/interp_imp.py b/pypy/module/imp/interp_imp.py --- a/pypy/module/imp/interp_imp.py +++ b/pypy/module/imp/interp_imp.py @@ -5,6 +5,7 @@ from pypy.interpreter.pycode import PyCode from pypy.module._io.interp_iobase import W_IOBase from pypy.interpreter.streamutil import wrap_streamerror +from pypy.interpreter.error import OperationError def extension_suffixes(space): @@ -72,7 +73,11 @@ return None def is_builtin(space, w_name): - name = space.text0_w(w_name) + try: + name = space.text0_w(w_name) + except OperationError: + return space.newint(0) + if name not in space.builtin_modules: return space.newint(0) if space.finditem(space.sys.get('modules'), w_name) is not None: diff --git a/pypy/module/imp/test/test_import.py b/pypy/module/imp/test/test_import.py --- a/pypy/module/imp/test/test_import.py +++ b/pypy/module/imp/test/test_import.py @@ -255,6 +255,10 @@ def test_import_keywords(self): __import__(name='sys', level=0) + def test_import_nonutf8_encodable(self): + exc = raises(ImportError, __import__, '\ud800') + assert exc.value.args[0].startswith("No module named ") + def test_import_by_filename(self): import pkg.a filename = pkg.a.__file__ diff --git a/pypy/objspace/std/test/test_unicodeobject.py b/pypy/objspace/std/test/test_unicodeobject.py --- a/pypy/objspace/std/test/test_unicodeobject.py +++ b/pypy/objspace/std/test/test_unicodeobject.py @@ -35,8 +35,7 @@ w_uni = space.wrap(u'abcd') assert space.text_w(w_uni) == 'abcd' w_uni = space.wrap(unichr(0xd921) + unichr(0xdddd)) - assert space.text_w(w_uni) == '\xed\xa4\xa1\xed\xb7\x9d' - # ^^^ and not the 4-bytes combined character + raises(UnicodeEncodeError, space.text_w, w_uni) class AppTestUnicodeStringStdOnly: diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py --- a/pypy/objspace/std/unicodeobject.py +++ b/pypy/objspace/std/unicodeobject.py @@ -8,7 +8,7 @@ from rpython.rlib.runicode import ( make_unicode_escape_function, str_decode_ascii, str_decode_utf_8, unicode_encode_ascii, unicode_encode_utf_8, fast_str_decode_ascii, - unicode_encode_utf8sp) + unicode_encode_utf8_forbid_surrogates, SurrogateError) from rpython.rlib import jit from pypy.interpreter import unicodehelper @@ -81,8 +81,16 @@ return self._value def text_w(self, space): - identifier = jit.conditional_call_elidable( - self._utf8, g_encode_utf8, self._value) + try: + identifier = jit.conditional_call_elidable( + self._utf8, g_encode_utf8, self._value) + except SurrogateError as e: + raise OperationError(space.w_UnicodeEncodeError, + space.newtuple([space.newtext('utf-8'), + self, + space.newint(e.index-1), + space.newint(e.index), + space.newtext("surrogates not allowed")])) if not jit.isconstant(self): self._utf8 = identifier return identifier @@ -1257,7 +1265,7 @@ @jit.elidable def g_encode_utf8(value): """This is a global function because of jit.conditional_call_value""" - return unicode_encode_utf8sp(value, len(value)) + return unicode_encode_utf8_forbid_surrogates(value, len(value)) _repr_function, _ = make_unicode_escape_function( pass_printable=True, unicode_output=True, quotes=True, prefix='') diff --git a/rpython/rlib/runicode.py b/rpython/rlib/runicode.py --- a/rpython/rlib/runicode.py +++ b/rpython/rlib/runicode.py @@ -433,7 +433,9 @@ return result.build() class SurrogateError(Exception): - pass + def __init__(self, char, index): + self.char = char + self.index = index def unicode_encode_utf8_forbid_surrogates(s, size): # Strict surrogate-forbidding utf-8 encoding. Any surrogate character @@ -454,7 +456,7 @@ result.append(chr((0x80 | (ch & 0x3f)))) elif ch < 0x10000: if 0xD800 <= ch <= 0xDFFF: - raise SurrogateError + raise SurrogateError(ch, pos) # Encode UCS2 Unicode ordinals result.append((chr((0xe0 | (ch >> 12))))) result.append((chr((0x80 | ((ch >> 6) & 0x3f))))) _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit