Author: fijal Branch: unicode-utf8 Changeset: r92935:47de95da2bbb Date: 2017-11-04 15:26 +0100 http://bitbucket.org/pypy/pypy/changeset/47de95da2bbb/
Log: finish whacking until the objspace tests pass diff --git a/TODO b/TODO --- a/TODO +++ b/TODO @@ -8,3 +8,4 @@ * better flag handling in split/splitlines maybe? * find all the fast-paths that we want to do with utf8 (we only do utf-8 now, not UTF8 or utf8) for decode/encode +* encode_error_handler has XXX diff --git a/pypy/interpreter/unicodehelper.py b/pypy/interpreter/unicodehelper.py --- a/pypy/interpreter/unicodehelper.py +++ b/pypy/interpreter/unicodehelper.py @@ -20,11 +20,13 @@ @specialize.memo() def encode_error_handler(space): # Fast version of the "strict" errors handler. - def raise_unicode_exception_encode(errors, encoding, msg, w_u, + def raise_unicode_exception_encode(errors, encoding, msg, u, u_len, startingpos, endingpos): + # XXX fix once we stop using runicode.py + flag = _get_flag(u.decode('utf8')) raise OperationError(space.w_UnicodeEncodeError, space.newtuple([space.newtext(encoding), - w_u, + space.newutf8(u, u_len, flag), space.newint(startingpos), space.newint(endingpos), space.newtext(msg)])) diff --git a/pypy/objspace/std/objspace.py b/pypy/objspace/std/objspace.py --- a/pypy/objspace/std/objspace.py +++ b/pypy/objspace/std/objspace.py @@ -164,7 +164,7 @@ if isinstance(x, str): return self.newtext(x) if isinstance(x, unicode): - return self.newutf8(x.encode('utf8'), len(x)) + return self.newutf8(x.encode('utf8'), len(x), rutf8.FLAG_REGULAR) if isinstance(x, float): return W_FloatObject(x) if isinstance(x, W_Root): diff --git a/pypy/objspace/std/test/test_index.py b/pypy/objspace/std/test/test_index.py --- a/pypy/objspace/std/test/test_index.py +++ b/pypy/objspace/std/test/test_index.py @@ -1,5 +1,7 @@ from py.test import raises +from rpython.rlib import rutf8 + class AppTest_IndexProtocol: def setup_class(self): w_oldstyle = self.space.appexec([], """(): @@ -263,7 +265,8 @@ class AppTest_UnicodeTestCase(SeqTestCase, StringTestCase): def setup_method(self, method): SeqTestCase.setup_method(self, method) - self.w_seq = self.space.wrap(u"this is a test") + self.w_seq = self.space.newutf8("this is a test", len("this is a test"), + rutf8.FLAG_ASCII) self.w_const = self.space.appexec([], """(): return unicode""") diff --git a/pypy/objspace/std/test/test_lengthhint.py b/pypy/objspace/std/test/test_lengthhint.py --- a/pypy/objspace/std/test/test_lengthhint.py +++ b/pypy/objspace/std/test/test_lengthhint.py @@ -1,3 +1,6 @@ + +from rpython.rlib import rutf8 + from pypy.module._collections.interp_deque import W_Deque from pypy.module.itertools.interp_itertools import W_Repeat @@ -71,7 +74,8 @@ self._test_length_hint(self.space.wrap('P' * self.SIZE)) def test_unicode(self): - self._test_length_hint(self.space.wrap(u'Y' * self.SIZE)) + self._test_length_hint(self.space.newutf8('Y' * self.SIZE, self.SIZE, + rutf8.FLAG_ASCII)) def test_tuple(self): self._test_length_hint(self.space.wrap(tuple(self.ITEMS))) diff --git a/pypy/objspace/std/test/test_liststrategies.py b/pypy/objspace/std/test/test_liststrategies.py --- a/pypy/objspace/std/test/test_liststrategies.py +++ b/pypy/objspace/std/test/test_liststrategies.py @@ -22,7 +22,7 @@ BytesListStrategy) #assert isinstance(W_ListObject(space, [w(u'a'), w(u'b')]).strategy, # UnicodeListStrategy) - assert isinstance(W_ListObject(space, [w(u'a'), wb('b')]).strategy, + assert isinstance(W_ListObject(space, [space.newutf8('a', 1, 0), wb('b')]).strategy, ObjectListStrategy) # mixed unicode and bytes def test_empty_to_any(self): diff --git a/pypy/objspace/std/test/test_obj.py b/pypy/objspace/std/test/test_obj.py --- a/pypy/objspace/std/test/test_obj.py +++ b/pypy/objspace/std/test/test_obj.py @@ -17,7 +17,7 @@ cls.w_cpython_apptest = space.wrap(option.runappdirect and not hasattr(sys, 'pypy_translation_info')) def w_unwrap_wrap_unicode(space, w_obj): - return space.newutf8(space.utf8_w(w_obj), w_obj._length) + return space.newutf8(space.utf8_w(w_obj), w_obj._length, w_obj._get_flag()) cls.w_unwrap_wrap_unicode = space.wrap(gateway.interp2app(w_unwrap_wrap_unicode)) def w_unwrap_wrap_str(space, w_obj): return space.wrap(space.str_w(w_obj)) diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py --- a/pypy/objspace/std/unicodeobject.py +++ b/pypy/objspace/std/unicodeobject.py @@ -1834,7 +1834,7 @@ if not isinstance(w_unistr, W_UnicodeObject): raise oefmt(space.w_TypeError, "expected unicode, got '%T'", w_unistr) unistr = w_unistr._utf8 - result = ['\0'] * len(unistr) + result = ['\0'] * w_unistr._length digits = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'] i = 0 @@ -1843,6 +1843,8 @@ uchr = rutf8.codepoint_at_pos(unistr, i) if rutf8.isspace(unistr, i): result[res_pos] = ' ' + res_pos += 1 + i = rutf8.next_codepoint_pos(unistr, i) continue try: result[res_pos] = digits[unicodedb.decimal(uchr)] _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit