Author: Tyler Wade <way...@gmail.com> Branch: utf8-unicode2 Changeset: r72414:e6b1c681e8ec Date: 2014-07-09 03:30 -0500 http://bitbucket.org/pypy/pypy/changeset/e6b1c681e8ec/
Log: Fix cpyext diff --git a/pypy/interpreter/test/test_utf8.py b/pypy/interpreter/test/test_utf8.py --- a/pypy/interpreter/test/test_utf8.py +++ b/pypy/interpreter/test/test_utf8.py @@ -4,6 +4,7 @@ import sys from pypy.interpreter.utf8 import ( Utf8Str, Utf8Builder, utf8chr, utf8ord) +from rpython.rtyper.lltypesystem import rffi def build_utf8str(): builder = Utf8Builder() @@ -193,3 +194,15 @@ assert s.rsplit(maxsplit=2) == u.rsplit(None, 2) assert s.rsplit(' ', 2) == u.rsplit(' ', 2) assert s.rsplit('\n') == [s] + +def test_copy_to_wcharp(): + s = build_utf8str() + if sys.maxunicode < 0x10000: + # The last character requires a surrogate pair on narrow builds and + # so won't be converted correctly by rffi.wcharp2unicode + s = s[:-1] + + wcharp = s.copy_to_wcharp() + u = rffi.wcharp2unicode(wcharp) + rffi.free_wcharp(wcharp) + assert s == u diff --git a/pypy/interpreter/utf8.py b/pypy/interpreter/utf8.py --- a/pypy/interpreter/utf8.py +++ b/pypy/interpreter/utf8.py @@ -3,6 +3,7 @@ from rpython.rlib.runicode import utf8_code_length from rpython.rlib.unicodedata import unicodedb_5_2_0 as unicodedb from rpython.rlib.rarithmetic import r_uint +from rpython.rtyper.lltypesystem import rffi def utf8chr(value): # Like unichr, but returns a Utf8Str object @@ -73,6 +74,8 @@ self._len = length def index_of_char(self, char): + if char >= len(self): + return len(self.bytes) byte = 0 pos = 0 while pos < char: @@ -412,6 +415,14 @@ byte_pos -= 1 return byte_pos + def copy_to_wcharp(self): + # XXX Temporary solution. This won't work on correctly on systems + # where sizeof(wchar_t) == 2. Also, it copies twice. + from pypy.interpreter.utf8_codecs import unicode_encode_unicode_internal + from rpython.rlib.runicode import MAXUNICODE + bytes = unicode_encode_unicode_internal(self, len(self), 'strict') + return rffi.cast(rffi.CWCHARP, rffi.str2charp(bytes)) + class Utf8Builder(object): diff --git a/pypy/interpreter/utf8_codecs.py b/pypy/interpreter/utf8_codecs.py --- a/pypy/interpreter/utf8_codecs.py +++ b/pypy/interpreter/utf8_codecs.py @@ -1538,8 +1538,8 @@ if rs is not None: # py3k only errorhandler('strict', 'decimal', msg, s, collstart, collend) - for char in ru: - ch = ord(char) + for i in range(len(ru)): + ch = ORD(ru, i) if unicodedb.isspace(ch): result.append(' ') continue diff --git a/pypy/module/_codecs/interp_codecs.py b/pypy/module/_codecs/interp_codecs.py --- a/pypy/module/_codecs/interp_codecs.py +++ b/pypy/module/_codecs/interp_codecs.py @@ -229,7 +229,7 @@ builder = UnicodeBuilder() pos = start while pos < end: - code = ord(obj[pos]) + code = utf8ord(obj, pos) if (MAXUNICODE == 0xffff and 0xD800 <= code <= 0xDBFF and pos + 1 < end and 0xDC00 <= ord(obj[pos+1]) <= 0xDFFF): code = (code & 0x03FF) << 10 diff --git a/pypy/module/cpyext/test/test_unicodeobject.py b/pypy/module/cpyext/test/test_unicodeobject.py --- a/pypy/module/cpyext/test/test_unicodeobject.py +++ b/pypy/module/cpyext/test/test_unicodeobject.py @@ -188,7 +188,7 @@ w_u = api.PyUnicode_DecodeUTF8(u, 2, None) assert space.type(w_u) is space.w_unicode - assert space.unwrap(w_u) == 'sp' + assert space.unwrap(w_u) == u'sp' rffi.free_charp(u) def test_encode_utf8(self, space, api): @@ -296,7 +296,7 @@ w_u = space.wrap(u'a') assert api.PyUnicode_FromObject(w_u) is w_u assert space.unwrap( - api.PyUnicode_FromObject(space.wrap('test'))) == 'test' + api.PyUnicode_FromObject(space.wrap('test'))) == u'test' def test_decode(self, space, api): b_text = rffi.str2charp('caf\x82xx') @@ -306,7 +306,7 @@ w_text = api.PyUnicode_FromEncodedObject(space.wrap("test"), b_encoding, None) assert space.isinstance_w(w_text, space.w_unicode) - assert space.unwrap(w_text) == "test" + assert space.unwrap(w_text) == u"test" assert api.PyUnicode_FromEncodedObject(space.wrap(u"test"), b_encoding, None) is None assert api.PyErr_Occurred() is space.w_TypeError diff --git a/pypy/module/cpyext/unicodeobject.py b/pypy/module/cpyext/unicodeobject.py --- a/pypy/module/cpyext/unicodeobject.py +++ b/pypy/module/cpyext/unicodeobject.py @@ -1,4 +1,5 @@ from pypy.interpreter.error import OperationError +from pypy.interpreter import utf8_codecs from rpython.rtyper.lltypesystem import rffi, lltype from pypy.module.unicodedata import unicodedb from pypy.module.cpyext.api import ( @@ -208,7 +209,7 @@ # Copy unicode buffer w_unicode = from_ref(space, ref) u = space.unicode_w(w_unicode) - ref_unicode.c_buffer = rffi.unicode2wcharp(u) + ref_unicode.c_buffer = u.copy_to_wcharp() return ref_unicode.c_buffer @cpython_api([PyObject], rffi.CWCHARP) @@ -552,7 +553,7 @@ else: errors = None - result, length, byteorder = runicode.str_decode_utf_16_helper( + result, length, byteorder = utf8_codecs.str_decode_utf_16_helper( string, size, errors, True, # final ? false for multiple passes? None, # errorhandler @@ -608,7 +609,7 @@ else: errors = None - result, length, byteorder = runicode.str_decode_utf_32_helper( + result, length, byteorder = utf8_codecs.str_decode_utf_32_helper( string, size, errors, True, # final ? false for multiple passes? None, # errorhandler @@ -640,7 +641,7 @@ else: errors = None state = space.fromcache(CodecState) - result = runicode.unicode_encode_decimal(u, length, errors, + result = utf8_codecs.unicode_encode_decimal(u, length, errors, state.encode_error_handler) i = len(result) output[i] = '\0' @@ -691,10 +692,12 @@ suffix match), 0 otherwise. Return -1 if an error occurred.""" str = space.unicode_w(w_str) substr = space.unicode_w(w_substr) + start = str.index_of_char(start) + end = str.index_of_char(end) if rffi.cast(lltype.Signed, direction) <= 0: - return rstring.startswith(str, substr, start, end) + return rstring.startswith(str.bytes, substr.bytes, start, end) else: - return rstring.endswith(str, substr, start, end) + return rstring.endswith(str.bytes, substr.bytes, start, end) @cpython_api([PyObject, PyObject, Py_ssize_t, Py_ssize_t], Py_ssize_t, error=-1) def PyUnicode_Count(space, w_str, w_substr, start, end): _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit