Author: fijal Branch: unicode-utf8 Changeset: r93358:fadafada40af Date: 2017-12-11 08:37 +0200 http://bitbucket.org/pypy/pypy/changeset/fadafada40af/
Log: fix micronumpy diff --git a/pypy/module/micronumpy/boxes.py b/pypy/module/micronumpy/boxes.py --- a/pypy/module/micronumpy/boxes.py +++ b/pypy/module/micronumpy/boxes.py @@ -11,6 +11,7 @@ from rpython.rlib.rstring import StringBuilder from rpython.rlib.objectmodel import specialize from rpython.rlib import jit +from rpython.rlib.rutf8 import get_utf8_length from rpython.rtyper.lltypesystem import lltype, rffi from rpython.tool.sourcetools import func_with_new_name from pypy.module.micronumpy import constants as NPY @@ -636,7 +637,8 @@ if dtype.is_unicode(): return self elif dtype.is_object(): - return W_ObjectBox(space.newunicode(self._value)) + return W_ObjectBox(space.newutf8(self._value, + get_utf8_length(self._value))) else: raise oefmt(space.w_NotImplementedError, "Conversion from unicode not implemented yet") @@ -646,7 +648,7 @@ return new_unicode_dtype(space, len(self._value)) def descr__new__unicode_box(space, w_subtype, w_arg): - value = space.unicode_w(space.unicode_from_object(w_arg)) + value = space.utf8_w(space.unicode_from_object(w_arg)) return W_UnicodeBox(value) class W_ObjectBox(W_GenericBox): diff --git a/pypy/module/micronumpy/compile.py b/pypy/module/micronumpy/compile.py --- a/pypy/module/micronumpy/compile.py +++ b/pypy/module/micronumpy/compile.py @@ -197,7 +197,7 @@ return StringObject(obj) newbytes = newtext - def newunicode(self, obj): + def newutf8(self, obj, l): raise NotImplementedError def newlist(self, items): @@ -305,10 +305,10 @@ raise NotImplementedError text_w = bytes_w - def unicode_w(self, w_obj): + def utf8_w(self, w_obj): # XXX if isinstance(w_obj, StringObject): - return unicode(w_obj.v) + return w_obj.v raise NotImplementedError def int(self, w_obj): diff --git a/pypy/module/micronumpy/types.py b/pypy/module/micronumpy/types.py --- a/pypy/module/micronumpy/types.py +++ b/pypy/module/micronumpy/types.py @@ -1,6 +1,7 @@ import functools import math from rpython.rlib.unroll import unrolling_iterable +from rpython.rlib.rutf8 import Utf8StringIterator, get_utf8_length, Utf8StringBuilder from pypy.interpreter.error import OperationError, oefmt from pypy.objspace.std.floatobject import float2string from pypy.objspace.std.complexobject import str_format @@ -2271,23 +2272,29 @@ if isinstance(w_item, boxes.W_UnicodeBox): return w_item if isinstance(w_item, boxes.W_ObjectBox): - value = space.unicode_w(space.unicode_from_object(w_item.w_obj)) + value = space.utf8_w(space.unicode_from_object(w_item.w_obj)) else: - value = space.unicode_w(space.unicode_from_object(w_item)) + value = space.utf8_w(space.unicode_from_object(w_item)) return boxes.W_UnicodeBox(value) + def convert_utf8_to_unichar_list(self, utf8): + l = [] + for ch in Utf8StringIterator(utf8): + l.append(unichr(ch)) + return l + def store(self, arr, i, offset, box, native): assert isinstance(box, boxes.W_UnicodeBox) - value = box._value with arr as storage: self._store(storage, i, offset, box, arr.dtype.elsize) @jit.unroll_safe def _store(self, storage, i, offset, box, width): - size = min(width // 4, len(box._value)) + v = self.convert_utf8_to_unichar_list(box._value) + size = min(width // 4, len(v)) for k in range(size): index = i + offset + 4*k - data = rffi.cast(Int32.T, ord(box._value[k])) + data = rffi.cast(Int32.T, ord(v[k])) raw_storage_setitem_unaligned(storage, index, data) # zero out the remaining memory for index in range(size * 4 + i + offset, width): @@ -2298,16 +2305,16 @@ if dtype is None: dtype = arr.dtype size = dtype.elsize // 4 - builder = UnicodeBuilder(size) + builder = Utf8StringBuilder(size) with arr as storage: for k in range(size): index = i + offset + 4*k - codepoint = raw_storage_getitem_unaligned( - Int32.T, arr.storage, index) - char = unichr(codepoint) - if char == u'\0': + codepoint = rffi.cast(lltype.Signed, + raw_storage_getitem_unaligned( + Int32.T, arr.storage, index)) + if codepoint == 0: break - builder.append(char) + builder.append_code(codepoint) return boxes.W_UnicodeBox(builder.build()) def str_format(self, item, add_quotes=True): @@ -2323,7 +2330,7 @@ def to_builtin_type(self, space, box): assert isinstance(box, boxes.W_UnicodeBox) - return space.newunicode(box._value) + return space.newutf8(box._value, get_utf8_length(box._value)) def eq(self, v1, v2): assert isinstance(v1, boxes.W_UnicodeBox) _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit