Author: Ronan Lamy <ronan.l...@gmail.com> Branch: Changeset: r78474:d62f1b272ea9 Date: 2015-07-06 18:44 +0100 http://bitbucket.org/pypy/pypy/changeset/d62f1b272ea9/
Log: merge branch 'unicode-dtype' diff --git a/pypy/module/micronumpy/boxes.py b/pypy/module/micronumpy/boxes.py --- a/pypy/module/micronumpy/boxes.py +++ b/pypy/module/micronumpy/boxes.py @@ -196,7 +196,12 @@ "'%T' object is not iterable", self) def descr_str(self, space): - return space.wrap(self.get_dtype(space).itemtype.str_format(self, add_quotes=False)) + tp = self.get_dtype(space).itemtype + return space.wrap(tp.str_format(self, add_quotes=False)) + + def descr_repr(self, space): + tp = self.get_dtype(space).itemtype + return space.wrap(tp.str_format(self, add_quotes=True)) def descr_format(self, space, w_spec): return space.format(self.item(space), w_spec) @@ -618,16 +623,25 @@ return W_StringBox(arr, 0, arr.dtype) class W_UnicodeBox(W_CharacterBox): + def __init__(self, value): + self._value = value + + def convert_to(self, space, dtype): + if dtype.is_unicode(): + return self + elif dtype.is_object(): + return W_ObjectBox(space.wrap(self._value)) + else: + raise oefmt(space.w_NotImplementedError, + "Conversion from unicode not implemented yet") + + def get_dtype(self, space): + from pypy.module.micronumpy.descriptor import new_unicode_dtype + return new_unicode_dtype(space, len(self._value)) + def descr__new__unicode_box(space, w_subtype, w_arg): - raise oefmt(space.w_NotImplementedError, "Unicode is not supported yet") - from pypy.module.micronumpy.descriptor import new_unicode_dtype - arg = space.unicode_w(space.unicode_from_object(w_arg)) - # XXX size computations, we need tests anyway - arr = VoidBoxStorage(len(arg), new_unicode_dtype(space, len(arg))) - # XXX not this way, we need store - #for i in range(len(arg)): - # arr.storage[i] = arg[i] - return W_UnicodeBox(arr, 0, arr.dtype) + value = space.unicode_w(space.unicode_from_object(w_arg)) + return W_UnicodeBox(value) class W_ObjectBox(W_GenericBox): descr__new__, _get_dtype, descr_reduce = new_dtype_getter(NPY.OBJECT) @@ -649,7 +663,7 @@ __getitem__ = interp2app(W_GenericBox.descr_getitem), __iter__ = interp2app(W_GenericBox.descr_iter), __str__ = interp2app(W_GenericBox.descr_str), - __repr__ = interp2app(W_GenericBox.descr_str), + __repr__ = interp2app(W_GenericBox.descr_repr), __format__ = interp2app(W_GenericBox.descr_format), __int__ = interp2app(W_GenericBox.descr_int), __long__ = interp2app(W_GenericBox.descr_long), diff --git a/pypy/module/micronumpy/casting.py b/pypy/module/micronumpy/casting.py --- a/pypy/module/micronumpy/casting.py +++ b/pypy/module/micronumpy/casting.py @@ -325,6 +325,8 @@ return complex_dtype elif space.isinstance_w(w_obj, space.w_str): return variable_dtype(space, 'S%d' % space.len_w(w_obj)) + elif space.isinstance_w(w_obj, space.w_unicode): + return new_unicode_dtype(space, space.len_w(w_obj)) return object_dtype @signature(ann.instance(W_Dtype), ann.instance(W_Dtype), returns=ann.bool()) diff --git a/pypy/module/micronumpy/test/test_dtypes.py b/pypy/module/micronumpy/test/test_dtypes.py --- a/pypy/module/micronumpy/test/test_dtypes.py +++ b/pypy/module/micronumpy/test/test_dtypes.py @@ -1052,20 +1052,6 @@ assert d.name == "unicode256" assert d.num == 19 - def test_string_boxes(self): - from numpy import str_ - assert isinstance(str_(3), str_) - - def test_unicode_boxes(self): - from numpy import unicode_ - import sys - if '__pypy__' in sys.builtin_module_names: - exc = raises(NotImplementedError, unicode_, 3) - assert exc.value.message.find('not supported yet') >= 0 - else: - u = unicode_(3) - assert isinstance(u, unicode) - def test_character_dtype(self): import numpy as np from numpy import array, character @@ -1133,7 +1119,7 @@ def test_array_from_record(self): import numpy as np - a = np.array(('???', -999, -12345678.9), + a = np.array(('???', -999, -12345678.9), dtype=[('c', '|S3'), ('a', '<i8'), ('b', '<f8')]) # Change the order of the keys b = np.array(a, dtype=[('a', '<i8'), ('b', '<f8'), ('c', '|S3')]) @@ -1141,7 +1127,7 @@ assert b.dtype.fields['a'][1] == 0 assert b['a'] == -999 a = np.array(('N/A', 1e+20, 1e+20, 999999), - dtype=[('name', '|S4'), ('x', '<f8'), + dtype=[('name', '|S4'), ('x', '<f8'), ('y', '<f8'), ('block', '<i8', (2, 3))]) assert (a['block'] == 999999).all() diff --git a/pypy/module/micronumpy/test/test_ndarray.py b/pypy/module/micronumpy/test/test_ndarray.py --- a/pypy/module/micronumpy/test/test_ndarray.py +++ b/pypy/module/micronumpy/test/test_ndarray.py @@ -1,3 +1,4 @@ +# -*- encoding: utf-8 -*- import py import sys @@ -322,6 +323,14 @@ assert b.flags['C'] assert (b == a).all() + def test_unicode(self): + import numpy as np + a = np.array([3, u'Aÿ', ''], dtype='U3') + assert a.shape == (3,) + assert a.dtype == np.dtype('U3') + assert a[0] == u'3' + assert a[1] == u'Aÿ' + def test_dtype_attribute(self): import numpy as np a = np.array(40000, dtype='uint16') @@ -380,6 +389,9 @@ assert zeros((), dtype='S') == '' assert zeros((), dtype='S').shape == () assert zeros((), dtype='S').dtype == '|S1' + assert zeros(5, dtype='U')[4] == u'' + assert zeros(5, dtype='U').shape == (5,) + assert zeros(5, dtype='U').dtype == '<U1' def test_check_shape(self): import numpy as np @@ -2423,6 +2435,12 @@ a.fill(12) assert (a == '1').all() + def test_unicode_filling(self): + import numpy as np + a = np.empty((10,10), dtype='U1') + a.fill(12) + assert (a == u'1').all() + def test_boolean_indexing(self): import numpy as np a = np.zeros((1, 3)) diff --git a/pypy/module/micronumpy/test/test_object_arrays.py b/pypy/module/micronumpy/test/test_object_arrays.py --- a/pypy/module/micronumpy/test/test_object_arrays.py +++ b/pypy/module/micronumpy/test/test_object_arrays.py @@ -171,4 +171,8 @@ assert 'a' * 100 in str(a) b = a.astype('S') assert 'a' * 100 in str(b) - + a = np.array([123], dtype='U') + assert a[0] == u'123' + b = a.astype('O') + assert b[0] == u'123' + assert type(b[0]) is unicode diff --git a/pypy/module/micronumpy/test/test_scalar.py b/pypy/module/micronumpy/test/test_scalar.py --- a/pypy/module/micronumpy/test/test_scalar.py +++ b/pypy/module/micronumpy/test/test_scalar.py @@ -1,3 +1,4 @@ +# -*- encoding:utf-8 -*- from pypy.module.micronumpy.test.test_base import BaseNumpyAppTest class AppTestScalar(BaseNumpyAppTest): @@ -457,3 +458,25 @@ for t in complex64, complex128: _do_test(t, 17j, -17j) + + def test_string_boxes(self): + from numpy import str_ + assert isinstance(str_(3), str_) + assert str_(3) == '3' + assert str(str_(3)) == '3' + assert repr(str_(3)) == "'3'" + + def test_unicode_boxes(self): + from numpy import unicode_ + u = unicode_(3) + assert isinstance(u, unicode) + assert u == u'3' + + def test_unicode_repr(self): + from numpy import unicode_ + u = unicode_(3) + assert str(u) == '3' + assert repr(u) == "u'3'" + u = unicode_(u'Aÿ') + # raises(UnicodeEncodeError, "str(u)") # XXX + assert repr(u) == repr(u'Aÿ') diff --git a/pypy/module/micronumpy/test/test_selection.py b/pypy/module/micronumpy/test/test_selection.py --- a/pypy/module/micronumpy/test/test_selection.py +++ b/pypy/module/micronumpy/test/test_selection.py @@ -210,22 +210,28 @@ assert (c == a).all(), msg def test_sort_unicode(self): + import sys from numpy import array # test unicode sorts. s = 'aaaaaaaa' - try: - a = array([s + chr(i) for i in range(101)], dtype=unicode) - b = a[::-1].copy() - except: - skip('unicode type not supported yet') - for kind in ['q', 'm', 'h'] : + a = array([s + chr(i) for i in range(101)], dtype=unicode) + b = a[::-1].copy() + for kind in ['q', 'm', 'h']: msg = "unicode sort, kind=%s" % kind - c = a.copy(); - c.sort(kind=kind) - assert (c == a).all(), msg - c = b.copy(); - c.sort(kind=kind) - assert (c == a).all(), msg + c = a.copy() + if '__pypy__' in sys.builtin_module_names: + exc = raises(NotImplementedError, "c.sort(kind=kind)") + assert 'non-numeric types' in exc.value.message + else: + c.sort(kind=kind) + assert (c == a).all(), msg + c = b.copy() + if '__pypy__' in sys.builtin_module_names: + exc = raises(NotImplementedError, "c.sort(kind=kind)") + assert 'non-numeric types' in exc.value.message + else: + c.sort(kind=kind) + assert (c == a).all(), msg def test_sort_objects(self): # test object array sorts. diff --git a/pypy/module/micronumpy/types.py b/pypy/module/micronumpy/types.py --- a/pypy/module/micronumpy/types.py +++ b/pypy/module/micronumpy/types.py @@ -11,7 +11,7 @@ most_neg_value_of, LONG_BIT from rpython.rlib.rawstorage import (alloc_raw_storage, raw_storage_getitem_unaligned, raw_storage_setitem_unaligned) -from rpython.rlib.rstring import StringBuilder +from rpython.rlib.rstring import StringBuilder, UnicodeBuilder from rpython.rlib.rstruct.ieee import (float_pack, float_unpack, unpack_float, pack_float80, unpack_float80) from rpython.rlib.rstruct.nativefmttable import native_is_bigendian @@ -50,6 +50,7 @@ pass return _raw_storage_getitem_unaligned(T, storage, offset) ''' + def simple_unary_op(func): specialize.argtype(1)(func) @functools.wraps(func) @@ -2177,7 +2178,7 @@ self._store(storage, i, offset, box, width) class UnicodeType(FlexibleType): - T = lltype.Char + T = lltype.UniChar num = NPY.UNICODE kind = NPY.UNICODELTR char = NPY.UNICODELTR @@ -2189,58 +2190,121 @@ def coerce(self, space, dtype, w_item): if isinstance(w_item, boxes.W_UnicodeBox): return w_item - raise OperationError(space.w_NotImplementedError, space.wrap( - "coerce (probably from set_item) not implemented for unicode type")) + value = space.unicode_w(space.unicode_from_object(w_item)) + return boxes.W_UnicodeBox(value) def store(self, arr, i, offset, box, native): assert isinstance(box, boxes.W_UnicodeBox) - raise oefmt(self.space.w_NotImplementedError, "unicode type not completed") + value = box._value + with arr as storage: + self._store(storage, i, offset, box, arr.dtype.elsize) + + @jit.unroll_safe + def _store(self, storage, i, offset, box, width): + size = min(width // 4, len(box._value)) + for k in range(size): + index = i + offset + 4*k + data = rffi.cast(Int32.T, ord(box._value[k])) + raw_storage_setitem_unaligned(storage, index, data) + for k in range(size, width // 4): + index = i + offset + 4*k + data = rffi.cast(Int32.T, 0) + raw_storage_setitem_unaligned(storage, index, data) def read(self, arr, i, offset, dtype): - raise oefmt(self.space.w_NotImplementedError, "unicode type not completed") + if dtype is None: + dtype = arr.dtype + size = dtype.elsize // 4 + builder = UnicodeBuilder(size) + with arr as storage: + for k in range(size): + index = i + offset + 4*k + codepoint = raw_storage_getitem_unaligned( + Int32.T, arr.storage, index) + char = unichr(codepoint) + if char == u'\0': + break + builder.append(char) + return boxes.W_UnicodeBox(builder.build()) def str_format(self, item, add_quotes=True): - raise oefmt(self.space.w_NotImplementedError, "unicode type not completed") + assert isinstance(item, boxes.W_UnicodeBox) + if add_quotes: + w_unicode = self.to_builtin_type(self.space, item) + return self.space.str_w(self.space.repr(w_unicode)) + else: + # Same as W_UnicodeBox.descr_repr() but without quotes and prefix + from rpython.rlib.runicode import unicode_encode_unicode_escape + return unicode_encode_unicode_escape(item._value, + len(item._value), 'strict') def to_builtin_type(self, space, box): - raise oefmt(self.space.w_NotImplementedError, "unicode type not completed") + assert isinstance(box, boxes.W_UnicodeBox) + return space.wrap(box._value) def eq(self, v1, v2): - raise oefmt(self.space.w_NotImplementedError, "unicode type not completed") + assert isinstance(v1, boxes.W_UnicodeBox) + assert isinstance(v2, boxes.W_UnicodeBox) + return v1._value == v2._value def ne(self, v1, v2): - raise oefmt(self.space.w_NotImplementedError, "unicode type not completed") + assert isinstance(v1, boxes.W_UnicodeBox) + assert isinstance(v2, boxes.W_UnicodeBox) + return v1._value != v2._value def lt(self, v1, v2): - raise oefmt(self.space.w_NotImplementedError, "unicode type not completed") + assert isinstance(v1, boxes.W_UnicodeBox) + assert isinstance(v2, boxes.W_UnicodeBox) + return v1._value < v2._value def le(self, v1, v2): - raise oefmt(self.space.w_NotImplementedError, "unicode type not completed") + assert isinstance(v1, boxes.W_UnicodeBox) + assert isinstance(v2, boxes.W_UnicodeBox) + return v1._value <= v2._value def gt(self, v1, v2): - raise oefmt(self.space.w_NotImplementedError, "unicode type not completed") + assert isinstance(v1, boxes.W_UnicodeBox) + assert isinstance(v2, boxes.W_UnicodeBox) + return v1._value > v2._value def ge(self, v1, v2): - raise oefmt(self.space.w_NotImplementedError, "unicode type not completed") + assert isinstance(v1, boxes.W_UnicodeBox) + assert isinstance(v2, boxes.W_UnicodeBox) + return v1._value >= v2._value def logical_and(self, v1, v2): - raise oefmt(self.space.w_NotImplementedError, "unicode type not completed") + assert isinstance(v1, boxes.W_UnicodeBox) + assert isinstance(v2, boxes.W_UnicodeBox) + if bool(v1) and bool(v2): + return Bool._True + return Bool._False def logical_or(self, v1, v2): - raise oefmt(self.space.w_NotImplementedError, "unicode type not completed") + assert isinstance(v1, boxes.W_UnicodeBox) + assert isinstance(v2, boxes.W_UnicodeBox) + if bool(v1) or bool(v2): + return Bool._True + return Bool._False def logical_not(self, v): - raise oefmt(self.space.w_NotImplementedError, "unicode type not completed") - - @str_binary_op + assert isinstance(v, boxes.W_UnicodeBox) + return not bool(v) + def logical_xor(self, v1, v2): - raise oefmt(self.space.w_NotImplementedError, "unicode type not completed") + assert isinstance(v1, boxes.W_UnicodeBox) + assert isinstance(v2, boxes.W_UnicodeBox) + a = bool(v1) + b = bool(v2) + return (not b and a) or (not a and b) def bool(self, v): - raise oefmt(self.space.w_NotImplementedError, "unicode type not completed") + assert isinstance(v, boxes.W_UnicodeBox) + return bool(v._value) def fill(self, storage, width, native, box, start, stop, offset, gcstruct): - raise oefmt(self.space.w_NotImplementedError, "unicode type not completed") + assert isinstance(box, boxes.W_UnicodeBox) + for i in xrange(start, stop, width): + self._store(storage, i, offset, box, width) class VoidType(FlexibleType): _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit