Author: Armin Rigo <ar...@tunes.org> Branch: fix-strbuf Changeset: r78709:735443d28cec Date: 2015-07-26 19:47 +0200 http://bitbucket.org/pypy/pypy/changeset/735443d28cec/
Log: (fijal, arigo) Adding the W_UnicodeBufferObject diff --git a/pypy/objspace/std/bytearrayobject.py b/pypy/objspace/std/bytearrayobject.py --- a/pypy/objspace/std/bytearrayobject.py +++ b/pypy/objspace/std/bytearrayobject.py @@ -47,6 +47,9 @@ value = value[:] return W_BytearrayObject(value) + def _new_concat(self, space, value1, value2): + return self._new(value1 + value2) + def _new_from_buffer(self, buffer): return W_BytearrayObject([buffer[i] for i in range(len(buffer))]) diff --git a/pypy/objspace/std/bytesobject.py b/pypy/objspace/std/bytesobject.py --- a/pypy/objspace/std/bytesobject.py +++ b/pypy/objspace/std/bytesobject.py @@ -482,6 +482,13 @@ """ raise NotImplementedError + def buffer_w(self, space, flags): + space.check_buf_flags(flags, True) + return StringBuffer(self.str_w(space)) + + def readbuf_w(self, space): + return StringBuffer(self.str_w(space)) + def writebuf_w(self, space): raise OperationError(space.w_TypeError, space.wrap( "Cannot use string as modifiable buffer")) @@ -499,12 +506,12 @@ def descr_formatter_parser(self, space): from pypy.objspace.std.newformat import str_template_formatter - tformat = str_template_formatter(space, space.str_w(self)) + tformat = str_template_formatter(space, self.str_w(space)) return tformat.formatter_parser() def descr_formatter_field_name_split(self, space): from pypy.objspace.std.newformat import str_template_formatter - tformat = str_template_formatter(space, space.str_w(self)) + tformat = str_template_formatter(space, self.str_w(space)) return tformat.formatter_field_name_split() @@ -526,19 +533,21 @@ def str_w(self, space): return self._value - def buffer_w(self, space, flags): - space.check_buf_flags(flags, True) - return StringBuffer(self._value) - - def readbuf_w(self, space): - return StringBuffer(self._value) - def listview_bytes(self): return _create_list_from_bytes(self._value) def _new(self, value): return W_BytesObject(value) + def _new_concat(self, space, value1, value2): + if space.config.objspace.std.withstrbuf: + from pypy.objspace.std.strbufobject import W_StringBufferObject + builder = StringBuilder(len(value1) + len(value2)) + builder.append(value1) + builder.append(value2) + return W_StringBufferObject(builder) + return self._new(value1 + value2) + def _new_from_list(self, value): return W_BytesObject(''.join(value)) @@ -726,18 +735,6 @@ from .bytearrayobject import W_BytearrayObject, _make_data self_as_bytearray = W_BytearrayObject(_make_data(self._value)) return space.add(self_as_bytearray, w_other) - if space.config.objspace.std.withstrbuf: - from pypy.objspace.std.strbufobject import W_StringBufferObject - try: - other = self._op_val(space, w_other) - except OperationError as e: - if e.match(space, space.w_TypeError): - return space.w_NotImplemented - raise - builder = StringBuilder() - builder.append(self._value) - builder.append(other) - return W_StringBufferObject(builder) return self._StringMethods_descr_add(space, w_other) _StringMethods__startswith = _startswith diff --git a/pypy/objspace/std/objspace.py b/pypy/objspace/std/objspace.py --- a/pypy/objspace/std/objspace.py +++ b/pypy/objspace/std/objspace.py @@ -31,7 +31,7 @@ from pypy.objspace.std.sliceobject import W_SliceObject from pypy.objspace.std.tupleobject import W_AbstractTupleObject, W_TupleObject from pypy.objspace.std.typeobject import W_TypeObject, TypeCache -from pypy.objspace.std.unicodeobject import W_UnicodeObject, wrapunicode +from pypy.objspace.std.unicodeobject import W_AbstractUnicodeObject, W_UnicodeObject, wrapunicode class StdObjSpace(ObjSpace): @@ -82,6 +82,8 @@ } if self.config.objspace.std.withstrbuf: builtin_type_classes[W_BytesObject.typedef] = W_AbstractBytesObject + builtin_type_classes[W_UnicodeObject.typedef] = ( + W_AbstractUnicodeObject) self.builtin_types = {} self._interplevel_classes = {} diff --git a/pypy/objspace/std/strbufobject.py b/pypy/objspace/std/strbufobject.py --- a/pypy/objspace/std/strbufobject.py +++ b/pypy/objspace/std/strbufobject.py @@ -1,10 +1,5 @@ -import inspect - -import py - -from pypy.objspace.std.bytesobject import (W_AbstractBytesObject, - W_BytesObject, StringBuffer) -from pypy.interpreter.gateway import interp2app, unwrap_spec +from pypy.objspace.std.bytesobject import W_AbstractBytesObject +from pypy.objspace.std.bytesobject import W_BytesObject from pypy.interpreter.error import OperationError from rpython.rlib.rstring import StringBuilder @@ -37,29 +32,22 @@ def str_w(self, space): return self.force() - def buffer_w(self, space, flags): - return StringBuffer(self.force()) - - def readbuf_w(self, space): - return StringBuffer(self.force()) - def descr_len(self, space): return space.wrap(self.length) def descr_add(self, space, w_other): - try: - other = W_BytesObject._op_val(space, w_other) - except OperationError as e: - if e.match(space, space.w_TypeError): - return space.w_NotImplemented - raise - if self.builder.getlength() != self.length: - builder = StringBuilder() - builder.append(self.force()) + if isinstance(w_other, W_AbstractBytesObject): + other = w_other.str_w(space) + if self.builder.getlength() != self.length: + builder = StringBuilder() + builder.append(self.force()) + else: + builder = self.builder + builder.append(other) + return W_StringBufferObject(builder) else: - builder = self.builder - builder.append(other) - return W_StringBufferObject(builder) + self.force() + return self.w_str.descr_add(space, w_other) def descr_str(self, space): # you cannot get subclasses of W_StringBufferObject here @@ -67,32 +55,42 @@ return self -for key, value in W_BytesObject.typedef.rawdict.iteritems(): - if not isinstance(value, interp2app): - continue - if key in ('__len__', '__add__', '__str__'): - continue +def copy_from_base_class(baseclass, bufclass, attr_name): + import inspect + import py + from pypy.interpreter.gateway import interp2app, unwrap_spec - func = value._code._bltin - args = inspect.getargs(func.func_code) - if args.varargs or args.keywords: - raise TypeError("Varargs and keywords not supported in unwrap_spec") - argspec = ', '.join([arg for arg in args.args[1:]]) - func_code = py.code.Source(""" - def f(self, %(args)s): - self.force() - return self.w_str.%(func_name)s(%(args)s) - """ % {'args': argspec, 'func_name': func.func_name}) - d = {} - exec func_code.compile() in d - f = d['f'] - f.func_defaults = func.func_defaults - f.__module__ = func.__module__ - # necessary for unique identifiers for pickling - f.func_name = func.func_name - unwrap_spec_ = getattr(func, 'unwrap_spec', None) - if unwrap_spec_ is not None: - f = unwrap_spec(**unwrap_spec_)(f) - setattr(W_StringBufferObject, func.func_name, f) + for key, value in baseclass.typedef.rawdict.iteritems(): + if not isinstance(value, interp2app): + continue -W_StringBufferObject.typedef = W_BytesObject.typedef + func = value._code._bltin + if func.func_name in bufclass.__dict__: + assert key in ('__len__', '__add__', '__str__', '__unicode__') + continue + + args = inspect.getargs(func.func_code) + if args.varargs or args.keywords: + raise TypeError("Varargs and keywords not supported in unwrap_spec") + argspec = ', '.join([arg for arg in args.args[1:]]) + func_code = py.code.Source(""" + def f(self, %(args)s): + self.force() + return self.%(attr_name)s.%(func_name)s(%(args)s) + """ % {'args': argspec, 'func_name': func.func_name, + 'attr_name': attr_name}) + d = {} + exec func_code.compile() in d + f = d['f'] + f.func_defaults = func.func_defaults + f.__module__ = func.__module__ + # necessary for unique identifiers for pickling + f.func_name = func.func_name + unwrap_spec_ = getattr(func, 'unwrap_spec', None) + if unwrap_spec_ is not None: + f = unwrap_spec(**unwrap_spec_)(f) + setattr(bufclass, func.func_name, f) + + bufclass.typedef = baseclass.typedef + +copy_from_base_class(W_BytesObject, W_StringBufferObject, 'w_str') diff --git a/pypy/objspace/std/stringmethods.py b/pypy/objspace/std/stringmethods.py --- a/pypy/objspace/std/stringmethods.py +++ b/pypy/objspace/std/stringmethods.py @@ -62,7 +62,7 @@ if e.match(space, space.w_TypeError): return space.w_NotImplemented raise - return self._new(self._val(space) + other) + return self._new_concat(space, self._val(space), other) # Bytearray overrides this method, CPython doesn't support contacting # buffers and strs, and unicodes are always handled above diff --git a/pypy/objspace/std/test/test_strbufobject.py b/pypy/objspace/std/test/test_strbufobject.py --- a/pypy/objspace/std/test/test_strbufobject.py +++ b/pypy/objspace/std/test/test_strbufobject.py @@ -84,6 +84,13 @@ a += 'b' raises(TypeError, "a += 5") + def test_add_unicode(self): + a = 'a' + a += 'b' + a += u'\u1234' + assert a == u'ab\u1234' + assert isinstance(a, unicode) + def test_mix_strings_format(self): a = 'a' a += 'b' @@ -99,3 +106,9 @@ a = 'abc' a += 'bc' assert list(a._formatter_parser()) == [('abcbc', None, None, None)] + + def test_startswith_u(self): + a = 'abc' + a += 'bc' + assert a.startswith(u'abcb') + assert not a.startswith(u'\u1234') diff --git a/pypy/objspace/std/test/test_unibufobject.py b/pypy/objspace/std/test/test_unibufobject.py new file mode 100644 --- /dev/null +++ b/pypy/objspace/std/test/test_unibufobject.py @@ -0,0 +1,110 @@ +import py + +from pypy.objspace.std.test import test_unicodeobject + +class AppTestUnicodeObject(test_unicodeobject.AppTestUnicodeString): + spaceconfig = test_unicodeobject.AppTestUnicodeString.spaceconfig.copy() + spaceconfig.update({"objspace.std.withstrbuf": True}) + + def test_basic(self): + import __pypy__ + # cannot do "Hello, " + "World!" because cpy2.5 optimises this + # away on AST level + s = u"Hello, ".__add__(u"World!") + assert type(s) is unicode + assert 'W_UnicodeBufferObject' in __pypy__.internal_repr(s) + + def test_add_twice(self): + x = u"a".__add__(u"b") + y = x + u"c" + c = x + u"d" + assert y == u"abc" + assert c == u"abd" + + def test_add(self): + import __pypy__ + all = "" + for i in range(20): + all += unicode(i) + assert 'W_UnicodeBufferObject' in __pypy__.internal_repr(all) + assert all == u"012345678910111213141516171819" + + def test_hash(self): + import __pypy__ + def join(s): return s[:len(s) // 2] + s[len(s) // 2:] + t = u'a' * 101 + s = join(t) + assert 'W_UnicodeBufferObject' in __pypy__.internal_repr(s) + assert hash(s) == hash(t) + + def test_len(self): + s = u"a".__add__(u"b") + r = u"c".__add__(u"d") + t = s + r + assert len(s) == 2 + assert len(r) == 2 + assert len(t) == 4 + + def test_add_strbuf(self): + # make three strbuf objects + s = u'a'.__add__(u'b') + t = u'x'.__add__(u'c') + u = u'y'.__add__(u'd') + + # add two different strbufs to the same string + v = s + t + w = s + u + + # check that insanity hasn't resulted. + assert v == u"abxc" + assert w == u"abyd" + + def test_more_adding_fun(self): + s = u'a'.__add__(u'b') # s is a strbuf now + t = s + u'c' + u = s + u'd' + v = s + u'e' + assert v == u'abe' + assert u == u'abd' + assert t == u'abc' + + def test_buh_even_more(self): + a = u'a'.__add__(u'b') + b = a + u'c' + c = u'0'.__add__(u'1') + x = c + a + assert x == u'01ab' + + def test_add_non_string(self): + a = u'a' + a += u'b' + raises(TypeError, "a += 5") + + def test_add_plain_string(self): + a = u'a' + a += u'\u1234' + a += 'b' + assert a == u'a\u1234b' + assert isinstance(a, unicode) + + def test_mix_strings_format(self): + a = u'a' + a += u'b' + assert u'foo%s' % a == u'fooab' + assert (a + u'%s') % (u'foo',) == u'abfoo' + + def test_print(self): + a = u'abc' + a += u'bc' + print a + + def test_formatter_parser(self): + a = u'abc' + a += u'bc' + assert list(a._formatter_parser()) == [(u'abcbc', None, None, None)] + + def test_startswith_s(self): + a = u'abc' + a += u'bc' + assert a.startswith('abcb') + assert not a.startswith('1234') diff --git a/pypy/objspace/std/unibufobject.py b/pypy/objspace/std/unibufobject.py new file mode 100644 --- /dev/null +++ b/pypy/objspace/std/unibufobject.py @@ -0,0 +1,67 @@ +from pypy.objspace.std.unicodeobject import W_AbstractUnicodeObject +from pypy.objspace.std.unicodeobject import W_UnicodeObject, unicode_from_string +from pypy.objspace.std.strbufobject import copy_from_base_class +from pypy.interpreter.error import OperationError +from rpython.rlib.rstring import UnicodeBuilder + + +class W_UnicodeBufferObject(W_AbstractUnicodeObject): + w_unicode = None + + def __init__(self, builder): + self.builder = builder # UnicodeBuilder + self.length = builder.getlength() + + def force(self): + if self.w_unicode is None: + s = self.builder.build() + if self.length < len(s): + s = s[:self.length] + self.w_unicode = W_UnicodeObject(s) + return s + else: + return self.w_unicode._value + + def __repr__(w_self): + """ representation for debugging purposes """ + return "%s(%r[:%d])" % ( + w_self.__class__.__name__, w_self.builder, w_self.length) + + def unwrap(self, space): + return self.force() + + def unicode_w(self, space): + return self.force() + + def descr_len(self, space): + return space.wrap(self.length) + + def _new_concat_buffer(self, other): + if self.builder.getlength() != self.length: + builder = UnicodeBuilder() + builder.append(self.force()) + else: + builder = self.builder + builder.append(other) + return W_UnicodeBufferObject(builder) + + def descr_add(self, space, w_other): + from pypy.objspace.std.bytesobject import W_AbstractBytesObject + + if isinstance(w_other, W_AbstractUnicodeObject): + other = w_other.unicode_w(space) + return self._new_concat_buffer(other) + elif isinstance(w_other, W_AbstractBytesObject): + other = unicode_from_string(space, w_other)._value + return self._new_concat_buffer(other) + else: + self.force() + return self.w_unicode.descr_add(space, w_other) + + def descr_unicode(self, space): + # you cannot get subclasses of W_UnicodeBufferObject here + assert type(self) is W_UnicodeBufferObject + return self + + +copy_from_base_class(W_UnicodeObject, W_UnicodeBufferObject, 'w_unicode') diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py --- a/pypy/objspace/std/unicodeobject.py +++ b/pypy/objspace/std/unicodeobject.py @@ -495,14 +495,22 @@ of the specified width. The string S is never truncated. """ + def readbuf_w(self, space): + from rpython.rlib.rstruct.unichar import pack_unichar, UNICODE_SIZE + value = self.unicode_w(space) + builder = StringBuilder(len(value) * UNICODE_SIZE) + for unich in value: + pack_unichar(unich, builder) + return StringBuffer(builder.build()) + def descr_formatter_parser(self, space): from pypy.objspace.std.newformat import unicode_template_formatter - tformat = unicode_template_formatter(space, space.unicode_w(self)) + tformat = unicode_template_formatter(space, self.unicode_w(space)) return tformat.formatter_parser() def descr_formatter_field_name_split(self, space): from pypy.objspace.std.newformat import unicode_template_formatter - tformat = unicode_template_formatter(space, space.unicode_w(self)) + tformat = unicode_template_formatter(space, self.unicode_w(space)) return tformat.formatter_field_name_split() @@ -530,13 +538,6 @@ def unicode_w(self, space): return self._value - def readbuf_w(self, space): - from rpython.rlib.rstruct.unichar import pack_unichar, UNICODE_SIZE - builder = StringBuilder(len(self._value) * UNICODE_SIZE) - for unich in self._value: - pack_unichar(unich, builder) - return StringBuffer(builder.build()) - def writebuf_w(self, space): raise OperationError(space.w_TypeError, space.wrap( "cannot use unicode as modifiable buffer")) @@ -554,6 +555,15 @@ def _new(self, value): return W_UnicodeObject(value) + def _new_concat(self, space, value1, value2): + if space.config.objspace.std.withstrbuf: + from pypy.objspace.std.unibufobject import W_UnicodeBufferObject + builder = UnicodeBuilder(len(value1) + len(value2)) + builder.append(value1) + builder.append(value2) + return W_UnicodeBufferObject(builder) + return self._new(value1 + value2) + def _new_from_list(self, value): return W_UnicodeObject(u''.join(value)) @@ -573,9 +583,11 @@ @staticmethod def _op_val(space, w_other): - if isinstance(w_other, W_UnicodeObject): - return w_other._value - if space.isinstance_w(w_other, space.w_str): + from pypy.objspace.std.bytesobject import W_AbstractBytesObject + + if isinstance(w_other, W_AbstractUnicodeObject): + return w_other.unicode_w(space) + if isinstance(w_other, W_AbstractBytesObject): return unicode_from_string(space, w_other)._value return unicode_from_encoded_object( space, w_other, None, "strict")._value @@ -664,9 +676,9 @@ if space.is_w(w_unicodetype, space.w_unicode): return w_value - assert isinstance(w_value, W_UnicodeObject) + value = w_value.unicode_w(space) w_newobj = space.allocate_instance(W_UnicodeObject, w_unicodetype) - W_UnicodeObject.__init__(w_newobj, w_value._value) + W_UnicodeObject.__init__(w_newobj, value) return w_newobj def descr_repr(self, space): @@ -1035,7 +1047,7 @@ __add__ = interpindirect2app(W_AbstractUnicodeObject.descr_add), __mul__ = interpindirect2app(W_AbstractUnicodeObject.descr_mul), - __rmul__ = interpindirect2app(W_AbstractUnicodeObject.descr_mul), + __rmul__ = interpindirect2app(W_AbstractUnicodeObject.descr_rmul), __getitem__ = interpindirect2app(W_AbstractUnicodeObject.descr_getitem), __getslice__ = interpindirect2app(W_AbstractUnicodeObject.descr_getslice), _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit