Author: Carl Friedrich Bolz <cfb...@gmx.de> Branch: Changeset: r64917:53b7cc76daf1 Date: 2013-06-17 14:33 +0200 http://bitbucket.org/pypy/pypy/changeset/53b7cc76daf1/
Log: merge diff --git a/pypy/interpreter/baseobjspace.py b/pypy/interpreter/baseobjspace.py --- a/pypy/interpreter/baseobjspace.py +++ b/pypy/interpreter/baseobjspace.py @@ -904,6 +904,9 @@ def newlist_str(self, list_s): return self.newlist([self.wrap(s) for s in list_s]) + def newlist_unicode(self, list_u): + return self.newlist([self.wrap(u) for u in list_u]) + def newlist_hint(self, sizehint): from pypy.objspace.std.listobject import make_empty_list_with_size return make_empty_list_with_size(self, sizehint) diff --git a/pypy/objspace/std/listobject.py b/pypy/objspace/std/listobject.py --- a/pypy/objspace/std/listobject.py +++ b/pypy/objspace/std/listobject.py @@ -166,6 +166,12 @@ storage = strategy.erase(list_s) return W_ListObject.from_storage_and_strategy(space, storage, strategy) + @staticmethod + def newlist_unicode(space, list_u): + strategy = space.fromcache(UnicodeListStrategy) + storage = strategy.erase(list_u) + return W_ListObject.from_storage_and_strategy(space, storage, strategy) + def __repr__(self): """ representation for debugging purposes """ return "%s(%s, %s)" % (self.__class__.__name__, self.strategy, diff --git a/pypy/objspace/std/objspace.py b/pypy/objspace/std/objspace.py --- a/pypy/objspace/std/objspace.py +++ b/pypy/objspace/std/objspace.py @@ -294,6 +294,9 @@ def newlist_str(self, list_s): return W_ListObject.newlist_str(self, list_s) + def newlist_unicode(self, list_u): + return W_ListObject.newlist_unicode(self, list_u) + def newdict(self, module=False, instance=False, kwargs=False, strdict=False): return W_DictMultiObject.allocate_and_init_instance( diff --git a/pypy/objspace/std/stringobject.py b/pypy/objspace/std/stringobject.py --- a/pypy/objspace/std/stringobject.py +++ b/pypy/objspace/std/stringobject.py @@ -11,12 +11,13 @@ from pypy.objspace.std.register_all import register_all from pypy.objspace.std.sliceobject import W_SliceObject, normalize_simple_slice from pypy.objspace.std.stringtype import ( - joined2, sliced, stringendswith, stringstartswith, wrapchar, wrapstr) + joined2, sliced, wrapchar, wrapstr) from rpython.rlib import jit from rpython.rlib.objectmodel import ( compute_hash, compute_unique_id, specialize) from rpython.rlib.rarithmetic import ovfcheck -from rpython.rlib.rstring import StringBuilder, split +from rpython.rlib.rstring import (StringBuilder, split, rsplit, replace, + endswith, startswith) class W_AbstractStringObject(W_Object): @@ -287,31 +288,12 @@ bylen = len(by) if bylen == 0: raise OperationError(space.w_ValueError, space.wrap("empty separator")) - - if bylen == 1 and maxsplit < 0: - res = [] - start = 0 - # fast path: uses str.rfind(character) and str.count(character) - by = by[0] # annotator hack: string -> char - count = value.count(by) - res = [None] * (count + 1) - end = len(value) - while count >= 0: - assert end >= 0 - prev = value.rfind(by, 0, end) - start = prev + 1 - assert start >= 0 - res[count] = value[start:end] - count -= 1 - end = prev - else: - res = split(value, by, maxsplit) - + res = split(value, by, maxsplit) return space.newlist_str(res) def str_rsplit__String_None_ANY(space, w_self, w_none, w_maxsplit=-1): maxsplit = space.int_w(w_maxsplit) - res_w = [] + res = [] value = w_self._value i = len(value)-1 while True: @@ -336,43 +318,21 @@ # the word is value[j+1:i+1] j1 = j + 1 assert j1 >= 0 - res_w.append(sliced(space, value, j1, i+1, w_self)) + res.append(value[j1:i+1]) # continue to look from the character before the space before the word i = j - 1 - res_w.reverse() - return space.newlist(res_w) + res.reverse() + return space.newlist_str(res) -def make_rsplit_with_delim(funcname, sliced): - from rpython.tool.sourcetools import func_with_new_name - - def fn(space, w_self, w_by, w_maxsplit=-1): - maxsplit = space.int_w(w_maxsplit) - res_w = [] - value = w_self._value - end = len(value) - by = w_by._value - bylen = len(by) - if bylen == 0: - raise OperationError(space.w_ValueError, space.wrap("empty separator")) - - while maxsplit != 0: - next = value.rfind(by, 0, end) - if next < 0: - break - res_w.append(sliced(space, value, next+bylen, end, w_self)) - end = next - maxsplit -= 1 # NB. if it's already < 0, it stays < 0 - - res_w.append(sliced(space, value, 0, end, w_self)) - res_w.reverse() - return space.newlist(res_w) - - return func_with_new_name(fn, funcname) - -str_rsplit__String_String_ANY = make_rsplit_with_delim('str_rsplit__String_String_ANY', - sliced) +def str_rsplit__String_String_ANY(space, w_self, w_by, w_maxsplit=-1): + maxsplit = space.int_w(w_maxsplit) + value = w_self._value + by = w_by._value + if not by: + raise OperationError(space.w_ValueError, space.wrap("empty separator")) + return space.newlist_str(rsplit(value, by, maxsplit)) def str_join__String_ANY(space, w_self, w_list): l = space.listview_str(w_list) @@ -524,75 +484,30 @@ return space.wrap(res) -def _string_replace(space, input, sub, by, maxsplit): - if maxsplit == 0: - return space.wrap(input) - - if not sub: - upper = len(input) - if maxsplit > 0 and maxsplit < upper + 2: - upper = maxsplit - 1 - assert upper >= 0 - - try: - result_size = ovfcheck(upper * len(by)) - result_size = ovfcheck(result_size + upper) - result_size = ovfcheck(result_size + len(by)) - remaining_size = len(input) - upper - result_size = ovfcheck(result_size + remaining_size) - except OverflowError: - raise OperationError(space.w_OverflowError, - space.wrap("replace string is too long") - ) - builder = StringBuilder(result_size) - for i in range(upper): - builder.append(by) - builder.append(input[i]) - builder.append(by) - builder.append_slice(input, upper, len(input)) - else: - # First compute the exact result size - count = input.count(sub) - if count > maxsplit and maxsplit > 0: - count = maxsplit - diff_len = len(by) - len(sub) - try: - result_size = ovfcheck(diff_len * count) - result_size = ovfcheck(result_size + len(input)) - except OverflowError: - raise OperationError(space.w_OverflowError, - space.wrap("replace string is too long") - ) - - builder = StringBuilder(result_size) - start = 0 - sublen = len(sub) - - while maxsplit != 0: - next = input.find(sub, start) - if next < 0: - break - builder.append_slice(input, start, next) - builder.append(by) - start = next + sublen - maxsplit -= 1 # NB. if it's already < 0, it stays < 0 - - builder.append_slice(input, start, len(input)) - - return space.wrap(builder.build()) - def str_replace__String_ANY_ANY_ANY(space, w_self, w_sub, w_by, w_maxsplit): - return _string_replace(space, w_self._value, space.buffer_w(w_sub).as_str(), - space.buffer_w(w_by).as_str(), - space.int_w(w_maxsplit)) + sub = space.buffer_w(w_sub).as_str() + by = space.buffer_w(w_by).as_str() + maxsplit = space.int_w(w_maxsplit) + try: + res = replace(w_self._value, sub, by, maxsplit) + except OverflowError: + raise OperationError(space.w_OverflowError, + space.wrap("replace string is too long") + ) + return space.wrap(res) def str_replace__String_String_String_ANY(space, w_self, w_sub, w_by, w_maxsplit=-1): - input = w_self._value sub = w_sub._value by = w_by._value maxsplit = space.int_w(w_maxsplit) - return _string_replace(space, input, sub, by, maxsplit) + try: + res = replace(w_self._value, sub, by, maxsplit) + except OverflowError: + raise OperationError(space.w_OverflowError, + space.wrap("replace string is too long") + ) + return space.wrap(res) def _strip(space, w_self, w_chars, left, right): "internal function called by str_xstrip methods" @@ -679,7 +594,7 @@ def str_endswith__String_String_ANY_ANY(space, w_self, w_suffix, w_start, w_end): (u_self, start, end) = _convert_idx_params(space, w_self, w_start, w_end, True) - return space.newbool(stringendswith(u_self, w_suffix._value, start, end)) + return space.newbool(endswith(u_self, w_suffix._value, start, end)) def str_endswith__String_ANY_ANY_ANY(space, w_self, w_suffixes, w_start, w_end): if not space.isinstance_w(w_suffixes, space.w_tuple): @@ -692,14 +607,14 @@ return space.call_method(w_u, "endswith", w_suffixes, w_start, w_end) suffix = space.str_w(w_suffix) - if stringendswith(u_self, suffix, start, end): + if endswith(u_self, suffix, start, end): return space.w_True return space.w_False def str_startswith__String_String_ANY_ANY(space, w_self, w_prefix, w_start, w_end): (u_self, start, end) = _convert_idx_params(space, w_self, w_start, w_end, True) - return space.newbool(stringstartswith(u_self, w_prefix._value, start, end)) + return space.newbool(startswith(u_self, w_prefix._value, start, end)) def str_startswith__String_ANY_ANY_ANY(space, w_self, w_prefixes, w_start, w_end): if not space.isinstance_w(w_prefixes, space.w_tuple): @@ -712,7 +627,7 @@ return space.call_method(w_u, "startswith", w_prefixes, w_start, w_end) prefix = space.str_w(w_prefix) - if stringstartswith(u_self, prefix, start, end): + if startswith(u_self, prefix, start, end): return space.w_True return space.w_False @@ -768,26 +683,7 @@ def str_splitlines__String_ANY(space, w_self, w_keepends): u_keepends = space.int_w(w_keepends) # truth value, but type checked data = w_self._value - selflen = len(data) - strs_w = [] - i = j = 0 - while i < selflen: - # Find a line and append it - while i < selflen and data[i] != '\n' and data[i] != '\r': - i += 1 - # Skip the line break reading CRLF as one line break - eol = i - i += 1 - if i < selflen and data[i-1] == '\r' and data[i] == '\n': - i += 1 - if u_keepends: - eol = i - strs_w.append(sliced(space, data, j, eol, w_self)) - j = i - - if j < selflen: - strs_w.append(sliced(space, data, j, len(data), w_self)) - return space.newlist(strs_w) + return space.newlist_str(data.splitlines(u_keepends)) def str_zfill__String_ANY(space, w_self, w_width): input = w_self._value diff --git a/pypy/objspace/std/stringtype.py b/pypy/objspace/std/stringtype.py --- a/pypy/objspace/std/stringtype.py +++ b/pypy/objspace/std/stringtype.py @@ -295,28 +295,3 @@ str_typedef.registermethods(globals()) -# ____________________________________________________________ - -# Helpers for several string implementations - -@specialize.argtype(0) -@jit.elidable -def stringendswith(u_self, suffix, start, end): - begin = end - len(suffix) - if begin < start: - return False - for i in range(len(suffix)): - if u_self[begin+i] != suffix[i]: - return False - return True - -@specialize.argtype(0) -@jit.elidable -def stringstartswith(u_self, prefix, start, end): - stop = start + len(prefix) - if stop > end: - return False - for i in range(len(prefix)): - if u_self[start+i] != prefix[i]: - return False - return True diff --git a/pypy/objspace/std/test/test_liststrategies.py b/pypy/objspace/std/test/test_liststrategies.py --- a/pypy/objspace/std/test/test_liststrategies.py +++ b/pypy/objspace/std/test/test_liststrategies.py @@ -555,10 +555,30 @@ try: w_l = space.call_method(w_s, "split") w_l2 = space.call_method(w_s, "split", space.wrap(" ")) + w_l3 = space.call_method(w_s, "rsplit") + w_l4 = space.call_method(w_s, "rsplit", space.wrap(" ")) finally: del space.newlist assert space.listview_str(w_l) == ["a", "b", "c"] assert space.listview_str(w_l2) == ["a", "b", "c"] + assert space.listview_str(w_l3) == ["a", "b", "c"] + assert space.listview_str(w_l4) == ["a", "b", "c"] + + def test_unicode_uses_newlist_unicode(self): + space = self.space + w_u = space.wrap(u"a b c") + space.newlist = None + try: + w_l = space.call_method(w_u, "split") + w_l2 = space.call_method(w_u, "split", space.wrap(" ")) + w_l3 = space.call_method(w_u, "rsplit") + w_l4 = space.call_method(w_u, "rsplit", space.wrap(" ")) + finally: + del space.newlist + assert space.listview_unicode(w_l) == [u"a", u"b", u"c"] + assert space.listview_unicode(w_l2) == [u"a", u"b", u"c"] + assert space.listview_unicode(w_l3) == [u"a", u"b", u"c"] + assert space.listview_unicode(w_l4) == [u"a", u"b", u"c"] def test_pop_without_argument_is_fast(self): space = self.space diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py --- a/pypy/objspace/std/unicodeobject.py +++ b/pypy/objspace/std/unicodeobject.py @@ -8,15 +8,14 @@ from pypy.objspace.std.multimethod import FailedToImplement from pypy.objspace.std.noneobject import W_NoneObject from pypy.objspace.std.sliceobject import W_SliceObject, normalize_simple_slice -from pypy.objspace.std.stringobject import ( - W_StringObject, make_rsplit_with_delim) -from pypy.objspace.std.stringtype import stringendswith, stringstartswith +from pypy.objspace.std.stringobject import W_StringObject from pypy.objspace.std.register_all import register_all from rpython.rlib import jit from rpython.rlib.rarithmetic import ovfcheck from rpython.rlib.objectmodel import ( compute_hash, compute_unique_id, specialize) -from rpython.rlib.rstring import UnicodeBuilder +from rpython.rlib.rstring import (UnicodeBuilder, split, rsplit, replace, + startswith, endswith) from rpython.rlib.runicode import make_unicode_escape_function from rpython.tool.sourcetools import func_with_new_name @@ -490,14 +489,14 @@ def unicode_endswith__Unicode_Unicode_ANY_ANY(space, w_self, w_substr, w_start, w_end): self, start, end = _convert_idx_params(space, w_self, w_start, w_end, True) - return space.newbool(stringendswith(self, w_substr._value, start, end)) + return space.newbool(endswith(self, w_substr._value, start, end)) def unicode_startswith__Unicode_Unicode_ANY_ANY(space, w_self, w_substr, w_start, w_end): self, start, end = _convert_idx_params(space, w_self, w_start, w_end, True) # XXX this stuff can be waaay better for ootypebased backends if # we re-use more of our rpython machinery (ie implement startswith # with additional parameters as rpython) - return space.newbool(stringstartswith(self, w_substr._value, start, end)) + return space.newbool(startswith(self, w_substr._value, start, end)) def unicode_startswith__Unicode_ANY_ANY_ANY(space, w_unistr, w_prefixes, w_start, w_end): @@ -507,7 +506,7 @@ w_start, w_end, True) for w_prefix in space.fixedview(w_prefixes): prefix = space.unicode_w(w_prefix) - if stringstartswith(unistr, prefix, start, end): + if startswith(unistr, prefix, start, end): return space.w_True return space.w_False @@ -519,7 +518,7 @@ w_start, w_end, True) for w_suffix in space.fixedview(w_suffixes): suffix = space.unicode_w(w_suffix) - if stringendswith(unistr, suffix, start, end): + if endswith(unistr, suffix, start, end): return space.w_True return space.w_False @@ -608,17 +607,17 @@ if (self[pos] == u'\r' and pos + 1 < end and self[pos + 1] == u'\n'): # Count CRLF as one linebreak - lines.append(W_UnicodeObject(self[start:pos + keepends * 2])) + lines.append(self[start:pos + keepends * 2]) pos += 1 else: - lines.append(W_UnicodeObject(self[start:pos + keepends])) + lines.append(self[start:pos + keepends]) pos += 1 start = pos else: pos += 1 if not unicodedb.islinebreak(ord(self[end - 1])): - lines.append(W_UnicodeObject(self[start:])) - return space.newlist(lines) + lines.append(self[start:]) + return space.newlist_unicode(lines) def unicode_find__Unicode_Unicode_ANY_ANY(space, w_self, w_substr, w_start, w_end): self, start, end = _convert_idx_params(space, w_self, w_start, w_end) @@ -650,7 +649,7 @@ def unicode_split__Unicode_None_ANY(space, w_self, w_none, w_maxsplit): maxsplit = space.int_w(w_maxsplit) - res_w = [] + res = [] value = w_self._value length = len(value) i = 0 @@ -673,12 +672,12 @@ maxsplit -= 1 # NB. if it's already < 0, it stays < 0 # the word is value[i:j] - res_w.append(W_UnicodeObject(value[i:j])) + res.append(value[i:j]) # continue to look from the character following the space after the word i = j + 1 - return space.newlist(res_w) + return space.newlist_unicode(res) def unicode_split__Unicode_Unicode_ANY(space, w_self, w_delim, w_maxsplit): self = w_self._value @@ -688,13 +687,13 @@ if delim_len == 0: raise OperationError(space.w_ValueError, space.wrap('empty separator')) - parts = _split_with(self, delim, maxsplit) - return space.newlist([W_UnicodeObject(part) for part in parts]) + parts = split(self, delim, maxsplit) + return space.newlist_unicode(parts) def unicode_rsplit__Unicode_None_ANY(space, w_self, w_none, w_maxsplit): maxsplit = space.int_w(w_maxsplit) - res_w = [] + res = [] value = w_self._value i = len(value)-1 while True: @@ -719,59 +718,32 @@ # the word is value[j+1:i+1] j1 = j + 1 assert j1 >= 0 - res_w.append(W_UnicodeObject(value[j1:i+1])) + res.append(value[j1:i+1]) # continue to look from the character before the space before the word i = j - 1 - res_w.reverse() - return space.newlist(res_w) + res.reverse() + return space.newlist_unicode(res) -def sliced(space, s, start, stop, orig_obj): - assert start >= 0 - assert stop >= 0 - if start == 0 and stop == len(s) and space.is_w(space.type(orig_obj), space.w_unicode): - return orig_obj - return space.wrap( s[start:stop]) - -unicode_rsplit__Unicode_Unicode_ANY = make_rsplit_with_delim('unicode_rsplit__Unicode_Unicode_ANY', - sliced) - -def _split_into_chars(self, maxsplit): - if maxsplit == 0: - return [self] - index = 0 - end = len(self) - parts = [u''] - maxsplit -= 1 - while maxsplit != 0: - if index >= end: - break - parts.append(self[index]) - index += 1 - maxsplit -= 1 - parts.append(self[index:]) - return parts - -def _split_with(self, with_, maxsplit=-1): - parts = [] - start = 0 - end = len(self) - length = len(with_) - while maxsplit != 0: - index = self.find(with_, start, end) - if index < 0: - break - parts.append(self[start:index]) - start = index + length - maxsplit -= 1 - parts.append(self[start:]) - return parts +def unicode_rsplit__Unicode_Unicode_ANY(space, w_self, w_by, w_maxsplit=-1): + maxsplit = space.int_w(w_maxsplit) + value = w_self._value + by = w_by._value + if not by: + raise OperationError(space.w_ValueError, space.wrap("empty separator")) + return space.newlist_unicode(rsplit(value, by, maxsplit)) def unicode_replace__Unicode_Unicode_Unicode_ANY(space, w_self, w_old, w_new, w_maxsplit): - return _unicode_replace(space, w_self, w_old._value, w_new._value, - w_maxsplit) + maxsplit = space.int_w(w_maxsplit) + try: + return W_UnicodeObject( + replace(w_self._value, w_old._value, w_new._value, maxsplit)) + except OverflowError: + raise OperationError( + space.w_OverflowError, + space.wrap("replace string is too long")) def unicode_replace__Unicode_ANY_ANY_ANY(space, w_self, w_old, w_new, w_maxsplit): @@ -783,27 +755,14 @@ new = unicode(space.bufferstr_w(w_new)) else: new = space.unicode_w(w_new) - return _unicode_replace(space, w_self, old, new, w_maxsplit) - -def _unicode_replace(space, w_self, old, new, w_maxsplit): - if len(old): - parts = _split_with(w_self._value, old, space.int_w(w_maxsplit)) - else: - self = w_self._value - maxsplit = space.int_w(w_maxsplit) - parts = _split_into_chars(self, maxsplit) - + maxsplit = space.int_w(w_maxsplit) try: - one = ovfcheck(len(parts) * len(new)) - ovfcheck(one + len(w_self._value)) + return W_UnicodeObject(replace(w_self._value, old, new, maxsplit)) except OverflowError: raise OperationError( space.w_OverflowError, space.wrap("replace string is too long")) - return W_UnicodeObject(new.join(parts)) - - def unicode_encode__Unicode_ANY_ANY(space, w_unistr, w_encoding=None, w_errors=None): @@ -848,7 +807,7 @@ def unicode_expandtabs__Unicode_ANY(space, w_self, w_tabsize): self = w_self._value tabsize = space.int_w(w_tabsize) - parts = _split_with(self, u'\t') + parts = self.split(u'\t') result = [parts[0]] prevsize = 0 for ch in parts[0]: diff --git a/rpython/rlib/rstring.py b/rpython/rlib/rstring.py --- a/rpython/rlib/rstring.py +++ b/rpython/rlib/rstring.py @@ -1,25 +1,50 @@ """ String builder interface and string functions """ +import sys from rpython.annotator.model import (SomeObject, SomeString, s_None, SomeChar, SomeInteger, SomeUnicodeCodePoint, SomeUnicodeString, SomePtr, SomePBC) -from rpython.rlib.objectmodel import newlist_hint +from rpython.rlib.objectmodel import newlist_hint, specialize from rpython.rlib.rarithmetic import ovfcheck from rpython.rtyper.extregistry import ExtRegistryEntry from rpython.tool.pairtype import pairtype +from rpython.rlib import jit # -------------- public API for string functions ----------------------- + +@specialize.argtype(0) def split(value, by, maxsplit=-1): + if isinstance(value, str): + assert isinstance(by, str) + else: + assert isinstance(by, unicode) bylen = len(by) if bylen == 0: raise ValueError("empty separator") + start = 0 + if bylen == 1: + # fast path: uses str.rfind(character) and str.count(character) + by = by[0] # annotator hack: string -> char + count = value.count(by) + if 0 <= maxsplit < count: + count = maxsplit + res = newlist_hint(count + 1) + while count > 0: + next = value.find(by, start) + assert next >= 0 # cannot fail due to the value.count above + res.append(value[start:next]) + start = next + bylen + count -= 1 + res.append(value[start:len(value)]) + return res + if maxsplit > 0: res = newlist_hint(min(maxsplit + 1, len(value))) else: res = [] - start = 0 + while maxsplit != 0: next = value.find(by, start) if next < 0: @@ -32,7 +57,12 @@ return res +@specialize.argtype(0) def rsplit(value, by, maxsplit=-1): + if isinstance(value, str): + assert isinstance(by, str) + else: + assert isinstance(by, unicode) if maxsplit > 0: res = newlist_hint(min(maxsplit + 1, len(value))) else: @@ -54,6 +84,109 @@ res.reverse() return res + +@specialize.argtype(0) +def replace(input, sub, by, maxsplit=-1): + if isinstance(input, str): + assert isinstance(sub, str) + assert isinstance(by, str) + Builder = StringBuilder + else: + assert isinstance(sub, unicode) + assert isinstance(by, unicode) + Builder = UnicodeBuilder + if maxsplit == 0: + return input + + if not sub: + upper = len(input) + if maxsplit > 0 and maxsplit < upper + 2: + upper = maxsplit - 1 + assert upper >= 0 + + try: + result_size = ovfcheck(upper * len(by)) + result_size = ovfcheck(result_size + upper) + result_size = ovfcheck(result_size + len(by)) + remaining_size = len(input) - upper + result_size = ovfcheck(result_size + remaining_size) + except OverflowError: + raise + builder = Builder(result_size) + for i in range(upper): + builder.append(by) + builder.append(input[i]) + builder.append(by) + builder.append_slice(input, upper, len(input)) + else: + # First compute the exact result size + count = input.count(sub) + if count > maxsplit and maxsplit > 0: + count = maxsplit + diff_len = len(by) - len(sub) + try: + result_size = ovfcheck(diff_len * count) + result_size = ovfcheck(result_size + len(input)) + except OverflowError: + raise + + builder = Builder(result_size) + start = 0 + sublen = len(sub) + + while maxsplit != 0: + next = input.find(sub, start) + if next < 0: + break + builder.append_slice(input, start, next) + builder.append(by) + start = next + sublen + maxsplit -= 1 # NB. if it's already < 0, it stays < 0 + + builder.append_slice(input, start, len(input)) + + return builder.build() + +def _normalize_start_end(length, start, end): + if start < 0: + start += length + if start < 0: + start = 0 + if end < 0: + end += length + if end < 0: + end = 0 + elif end > length: + end = length + return start, end + +@specialize.argtype(0) +@jit.elidable +def startswith(u_self, prefix, start=0, end=sys.maxint): + length = len(u_self) + start, end = _normalize_start_end(length, start, end) + stop = start + len(prefix) + if stop > end: + return False + for i in range(len(prefix)): + if u_self[start+i] != prefix[i]: + return False + return True + +@specialize.argtype(0) +@jit.elidable +def endswith(u_self, suffix, start=0, end=sys.maxint): + length = len(u_self) + start, end = _normalize_start_end(length, start, end) + begin = end - len(suffix) + if begin < start: + return False + for i in range(len(suffix)): + if u_self[begin+i] != suffix[i]: + return False + return True + + # -------------- public API --------------------------------- INIT_SIZE = 100 # XXX tweak @@ -271,3 +404,5 @@ def specialize_call(self, hop): hop.exception_cannot_occur() + + diff --git a/rpython/rlib/test/test_rstring.py b/rpython/rlib/test/test_rstring.py --- a/rpython/rlib/test/test_rstring.py +++ b/rpython/rlib/test/test_rstring.py @@ -1,6 +1,8 @@ import sys, py from rpython.rlib.rstring import StringBuilder, UnicodeBuilder, split, rsplit +from rpython.rlib.rstring import replace, startswith, endswith +from rpython.rtyper.test.tool import BaseRtypingTest, LLRtypeMixin def test_split(): assert split("", 'x') == [''] @@ -10,9 +12,21 @@ assert split('a|b|c|d', '|') == ['a', 'b', 'c', 'd'] assert split('a|b|c|d', '|', 2) == ['a', 'b', 'c|d'] assert split('a//b//c//d', '//') == ['a', 'b', 'c', 'd'] + assert split('a//b//c//d', '//', 2) == ['a', 'b', 'c//d'] assert split('endcase test', 'test') == ['endcase ', ''] py.test.raises(ValueError, split, 'abc', '') +def test_split_unicode(): + assert split(u"", u'x') == [u''] + assert split(u"a", u"a", 1) == [u'', u''] + assert split(u" ", u" ", 1) == [u'', u''] + assert split(u"aa", u"a", 2) == [u'', u'', u''] + assert split(u'a|b|c|d', u'|') == [u'a', u'b', u'c', u'd'] + assert split(u'a|b|c|d', u'|', 2) == [u'a', u'b', u'c|d'] + assert split(u'a//b//c//d', u'//') == [u'a', u'b', u'c', u'd'] + assert split(u'endcase test', u'test') == [u'endcase ', u''] + py.test.raises(ValueError, split, u'abc', u'') + def test_rsplit(): assert rsplit("a", "a", 1) == ['', ''] assert rsplit(" ", " ", 1) == ['', ''] @@ -23,6 +37,111 @@ assert rsplit('endcase test', 'test') == ['endcase ', ''] py.test.raises(ValueError, rsplit, "abc", '') +def test_rsplit_unicode(): + assert rsplit(u"a", u"a", 1) == [u'', u''] + assert rsplit(u" ", u" ", 1) == [u'', u''] + assert rsplit(u"aa", u"a", 2) == [u'', u'', u''] + assert rsplit(u'a|b|c|d', u'|') == [u'a', u'b', u'c', u'd'] + assert rsplit(u'a|b|c|d', u'|', 2) == [u'a|b', u'c', u'd'] + assert rsplit(u'a//b//c//d', u'//') == [u'a', u'b', u'c', u'd'] + assert rsplit(u'endcase test', u'test') == [u'endcase ', u''] + py.test.raises(ValueError, rsplit, u"abc", u'') + +def test_string_replace(): + assert replace('one!two!three!', '!', '@', 1) == 'one@two!three!' + assert replace('one!two!three!', '!', '') == 'onetwothree' + assert replace('one!two!three!', '!', '@', 2) == 'one@two@three!' + assert replace('one!two!three!', '!', '@', 3) == 'one@two@three@' + assert replace('one!two!three!', '!', '@', 4) == 'one@two@three@' + assert replace('one!two!three!', '!', '@', 0) == 'one!two!three!' + assert replace('one!two!three!', '!', '@') == 'one@two@three@' + assert replace('one!two!three!', 'x', '@') == 'one!two!three!' + assert replace('one!two!three!', 'x', '@', 2) == 'one!two!three!' + assert replace('abc', '', '-') == '-a-b-c-' + assert replace('abc', '', '-', 3) == '-a-b-c' + assert replace('abc', '', '-', 0) == 'abc' + assert replace('', '', '') == '' + assert replace('', '', 'a') == 'a' + assert replace('abc', 'ab', '--', 0) == 'abc' + assert replace('abc', 'xy', '--') == 'abc' + assert replace('123', '123', '') == '' + assert replace('123123', '123', '') == '' + assert replace('123x123', '123', '') == 'x' + +def test_string_replace_overflow(): + if sys.maxint > 2**31-1: + py.test.skip("Wrong platform") + s = "a" * (2**16) + with py.test.raises(OverflowError): + replace(s, "", s) + with py.test.raises(OverflowError): + replace(s, "a", s) + with py.test.raises(OverflowError): + replace(s, "a", s, len(s) - 10) + +def test_unicode_replace(): + assert replace(u'one!two!three!', u'!', u'@', 1) == u'one@two!three!' + assert replace(u'one!two!three!', u'!', u'') == u'onetwothree' + assert replace(u'one!two!three!', u'!', u'@', 2) == u'one@two@three!' + assert replace(u'one!two!three!', u'!', u'@', 3) == u'one@two@three@' + assert replace(u'one!two!three!', u'!', u'@', 4) == u'one@two@three@' + assert replace(u'one!two!three!', u'!', u'@', 0) == u'one!two!three!' + assert replace(u'one!two!three!', u'!', u'@') == u'one@two@three@' + assert replace(u'one!two!three!', u'x', u'@') == u'one!two!three!' + assert replace(u'one!two!three!', u'x', u'@', 2) == u'one!two!three!' + assert replace(u'abc', u'', u'-') == u'-a-b-c-' + assert replace(u'abc', u'', u'-', 3) == u'-a-b-c' + assert replace(u'abc', u'', u'-', 0) == u'abc' + assert replace(u'', u'', u'') == u'' + assert replace(u'', u'', u'a') == u'a' + assert replace(u'abc', u'ab', u'--', 0) == u'abc' + assert replace(u'abc', u'xy', u'--') == u'abc' + assert replace(u'123', u'123', u'') == u'' + assert replace(u'123123', u'123', u'') == u'' + assert replace(u'123x123', u'123', u'') == u'x' + +def test_unicode_replace_overflow(): + if sys.maxint > 2**31-1: + py.test.skip("Wrong platform") + s = u"a" * (2**16) + with py.test.raises(OverflowError): + replace(s, u"", s) + with py.test.raises(OverflowError): + replace(s, u"a", s) + with py.test.raises(OverflowError): + replace(s, u"a", s, len(s) - 10) + +def test_startswith(): + assert startswith('ab', 'ab') is True + assert startswith('ab', 'a') is True + assert startswith('ab', '') is True + assert startswith('x', 'a') is False + assert startswith('x', 'x') is True + assert startswith('', '') is True + assert startswith('', 'a') is False + assert startswith('x', 'xx') is False + assert startswith('y', 'xx') is False + assert startswith('ab', 'a', 0) is True + assert startswith('ab', 'a', 1) is False + assert startswith('ab', 'b', 1) is True + assert startswith('abc', 'bc', 1, 2) is False + assert startswith('abc', 'c', -1, 4) is True + +def test_endswith(): + assert endswith('ab', 'ab') is True + assert endswith('ab', 'b') is True + assert endswith('ab', '') is True + assert endswith('x', 'a') is False + assert endswith('x', 'x') is True + assert endswith('', '') is True + assert endswith('', 'a') is False + assert endswith('x', 'xx') is False + assert endswith('y', 'xx') is False + assert endswith('abc', 'ab', 0, 2) is True + assert endswith('abc', 'bc', 1) is True + assert endswith('abc', 'bc', 2) is False + assert endswith('abc', 'b', -3, -1) is True + def test_string_builder(): s = StringBuilder() s.append("a") @@ -42,4 +161,32 @@ s.append_multiple_char(u'd', 4) assert s.build() == 'aabcbdddd' assert isinstance(s.build(), unicode) - + + +class TestTranslates(LLRtypeMixin, BaseRtypingTest): + def test_split_rsplit(self): + def fn(): + res = True + res = res and split('a//b//c//d', '//') == ['a', 'b', 'c', 'd'] + res = res and split('a//b//c//d', '//', 2) == ['a', 'b', 'c//d'] + res = res and split(u'a//b//c//d', u'//') == [u'a', u'b', u'c', u'd'] + res = res and split(u'endcase test', u'test') == [u'endcase ', u''] + res = res and rsplit('a|b|c|d', '|', 2) == ['a|b', 'c', 'd'] + res = res and rsplit('a//b//c//d', '//') == ['a', 'b', 'c', 'd'] + res = res and rsplit(u'a|b|c|d', u'|') == [u'a', u'b', u'c', u'd'] + res = res and rsplit(u'a|b|c|d', u'|', 2) == [u'a|b', u'c', u'd'] + res = res and rsplit(u'a//b//c//d', u'//') == [u'a', u'b', u'c', u'd'] + return res + res = self.interpret(fn, []) + assert res + + def test_replace(self): + def fn(): + res = True + res = res and replace('abc', 'ab', '--', 0) == 'abc' + res = res and replace('abc', 'xy', '--') == 'abc' + res = res and replace('abc', 'ab', '--', 0) == 'abc' + res = res and replace('abc', 'xy', '--') == 'abc' + return res + res = self.interpret(fn, []) + assert res _______________________________________________ pypy-commit mailing list pypy-commit@python.org http://mail.python.org/mailman/listinfo/pypy-commit