Author: Manuel Jacob Branch: refactor-str-types Changeset: r64925:b38d402d8513 Date: 2013-06-17 18:09 +0200 http://bitbucket.org/pypy/pypy/changeset/b38d402d8513/
Log: Cleanup. diff --git a/pypy/objspace/std/bytesobject.py b/pypy/objspace/std/bytesobject.py --- a/pypy/objspace/std/bytesobject.py +++ b/pypy/objspace/std/bytesobject.py @@ -1,29 +1,19 @@ """The builtin str implementation""" -from sys import maxint from pypy.interpreter.buffer import StringBuffer -from pypy.interpreter.error import OperationError, operationerrfmt +from pypy.interpreter.error import operationerrfmt from pypy.interpreter.gateway import interp2app, unwrap_spec, WrappedDefault -from pypy.objspace.std import newformat, slicetype +from pypy.objspace.std import newformat from pypy.objspace.std.basestringtype import basestring_typedef from pypy.objspace.std.formatting import mod_format -from pypy.objspace.std.inttype import wrapint from pypy.objspace.std.model import W_Object, registerimplementation -from pypy.objspace.std.multimethod import FailedToImplement -from pypy.objspace.std.noneobject import W_NoneObject -from pypy.objspace.std.register_all import register_all -from pypy.objspace.std.sliceobject import W_SliceObject, normalize_simple_slice -from pypy.objspace.std.stdtypedef import StdTypeDef, SMM +from pypy.objspace.std.stdtypedef import StdTypeDef from pypy.objspace.std.stringmethods import StringMethods from pypy.objspace.std.unicodeobject import (unicode_from_string, decode_object, _get_encoding_and_errors) -from rpython.rlib import jit from rpython.rlib.jit import we_are_jitted -from rpython.rlib.objectmodel import (compute_hash, compute_unique_id, - specialize) -from rpython.rlib.rarithmetic import ovfcheck -from rpython.rlib.rstring import (StringBuilder, split, rsplit, replace, - endswith, startswith) +from rpython.rlib.objectmodel import compute_hash, compute_unique_id +from rpython.rlib.rstring import StringBuilder class W_AbstractBytesObject(W_Object): @@ -331,8 +321,6 @@ __getnewargs__ = interp2app(W_BytesObject.descr_getnewargs), ) -str_typedef.registermethods(globals()) - def string_escape_encode(s, quote): diff --git a/pypy/objspace/std/stringmethods.py b/pypy/objspace/std/stringmethods.py --- a/pypy/objspace/std/stringmethods.py +++ b/pypy/objspace/std/stringmethods.py @@ -524,7 +524,6 @@ def descr_rsplit(self, space, w_sep=None, maxsplit=-1): res = [] value = self._val() - length = len(value) if space.is_none(w_sep): i = len(value)-1 while True: diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py --- a/pypy/objspace/std/unicodeobject.py +++ b/pypy/objspace/std/unicodeobject.py @@ -11,11 +11,9 @@ from pypy.objspace.std.stdtypedef import StdTypeDef from pypy.objspace.std.stringmethods import StringMethods from rpython.rlib.objectmodel import compute_hash, compute_unique_id -from rpython.rlib.rarithmetic import ovfcheck from rpython.rlib.rstring import UnicodeBuilder from rpython.rlib.runicode import (str_decode_utf_8, str_decode_ascii, unicode_encode_utf_8, unicode_encode_ascii, make_unicode_escape_function) -from rpython.tool.sourcetools import func_with_new_name __all__ = ['W_UnicodeObject', 'wrapunicode', 'plain_str2unicode', 'encode_object', 'decode_object', 'unicode_from_object', @@ -350,11 +348,6 @@ # raising UnicodeDecodeError is messy, "please crash for me" return unicode_from_encoded_object(space, w_str, "ascii", "strict") -def unicode_decode__unitypedef_ANY_ANY(space, w_unicode, w_encoding=None, - w_errors=None): - return space.call_method(space.str(w_unicode), 'decode', - w_encoding, w_errors) - @unwrap_spec(w_string = WrappedDefault("")) def descr_new_(space, w_unicodetype, w_string, w_encoding=None, w_errors=None): @@ -503,444 +496,6 @@ raise OperationError(space.w_UnicodeEncodeError, space.newtuple([w_encoding, w_unistr, w_start, w_end, w_reason])) return ''.join(result) -# checks if should trigger an unicode warning -def _unicode_string_comparison(space, w_uni, w_str, inverse, uni_from_str): - try: - w_uni2 = uni_from_str(space, w_str) - except OperationError, e: - if e.match(space, space.w_UnicodeDecodeError): - msg = ("Unicode %s comparison failed to convert both arguments to " - "Unicode - interpreting them as being unequal" % - "unequal" if inverse else "equal") - space.warn(space.wrap(msg), space.w_UnicodeWarning) - return space.newbool(inverse) - raise - result = space.eq(w_uni, w_uni2) - if inverse: - return space.not_(result) - return result - -def _isspace(uchar): - return unicodedb.isspace(ord(uchar)) - -def make_generic(funcname): - def func(space, w_self): - v = w_self._value - if len(v) == 0: - return space.w_False - for idx in range(len(v)): - if not getattr(unicodedb, funcname)(ord(v[idx])): - return space.w_False - return space.w_True - return func_with_new_name(func, "unicode_%s__Unicode" % (funcname, )) - -unicode_isspace__Unicode = make_generic("isspace") -unicode_isalpha__Unicode = make_generic("isalpha") -unicode_isalnum__Unicode = make_generic("isalnum") -unicode_isdecimal__Unicode = make_generic("isdecimal") -unicode_isdigit__Unicode = make_generic("isdigit") -unicode_isnumeric__Unicode = make_generic("isnumeric") - -def unicode_islower__Unicode(space, w_unicode): - cased = False - for uchar in w_unicode._value: - if (unicodedb.isupper(ord(uchar)) or - unicodedb.istitle(ord(uchar))): - return space.w_False - if not cased and unicodedb.islower(ord(uchar)): - cased = True - return space.newbool(cased) - -def unicode_isupper__Unicode(space, w_unicode): - cased = False - for uchar in w_unicode._value: - if (unicodedb.islower(ord(uchar)) or - unicodedb.istitle(ord(uchar))): - return space.w_False - if not cased and unicodedb.isupper(ord(uchar)): - cased = True - return space.newbool(cased) - -def unicode_istitle__Unicode(space, w_unicode): - cased = False - previous_is_cased = False - for uchar in w_unicode._value: - if (unicodedb.isupper(ord(uchar)) or - unicodedb.istitle(ord(uchar))): - if previous_is_cased: - return space.w_False - previous_is_cased = cased = True - elif unicodedb.islower(ord(uchar)): - if not previous_is_cased: - return space.w_False - previous_is_cased = cased = True - else: - previous_is_cased = False - return space.newbool(cased) - -def _strip(space, w_self, w_chars, left, right): - "internal function called by str_xstrip methods" - u_self = w_self._value - u_chars = w_chars._value - - lpos = 0 - rpos = len(u_self) - - if left: - while lpos < rpos and u_self[lpos] in u_chars: - lpos += 1 - - if right: - while rpos > lpos and u_self[rpos - 1] in u_chars: - rpos -= 1 - - assert rpos >= 0 - result = u_self[lpos: rpos] - return W_UnicodeObject(result) - -def _strip_none(space, w_self, left, right): - "internal function called by str_xstrip methods" - u_self = w_self._value - - lpos = 0 - rpos = len(u_self) - - if left: - while lpos < rpos and _isspace(u_self[lpos]): - lpos += 1 - - if right: - while rpos > lpos and _isspace(u_self[rpos - 1]): - rpos -= 1 - - assert rpos >= 0 - result = u_self[lpos: rpos] - return W_UnicodeObject(result) - - -def unicode_capitalize__Unicode(space, w_self): - input = w_self._value - if len(input) == 0: - return W_UnicodeObject.EMPTY - builder = UnicodeBuilder(len(input)) - builder.append(unichr(unicodedb.toupper(ord(input[0])))) - for i in range(1, len(input)): - builder.append(unichr(unicodedb.tolower(ord(input[i])))) - return W_UnicodeObject(builder.build()) - -def unicode_title__Unicode(space, w_self): - input = w_self._value - if len(input) == 0: - return w_self - - builder = UnicodeBuilder(len(input)) - previous_is_cased = False - for i in range(len(input)): - unichar = ord(input[i]) - if previous_is_cased: - builder.append(unichr(unicodedb.tolower(unichar))) - else: - builder.append(unichr(unicodedb.totitle(unichar))) - previous_is_cased = unicodedb.iscased(unichar) - return W_UnicodeObject(builder.build()) - -def unicode_lower__Unicode(space, w_self): - input = w_self._value - builder = UnicodeBuilder(len(input)) - for i in range(len(input)): - builder.append(unichr(unicodedb.tolower(ord(input[i])))) - return W_UnicodeObject(builder.build()) - -def unicode_upper__Unicode(space, w_self): - input = w_self._value - builder = UnicodeBuilder(len(input)) - for i in range(len(input)): - builder.append(unichr(unicodedb.toupper(ord(input[i])))) - return W_UnicodeObject(builder.build()) - -def unicode_swapcase__Unicode(space, w_self): - input = w_self._value - builder = UnicodeBuilder(len(input)) - for i in range(len(input)): - unichar = ord(input[i]) - if unicodedb.islower(unichar): - builder.append(unichr(unicodedb.toupper(unichar))) - elif unicodedb.isupper(unichar): - builder.append(unichr(unicodedb.tolower(unichar))) - else: - builder.append(input[i]) - return W_UnicodeObject(builder.build()) - -def _normalize_index(length, index): - if index < 0: - index += length - if index < 0: - index = 0 - elif index > length: - index = length - return index - -def _to_unichar_w(space, w_char): - try: - unistr = space.unicode_w(w_char) - except OperationError, e: - if e.match(space, space.w_TypeError): - msg = 'The fill character cannot be converted to Unicode' - raise OperationError(space.w_TypeError, space.wrap(msg)) - else: - raise - - if len(unistr) != 1: - raise OperationError(space.w_TypeError, space.wrap('The fill character must be exactly one character long')) - return unistr[0] - -def unicode_center__Unicode_ANY_ANY(space, w_self, w_width, w_fillchar): - self = w_self._value - width = space.int_w(w_width) - fillchar = _to_unichar_w(space, w_fillchar) - padding = width - len(self) - if padding < 0: - return w_self.create_if_subclassed() - leftpad = padding // 2 + (padding & width & 1) - result = [fillchar] * width - for i in range(len(self)): - result[leftpad + i] = self[i] - return W_UnicodeObject(u''.join(result)) - -def unicode_ljust__Unicode_ANY_ANY(space, w_self, w_width, w_fillchar): - self = w_self._value - width = space.int_w(w_width) - fillchar = _to_unichar_w(space, w_fillchar) - padding = width - len(self) - if padding < 0: - return w_self.create_if_subclassed() - result = [fillchar] * width - for i in range(len(self)): - result[i] = self[i] - return W_UnicodeObject(u''.join(result)) - -def unicode_rjust__Unicode_ANY_ANY(space, w_self, w_width, w_fillchar): - self = w_self._value - width = space.int_w(w_width) - fillchar = _to_unichar_w(space, w_fillchar) - padding = width - len(self) - if padding < 0: - return w_self.create_if_subclassed() - result = [fillchar] * width - for i in range(len(self)): - result[padding + i] = self[i] - return W_UnicodeObject(u''.join(result)) - -def unicode_splitlines__Unicode_ANY(space, w_self, w_keepends): - self = w_self._value - keepends = 0 - if space.int_w(w_keepends): - keepends = 1 - if len(self) == 0: - return space.newlist([]) - - start = 0 - end = len(self) - pos = 0 - lines = [] - while pos < end: - if unicodedb.islinebreak(ord(self[pos])): - if (self[pos] == u'\r' and pos + 1 < end and - self[pos + 1] == u'\n'): - # Count CRLF as one linebreak - lines.append(self[start:pos + keepends * 2]) - pos += 1 - else: - lines.append(self[start:pos + keepends]) - pos += 1 - start = pos - else: - pos += 1 - if not unicodedb.islinebreak(ord(self[end - 1])): - lines.append(self[start:]) - return space.newlist_unicode(lines) - -def unicode_split__Unicode_None_ANY(space, w_self, w_none, w_maxsplit): - maxsplit = space.int_w(w_maxsplit) - res = [] - value = w_self._value - length = len(value) - i = 0 - while True: - # find the beginning of the next word - while i < length: - if not _isspace(value[i]): - break # found - i += 1 - else: - break # end of string, finished - - # find the end of the word - if maxsplit == 0: - j = length # take all the rest of the string - else: - j = i + 1 - while j < length and not _isspace(value[j]): - j += 1 - maxsplit -= 1 # NB. if it's already < 0, it stays < 0 - - # the word is value[i:j] - res.append(value[i:j]) - - # continue to look from the character following the space after the word - i = j + 1 - - return space.newlist_unicode(res) - -def unicode_split__Unicode_Unicode_ANY(space, w_self, w_delim, w_maxsplit): - self = w_self._value - delim = w_delim._value - maxsplit = space.int_w(w_maxsplit) - delim_len = len(delim) - if delim_len == 0: - raise OperationError(space.w_ValueError, - space.wrap('empty separator')) - parts = split(self, delim, maxsplit) - return space.newlist_unicode(parts) - - -def unicode_rsplit__Unicode_None_ANY(space, w_self, w_none, w_maxsplit): - maxsplit = space.int_w(w_maxsplit) - res = [] - value = w_self._value - i = len(value)-1 - while True: - # starting from the end, find the end of the next word - while i >= 0: - if not _isspace(value[i]): - break # found - i -= 1 - else: - break # end of string, finished - - # find the start of the word - # (more precisely, 'j' will be the space character before the word) - if maxsplit == 0: - j = -1 # take all the rest of the string - else: - j = i - 1 - while j >= 0 and not _isspace(value[j]): - j -= 1 - maxsplit -= 1 # NB. if it's already < 0, it stays < 0 - - # the word is value[j+1:i+1] - j1 = j + 1 - assert j1 >= 0 - res.append(value[j1:i+1]) - - # continue to look from the character before the space before the word - i = j - 1 - - res.reverse() - return space.newlist_unicode(res) - -def sliced(space, s, start, stop, orig_obj): - assert start >= 0 - assert stop >= 0 - if start == 0 and stop == len(s) and space.is_w(space.type(orig_obj), space.w_unicode): - return orig_obj - return space.wrap( s[start:stop]) - -#unicode_rsplit__Unicode_Unicode_ANY = make_rsplit_with_delim('unicode_rsplit__Unicode_Unicode_ANY', -# sliced) - -def _split_into_chars(self, maxsplit): - if maxsplit == 0: - return [self] - index = 0 - end = len(self) - parts = [u''] - maxsplit -= 1 - while maxsplit != 0: - if index >= end: - break - parts.append(self[index]) - index += 1 - maxsplit -= 1 - parts.append(self[index:]) - return parts - -def _split_with(self, with_, maxsplit=-1): - xxx # remove - parts = [] - start = 0 - end = len(self) - length = len(with_) - while maxsplit != 0: - index = self.find(with_, start, end) - if index < 0: - break - parts.append(self[start:index]) - start = index + length - maxsplit -= 1 - parts.append(self[start:]) - return parts - -def unicode_replace__Unicode_Unicode_Unicode_ANY(space, w_self, w_old, - w_new, w_maxsplit): - maxsplit = space.int_w(w_maxsplit) - try: - return W_UnicodeObject( - replace(w_self._value, w_old._value, w_new._value, maxsplit)) - except OverflowError: - raise OperationError( - space.w_OverflowError, - space.wrap("replace string is too long")) - -def unicode_replace__Unicode_ANY_ANY_ANY(space, w_self, w_old, w_new, - w_maxsplit): - if not space.isinstance_w(w_old, space.w_unicode): - old = unicode(space.bufferstr_w(w_old)) - else: - old = space.unicode_w(w_old) - if not space.isinstance_w(w_new, space.w_unicode): - new = unicode(space.bufferstr_w(w_new)) - else: - new = space.unicode_w(w_new) - maxsplit = space.int_w(w_maxsplit) - try: - return W_UnicodeObject(replace(w_self._value, old, new, maxsplit)) - except OverflowError: - raise OperationError( - space.w_OverflowError, - space.wrap("replace string is too long")) - - return W_UnicodeObject(new.join(parts)) - - -def unicode_expandtabs__Unicode_ANY(space, w_self, tabsize): - xxx # remove - self = w_self._value - parts = _split_with(self, u'\t') - result = [parts[0]] - prevsize = 0 - for ch in parts[0]: - prevsize += 1 - if ch == u"\n" or ch == u"\r": - prevsize = 0 - totalsize = prevsize - - for i in range(1, len(parts)): - pad = tabsize - prevsize % tabsize - nextpart = parts[i] - try: - totalsize = ovfcheck(totalsize + pad) - totalsize = ovfcheck(totalsize + len(nextpart)) - result.append(u' ' * pad) - except OverflowError: - raise OperationError(space.w_OverflowError, space.wrap('new string is too long')) - result.append(nextpart) - prevsize = 0 - for ch in nextpart: - prevsize += 1 - if ch in (u"\n", u"\r"): - prevsize = 0 - return space.wrap(u''.join(result)) _repr_function, _ = make_unicode_escape_function( pass_printable=False, unicode_output=False, quotes=True, prefix='u') _______________________________________________ pypy-commit mailing list pypy-commit@python.org http://mail.python.org/mailman/listinfo/pypy-commit