Author: Manuel Jacob
Branch: refactor-str-types
Changeset: r64925:b38d402d8513
Date: 2013-06-17 18:09 +0200
http://bitbucket.org/pypy/pypy/changeset/b38d402d8513/
Log: Cleanup.
diff --git a/pypy/objspace/std/bytesobject.py b/pypy/objspace/std/bytesobject.py
--- a/pypy/objspace/std/bytesobject.py
+++ b/pypy/objspace/std/bytesobject.py
@@ -1,29 +1,19 @@
"""The builtin str implementation"""
-from sys import maxint
from pypy.interpreter.buffer import StringBuffer
-from pypy.interpreter.error import OperationError, operationerrfmt
+from pypy.interpreter.error import operationerrfmt
from pypy.interpreter.gateway import interp2app, unwrap_spec, WrappedDefault
-from pypy.objspace.std import newformat, slicetype
+from pypy.objspace.std import newformat
from pypy.objspace.std.basestringtype import basestring_typedef
from pypy.objspace.std.formatting import mod_format
-from pypy.objspace.std.inttype import wrapint
from pypy.objspace.std.model import W_Object, registerimplementation
-from pypy.objspace.std.multimethod import FailedToImplement
-from pypy.objspace.std.noneobject import W_NoneObject
-from pypy.objspace.std.register_all import register_all
-from pypy.objspace.std.sliceobject import W_SliceObject, normalize_simple_slice
-from pypy.objspace.std.stdtypedef import StdTypeDef, SMM
+from pypy.objspace.std.stdtypedef import StdTypeDef
from pypy.objspace.std.stringmethods import StringMethods
from pypy.objspace.std.unicodeobject import (unicode_from_string,
decode_object, _get_encoding_and_errors)
-from rpython.rlib import jit
from rpython.rlib.jit import we_are_jitted
-from rpython.rlib.objectmodel import (compute_hash, compute_unique_id,
- specialize)
-from rpython.rlib.rarithmetic import ovfcheck
-from rpython.rlib.rstring import (StringBuilder, split, rsplit, replace,
- endswith, startswith)
+from rpython.rlib.objectmodel import compute_hash, compute_unique_id
+from rpython.rlib.rstring import StringBuilder
class W_AbstractBytesObject(W_Object):
@@ -331,8 +321,6 @@
__getnewargs__ = interp2app(W_BytesObject.descr_getnewargs),
)
-str_typedef.registermethods(globals())
-
def string_escape_encode(s, quote):
diff --git a/pypy/objspace/std/stringmethods.py
b/pypy/objspace/std/stringmethods.py
--- a/pypy/objspace/std/stringmethods.py
+++ b/pypy/objspace/std/stringmethods.py
@@ -524,7 +524,6 @@
def descr_rsplit(self, space, w_sep=None, maxsplit=-1):
res = []
value = self._val()
- length = len(value)
if space.is_none(w_sep):
i = len(value)-1
while True:
diff --git a/pypy/objspace/std/unicodeobject.py
b/pypy/objspace/std/unicodeobject.py
--- a/pypy/objspace/std/unicodeobject.py
+++ b/pypy/objspace/std/unicodeobject.py
@@ -11,11 +11,9 @@
from pypy.objspace.std.stdtypedef import StdTypeDef
from pypy.objspace.std.stringmethods import StringMethods
from rpython.rlib.objectmodel import compute_hash, compute_unique_id
-from rpython.rlib.rarithmetic import ovfcheck
from rpython.rlib.rstring import UnicodeBuilder
from rpython.rlib.runicode import (str_decode_utf_8, str_decode_ascii,
unicode_encode_utf_8, unicode_encode_ascii, make_unicode_escape_function)
-from rpython.tool.sourcetools import func_with_new_name
__all__ = ['W_UnicodeObject', 'wrapunicode', 'plain_str2unicode',
'encode_object', 'decode_object', 'unicode_from_object',
@@ -350,11 +348,6 @@
# raising UnicodeDecodeError is messy, "please crash for me"
return unicode_from_encoded_object(space, w_str, "ascii", "strict")
-def unicode_decode__unitypedef_ANY_ANY(space, w_unicode, w_encoding=None,
- w_errors=None):
- return space.call_method(space.str(w_unicode), 'decode',
- w_encoding, w_errors)
-
@unwrap_spec(w_string = WrappedDefault(""))
def descr_new_(space, w_unicodetype, w_string, w_encoding=None, w_errors=None):
@@ -503,444 +496,6 @@
raise OperationError(space.w_UnicodeEncodeError,
space.newtuple([w_encoding, w_unistr, w_start, w_end, w_reason]))
return ''.join(result)
-# checks if should trigger an unicode warning
-def _unicode_string_comparison(space, w_uni, w_str, inverse, uni_from_str):
- try:
- w_uni2 = uni_from_str(space, w_str)
- except OperationError, e:
- if e.match(space, space.w_UnicodeDecodeError):
- msg = ("Unicode %s comparison failed to convert both arguments to "
- "Unicode - interpreting them as being unequal" %
- "unequal" if inverse else "equal")
- space.warn(space.wrap(msg), space.w_UnicodeWarning)
- return space.newbool(inverse)
- raise
- result = space.eq(w_uni, w_uni2)
- if inverse:
- return space.not_(result)
- return result
-
-def _isspace(uchar):
- return unicodedb.isspace(ord(uchar))
-
-def make_generic(funcname):
- def func(space, w_self):
- v = w_self._value
- if len(v) == 0:
- return space.w_False
- for idx in range(len(v)):
- if not getattr(unicodedb, funcname)(ord(v[idx])):
- return space.w_False
- return space.w_True
- return func_with_new_name(func, "unicode_%s__Unicode" % (funcname, ))
-
-unicode_isspace__Unicode = make_generic("isspace")
-unicode_isalpha__Unicode = make_generic("isalpha")
-unicode_isalnum__Unicode = make_generic("isalnum")
-unicode_isdecimal__Unicode = make_generic("isdecimal")
-unicode_isdigit__Unicode = make_generic("isdigit")
-unicode_isnumeric__Unicode = make_generic("isnumeric")
-
-def unicode_islower__Unicode(space, w_unicode):
- cased = False
- for uchar in w_unicode._value:
- if (unicodedb.isupper(ord(uchar)) or
- unicodedb.istitle(ord(uchar))):
- return space.w_False
- if not cased and unicodedb.islower(ord(uchar)):
- cased = True
- return space.newbool(cased)
-
-def unicode_isupper__Unicode(space, w_unicode):
- cased = False
- for uchar in w_unicode._value:
- if (unicodedb.islower(ord(uchar)) or
- unicodedb.istitle(ord(uchar))):
- return space.w_False
- if not cased and unicodedb.isupper(ord(uchar)):
- cased = True
- return space.newbool(cased)
-
-def unicode_istitle__Unicode(space, w_unicode):
- cased = False
- previous_is_cased = False
- for uchar in w_unicode._value:
- if (unicodedb.isupper(ord(uchar)) or
- unicodedb.istitle(ord(uchar))):
- if previous_is_cased:
- return space.w_False
- previous_is_cased = cased = True
- elif unicodedb.islower(ord(uchar)):
- if not previous_is_cased:
- return space.w_False
- previous_is_cased = cased = True
- else:
- previous_is_cased = False
- return space.newbool(cased)
-
-def _strip(space, w_self, w_chars, left, right):
- "internal function called by str_xstrip methods"
- u_self = w_self._value
- u_chars = w_chars._value
-
- lpos = 0
- rpos = len(u_self)
-
- if left:
- while lpos < rpos and u_self[lpos] in u_chars:
- lpos += 1
-
- if right:
- while rpos > lpos and u_self[rpos - 1] in u_chars:
- rpos -= 1
-
- assert rpos >= 0
- result = u_self[lpos: rpos]
- return W_UnicodeObject(result)
-
-def _strip_none(space, w_self, left, right):
- "internal function called by str_xstrip methods"
- u_self = w_self._value
-
- lpos = 0
- rpos = len(u_self)
-
- if left:
- while lpos < rpos and _isspace(u_self[lpos]):
- lpos += 1
-
- if right:
- while rpos > lpos and _isspace(u_self[rpos - 1]):
- rpos -= 1
-
- assert rpos >= 0
- result = u_self[lpos: rpos]
- return W_UnicodeObject(result)
-
-
-def unicode_capitalize__Unicode(space, w_self):
- input = w_self._value
- if len(input) == 0:
- return W_UnicodeObject.EMPTY
- builder = UnicodeBuilder(len(input))
- builder.append(unichr(unicodedb.toupper(ord(input[0]))))
- for i in range(1, len(input)):
- builder.append(unichr(unicodedb.tolower(ord(input[i]))))
- return W_UnicodeObject(builder.build())
-
-def unicode_title__Unicode(space, w_self):
- input = w_self._value
- if len(input) == 0:
- return w_self
-
- builder = UnicodeBuilder(len(input))
- previous_is_cased = False
- for i in range(len(input)):
- unichar = ord(input[i])
- if previous_is_cased:
- builder.append(unichr(unicodedb.tolower(unichar)))
- else:
- builder.append(unichr(unicodedb.totitle(unichar)))
- previous_is_cased = unicodedb.iscased(unichar)
- return W_UnicodeObject(builder.build())
-
-def unicode_lower__Unicode(space, w_self):
- input = w_self._value
- builder = UnicodeBuilder(len(input))
- for i in range(len(input)):
- builder.append(unichr(unicodedb.tolower(ord(input[i]))))
- return W_UnicodeObject(builder.build())
-
-def unicode_upper__Unicode(space, w_self):
- input = w_self._value
- builder = UnicodeBuilder(len(input))
- for i in range(len(input)):
- builder.append(unichr(unicodedb.toupper(ord(input[i]))))
- return W_UnicodeObject(builder.build())
-
-def unicode_swapcase__Unicode(space, w_self):
- input = w_self._value
- builder = UnicodeBuilder(len(input))
- for i in range(len(input)):
- unichar = ord(input[i])
- if unicodedb.islower(unichar):
- builder.append(unichr(unicodedb.toupper(unichar)))
- elif unicodedb.isupper(unichar):
- builder.append(unichr(unicodedb.tolower(unichar)))
- else:
- builder.append(input[i])
- return W_UnicodeObject(builder.build())
-
-def _normalize_index(length, index):
- if index < 0:
- index += length
- if index < 0:
- index = 0
- elif index > length:
- index = length
- return index
-
-def _to_unichar_w(space, w_char):
- try:
- unistr = space.unicode_w(w_char)
- except OperationError, e:
- if e.match(space, space.w_TypeError):
- msg = 'The fill character cannot be converted to Unicode'
- raise OperationError(space.w_TypeError, space.wrap(msg))
- else:
- raise
-
- if len(unistr) != 1:
- raise OperationError(space.w_TypeError, space.wrap('The fill character
must be exactly one character long'))
- return unistr[0]
-
-def unicode_center__Unicode_ANY_ANY(space, w_self, w_width, w_fillchar):
- self = w_self._value
- width = space.int_w(w_width)
- fillchar = _to_unichar_w(space, w_fillchar)
- padding = width - len(self)
- if padding < 0:
- return w_self.create_if_subclassed()
- leftpad = padding // 2 + (padding & width & 1)
- result = [fillchar] * width
- for i in range(len(self)):
- result[leftpad + i] = self[i]
- return W_UnicodeObject(u''.join(result))
-
-def unicode_ljust__Unicode_ANY_ANY(space, w_self, w_width, w_fillchar):
- self = w_self._value
- width = space.int_w(w_width)
- fillchar = _to_unichar_w(space, w_fillchar)
- padding = width - len(self)
- if padding < 0:
- return w_self.create_if_subclassed()
- result = [fillchar] * width
- for i in range(len(self)):
- result[i] = self[i]
- return W_UnicodeObject(u''.join(result))
-
-def unicode_rjust__Unicode_ANY_ANY(space, w_self, w_width, w_fillchar):
- self = w_self._value
- width = space.int_w(w_width)
- fillchar = _to_unichar_w(space, w_fillchar)
- padding = width - len(self)
- if padding < 0:
- return w_self.create_if_subclassed()
- result = [fillchar] * width
- for i in range(len(self)):
- result[padding + i] = self[i]
- return W_UnicodeObject(u''.join(result))
-
-def unicode_splitlines__Unicode_ANY(space, w_self, w_keepends):
- self = w_self._value
- keepends = 0
- if space.int_w(w_keepends):
- keepends = 1
- if len(self) == 0:
- return space.newlist([])
-
- start = 0
- end = len(self)
- pos = 0
- lines = []
- while pos < end:
- if unicodedb.islinebreak(ord(self[pos])):
- if (self[pos] == u'\r' and pos + 1 < end and
- self[pos + 1] == u'\n'):
- # Count CRLF as one linebreak
- lines.append(self[start:pos + keepends * 2])
- pos += 1
- else:
- lines.append(self[start:pos + keepends])
- pos += 1
- start = pos
- else:
- pos += 1
- if not unicodedb.islinebreak(ord(self[end - 1])):
- lines.append(self[start:])
- return space.newlist_unicode(lines)
-
-def unicode_split__Unicode_None_ANY(space, w_self, w_none, w_maxsplit):
- maxsplit = space.int_w(w_maxsplit)
- res = []
- value = w_self._value
- length = len(value)
- i = 0
- while True:
- # find the beginning of the next word
- while i < length:
- if not _isspace(value[i]):
- break # found
- i += 1
- else:
- break # end of string, finished
-
- # find the end of the word
- if maxsplit == 0:
- j = length # take all the rest of the string
- else:
- j = i + 1
- while j < length and not _isspace(value[j]):
- j += 1
- maxsplit -= 1 # NB. if it's already < 0, it stays < 0
-
- # the word is value[i:j]
- res.append(value[i:j])
-
- # continue to look from the character following the space after the
word
- i = j + 1
-
- return space.newlist_unicode(res)
-
-def unicode_split__Unicode_Unicode_ANY(space, w_self, w_delim, w_maxsplit):
- self = w_self._value
- delim = w_delim._value
- maxsplit = space.int_w(w_maxsplit)
- delim_len = len(delim)
- if delim_len == 0:
- raise OperationError(space.w_ValueError,
- space.wrap('empty separator'))
- parts = split(self, delim, maxsplit)
- return space.newlist_unicode(parts)
-
-
-def unicode_rsplit__Unicode_None_ANY(space, w_self, w_none, w_maxsplit):
- maxsplit = space.int_w(w_maxsplit)
- res = []
- value = w_self._value
- i = len(value)-1
- while True:
- # starting from the end, find the end of the next word
- while i >= 0:
- if not _isspace(value[i]):
- break # found
- i -= 1
- else:
- break # end of string, finished
-
- # find the start of the word
- # (more precisely, 'j' will be the space character before the word)
- if maxsplit == 0:
- j = -1 # take all the rest of the string
- else:
- j = i - 1
- while j >= 0 and not _isspace(value[j]):
- j -= 1
- maxsplit -= 1 # NB. if it's already < 0, it stays < 0
-
- # the word is value[j+1:i+1]
- j1 = j + 1
- assert j1 >= 0
- res.append(value[j1:i+1])
-
- # continue to look from the character before the space before the word
- i = j - 1
-
- res.reverse()
- return space.newlist_unicode(res)
-
-def sliced(space, s, start, stop, orig_obj):
- assert start >= 0
- assert stop >= 0
- if start == 0 and stop == len(s) and space.is_w(space.type(orig_obj),
space.w_unicode):
- return orig_obj
- return space.wrap( s[start:stop])
-
-#unicode_rsplit__Unicode_Unicode_ANY =
make_rsplit_with_delim('unicode_rsplit__Unicode_Unicode_ANY',
-# sliced)
-
-def _split_into_chars(self, maxsplit):
- if maxsplit == 0:
- return [self]
- index = 0
- end = len(self)
- parts = [u'']
- maxsplit -= 1
- while maxsplit != 0:
- if index >= end:
- break
- parts.append(self[index])
- index += 1
- maxsplit -= 1
- parts.append(self[index:])
- return parts
-
-def _split_with(self, with_, maxsplit=-1):
- xxx # remove
- parts = []
- start = 0
- end = len(self)
- length = len(with_)
- while maxsplit != 0:
- index = self.find(with_, start, end)
- if index < 0:
- break
- parts.append(self[start:index])
- start = index + length
- maxsplit -= 1
- parts.append(self[start:])
- return parts
-
-def unicode_replace__Unicode_Unicode_Unicode_ANY(space, w_self, w_old,
- w_new, w_maxsplit):
- maxsplit = space.int_w(w_maxsplit)
- try:
- return W_UnicodeObject(
- replace(w_self._value, w_old._value, w_new._value, maxsplit))
- except OverflowError:
- raise OperationError(
- space.w_OverflowError,
- space.wrap("replace string is too long"))
-
-def unicode_replace__Unicode_ANY_ANY_ANY(space, w_self, w_old, w_new,
- w_maxsplit):
- if not space.isinstance_w(w_old, space.w_unicode):
- old = unicode(space.bufferstr_w(w_old))
- else:
- old = space.unicode_w(w_old)
- if not space.isinstance_w(w_new, space.w_unicode):
- new = unicode(space.bufferstr_w(w_new))
- else:
- new = space.unicode_w(w_new)
- maxsplit = space.int_w(w_maxsplit)
- try:
- return W_UnicodeObject(replace(w_self._value, old, new, maxsplit))
- except OverflowError:
- raise OperationError(
- space.w_OverflowError,
- space.wrap("replace string is too long"))
-
- return W_UnicodeObject(new.join(parts))
-
-
-def unicode_expandtabs__Unicode_ANY(space, w_self, tabsize):
- xxx # remove
- self = w_self._value
- parts = _split_with(self, u'\t')
- result = [parts[0]]
- prevsize = 0
- for ch in parts[0]:
- prevsize += 1
- if ch == u"\n" or ch == u"\r":
- prevsize = 0
- totalsize = prevsize
-
- for i in range(1, len(parts)):
- pad = tabsize - prevsize % tabsize
- nextpart = parts[i]
- try:
- totalsize = ovfcheck(totalsize + pad)
- totalsize = ovfcheck(totalsize + len(nextpart))
- result.append(u' ' * pad)
- except OverflowError:
- raise OperationError(space.w_OverflowError, space.wrap('new string
is too long'))
- result.append(nextpart)
- prevsize = 0
- for ch in nextpart:
- prevsize += 1
- if ch in (u"\n", u"\r"):
- prevsize = 0
- return space.wrap(u''.join(result))
_repr_function, _ = make_unicode_escape_function(
pass_printable=False, unicode_output=False, quotes=True, prefix='u')
_______________________________________________
pypy-commit mailing list
[email protected]
http://mail.python.org/mailman/listinfo/pypy-commit