[pypy-commit] pypy refactor-str-types: Cleanup.

Manuel Jacob Mon, 17 Jun 2013 09:12:33 -0700

Author: Manuel Jacob
Branch: refactor-str-types
Changeset: r64925:b38d402d8513
Date: 2013-06-17 18:09 +0200
http://bitbucket.org/pypy/pypy/changeset/b38d402d8513/


Log:    Cleanup.

diff --git a/pypy/objspace/std/bytesobject.py b/pypy/objspace/std/bytesobject.py
--- a/pypy/objspace/std/bytesobject.py
+++ b/pypy/objspace/std/bytesobject.py
@@ -1,29 +1,19 @@
 """The builtin str implementation"""
 
-from sys import maxint
 from pypy.interpreter.buffer import StringBuffer
-from pypy.interpreter.error import OperationError, operationerrfmt
+from pypy.interpreter.error import operationerrfmt
 from pypy.interpreter.gateway import interp2app, unwrap_spec, WrappedDefault
-from pypy.objspace.std import newformat, slicetype
+from pypy.objspace.std import newformat
 from pypy.objspace.std.basestringtype import basestring_typedef
 from pypy.objspace.std.formatting import mod_format
-from pypy.objspace.std.inttype import wrapint
 from pypy.objspace.std.model import W_Object, registerimplementation
-from pypy.objspace.std.multimethod import FailedToImplement
-from pypy.objspace.std.noneobject import W_NoneObject
-from pypy.objspace.std.register_all import register_all
-from pypy.objspace.std.sliceobject import W_SliceObject, normalize_simple_slice
-from pypy.objspace.std.stdtypedef import StdTypeDef, SMM
+from pypy.objspace.std.stdtypedef import StdTypeDef
 from pypy.objspace.std.stringmethods import StringMethods
 from pypy.objspace.std.unicodeobject import (unicode_from_string,
     decode_object, _get_encoding_and_errors)
-from rpython.rlib import jit
 from rpython.rlib.jit import we_are_jitted
-from rpython.rlib.objectmodel import (compute_hash, compute_unique_id,
-        specialize)
-from rpython.rlib.rarithmetic import ovfcheck
-from rpython.rlib.rstring import (StringBuilder, split, rsplit, replace,
-    endswith, startswith)
+from rpython.rlib.objectmodel import compute_hash, compute_unique_id
+from rpython.rlib.rstring import StringBuilder
 
 
 class W_AbstractBytesObject(W_Object):
@@ -331,8 +321,6 @@
     __getnewargs__ = interp2app(W_BytesObject.descr_getnewargs),
 )
 
-str_typedef.registermethods(globals())
-
 
 def string_escape_encode(s, quote):
 
diff --git a/pypy/objspace/std/stringmethods.py 
b/pypy/objspace/std/stringmethods.py
--- a/pypy/objspace/std/stringmethods.py
+++ b/pypy/objspace/std/stringmethods.py
@@ -524,7 +524,6 @@
     def descr_rsplit(self, space, w_sep=None, maxsplit=-1):
         res = []
         value = self._val()
-        length = len(value)
         if space.is_none(w_sep):
             i = len(value)-1
             while True:
diff --git a/pypy/objspace/std/unicodeobject.py 
b/pypy/objspace/std/unicodeobject.py
--- a/pypy/objspace/std/unicodeobject.py
+++ b/pypy/objspace/std/unicodeobject.py
@@ -11,11 +11,9 @@
 from pypy.objspace.std.stdtypedef import StdTypeDef
 from pypy.objspace.std.stringmethods import StringMethods
 from rpython.rlib.objectmodel import compute_hash, compute_unique_id
-from rpython.rlib.rarithmetic import ovfcheck
 from rpython.rlib.rstring import UnicodeBuilder
 from rpython.rlib.runicode import (str_decode_utf_8, str_decode_ascii,
     unicode_encode_utf_8, unicode_encode_ascii, make_unicode_escape_function)
-from rpython.tool.sourcetools import func_with_new_name
 
 __all__ = ['W_UnicodeObject', 'wrapunicode', 'plain_str2unicode',
            'encode_object', 'decode_object', 'unicode_from_object',
@@ -350,11 +348,6 @@
         # raising UnicodeDecodeError is messy, "please crash for me"
         return unicode_from_encoded_object(space, w_str, "ascii", "strict")
 
-def unicode_decode__unitypedef_ANY_ANY(space, w_unicode, w_encoding=None,
-                                       w_errors=None):
-    return space.call_method(space.str(w_unicode), 'decode',
-                             w_encoding, w_errors)
-
 
 @unwrap_spec(w_string = WrappedDefault(""))
 def descr_new_(space, w_unicodetype, w_string, w_encoding=None, w_errors=None):
@@ -503,444 +496,6 @@
                 raise OperationError(space.w_UnicodeEncodeError, 
space.newtuple([w_encoding, w_unistr, w_start, w_end, w_reason]))
     return ''.join(result)
 
-# checks if should trigger an unicode warning
-def _unicode_string_comparison(space, w_uni, w_str, inverse, uni_from_str):
-    try:
-        w_uni2 = uni_from_str(space, w_str)
-    except OperationError, e:
-        if e.match(space, space.w_UnicodeDecodeError):
-            msg = ("Unicode %s comparison failed to convert both arguments to "
-                   "Unicode - interpreting them as being unequal" %
-                   "unequal" if inverse else "equal")
-            space.warn(space.wrap(msg), space.w_UnicodeWarning)
-            return space.newbool(inverse)
-        raise
-    result = space.eq(w_uni, w_uni2)
-    if inverse:
-        return space.not_(result)
-    return result
-
-def _isspace(uchar):
-    return unicodedb.isspace(ord(uchar))
-
-def make_generic(funcname):
-    def func(space, w_self):
-        v = w_self._value
-        if len(v) == 0:
-            return space.w_False
-        for idx in range(len(v)):
-            if not getattr(unicodedb, funcname)(ord(v[idx])):
-                return space.w_False
-        return space.w_True
-    return func_with_new_name(func, "unicode_%s__Unicode" % (funcname, ))
-
-unicode_isspace__Unicode = make_generic("isspace")
-unicode_isalpha__Unicode = make_generic("isalpha")
-unicode_isalnum__Unicode = make_generic("isalnum")
-unicode_isdecimal__Unicode = make_generic("isdecimal")
-unicode_isdigit__Unicode = make_generic("isdigit")
-unicode_isnumeric__Unicode = make_generic("isnumeric")
-
-def unicode_islower__Unicode(space, w_unicode):
-    cased = False
-    for uchar in w_unicode._value:
-        if (unicodedb.isupper(ord(uchar)) or
-            unicodedb.istitle(ord(uchar))):
-            return space.w_False
-        if not cased and unicodedb.islower(ord(uchar)):
-            cased = True
-    return space.newbool(cased)
-
-def unicode_isupper__Unicode(space, w_unicode):
-    cased = False
-    for uchar in w_unicode._value:
-        if (unicodedb.islower(ord(uchar)) or
-            unicodedb.istitle(ord(uchar))):
-            return space.w_False
-        if not cased and unicodedb.isupper(ord(uchar)):
-            cased = True
-    return space.newbool(cased)
-
-def unicode_istitle__Unicode(space, w_unicode):
-    cased = False
-    previous_is_cased = False
-    for uchar in w_unicode._value:
-        if (unicodedb.isupper(ord(uchar)) or
-            unicodedb.istitle(ord(uchar))):
-            if previous_is_cased:
-                return space.w_False
-            previous_is_cased = cased = True
-        elif unicodedb.islower(ord(uchar)):
-            if not previous_is_cased:
-                return space.w_False
-            previous_is_cased = cased = True
-        else:
-            previous_is_cased = False
-    return space.newbool(cased)
-
-def _strip(space, w_self, w_chars, left, right):
-    "internal function called by str_xstrip methods"
-    u_self = w_self._value
-    u_chars = w_chars._value
-
-    lpos = 0
-    rpos = len(u_self)
-
-    if left:
-        while lpos < rpos and u_self[lpos] in u_chars:
-           lpos += 1
-
-    if right:
-        while rpos > lpos and u_self[rpos - 1] in u_chars:
-           rpos -= 1
-
-    assert rpos >= 0
-    result = u_self[lpos: rpos]
-    return W_UnicodeObject(result)
-
-def _strip_none(space, w_self, left, right):
-    "internal function called by str_xstrip methods"
-    u_self = w_self._value
-
-    lpos = 0
-    rpos = len(u_self)
-
-    if left:
-        while lpos < rpos and _isspace(u_self[lpos]):
-           lpos += 1
-
-    if right:
-        while rpos > lpos and _isspace(u_self[rpos - 1]):
-           rpos -= 1
-
-    assert rpos >= 0
-    result = u_self[lpos: rpos]
-    return W_UnicodeObject(result)
-
-
-def unicode_capitalize__Unicode(space, w_self):
-    input = w_self._value
-    if len(input) == 0:
-        return W_UnicodeObject.EMPTY
-    builder = UnicodeBuilder(len(input))
-    builder.append(unichr(unicodedb.toupper(ord(input[0]))))
-    for i in range(1, len(input)):
-        builder.append(unichr(unicodedb.tolower(ord(input[i]))))
-    return W_UnicodeObject(builder.build())
-
-def unicode_title__Unicode(space, w_self):
-    input = w_self._value
-    if len(input) == 0:
-        return w_self
-
-    builder = UnicodeBuilder(len(input))
-    previous_is_cased = False
-    for i in range(len(input)):
-        unichar = ord(input[i])
-        if previous_is_cased:
-            builder.append(unichr(unicodedb.tolower(unichar)))
-        else:
-            builder.append(unichr(unicodedb.totitle(unichar)))
-        previous_is_cased = unicodedb.iscased(unichar)
-    return W_UnicodeObject(builder.build())
-
-def unicode_lower__Unicode(space, w_self):
-    input = w_self._value
-    builder = UnicodeBuilder(len(input))
-    for i in range(len(input)):
-        builder.append(unichr(unicodedb.tolower(ord(input[i]))))
-    return W_UnicodeObject(builder.build())
-
-def unicode_upper__Unicode(space, w_self):
-    input = w_self._value
-    builder = UnicodeBuilder(len(input))
-    for i in range(len(input)):
-        builder.append(unichr(unicodedb.toupper(ord(input[i]))))
-    return W_UnicodeObject(builder.build())
-
-def unicode_swapcase__Unicode(space, w_self):
-    input = w_self._value
-    builder = UnicodeBuilder(len(input))
-    for i in range(len(input)):
-        unichar = ord(input[i])
-        if unicodedb.islower(unichar):
-            builder.append(unichr(unicodedb.toupper(unichar)))
-        elif unicodedb.isupper(unichar):
-            builder.append(unichr(unicodedb.tolower(unichar)))
-        else:
-            builder.append(input[i])
-    return W_UnicodeObject(builder.build())
-
-def _normalize_index(length, index):
-    if index < 0:
-        index += length
-        if index < 0:
-            index = 0
-    elif index > length:
-        index = length
-    return index
-
-def _to_unichar_w(space, w_char):
-    try:
-        unistr = space.unicode_w(w_char)
-    except OperationError, e:
-        if e.match(space, space.w_TypeError):
-            msg = 'The fill character cannot be converted to Unicode'
-            raise OperationError(space.w_TypeError, space.wrap(msg))
-        else:
-            raise
-
-    if len(unistr) != 1:
-        raise OperationError(space.w_TypeError, space.wrap('The fill character 
must be exactly one character long'))
-    return unistr[0]
-
-def unicode_center__Unicode_ANY_ANY(space, w_self, w_width, w_fillchar):
-    self = w_self._value
-    width = space.int_w(w_width)
-    fillchar = _to_unichar_w(space, w_fillchar)
-    padding = width - len(self)
-    if padding < 0:
-        return w_self.create_if_subclassed()
-    leftpad = padding // 2 + (padding & width & 1)
-    result = [fillchar] * width
-    for i in range(len(self)):
-        result[leftpad + i] = self[i]
-    return W_UnicodeObject(u''.join(result))
-
-def unicode_ljust__Unicode_ANY_ANY(space, w_self, w_width, w_fillchar):
-    self = w_self._value
-    width = space.int_w(w_width)
-    fillchar = _to_unichar_w(space, w_fillchar)
-    padding = width - len(self)
-    if padding < 0:
-        return w_self.create_if_subclassed()
-    result = [fillchar] * width
-    for i in range(len(self)):
-        result[i] = self[i]
-    return W_UnicodeObject(u''.join(result))
-
-def unicode_rjust__Unicode_ANY_ANY(space, w_self, w_width, w_fillchar):
-    self = w_self._value
-    width = space.int_w(w_width)
-    fillchar = _to_unichar_w(space, w_fillchar)
-    padding = width - len(self)
-    if padding < 0:
-        return w_self.create_if_subclassed()
-    result = [fillchar] * width
-    for i in range(len(self)):
-        result[padding + i] = self[i]
-    return W_UnicodeObject(u''.join(result))
-
-def unicode_splitlines__Unicode_ANY(space, w_self, w_keepends):
-    self = w_self._value
-    keepends = 0
-    if space.int_w(w_keepends):
-        keepends = 1
-    if len(self) == 0:
-        return space.newlist([])
-
-    start = 0
-    end = len(self)
-    pos = 0
-    lines = []
-    while pos < end:
-        if unicodedb.islinebreak(ord(self[pos])):
-            if (self[pos] == u'\r' and pos + 1 < end and
-                self[pos + 1] == u'\n'):
-                # Count CRLF as one linebreak
-                lines.append(self[start:pos + keepends * 2])
-                pos += 1
-            else:
-                lines.append(self[start:pos + keepends])
-            pos += 1
-            start = pos
-        else:
-            pos += 1
-    if not unicodedb.islinebreak(ord(self[end - 1])):
-        lines.append(self[start:])
-    return space.newlist_unicode(lines)
-
-def unicode_split__Unicode_None_ANY(space, w_self, w_none, w_maxsplit):
-    maxsplit = space.int_w(w_maxsplit)
-    res = []
-    value = w_self._value
-    length = len(value)
-    i = 0
-    while True:
-        # find the beginning of the next word
-        while i < length:
-            if not _isspace(value[i]):
-                break   # found
-            i += 1
-        else:
-            break  # end of string, finished
-
-        # find the end of the word
-        if maxsplit == 0:
-            j = length   # take all the rest of the string
-        else:
-            j = i + 1
-            while j < length and not _isspace(value[j]):
-                j += 1
-            maxsplit -= 1   # NB. if it's already < 0, it stays < 0
-
-        # the word is value[i:j]
-        res.append(value[i:j])
-
-        # continue to look from the character following the space after the 
word
-        i = j + 1
-
-    return space.newlist_unicode(res)
-
-def unicode_split__Unicode_Unicode_ANY(space, w_self, w_delim, w_maxsplit):
-    self = w_self._value
-    delim = w_delim._value
-    maxsplit = space.int_w(w_maxsplit)
-    delim_len = len(delim)
-    if delim_len == 0:
-        raise OperationError(space.w_ValueError,
-                             space.wrap('empty separator'))
-    parts = split(self, delim, maxsplit)
-    return space.newlist_unicode(parts)
-
-
-def unicode_rsplit__Unicode_None_ANY(space, w_self, w_none, w_maxsplit):
-    maxsplit = space.int_w(w_maxsplit)
-    res = []
-    value = w_self._value
-    i = len(value)-1
-    while True:
-        # starting from the end, find the end of the next word
-        while i >= 0:
-            if not _isspace(value[i]):
-                break   # found
-            i -= 1
-        else:
-            break  # end of string, finished
-
-        # find the start of the word
-        # (more precisely, 'j' will be the space character before the word)
-        if maxsplit == 0:
-            j = -1   # take all the rest of the string
-        else:
-            j = i - 1
-            while j >= 0 and not _isspace(value[j]):
-                j -= 1
-            maxsplit -= 1   # NB. if it's already < 0, it stays < 0
-
-        # the word is value[j+1:i+1]
-        j1 = j + 1
-        assert j1 >= 0
-        res.append(value[j1:i+1])
-
-        # continue to look from the character before the space before the word
-        i = j - 1
-
-    res.reverse()
-    return space.newlist_unicode(res)
-
-def sliced(space, s, start, stop, orig_obj):
-    assert start >= 0
-    assert stop >= 0
-    if start == 0 and stop == len(s) and space.is_w(space.type(orig_obj), 
space.w_unicode):
-        return orig_obj
-    return space.wrap( s[start:stop])
-
-#unicode_rsplit__Unicode_Unicode_ANY = 
make_rsplit_with_delim('unicode_rsplit__Unicode_Unicode_ANY',
-#                                                             sliced)
-
-def _split_into_chars(self, maxsplit):
-    if maxsplit == 0:
-        return [self]
-    index = 0
-    end = len(self)
-    parts = [u'']
-    maxsplit -= 1
-    while maxsplit != 0:
-        if index >= end:
-            break
-        parts.append(self[index])
-        index += 1
-        maxsplit -= 1
-    parts.append(self[index:])
-    return parts
-
-def _split_with(self, with_, maxsplit=-1):
-    xxx # remove
-    parts = []
-    start = 0
-    end = len(self)
-    length = len(with_)
-    while maxsplit != 0:
-        index = self.find(with_, start, end)
-        if index < 0:
-            break
-        parts.append(self[start:index])
-        start = index + length
-        maxsplit -= 1
-    parts.append(self[start:])
-    return parts
-
-def unicode_replace__Unicode_Unicode_Unicode_ANY(space, w_self, w_old,
-                                                 w_new, w_maxsplit):
-    maxsplit = space.int_w(w_maxsplit)
-    try:
-        return W_UnicodeObject(
-                replace(w_self._value, w_old._value, w_new._value, maxsplit))
-    except OverflowError:
-        raise OperationError(
-            space.w_OverflowError,
-            space.wrap("replace string is too long"))
-
-def unicode_replace__Unicode_ANY_ANY_ANY(space, w_self, w_old, w_new,
-                                         w_maxsplit):
-    if not space.isinstance_w(w_old, space.w_unicode):
-        old = unicode(space.bufferstr_w(w_old))
-    else:
-        old = space.unicode_w(w_old)
-    if not space.isinstance_w(w_new, space.w_unicode):
-        new = unicode(space.bufferstr_w(w_new))
-    else:
-        new = space.unicode_w(w_new)
-    maxsplit = space.int_w(w_maxsplit)
-    try:
-        return W_UnicodeObject(replace(w_self._value, old, new, maxsplit))
-    except OverflowError:
-        raise OperationError(
-            space.w_OverflowError,
-            space.wrap("replace string is too long"))
-
-    return W_UnicodeObject(new.join(parts))
-
-
-def unicode_expandtabs__Unicode_ANY(space, w_self, tabsize):
-    xxx # remove
-    self = w_self._value
-    parts = _split_with(self, u'\t')
-    result = [parts[0]]
-    prevsize = 0
-    for ch in parts[0]:
-        prevsize += 1
-        if ch == u"\n" or ch ==  u"\r":
-            prevsize = 0
-    totalsize = prevsize
-
-    for i in range(1, len(parts)):
-        pad = tabsize - prevsize % tabsize
-        nextpart = parts[i]
-        try:
-            totalsize = ovfcheck(totalsize + pad)
-            totalsize = ovfcheck(totalsize + len(nextpart))
-            result.append(u' ' * pad)
-        except OverflowError:
-            raise OperationError(space.w_OverflowError, space.wrap('new string 
is too long'))
-        result.append(nextpart)
-        prevsize = 0
-        for ch in nextpart:
-            prevsize += 1
-            if ch in (u"\n", u"\r"):
-                prevsize = 0
-    return space.wrap(u''.join(result))
 
 _repr_function, _ = make_unicode_escape_function(
     pass_printable=False, unicode_output=False, quotes=True, prefix='u')
_______________________________________________
pypy-commit mailing list
[email protected]
http://mail.python.org/mailman/listinfo/pypy-commit

[pypy-commit] pypy refactor-str-types: Cleanup.

Reply via email to