[pypy-commit] pypy unicode-utf8: expandtabs and [is]title.

jerith Sat, 07 Oct 2017 06:12:12 -0700

Author: Jeremy Thurgood <fir...@gmail.com>
Branch: unicode-utf8
Changeset: r92631:842f2cbd6d78
Date: 2017-10-07 14:54 +0200
http://bitbucket.org/pypy/pypy/changeset/842f2cbd6d78/


Log:    expandtabs and [is]title.

diff --git a/pypy/objspace/std/test/test_unicodeobject.py 
b/pypy/objspace/std/test/test_unicodeobject.py
--- a/pypy/objspace/std/test/test_unicodeobject.py
+++ b/pypy/objspace/std/test/test_unicodeobject.py
@@ -230,6 +230,7 @@
         assert u"bROWN fOX".title() == u"Brown Fox"
         assert u"Brown Fox".title() == u"Brown Fox"
         assert u"bro!wn fox".title() == u"Bro!Wn Fox"
+        assert u"brow\u4321n fox".title() == u"Brow\u4321N Fox"
 
     def test_istitle(self):
         assert u"".istitle() == False
diff --git a/pypy/objspace/std/unicodeobject.py 
b/pypy/objspace/std/unicodeobject.py
--- a/pypy/objspace/std/unicodeobject.py
+++ b/pypy/objspace/std/unicodeobject.py
@@ -5,6 +5,7 @@
     enforceargs, newlist_hint, specialize, we_are_translated)
 from rpython.rlib.buffer import StringBuffer
 from rpython.rlib.mutbuffer import MutableStringBuffer
+from rpython.rlib.rarithmetic import ovfcheck
 from rpython.rlib.rstring import StringBuilder, split, rsplit, UnicodeBuilder,\
      replace_count
 from rpython.rlib.runicode import make_unicode_escape_function
@@ -349,6 +350,28 @@
     def descr_rmod(self, space, w_values):
         return mod_format(space, w_values, self, do_unicode=True)
 
+    def descr_title(self, space):
+        if len(self._utf8) == 0:
+            return self
+        return W_UnicodeObject(self.title(self._utf8), self._len())
+
+    @jit.elidable
+    def title(self, value):
+        input = self._utf8
+        builder = StringBuilder(len(input))
+        i = 0
+        previous_is_cased = False
+        while i < len(input):
+            ch = rutf8.codepoint_at_pos(input, i)
+            i = rutf8.next_codepoint_pos(input, i)
+            if not previous_is_cased:
+                ch = unicodedb.totitle(ch)
+            else:
+                ch = unicodedb.tolower(ch)
+            rutf8.unichr_as_utf8_append(builder, ch)
+            previous_is_cased = unicodedb.iscased(ch)
+        return builder.build()
+
     def descr_translate(self, space, w_table):
         input = self._utf8
         result = StringBuilder(len(input))
@@ -389,6 +412,30 @@
                                                     w_errors)
         return encode_object(space, self, encoding, errors)
 
+    @unwrap_spec(tabsize=int)
+    def descr_expandtabs(self, space, tabsize=8):
+        value = self._utf8
+        if not value:
+            return self._empty()
+
+        splitted = value.split('\t')
+
+        try:
+            if tabsize > 0:
+                ovfcheck(len(splitted) * tabsize)
+        except OverflowError:
+            raise oefmt(space.w_OverflowError, "new string is too long")
+        expanded = oldtoken = splitted.pop(0)
+        newlen = self._len() - len(splitted)
+
+        for token in splitted:
+            dist = self._tabindent(oldtoken, tabsize)
+            expanded += ' ' * dist + token
+            newlen += dist
+            oldtoken = token
+
+        return W_UnicodeObject(expanded, newlen)
+
     _StringMethods_descr_join = descr_join
     def descr_join(self, space, w_list):
         l = space.listview_unicode(w_list)
@@ -438,6 +485,27 @@
             i = rutf8.next_codepoint_pos(val, i)
         return space.newbool(cased)
 
+    def descr_istitle(self, space):
+        cased = False
+        previous_is_cased = False
+        val = self._utf8
+        i = 0
+        while i < len(val):
+            uchar = rutf8.codepoint_at_pos(val, i)
+            i = rutf8.next_codepoint_pos(val, i)
+            if unicodedb.isupper(uchar) or unicodedb.istitle(uchar):
+                if previous_is_cased:
+                    return space.w_False
+                previous_is_cased = True
+                cased = True
+            elif unicodedb.islower(uchar):
+                if not previous_is_cased:
+                    return space.w_False
+                cased = True
+            else:
+                previous_is_cased = False
+        return space.newbool(cased)
+
     def descr_isupper(self, space):
         cased = False
         i = 0
_______________________________________________
pypy-commit mailing list
pypy-commit@python.org
https://mail.python.org/mailman/listinfo/pypy-commit

[pypy-commit] pypy unicode-utf8: expandtabs and [is]title.

Reply via email to