Author: fijal
Branch: unicode-utf8
Changeset: r92605:82feb3298ec4
Date: 2017-10-05 13:46 +0200
http://bitbucket.org/pypy/pypy/changeset/82feb3298ec4/

Log:    try to use utf8 index storage for getitem

diff --git a/pypy/objspace/std/unicodeobject.py 
b/pypy/objspace/std/unicodeobject.py
--- a/pypy/objspace/std/unicodeobject.py
+++ b/pypy/objspace/std/unicodeobject.py
@@ -32,14 +32,12 @@
     _immutable_fields_ = ['_utf8']
 
     @enforceargs(utf8str=str)
-    def __init__(self, utf8str, length, ucs4str=None):
+    def __init__(self, utf8str, length):
         assert isinstance(utf8str, str)
         assert length >= 0
-        if ucs4str is not None:
-            assert isinstance(ucs4str, unicode)
         self._utf8 = utf8str
         self._length = length
-        self._ucs4 = ucs4str
+        self._index_storage = None
         if not we_are_translated():
             assert rutf8.check_utf8(utf8str, allow_surrogates=True) == length
 
@@ -636,12 +634,15 @@
     descr_rmul = descr_mul
 
     def _getitem_result(self, space, index):
-        if self._ucs4 is None:
-            self._ucs4 = self._utf8.decode('utf-8')
-        try:
-            return W_UnicodeObject(self._ucs4[index].encode('utf-8'), 1)
-        except IndexError:
+        if index >= self._length:
             raise oefmt(space.w_IndexError, "string index out of range")
+        if self._index_storage is None:
+            self._index_storage = rutf8.create_utf8_index_storage(self._utf8,
+                self._length)
+        start = rutf8.codepoint_position_at_index(self._utf8,
+            self._index_storage, index)
+        end = rutf8.next_codepoint_pos(self._utf8, start)
+        return W_UnicodeObject(self._utf8[start:end], 1)
 
     @unwrap_spec(width=int, w_fillchar=WrappedDefault(' '))
     def descr_rjust(self, space, width, w_fillchar):
_______________________________________________
pypy-commit mailing list
pypy-commit@python.org
https://mail.python.org/mailman/listinfo/pypy-commit

Reply via email to