Author: fijal
Branch: unicode-utf8
Changeset: r90542:e30fd70a9177
Date: 2017-03-04 21:23 +0100
http://bitbucket.org/pypy/pypy/changeset/e30fd70a9177/
Log: fix rsplit
diff --git a/pypy/objspace/std/unicodeobject.py
b/pypy/objspace/std/unicodeobject.py
--- a/pypy/objspace/std/unicodeobject.py
+++ b/pypy/objspace/std/unicodeobject.py
@@ -123,6 +123,8 @@
return rutf8.compute_length_utf8(self._utf8)
def _val(self, space):
+ import pdb
+ pdb.set_trace()
return self._utf8.decode('utf8')
@staticmethod
diff --git a/rpython/rlib/rstring.py b/rpython/rlib/rstring.py
--- a/rpython/rlib/rstring.py
+++ b/rpython/rlib/rstring.py
@@ -35,6 +35,8 @@
from rpython.rlib.rutf8 import next_codepoint_pos
if isutf8:
+ if pos == -1:
+ return 0
return next_codepoint_pos(s, pos)
else:
return pos + 1
@@ -44,6 +46,8 @@
from rpython.rlib.rutf8 import prev_codepoint_pos
if isutf8:
+ if pos == 0:
+ return -1
return prev_codepoint_pos(s, pos)
else:
return pos - 1
@@ -139,7 +143,7 @@
while True:
# starting from the end, find the end of the next word
while i >= 0:
- if not _isspace(value, i):
+ if not _isspace(value, i, isutf8):
break # found
i = _decr(value, i, isutf8)
else:
@@ -151,17 +155,17 @@
j = -1 # take all the rest of the string
else:
j = _decr(value, i, isutf8)
- while j >= 0 and not _isspace(value, j):
+ while j >= 0 and not _isspace(value, j, isutf8):
j = _decr(value, j, isutf8)
maxsplit -= 1 # NB. if it's already < 0, it stays < 0
# the word is value[j+1:i+1]
+ j1 = _incr(value, j, isutf8)
+ assert j1 >= 0
+ i1 = _incr(value, i, isutf8)
+ res.append(value[j1:i1])
if j < 0:
- j1 = 0
- else:
- j1 = _incr(value, j, isutf8)
- assert j1 >= 0
- res.append(value[j1:i+1])
+ break
# continue to look from the character before the space before the
word
i = _decr(value, j, isutf8)
diff --git a/rpython/rlib/rutf8.py b/rpython/rlib/rutf8.py
--- a/rpython/rlib/rutf8.py
+++ b/rpython/rlib/rutf8.py
@@ -77,10 +77,11 @@
def prev_codepoint_pos(code, pos):
""" Gives the position of the previous codepoint
"""
+ pos -= 1
chr1 = ord(code[pos])
if chr1 < 0x80:
- return pos - 1
- while ord(code[pos]) & 0xC0 == 0xC0:
+ return pos
+ while ord(code[pos]) & 0xC0 == 0x80:
pos -= 1
return pos
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit