Author: fijal Branch: unicode-utf8 Changeset: r90542:e30fd70a9177 Date: 2017-03-04 21:23 +0100 http://bitbucket.org/pypy/pypy/changeset/e30fd70a9177/
Log: fix rsplit diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py --- a/pypy/objspace/std/unicodeobject.py +++ b/pypy/objspace/std/unicodeobject.py @@ -123,6 +123,8 @@ return rutf8.compute_length_utf8(self._utf8) def _val(self, space): + import pdb + pdb.set_trace() return self._utf8.decode('utf8') @staticmethod diff --git a/rpython/rlib/rstring.py b/rpython/rlib/rstring.py --- a/rpython/rlib/rstring.py +++ b/rpython/rlib/rstring.py @@ -35,6 +35,8 @@ from rpython.rlib.rutf8 import next_codepoint_pos if isutf8: + if pos == -1: + return 0 return next_codepoint_pos(s, pos) else: return pos + 1 @@ -44,6 +46,8 @@ from rpython.rlib.rutf8 import prev_codepoint_pos if isutf8: + if pos == 0: + return -1 return prev_codepoint_pos(s, pos) else: return pos - 1 @@ -139,7 +143,7 @@ while True: # starting from the end, find the end of the next word while i >= 0: - if not _isspace(value, i): + if not _isspace(value, i, isutf8): break # found i = _decr(value, i, isutf8) else: @@ -151,17 +155,17 @@ j = -1 # take all the rest of the string else: j = _decr(value, i, isutf8) - while j >= 0 and not _isspace(value, j): + while j >= 0 and not _isspace(value, j, isutf8): j = _decr(value, j, isutf8) maxsplit -= 1 # NB. if it's already < 0, it stays < 0 # the word is value[j+1:i+1] + j1 = _incr(value, j, isutf8) + assert j1 >= 0 + i1 = _incr(value, i, isutf8) + res.append(value[j1:i1]) if j < 0: - j1 = 0 - else: - j1 = _incr(value, j, isutf8) - assert j1 >= 0 - res.append(value[j1:i+1]) + break # continue to look from the character before the space before the word i = _decr(value, j, isutf8) diff --git a/rpython/rlib/rutf8.py b/rpython/rlib/rutf8.py --- a/rpython/rlib/rutf8.py +++ b/rpython/rlib/rutf8.py @@ -77,10 +77,11 @@ def prev_codepoint_pos(code, pos): """ Gives the position of the previous codepoint """ + pos -= 1 chr1 = ord(code[pos]) if chr1 < 0x80: - return pos - 1 - while ord(code[pos]) & 0xC0 == 0xC0: + return pos + while ord(code[pos]) & 0xC0 == 0x80: pos -= 1 return pos _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit