Author: Jeremy Thurgood <fir...@gmail.com> Branch: unicode-utf8 Changeset: r92634:2d6fe4fc14a3 Date: 2017-10-07 16:28 +0200 http://bitbucket.org/pypy/pypy/changeset/2d6fe4fc14a3/
Log: capitalize and {starts,ends}with. diff --git a/pypy/objspace/std/stringmethods.py b/pypy/objspace/std/stringmethods.py --- a/pypy/objspace/std/stringmethods.py +++ b/pypy/objspace/std/stringmethods.py @@ -632,16 +632,13 @@ return space.w_True return space.w_False + # This is overridden in unicodeobject, but the two above are not. def _startswith(self, space, value, w_prefix, start, end): prefix = self._op_val(space, w_prefix) if start > len(value): - return self._starts_ends_overflow(prefix) + return False return startswith(value, prefix, start, end) - def _starts_ends_overflow(self, prefix): - return False # bug-to-bug compat: this is for strings and - # bytearrays, but overridden for unicodes - def descr_endswith(self, space, w_suffix, w_start=None, w_end=None): value, start, end, _ = self._convert_idx_params(space, w_start, w_end) if space.isinstance_w(w_suffix, space.w_tuple): @@ -655,10 +652,11 @@ return space.w_True return space.w_False + # This is overridden in unicodeobject, but the two above are not. def _endswith(self, space, value, w_prefix, start, end): prefix = self._op_val(space, w_prefix) if start > len(value): - return self._starts_ends_overflow(prefix) + return False return endswith(value, prefix, start, end) def _strip(self, space, w_chars, left, right, name='strip'): diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py --- a/pypy/objspace/std/unicodeobject.py +++ b/pypy/objspace/std/unicodeobject.py @@ -6,8 +6,9 @@ from rpython.rlib.buffer import StringBuffer from rpython.rlib.mutbuffer import MutableStringBuffer from rpython.rlib.rarithmetic import ovfcheck -from rpython.rlib.rstring import StringBuilder, split, rsplit, UnicodeBuilder,\ - replace_count +from rpython.rlib.rstring import ( + StringBuilder, split, rsplit, UnicodeBuilder, replace_count, startswith, + endswith) from rpython.rlib.runicode import make_unicode_escape_function from rpython.rlib import rutf8, jit @@ -139,6 +140,10 @@ return True @staticmethod + def _op_utf8(space, w_other, strict=None): + return W_UnicodeObject.convert_arg_to_w_unicode(space, w_other, strict)._utf8 + + @staticmethod def _op_val(space, w_other, strict=None): return W_UnicodeObject.convert_arg_to_w_unicode(space, w_other, strict)._utf8.decode('utf8') @@ -520,8 +525,17 @@ i = rutf8.next_codepoint_pos(val, i) return space.newbool(cased) - def _starts_ends_overflow(self, prefix): - return len(prefix) == 0 + def _startswith(self, space, value, w_prefix, start, end): + prefix = self._op_utf8(space, w_prefix) + if start > len(value): + return len(prefix) == 0 + return startswith(value, prefix, start, end) + + def _endswith(self, space, value, w_prefix, start, end): + prefix = self._op_utf8(space, w_prefix) + if start > len(value): + return len(prefix) == 0 + return endswith(value, prefix, start, end) def descr_add(self, space, w_other): try: @@ -644,6 +658,21 @@ return space.newlist_utf8(res) + def descr_capitalize(self, space): + value = self._utf8 + if len(value) == 0: + return self._empty() + + builder = StringBuilder(len(value)) + uchar = rutf8.codepoint_at_pos(value, 0) + i = rutf8.next_codepoint_pos(value, 0) + rutf8.unichr_as_utf8_append(builder, unicodedb.toupper(uchar)) + while i < len(value): + uchar = rutf8.codepoint_at_pos(value, i) + i = rutf8.next_codepoint_pos(value, i) + rutf8.unichr_as_utf8_append(builder, unicodedb.tolower(uchar)) + return W_UnicodeObject(builder.build(), self._len()) + @unwrap_spec(width=int, w_fillchar=WrappedDefault(' ')) def descr_center(self, space, width, w_fillchar): value = self._utf8 _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit