Author: Armin Rigo <ar...@tunes.org> Branch: Changeset: r73198:c71b5f944ccf Date: 2014-08-30 08:29 +0200 http://bitbucket.org/pypy/pypy/changeset/c71b5f944ccf/
Log: Performance tweaks: may return the unicode object passed in as argument if it needs no encoding at all. diff --git a/pypy/module/_pypyjson/interp_encoder.py b/pypy/module/_pypyjson/interp_encoder.py --- a/pypy/module/_pypyjson/interp_encoder.py +++ b/pypy/module/_pypyjson/interp_encoder.py @@ -1,4 +1,6 @@ from rpython.rlib.rstring import StringBuilder +from rpython.rlib.runicode import str_decode_utf_8 +from pypy.interpreter import unicodehelper HEX = '0123456789abcdef' @@ -17,20 +19,39 @@ def raw_encode_basestring_ascii(space, w_string): if space.isinstance_w(w_string, space.w_str): s = space.str_w(w_string) - for c in s: + for i in range(len(s)): + c = s[i] if c >= ' ' and c <= '~' and c != '"' and c != '\\': pass else: + first = i break else: # the input is a string with only non-special ascii chars return w_string - w_string = space.call_method(w_string, 'decode', space.wrap('utf-8')) + eh = unicodehelper.decode_error_handler(space) + u = str_decode_utf_8( + s, len(s), None, final=True, errorhandler=eh, + allow_surrogates=True)[0] + sb = StringBuilder(len(u)) + sb.append_slice(s, 0, first) + else: + u = space.unicode_w(w_string) + for i in range(len(u)): + c = u[i] + if c >= u' ' and c <= u'~' and c != u'"' and c != u'\\': + pass + else: + break + else: + # the input is a unicode with only non-special ascii chars + return w_string + sb = StringBuilder(len(u)) + first = 0 - u = space.unicode_w(w_string) - sb = StringBuilder(len(u)) - for c in u: + for i in range(first, len(u)): + c = u[i] if c <= u'~': if c == u'"' or c == u'\\': sb.append('\\') diff --git a/pypy/module/_pypyjson/test/test__pypyjson.py b/pypy/module/_pypyjson/test/test__pypyjson.py --- a/pypy/module/_pypyjson/test/test__pypyjson.py +++ b/pypy/module/_pypyjson/test/test__pypyjson.py @@ -192,14 +192,14 @@ def test_raw_encode_basestring_ascii(self): import _pypyjson - def check(s): + def check(s, expected_type=str): s = _pypyjson.raw_encode_basestring_ascii(s) - assert type(s) is str + assert type(s) is expected_type return s assert check("") == "" - assert check(u"") == "" + assert check(u"", expected_type=unicode) == u"" assert check("abc ") == "abc " - assert check(u"abc ") == "abc " + assert check(u"abc ", expected_type=unicode) == u"abc " raises(UnicodeDecodeError, check, "\xc0") assert check("\xc2\x84") == "\\u0084" assert check("\xf0\x92\x8d\x85") == "\\ud808\\udf45" _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit