Author: Armin Rigo <[email protected]>
Branch:
Changeset: r78495:8dd96fd0cca6
Date: 2015-07-08 13:29 +0200
http://bitbucket.org/pypy/pypy/changeset/8dd96fd0cca6/
Log: Elidable-ize the convertion from ascii string to unicode
diff --git a/pypy/objspace/std/unicodeobject.py
b/pypy/objspace/std/unicodeobject.py
--- a/pypy/objspace/std/unicodeobject.py
+++ b/pypy/objspace/std/unicodeobject.py
@@ -6,7 +6,7 @@
from rpython.rlib.rstring import StringBuilder, UnicodeBuilder
from rpython.rlib.runicode import (
make_unicode_escape_function, str_decode_ascii, str_decode_utf_8,
- unicode_encode_ascii, unicode_encode_utf_8)
+ unicode_encode_ascii, unicode_encode_utf_8, fast_str_decode_ascii)
from pypy.interpreter import unicodehelper
from pypy.interpreter.baseobjspace import W_Root
@@ -481,9 +481,13 @@
if encoding == 'ascii':
# XXX error handling
s = space.charbuf_w(w_obj)
- eh = unicodehelper.decode_error_handler(space)
- return space.wrap(str_decode_ascii(
- s, len(s), None, final=True, errorhandler=eh)[0])
+ try:
+ u = fast_str_decode_ascii(s)
+ except ValueError:
+ eh = unicodehelper.decode_error_handler(space)
+ u = str_decode_ascii( # try again, to get the error right
+ s, len(s), None, final=True, errorhandler=eh)[0]
+ return space.wrap(u)
if encoding == 'utf-8':
s = space.charbuf_w(w_obj)
eh = unicodehelper.decode_error_handler(space)
diff --git a/rpython/rlib/runicode.py b/rpython/rlib/runicode.py
--- a/rpython/rlib/runicode.py
+++ b/rpython/rlib/runicode.py
@@ -1009,6 +1009,16 @@
result.append(r)
return result.build(), pos
+# An elidable version, for a subset of the cases
[email protected]
+def fast_str_decode_ascii(s):
+ result = UnicodeBuilder(len(s))
+ for c in s:
+ if ord(c) >= 128:
+ raise ValueError
+ result.append(unichr(ord(c)))
+ return result.build()
+
# Specialize on the errorhandler when it's a constant
@specialize.arg_or_var(3)
diff --git a/rpython/rlib/test/test_runicode.py
b/rpython/rlib/test/test_runicode.py
--- a/rpython/rlib/test/test_runicode.py
+++ b/rpython/rlib/test/test_runicode.py
@@ -139,6 +139,12 @@
for encoding in "utf-8 latin-1 ascii".split():
self.checkdecode(chr(i), encoding)
+ def test_fast_str_decode_ascii(self):
+ u = runicode.fast_str_decode_ascii("abc\x00\x7F")
+ assert type(u) is unicode
+ assert u == u"abc\x00\x7F"
+ py.test.raises(ValueError, runicode.fast_str_decode_ascii, "ab\x80")
+
def test_all_first_256(self):
for i in range(256):
for encoding in ("utf-7 utf-8 latin-1 utf-16 utf-16-be utf-16-le "
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit