[pypy-commit] pypy default: Elidable-ize the convertion from ascii string to unicode

arigo Wed, 08 Jul 2015 04:31:00 -0700

Author: Armin Rigo <[email protected]>
Branch: 
Changeset: r78495:8dd96fd0cca6
Date: 2015-07-08 13:29 +0200
http://bitbucket.org/pypy/pypy/changeset/8dd96fd0cca6/


Log:    Elidable-ize the convertion from ascii string to unicode

diff --git a/pypy/objspace/std/unicodeobject.py 
b/pypy/objspace/std/unicodeobject.py
--- a/pypy/objspace/std/unicodeobject.py
+++ b/pypy/objspace/std/unicodeobject.py
@@ -6,7 +6,7 @@
 from rpython.rlib.rstring import StringBuilder, UnicodeBuilder
 from rpython.rlib.runicode import (
     make_unicode_escape_function, str_decode_ascii, str_decode_utf_8,
-    unicode_encode_ascii, unicode_encode_utf_8)
+    unicode_encode_ascii, unicode_encode_utf_8, fast_str_decode_ascii)
 
 from pypy.interpreter import unicodehelper
 from pypy.interpreter.baseobjspace import W_Root
@@ -481,9 +481,13 @@
         if encoding == 'ascii':
             # XXX error handling
             s = space.charbuf_w(w_obj)
-            eh = unicodehelper.decode_error_handler(space)
-            return space.wrap(str_decode_ascii(
-                    s, len(s), None, final=True, errorhandler=eh)[0])
+            try:
+                u = fast_str_decode_ascii(s)
+            except ValueError:
+                eh = unicodehelper.decode_error_handler(space)
+                u = str_decode_ascii(     # try again, to get the error right
+                    s, len(s), None, final=True, errorhandler=eh)[0]
+            return space.wrap(u)
         if encoding == 'utf-8':
             s = space.charbuf_w(w_obj)
             eh = unicodehelper.decode_error_handler(space)
diff --git a/rpython/rlib/runicode.py b/rpython/rlib/runicode.py
--- a/rpython/rlib/runicode.py
+++ b/rpython/rlib/runicode.py
@@ -1009,6 +1009,16 @@
             result.append(r)
     return result.build(), pos
 
+# An elidable version, for a subset of the cases
[email protected]
+def fast_str_decode_ascii(s):
+    result = UnicodeBuilder(len(s))
+    for c in s:
+        if ord(c) >= 128:
+            raise ValueError
+        result.append(unichr(ord(c)))
+    return result.build()
+
 
 # Specialize on the errorhandler when it's a constant
 @specialize.arg_or_var(3)
diff --git a/rpython/rlib/test/test_runicode.py 
b/rpython/rlib/test/test_runicode.py
--- a/rpython/rlib/test/test_runicode.py
+++ b/rpython/rlib/test/test_runicode.py
@@ -139,6 +139,12 @@
             for encoding in "utf-8 latin-1 ascii".split():
                 self.checkdecode(chr(i), encoding)
 
+    def test_fast_str_decode_ascii(self):
+        u = runicode.fast_str_decode_ascii("abc\x00\x7F")
+        assert type(u) is unicode
+        assert u == u"abc\x00\x7F"
+        py.test.raises(ValueError, runicode.fast_str_decode_ascii, "ab\x80")
+
     def test_all_first_256(self):
         for i in range(256):
             for encoding in ("utf-7 utf-8 latin-1 utf-16 utf-16-be utf-16-le "
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit

[pypy-commit] pypy default: Elidable-ize the convertion from ascii string to unicode

Reply via email to