Author: Antonio Cuni <[email protected]>
Branch: py3k
Changeset: r56104:ad1b4b5cbb55
Date: 2012-07-17 14:18 +0200
http://bitbucket.org/pypy/pypy/changeset/ad1b4b5cbb55/

Log:    don't crash if we try to wrap a non-ascii byte string; this might
        still happen because e.g. exception messages are not unicode yet

diff --git a/pypy/objspace/std/objspace.py b/pypy/objspace/std/objspace.py
--- a/pypy/objspace/std/objspace.py
+++ b/pypy/objspace/std/objspace.py
@@ -173,7 +173,24 @@
             else:
                 return self.newint(x)
         if isinstance(x, str):
-            return wrapunicode(self, x.decode('ascii'))
+            # this hack is temporary: look at the comment in
+            # test_stdstdobjspace.test_wrap_string
+            try:
+                unicode_x = x.decode('ascii')
+            except UnicodeDecodeError:
+                # poor man's x.decode('ascii', 'replace'), since it's not
+                # supported by RPython
+                if not we_are_translated():
+                    print 'WARNING: space.str() called on a non-ascii byte 
string: %r' % x
+                lst = []
+                for ch in x:
+                    ch = ord(ch)
+                    if ch > 127:
+                        lst.append(u'\ufffd')
+                    else:
+                        lst.append(unichr(ch))
+                unicode_x = u''.join(lst)
+            return wrapunicode(self, unicode_x)
         if isinstance(x, unicode):
             return wrapunicode(self, x)
         if isinstance(x, float):
diff --git a/pypy/objspace/std/test/test_stdobjspace.py 
b/pypy/objspace/std/test/test_stdobjspace.py
--- a/pypy/objspace/std/test/test_stdobjspace.py
+++ b/pypy/objspace/std/test/test_stdobjspace.py
@@ -91,3 +91,18 @@
         value = 200
         x = rffi.cast(rffi.UCHAR, value)
         assert space.eq_w(space.wrap(value), space.wrap(x))
+
+    def test_wrap_string(self):
+        from pypy.objspace.std.unicodeobject import W_UnicodeObject
+        w_x = self.space.wrap('foo')
+        assert isinstance(w_x, W_UnicodeObject)
+        assert w_x._value == u'foo'
+        #
+        # calling space.wrap() on a byte string which is not ASCII should
+        # never happen. Howeven it might happen while the py3k port is not
+        # 100% complete. In the meantime, try to return something more or less
+        # sensible instead of crashing with an RPython UnicodeError.
+        from pypy.objspace.std.unicodeobject import W_UnicodeObject
+        w_x = self.space.wrap('foo\xF0')
+        assert isinstance(w_x, W_UnicodeObject)
+        assert w_x._value == u'foo\ufffd'
_______________________________________________
pypy-commit mailing list
[email protected]
http://mail.python.org/mailman/listinfo/pypy-commit

Reply via email to