Author: David Ripton <drip...@ripton.net> Branch: Changeset: r53366:e41fae0d7da3 Date: 2012-03-12 13:50 -0700 http://bitbucket.org/pypy/pypy/changeset/e41fae0d7da3/
Log: Use the default encoding in stringobject.unicode_w Fixes issue1079, a problem in str.join with unicode arguments, and the default encoding set to utf8. diff --git a/pypy/objspace/std/stringobject.py b/pypy/objspace/std/stringobject.py --- a/pypy/objspace/std/stringobject.py +++ b/pypy/objspace/std/stringobject.py @@ -56,9 +56,18 @@ return w_self._value def unicode_w(w_self, space): - # XXX should this use the default encoding? - from pypy.objspace.std.unicodetype import plain_str2unicode - return plain_str2unicode(space, w_self._value) + # Use the default encoding. + from pypy.objspace.std.unicodetype import unicode_from_string, \ + decode_object + w_defaultencoding = space.call_function(space.sys.get( + 'getdefaultencoding')) + from pypy.objspace.std.unicodetype import _get_encoding_and_errors, \ + unicode_from_string, decode_object + encoding, errors = _get_encoding_and_errors(space, w_defaultencoding, + space.w_None) + if encoding is None and errors is None: + return space.unicode_w(unicode_from_string(space, w_self)) + return space.unicode_w(decode_object(space, w_self, encoding, errors)) registerimplementation(W_StringObject) diff --git a/pypy/objspace/std/test/test_stringobject.py b/pypy/objspace/std/test/test_stringobject.py --- a/pypy/objspace/std/test/test_stringobject.py +++ b/pypy/objspace/std/test/test_stringobject.py @@ -501,6 +501,35 @@ raises(TypeError, ''.join, [1]) raises(TypeError, ''.join, [[1]]) + def test_unicode_join_str_arg_ascii(self): + raises(UnicodeDecodeError, u''.join, ['\xc3\xa1']) + + def test_unicode_join_str_arg_utf8(self): + # Need default encoding utf-8, but sys.setdefaultencoding + # is removed after startup. + import sys + old_encoding = sys.getdefaultencoding() + + # Duplicate unittest.test_support.CleanImport logic because it won't + # import. + self.original_modules = sys.modules.copy() + for module_name in ['sys']: + if module_name in sys.modules: + module = sys.modules[module_name] + # It is possible that module_name is just an alias for + # another module (e.g. stub for modules renamed in 3.x). + # In that case, we also need delete the real module to clear + # the import cache. + if module.__name__ != module_name: + del sys.modules[module.__name__] + del sys.modules[module_name] + + import sys as temp_sys + temp_sys.setdefaultencoding('utf-8') + assert u''.join(['\xc3\xa1']) == u'\xe1' + temp_sys.setdefaultencoding(old_encoding) + sys.modules.update(self.original_modules) + def test_unicode_join_endcase(self): # This class inserts a Unicode object into its argument's natural # iteration, in the 3rd position. _______________________________________________ pypy-commit mailing list pypy-commit@python.org http://mail.python.org/mailman/listinfo/pypy-commit