Author: Tyler Wade <way...@gmail.com> Branch: utf8-unicode2 Changeset: r73348:19502a3ecd04 Date: 2014-08-28 23:48 -0500 http://bitbucket.org/pypy/pypy/changeset/19502a3ecd04/
Log: Fix MBCS codecs on Windows diff --git a/pypy/interpreter/test/test_utf8_codecs.py b/pypy/interpreter/test/test_utf8_codecs.py --- a/pypy/interpreter/test/test_utf8_codecs.py +++ b/pypy/interpreter/test/test_utf8_codecs.py @@ -727,7 +727,7 @@ def test_mbcs_encode_force_replace(self): if sys.platform != 'win32': py.test.skip("mbcs encoding is win32-specific") - u = u'@test_2224_tmp-?L??\udc80' + u = Utf8Str.from_unicode(u'@test_2224_tmp-?L??\udc80') encoder = self.getencoder('mbcs') assert encoder(u, len(u), 'strict') == '@test_2224_tmp-?L???' py.test.raises(UnicodeEncodeError, encoder, u, len(u), 'strict', diff --git a/pypy/interpreter/utf8.py b/pypy/interpreter/utf8.py --- a/pypy/interpreter/utf8.py +++ b/pypy/interpreter/utf8.py @@ -9,12 +9,18 @@ wchar_rint = rffi.r_int +wchar_ruint = rffi.r_uint WCHAR_INTP = rffi.INTP +WCHAR_UINTP = rffi.UINTP WCHAR_INT = rffi.INT +WCHAR_UINT = rffi.UINT if rffi.sizeof(rffi.WCHAR_T) == 2: wchar_rint = rffi.r_short + wchar_ruint = rffi.r_ushort WCHAR_INTP = rffi.SHORTP + WCHAR_UINTP = rffi.USHORTP WCHAR_INT = rffi.SHORT + WCHAR_UINT = rffi.USHORT def utf8chr(value): @@ -541,11 +547,11 @@ if c > 0xFFFF: length += 1 - array = lltype.malloc(WCHAR_INTP.TO, length, flavor='raw', + array = lltype.malloc(WCHAR_UINTP.TO, length, flavor='raw', track_allocation=track_allocation) self.copy_to_wcharp(array, 0, length) - array[length - 1] = wchar_rint(0) + array[length - 1] = wchar_ruint(0) array = rffi.cast(rffi.CWCHARP, array) return array @@ -560,12 +566,12 @@ if rffi.sizeof(rffi.WCHAR_T) == 2: c1, c2 = create_surrogate_pair(c) - dst[i + dststart] = wchar_rint(c1) + dst[i + dststart] = wchar_ruint(c1) if c2: i += 1 - dst[i + dststart] = wchar_rint(c2) + dst[i + dststart] = wchar_ruint(c2) else: - dst[i + dststart] = wchar_rint(c) + dst[i + dststart] = wchar_ruint(c) i += 1 @@ -574,7 +580,7 @@ @staticmethod def from_wcharp(wcharp): - array = rffi.cast(WCHAR_INTP, wcharp) + array = rffi.cast(WCHAR_UINTP, wcharp) builder = Utf8Builder() i = 0; while True: @@ -602,7 +608,7 @@ @staticmethod def from_wcharpn(wcharp, size): - array = rffi.cast(WCHAR_INTP, wcharp) + array = rffi.cast(WCHAR_UINTP, wcharp) builder = Utf8Builder() i = 0; while i < size: @@ -630,7 +636,7 @@ @staticmethod def from_wcharpsize(wcharp, size): - array = rffi.cast(WCHAR_INTP, wcharp) + array = rffi.cast(WCHAR_UINTP, wcharp) builder = Utf8Builder() i = 0; while i < size: diff --git a/pypy/interpreter/utf8_codecs.py b/pypy/interpreter/utf8_codecs.py --- a/pypy/interpreter/utf8_codecs.py +++ b/pypy/interpreter/utf8_codecs.py @@ -1483,7 +1483,9 @@ if MultiByteToWideChar(CP_ACP, flags, dataptr, size, buf.raw, usize) == 0: _decode_mbcs_error(s, errorhandler) - return buf.str(usize), size + # TODO Is this cast necessary for rpython static-typing? + #return Utf8Str.from_wcharp(rffi.cast(rffi.CWCHARP, buf.raw)), size + return Utf8Str.from_wcharpsize(buf.raw, size), size def unicode_encode_mbcs(s, size, errors, errorhandler=None, force_replace=True): @@ -1507,7 +1509,7 @@ used_default_p[0] = rffi.cast(rwin32.BOOL, False) try: - with rffi.scoped_nonmoving_unicodebuffer(s) as dataptr: + with s.scoped_wcharp_copy() as dataptr: # first get the size of the result mbcssize = WideCharToMultiByte(CP_ACP, flags, dataptr, size, None, 0, _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit