Author: Armin Rigo <ar...@tunes.org> Branch: Changeset: r93692:84180176fef1 Date: 2018-01-21 09:58 +0100 http://bitbucket.org/pypy/pypy/changeset/84180176fef1/
Log: PyUnicode_AsUTF{16,32}String() diff --git a/pypy/module/cpyext/stubs.py b/pypy/module/cpyext/stubs.py --- a/pypy/module/cpyext/stubs.py +++ b/pypy/module/cpyext/stubs.py @@ -1552,14 +1552,6 @@ """ raise NotImplementedError -@cpython_api([PyObject], PyObject) -def PyUnicode_AsUTF32String(space, unicode): - """Return a Python string using the UTF-32 encoding in native byte order. The - string always starts with a BOM mark. Error handling is "strict". Return - NULL if an exception was raised by the codec. - """ - raise NotImplementedError - @cpython_api([rffi.CCHARP, Py_ssize_t, rffi.CCHARP, rffi.INTP, Py_ssize_t], PyObject) def PyUnicode_DecodeUTF16Stateful(space, s, size, errors, byteorder, consumed): """If consumed is NULL, behave like PyUnicode_DecodeUTF16(). If @@ -1595,13 +1587,6 @@ changes in your code for properly supporting 64-bit systems.""" raise NotImplementedError -@cpython_api([PyObject], PyObject) -def PyUnicode_AsUTF16String(space, unicode): - """Return a Python string using the UTF-16 encoding in native byte order. The - string always starts with a BOM mark. Error handling is "strict". Return - NULL if an exception was raised by the codec.""" - raise NotImplementedError - @cpython_api([rffi.CCHARP, Py_ssize_t, rffi.CCHARP], PyObject) def PyUnicode_DecodeUTF7(space, s, size, errors): """Create a Unicode object by decoding size bytes of the UTF-7 encoded string diff --git a/pypy/module/cpyext/test/test_unicodeobject.py b/pypy/module/cpyext/test/test_unicodeobject.py --- a/pypy/module/cpyext/test/test_unicodeobject.py +++ b/pypy/module/cpyext/test/test_unicodeobject.py @@ -145,6 +145,20 @@ res = module.test_unicode_format(1, "xyz") assert res == u"bla 1 ble xyz\n" + def test_AsUTFNString(self): + module = self.import_extension('foo', [ + ("asutf8", "METH_O", "return PyUnicode_AsUTF8String(args);"), + ("asutf16", "METH_O", "return PyUnicode_AsUTF16String(args);"), + ("asutf32", "METH_O", "return PyUnicode_AsUTF32String(args);"), + ]) + u = u'sp\x09m\u1234\U00012345' + s = module.asutf8(u) + assert s == u.encode('utf-8') + s = module.asutf16(u) + assert s == u.encode('utf-16') + s = module.asutf32(u) + assert s == u.encode('utf-32') + class TestUnicode(BaseApiTest): def test_unicodeobject(self, space): @@ -247,10 +261,24 @@ lltype.free(ar, flavor='raw') def test_AsUTF8String(self, space): - w_u = space.wrap(u'sp\x09m') + w_u = space.wrap(u'sp\x09m\u1234') w_res = PyUnicode_AsUTF8String(space, w_u) assert space.type(w_res) is space.w_bytes - assert space.unwrap(w_res) == 'sp\tm' + assert space.unwrap(w_res) == 'sp\tm\xe1\x88\xb4' + + def test_AsUTF16String(self, space): + u = u'sp\x09m\u1234\U00012345' + w_u = space.wrap(u) + w_res = PyUnicode_AsUTF16String(space, w_u) + assert space.type(w_res) is space.w_bytes + assert space.unwrap(w_res) == u.encode('utf-16') + + def test_AsUTF32String(self, space): + u = u'sp\x09m\u1234\U00012345' + w_u = space.wrap(u) + w_res = PyUnicode_AsUTF32String(space, w_u) + assert space.type(w_res) is space.w_bytes + assert space.unwrap(w_res) == u.encode('utf-32') def test_decode_utf8(self, space): u = rffi.str2charp(u'sp\x134m'.encode("utf-8")) diff --git a/pypy/module/cpyext/unicodeobject.py b/pypy/module/cpyext/unicodeobject.py --- a/pypy/module/cpyext/unicodeobject.py +++ b/pypy/module/cpyext/unicodeobject.py @@ -474,7 +474,7 @@ ref[0] = rffi.cast(PyObject, py_newuni) return 0 -def make_conversion_functions(suffix, encoding): +def make_conversion_functions(suffix, encoding, only_for_asstring=False): @cpython_api([PyObject], PyObject) @func_renamer('PyUnicode_As%sString' % suffix) def PyUnicode_AsXXXString(space, w_unicode): @@ -486,6 +486,9 @@ return unicodeobject.encode_object(space, w_unicode, encoding, "strict") globals()['PyUnicode_As%sString' % suffix] = PyUnicode_AsXXXString + if only_for_asstring: + return + @cpython_api([CONST_STRING, Py_ssize_t, CONST_STRING], PyObject) @func_renamer('PyUnicode_Decode%s' % suffix) def PyUnicode_DecodeXXX(space, s, size, errors): @@ -516,6 +519,8 @@ globals()['PyUnicode_Encode%s' % suffix] = PyUnicode_EncodeXXX make_conversion_functions('UTF8', 'utf-8') +make_conversion_functions('UTF16', 'utf-16', only_for_asstring=True) +make_conversion_functions('UTF32', 'utf-32', only_for_asstring=True) make_conversion_functions('ASCII', 'ascii') make_conversion_functions('Latin1', 'latin-1') if sys.platform == 'win32': _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit