Author: Armin Rigo
Branch:
Changeset: r93692:84180176fef1
Date: 2018-01-21 09:58 +0100
http://bitbucket.org/pypy/pypy/changeset/84180176fef1/
Log:PyUnicode_AsUTF{16,32}String()
diff --git a/pypy/module/cpyext/stubs.py b/pypy/module/cpyext/stubs.py
--- a/pypy/module/cpyext/stubs.py
+++ b/pypy/module/cpyext/stubs.py
@@ -1552,14 +1552,6 @@
"""
raise NotImplementedError
-@cpython_api([PyObject], PyObject)
-def PyUnicode_AsUTF32String(space, unicode):
-"""Return a Python string using the UTF-32 encoding in native byte order.
The
-string always starts with a BOM mark. Error handling is "strict". Return
-NULL if an exception was raised by the codec.
-"""
-raise NotImplementedError
-
@cpython_api([rffi.CCHARP, Py_ssize_t, rffi.CCHARP, rffi.INTP, Py_ssize_t],
PyObject)
def PyUnicode_DecodeUTF16Stateful(space, s, size, errors, byteorder, consumed):
"""If consumed is NULL, behave like PyUnicode_DecodeUTF16(). If
@@ -1595,13 +1587,6 @@
changes in your code for properly supporting 64-bit systems."""
raise NotImplementedError
-@cpython_api([PyObject], PyObject)
-def PyUnicode_AsUTF16String(space, unicode):
-"""Return a Python string using the UTF-16 encoding in native byte order.
The
-string always starts with a BOM mark. Error handling is "strict". Return
-NULL if an exception was raised by the codec."""
-raise NotImplementedError
-
@cpython_api([rffi.CCHARP, Py_ssize_t, rffi.CCHARP], PyObject)
def PyUnicode_DecodeUTF7(space, s, size, errors):
"""Create a Unicode object by decoding size bytes of the UTF-7 encoded
string
diff --git a/pypy/module/cpyext/test/test_unicodeobject.py
b/pypy/module/cpyext/test/test_unicodeobject.py
--- a/pypy/module/cpyext/test/test_unicodeobject.py
+++ b/pypy/module/cpyext/test/test_unicodeobject.py
@@ -145,6 +145,20 @@
res = module.test_unicode_format(1, "xyz")
assert res == u"bla 1 ble xyz\n"
+def test_AsUTFNString(self):
+module = self.import_extension('foo', [
+("asutf8", "METH_O", "return PyUnicode_AsUTF8String(args);"),
+("asutf16", "METH_O", "return PyUnicode_AsUTF16String(args);"),
+("asutf32", "METH_O", "return PyUnicode_AsUTF32String(args);"),
+])
+u = u'sp\x09m\u1234\U00012345'
+s = module.asutf8(u)
+assert s == u.encode('utf-8')
+s = module.asutf16(u)
+assert s == u.encode('utf-16')
+s = module.asutf32(u)
+assert s == u.encode('utf-32')
+
class TestUnicode(BaseApiTest):
def test_unicodeobject(self, space):
@@ -247,10 +261,24 @@
lltype.free(ar, flavor='raw')
def test_AsUTF8String(self, space):
-w_u = space.wrap(u'sp\x09m')
+w_u = space.wrap(u'sp\x09m\u1234')
w_res = PyUnicode_AsUTF8String(space, w_u)
assert space.type(w_res) is space.w_bytes
-assert space.unwrap(w_res) == 'sp\tm'
+assert space.unwrap(w_res) == 'sp\tm\xe1\x88\xb4'
+
+def test_AsUTF16String(self, space):
+u = u'sp\x09m\u1234\U00012345'
+w_u = space.wrap(u)
+w_res = PyUnicode_AsUTF16String(space, w_u)
+assert space.type(w_res) is space.w_bytes
+assert space.unwrap(w_res) == u.encode('utf-16')
+
+def test_AsUTF32String(self, space):
+u = u'sp\x09m\u1234\U00012345'
+w_u = space.wrap(u)
+w_res = PyUnicode_AsUTF32String(space, w_u)
+assert space.type(w_res) is space.w_bytes
+assert space.unwrap(w_res) == u.encode('utf-32')
def test_decode_utf8(self, space):
u = rffi.str2charp(u'sp\x134m'.encode("utf-8"))
diff --git a/pypy/module/cpyext/unicodeobject.py
b/pypy/module/cpyext/unicodeobject.py
--- a/pypy/module/cpyext/unicodeobject.py
+++ b/pypy/module/cpyext/unicodeobject.py
@@ -474,7 +474,7 @@
ref[0] = rffi.cast(PyObject, py_newuni)
return 0
-def make_conversion_functions(suffix, encoding):
+def make_conversion_functions(suffix, encoding, only_for_asstring=False):
@cpython_api([PyObject], PyObject)
@func_renamer('PyUnicode_As%sString' % suffix)
def PyUnicode_AsXXXString(space, w_unicode):
@@ -486,6 +486,9 @@
return unicodeobject.encode_object(space, w_unicode, encoding,
"strict")
globals()['PyUnicode_As%sString' % suffix] = PyUnicode_AsXXXString
+if only_for_asstring:
+return
+
@cpython_api([CONST_STRING, Py_ssize_t, CONST_STRING], PyObject)
@func_renamer('PyUnicode_Decode%s' % suffix)
def PyUnicode_DecodeXXX(space, s, size, errors):
@@ -516,6 +519,8 @@
globals()['PyUnicode_Encode%s' % suffix] = PyUnicode_EncodeXXX
make_conversion_functions('UTF8', 'utf-8')
+make_conversion_functions('UTF16', 'utf-16', only_for_asstring=True)
+make_conversion_functions('UTF32', 'utf-32', only_for_asstring=True)
make_conversion_functions('ASCII', 'ascii')
make_conversion_functions('Latin1', 'latin-1')
if sys.platform == 'win32':