Author: Ronan Lamy <ronan.l...@gmail.com> Branch: py3.5 Changeset: r92406:e27c61e1a09a Date: 2017-09-15 21:58 +0100 http://bitbucket.org/pypy/pypy/changeset/e27c61e1a09a/
Log: Add inefficient implementation of PyUnicode_FromKindAndData() diff --git a/pypy/module/cpyext/test/test_unicodeobject.py b/pypy/module/cpyext/test/test_unicodeobject.py --- a/pypy/module/cpyext/test/test_unicodeobject.py +++ b/pypy/module/cpyext/test/test_unicodeobject.py @@ -154,6 +154,42 @@ res = module.test_unicode_format(1, "xyz") assert res == "bla 1 ble xyz\n" + def test_fromkind(self): + module = self.import_extension('foo', [ + ('from_ucs1', 'METH_O', + """ + char* p; + Py_ssize_t size; + if (PyBytes_AsStringAndSize(args, &p, &size) < 0) + return NULL; + return PyUnicode_FromKindAndData(PyUnicode_1BYTE_KIND, p, size); + """), + ('from_ucs2', 'METH_O', + """ + char* p; + Py_ssize_t size; + if (PyBytes_AsStringAndSize(args, &p, &size) < 0) + return NULL; + return PyUnicode_FromKindAndData(PyUnicode_2BYTE_KIND, p, size/2); + """), + ('from_ucs4', 'METH_O', + """ + char* p; + Py_ssize_t size; + if (PyBytes_AsStringAndSize(args, &p, &size) < 0) + return NULL; + return PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, p, size/4); + """)]) + res = module.from_ucs1(b'spam') + assert res == 'spam' + s = "späm" + b = s.encode('utf-16')[2:] # Skip the BOM + s2 = module.from_ucs2(b) + assert module.from_ucs2(b) == s + s = "x\N{PILE OF POO}x" + b = s.encode('utf-32')[4:] # Skip the BOM + assert module.from_ucs4(b) == s + def test_aswidecharstring(self): module = self.import_extension('foo', [ ("aswidecharstring", "METH_O", diff --git a/pypy/module/cpyext/unicodeobject.py b/pypy/module/cpyext/unicodeobject.py --- a/pypy/module/cpyext/unicodeobject.py +++ b/pypy/module/cpyext/unicodeobject.py @@ -13,7 +13,8 @@ PyObject, PyObjectP, Py_DecRef, make_ref, from_ref, track_reference, make_typedescr, get_typedescr, as_pyobj) from pypy.module.cpyext.bytesobject import PyBytes_Check, PyBytes_FromObject -from pypy.module._codecs.interp_codecs import CodecState +from pypy.module._codecs.interp_codecs import ( + CodecState, latin_1_decode, utf_16_decode, utf_32_decode) from pypy.objspace.std import unicodeobject from rpython.rlib import rstring, runicode from rpython.tool.sourcetools import func_renamer @@ -34,7 +35,7 @@ dealloc=unicode_dealloc, realize=unicode_realize) -# Buffer for the default encoding (used by PyUnicde_GetDefaultEncoding) +# Buffer for the default encoding (used by PyUnicode_GetDefaultEncoding) DEFAULT_ENCODING_SIZE = 100 default_encoding = lltype.malloc(rffi.CCHARP.TO, DEFAULT_ENCODING_SIZE, flavor='raw', zero=True) @@ -307,6 +308,26 @@ set_ready(py_obj, 1) return 0 +@cts.decl("""PyObject* PyUnicode_FromKindAndData( + int kind, const void *buffer, Py_ssize_t size)""") +def PyUnicode_FromKindAndData(space, kind, data, size): + if size < 0: + raise oefmt(space.w_ValueError, "size must be positive") + if kind == _1BYTE_KIND: + value = rffi.charpsize2str(data, size) + w_res = latin_1_decode(space, value, w_final=space.w_False) + elif kind == _2BYTE_KIND: + value = rffi.charpsize2str(data, 2 * size) + w_res = utf_16_decode(space, value, w_final=space.w_False) + elif kind == _4BYTE_KIND: + value = rffi.charpsize2str(data, 4 * size) + w_res = utf_32_decode(space, value, w_final=space.w_False) + else: + raise oefmt(space.w_SystemError, "invalid kind") + w_ret = space.unpackiterable(w_res)[0] + _PyUnicode_Ready(space, w_ret) + return w_ret + @cts.decl("Py_UNICODE * PyUnicode_AsUnicodeAndSize(PyObject *unicode, Py_ssize_t *size)") def PyUnicode_AsUnicodeAndSize(space, ref, psize): """Return a read-only pointer to the Unicode object's internal Py_UNICODE _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit