Stefan Behnel schrieb:
> Christian Heimes wrote:
>> In Python 3.x the PyUnicode object keeps a default encoded PyString in
>> its struct. You can use PyUnicode_AsStringAndSize() to get an UTF8
>> encoded char* from a PyUnicode object without worrying about memory
>> management.
>
> Interesting. What is a "default encoding"?
The default encoding of Python 3.0 is UTF-8.
>From Objects/unicodeobject.c
PyObject *_PyUnicode_AsDefaultEncodedString(PyObject *unicode,
const char *errors)
{
PyObject *v = ((PyUnicodeObject *)unicode)->defenc;
if (v)
return v;
if (errors != NULL)
Py_FatalError("non-NULL encoding in
_PyUnicode_AsDefaultEncodedString");
v = PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(unicode),
PyUnicode_GET_SIZE(unicode),
NULL);
if (!v)
return NULL;
((PyUnicodeObject *)unicode)->defenc = v;
return v;
}
char*
PyUnicode_AsStringAndSize(PyObject *unicode, Py_ssize_t *psize)
{
PyObject *bytes;
if (!PyUnicode_Check(unicode)) {
PyErr_BadArgument();
return NULL;
}
bytes = _PyUnicode_AsDefaultEncodedString(unicode, NULL);
if (bytes == NULL)
return NULL;
if (psize != NULL)
*psize = PyString_GET_SIZE(bytes);
return PyString_AS_STRING(bytes);
}
_______________________________________________
Cython-dev mailing list
[email protected]
http://codespeak.net/mailman/listinfo/cython-dev