Stefan Behnel schrieb:
> Christian Heimes wrote:
>> In Python 3.x the PyUnicode object keeps a default encoded PyString in
>> its struct. You can use PyUnicode_AsStringAndSize() to get an UTF8
>> encoded char* from a PyUnicode object without worrying about memory
>> management.
> 
> Interesting. What is a "default encoding"?

The default encoding of Python 3.0 is UTF-8.

>From Objects/unicodeobject.c

PyObject *_PyUnicode_AsDefaultEncodedString(PyObject *unicode,
                                            const char *errors)
{
    PyObject *v = ((PyUnicodeObject *)unicode)->defenc;
    if (v)
        return v;
    if (errors != NULL)
        Py_FatalError("non-NULL encoding in
_PyUnicode_AsDefaultEncodedString");
    v = PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(unicode),
                             PyUnicode_GET_SIZE(unicode),
                             NULL);
    if (!v)
        return NULL;
    ((PyUnicodeObject *)unicode)->defenc = v;
    return v;
}

char*
PyUnicode_AsStringAndSize(PyObject *unicode, Py_ssize_t *psize)
{
    PyObject *bytes;
    if (!PyUnicode_Check(unicode)) {
        PyErr_BadArgument();
        return NULL;
    }
    bytes = _PyUnicode_AsDefaultEncodedString(unicode, NULL);
    if (bytes == NULL)
        return NULL;
    if (psize != NULL)
        *psize = PyString_GET_SIZE(bytes);
    return PyString_AS_STRING(bytes);
}
_______________________________________________
Cython-dev mailing list
[email protected]
http://codespeak.net/mailman/listinfo/cython-dev

Reply via email to