Zaur Shibzukhov, 01.03.2013 08:37:
> unicode_slice.h
> ---------------------
>
> #include "unicodeobject.h"
>
> static inline PyObject* unicode_slice(
> PyObject* text, Py_ssize_t start, Py_ssize_t stop);
>
> /////////////// PyUnicode_Substring ///////////////
>
> /* CURRENT */
>
> static inline PyObject* unicode_slice(
> PyObject* text, Py_ssize_t start, Py_ssize_t stop) {
> Py_ssize_t length;
> #if CYTHON_PEP393_ENABLED
> if (PyUnicode_READY(text) == -1) return NULL;
> length = PyUnicode_GET_LENGTH(text);
> #else
> length = PyUnicode_GET_SIZE(text);
> #endif
> if (start < 0) {
> start += length;
> if (start < 0)
> start = 0;
> }
> if (stop < 0)
> stop += length;
> else if (stop > length)
> stop = length;
> length = stop - start;
> if (length <= 0)
> return PyUnicode_FromUnicode(NULL, 0);
> #if CYTHON_PEP393_ENABLED
> return PyUnicode_FromKindAndData(PyUnicode_KIND(text),
> PyUnicode_1BYTE_DATA(text) + start*PyUnicode_KIND(text), stop-start);
> #else
> return PyUnicode_FromUnicode(PyUnicode_AS_UNICODE(text)+start,
> stop-start);
> #endif
> }
>
> static inline PyObject* unicode_slice2(
> PyObject* text, Py_ssize_t start, Py_ssize_t stop, int flag);
>
> /////////////// PyUnicode_Substring ///////////////
>
> /* CHANGED */
>
> static inline PyObject* unicode_slice2(
> PyObject* text, Py_ssize_t start, Py_ssize_t stop, int flag) {
> Py_ssize_t length;
>
> #if CYTHON_PEP393_ENABLED
> if (PyUnicode_READY(text) == -1) return NULL;
> #endif
>
> if (flag) {
> #if CYTHON_PEP393_ENABLED
> length = PyUnicode_GET_LENGTH(text);
> #else
> length = PyUnicode_GET_SIZE(text);
> #endif
> if (start < 0) {
> start += length;
> if (start < 0)
> start = 0;
> }
> if (stop < 0)
> stop += length;
> else if (stop > length)
> stop = length;
> length = stop - start;
> if (length <= 0)
> return PyUnicode_FromUnicode(NULL, 0);
> }
>
> #if CYTHON_PEP393_ENABLED
> return PyUnicode_FromKindAndData(PyUnicode_KIND(text),
> PyUnicode_1BYTE_DATA(text) + start*PyUnicode_KIND(text), stop-start);
> #else
> return PyUnicode_FromUnicode(PyUnicode_AS_UNICODE(text)+start,
> stop-start);
> #endif
> }
>
> unicode_slice.pyx
> ------------------------
>
> cdef extern from 'unicode_slice.h':
> inline unicode unicode_slice(unicode ustring, int start, int stop)
> inline unicode unicode_slice2(unicode ustring, int start, int
> stop, int flag)
>
> cdef unicode text = u"abcdefghigklmnopqrstuvwxyzabcdefghigklmnopqrstuvwxyz"
>
> cdef long f_1(unicode text):
> cdef int i, j
> cdef int n = len(text)
> cdef int val
> cdef long S = 0
>
> for j in range(100000):
> for i in range(n):
> val = len(unicode_slice(text, 0, i))
> S += val * j
>
> return S
>
> cdef long f_2(unicode text):
> cdef int i, j
> cdef int n = len(text)
> cdef int val
> cdef long S = 0
>
> for j in range(100000):
> for i in range(n):
> val = len(unicode_slice2(text, 0, i, 0))
> S += val * j
>
> return S
>
>
> def test_1():
> f_1(text)
>
> def test_2():
> f_2(text)
>
> Here are timings:
>
> (py33) zbook:mytests $ python3.3 -m timeit -n 50 -r 5 -s "from
> mytests.unicode_slice import test_1" "test_1()"
> 50 loops, best of 5: 534 msec per loop
> (py33) zbook:mytests $ python3.3 -m timeit -n 50 -r 5 -s "from
> mytests.unicode_slice import test_2" "test_2()"
> 50 loops, best of 5: 523 msec per loop
>
> Only 2%
That's to be expected. Creating a Unicode string object is the highly
dominating operation here, including memory allocation, object type
selection and what not.
Stefan
_______________________________________________
cython-devel mailing list
[email protected]
http://mail.python.org/mailman/listinfo/cython-devel