Zaur Shibzukhov, 01.03.2013 08:37: > unicode_slice.h > --------------------- > > #include "unicodeobject.h" > > static inline PyObject* unicode_slice( > PyObject* text, Py_ssize_t start, Py_ssize_t stop); > > /////////////// PyUnicode_Substring /////////////// > > /* CURRENT */ > > static inline PyObject* unicode_slice( > PyObject* text, Py_ssize_t start, Py_ssize_t stop) { > Py_ssize_t length; > #if CYTHON_PEP393_ENABLED > if (PyUnicode_READY(text) == -1) return NULL; > length = PyUnicode_GET_LENGTH(text); > #else > length = PyUnicode_GET_SIZE(text); > #endif > if (start < 0) { > start += length; > if (start < 0) > start = 0; > } > if (stop < 0) > stop += length; > else if (stop > length) > stop = length; > length = stop - start; > if (length <= 0) > return PyUnicode_FromUnicode(NULL, 0); > #if CYTHON_PEP393_ENABLED > return PyUnicode_FromKindAndData(PyUnicode_KIND(text), > PyUnicode_1BYTE_DATA(text) + start*PyUnicode_KIND(text), stop-start); > #else > return PyUnicode_FromUnicode(PyUnicode_AS_UNICODE(text)+start, > stop-start); > #endif > } > > static inline PyObject* unicode_slice2( > PyObject* text, Py_ssize_t start, Py_ssize_t stop, int flag); > > /////////////// PyUnicode_Substring /////////////// > > /* CHANGED */ > > static inline PyObject* unicode_slice2( > PyObject* text, Py_ssize_t start, Py_ssize_t stop, int flag) { > Py_ssize_t length; > > #if CYTHON_PEP393_ENABLED > if (PyUnicode_READY(text) == -1) return NULL; > #endif > > if (flag) { > #if CYTHON_PEP393_ENABLED > length = PyUnicode_GET_LENGTH(text); > #else > length = PyUnicode_GET_SIZE(text); > #endif > if (start < 0) { > start += length; > if (start < 0) > start = 0; > } > if (stop < 0) > stop += length; > else if (stop > length) > stop = length; > length = stop - start; > if (length <= 0) > return PyUnicode_FromUnicode(NULL, 0); > } > > #if CYTHON_PEP393_ENABLED > return PyUnicode_FromKindAndData(PyUnicode_KIND(text), > PyUnicode_1BYTE_DATA(text) + start*PyUnicode_KIND(text), stop-start); > #else > return PyUnicode_FromUnicode(PyUnicode_AS_UNICODE(text)+start, > stop-start); > #endif > } > > unicode_slice.pyx > ------------------------ > > cdef extern from 'unicode_slice.h': > inline unicode unicode_slice(unicode ustring, int start, int stop) > inline unicode unicode_slice2(unicode ustring, int start, int > stop, int flag) > > cdef unicode text = u"abcdefghigklmnopqrstuvwxyzabcdefghigklmnopqrstuvwxyz" > > cdef long f_1(unicode text): > cdef int i, j > cdef int n = len(text) > cdef int val > cdef long S = 0 > > for j in range(100000): > for i in range(n): > val = len(unicode_slice(text, 0, i)) > S += val * j > > return S > > cdef long f_2(unicode text): > cdef int i, j > cdef int n = len(text) > cdef int val > cdef long S = 0 > > for j in range(100000): > for i in range(n): > val = len(unicode_slice2(text, 0, i, 0)) > S += val * j > > return S > > > def test_1(): > f_1(text) > > def test_2(): > f_2(text) > > Here are timings: > > (py33) zbook:mytests $ python3.3 -m timeit -n 50 -r 5 -s "from > mytests.unicode_slice import test_1" "test_1()" > 50 loops, best of 5: 534 msec per loop > (py33) zbook:mytests $ python3.3 -m timeit -n 50 -r 5 -s "from > mytests.unicode_slice import test_2" "test_2()" > 50 loops, best of 5: 523 msec per loop > > Only 2%
That's to be expected. Creating a Unicode string object is the highly dominating operation here, including memory allocation, object type selection and what not. Stefan _______________________________________________ cython-devel mailing list cython-devel@python.org http://mail.python.org/mailman/listinfo/cython-devel