Re: [Cython] About IndexNode and unicode[index]
Zaur Shibzukhov, 01.03.2013 08:37: > unicode_slice.h > - > > #include "unicodeobject.h" > > static inline PyObject* unicode_slice( > PyObject* text, Py_ssize_t start, Py_ssize_t stop); > > /// PyUnicode_Substring /// > > /* CURRENT */ > > static inline PyObject* unicode_slice( > PyObject* text, Py_ssize_t start, Py_ssize_t stop) { > Py_ssize_t length; > #if CYTHON_PEP393_ENABLED > if (PyUnicode_READY(text) == -1) return NULL; > length = PyUnicode_GET_LENGTH(text); > #else > length = PyUnicode_GET_SIZE(text); > #endif > if (start < 0) { > start += length; > if (start < 0) > start = 0; > } > if (stop < 0) > stop += length; > else if (stop > length) > stop = length; > length = stop - start; > if (length <= 0) > return PyUnicode_FromUnicode(NULL, 0); > #if CYTHON_PEP393_ENABLED > return PyUnicode_FromKindAndData(PyUnicode_KIND(text), > PyUnicode_1BYTE_DATA(text) + start*PyUnicode_KIND(text), stop-start); > #else > return PyUnicode_FromUnicode(PyUnicode_AS_UNICODE(text)+start, > stop-start); > #endif > } > > static inline PyObject* unicode_slice2( > PyObject* text, Py_ssize_t start, Py_ssize_t stop, int flag); > > /// PyUnicode_Substring /// > > /* CHANGED */ > > static inline PyObject* unicode_slice2( > PyObject* text, Py_ssize_t start, Py_ssize_t stop, int flag) { > Py_ssize_t length; > > #if CYTHON_PEP393_ENABLED > if (PyUnicode_READY(text) == -1) return NULL; > #endif > > if (flag) { > #if CYTHON_PEP393_ENABLED > length = PyUnicode_GET_LENGTH(text); > #else > length = PyUnicode_GET_SIZE(text); > #endif > if (start < 0) { > start += length; > if (start < 0) > start = 0; > } > if (stop < 0) > stop += length; > else if (stop > length) > stop = length; > length = stop - start; > if (length <= 0) > return PyUnicode_FromUnicode(NULL, 0); > } > > #if CYTHON_PEP393_ENABLED > return PyUnicode_FromKindAndData(PyUnicode_KIND(text), > PyUnicode_1BYTE_DATA(text) + start*PyUnicode_KIND(text), stop-start); > #else > return PyUnicode_FromUnicode(PyUnicode_AS_UNICODE(text)+start, > stop-start); > #endif > } > > unicode_slice.pyx > > > cdef extern from 'unicode_slice.h': > inline unicode unicode_slice(unicode ustring, int start, int stop) > inline unicode unicode_slice2(unicode ustring, int start, int > stop, int flag) > > cdef unicode text = u"abcdefghigklmnopqrstuvwxyzabcdefghigklmnopqrstuvwxyz" > > cdef long f_1(unicode text): > cdef int i, j > cdef int n = len(text) > cdef int val > cdef long S = 0 > > for j in range(10): > for i in range(n): > val = len(unicode_slice(text, 0, i)) > S += val * j > > return S > > cdef long f_2(unicode text): > cdef int i, j > cdef int n = len(text) > cdef int val > cdef long S = 0 > > for j in range(10): > for i in range(n): > val = len(unicode_slice2(text, 0, i, 0)) > S += val * j > > return S > > > def test_1(): > f_1(text) > > def test_2(): > f_2(text) > > Here are timings: > > (py33) zbook:mytests $ python3.3 -m timeit -n 50 -r 5 -s "from > mytests.unicode_slice import test_1" "test_1()" > 50 loops, best of 5: 534 msec per loop > (py33) zbook:mytests $ python3.3 -m timeit -n 50 -r 5 -s "from > mytests.unicode_slice import test_2" "test_2()" > 50 loops, best of 5: 523 msec per loop > > Only 2% That's to be expected. Creating a Unicode string object is the highly dominating operation here, including memory allocation, object type selection and what not. Stefan ___ cython-devel mailing list cython-devel@python.org http://mail.python.org/mailman/listinfo/cython-devel
Re: [Cython] About IndexNode and unicode[index]
2013/3/1 ZS : > 2013/3/1 Stefan Behnel : >> ZS, 28.02.2013 21:07: >>> 2013/2/28 Stefan Behnel: > This allows to write unicode text parsing code almost at C speed > mostly in python (+ .pxd defintions). I suggest simply adding a constant flag argument to the existing function that states if checking should be done or not. Inlining will let the C compiler drop the corresponding code, which may or may nor make it a little faster. >>> >>> static inline Py_UCS4 unicode_char2(PyObject* ustring, Py_ssize_t i, int >>> flag) { >>> Py_ssize_t length; >>> #if CYTHON_PEP393_ENABLED >>> if (PyUnicode_READY(ustring) < 0) return (Py_UCS4)-1; >>> #endif >>> if (flag) { >>> length = __Pyx_PyUnicode_GET_LENGTH(ustring); >>> if ((0 <= i) & (i < length)) { >>> return __Pyx_PyUnicode_READ_CHAR(ustring, i); >>> } else if ((-length <= i) & (i < 0)) { >>> return __Pyx_PyUnicode_READ_CHAR(ustring, i + length); >>> } else { >>> PyErr_SetString(PyExc_IndexError, "string index out of range"); >>> return (Py_UCS4)-1; >>> } >>> } else { >>> return __Pyx_PyUnicode_READ_CHAR(ustring, i); >>> } >>> } >> >> I think you could even pass in two flags, one for wraparound and one for >> boundscheck, and then just evaluate them appropriately in the existing "if" >> tests above. That should allow both features to be supported independently >> in a fast way. >> >> >>> Here are timings: >>> >>> (py33) zbook:mytests $ python3.3 -m timeit -n 50 -r 5 -s "from >>> mytests.unicode_index import test_1" "test_1()" >>> 50 loops, best of 5: 152 msec per loop >>> (py33) zbook:mytests $ python3.3 -m timeit -n 50 -r 5 -s "from >>> mytests.unicode_index import test_2" "test_2()" >>> 50 loops, best of 5: 86.5 msec per loop >>> (py33) zbook:mytests $ python3.3 -m timeit -n 50 -r 5 -s "from >>> mytests.unicode_index import test_3" "test_3()" >>> 50 loops, best of 5: 86.5 msec per loop >>> >>> So your suggestion would be preferable. >> >> Nice. Yes, looks like it' worth it. >> > > Sure that same could be applied to unicode slicing too. > I had to verify myself first. So here is the test... unicode_slice.h - #include "unicodeobject.h" static inline PyObject* unicode_slice( PyObject* text, Py_ssize_t start, Py_ssize_t stop); /// PyUnicode_Substring /// /* CURRENT */ static inline PyObject* unicode_slice( PyObject* text, Py_ssize_t start, Py_ssize_t stop) { Py_ssize_t length; #if CYTHON_PEP393_ENABLED if (PyUnicode_READY(text) == -1) return NULL; length = PyUnicode_GET_LENGTH(text); #else length = PyUnicode_GET_SIZE(text); #endif if (start < 0) { start += length; if (start < 0) start = 0; } if (stop < 0) stop += length; else if (stop > length) stop = length; length = stop - start; if (length <= 0) return PyUnicode_FromUnicode(NULL, 0); #if CYTHON_PEP393_ENABLED return PyUnicode_FromKindAndData(PyUnicode_KIND(text), PyUnicode_1BYTE_DATA(text) + start*PyUnicode_KIND(text), stop-start); #else return PyUnicode_FromUnicode(PyUnicode_AS_UNICODE(text)+start, stop-start); #endif } static inline PyObject* unicode_slice2( PyObject* text, Py_ssize_t start, Py_ssize_t stop, int flag); /// PyUnicode_Substring /// /* CHANGED */ static inline PyObject* unicode_slice2( PyObject* text, Py_ssize_t start, Py_ssize_t stop, int flag) { Py_ssize_t length; #if CYTHON_PEP393_ENABLED if (PyUnicode_READY(text) == -1) return NULL; #endif if (flag) { #if CYTHON_PEP393_ENABLED length = PyUnicode_GET_LENGTH(text); #else length = PyUnicode_GET_SIZE(text); #endif if (start < 0) { start += length; if (start < 0) start = 0; } if (stop < 0) stop += length; else if (stop > length) stop = length; length = stop - start; if (length <= 0) return PyUnicode_FromUnicode(NULL, 0); } #if CYTHON_PEP393_ENABLED return PyUnicode_FromKindAndData(PyUnicode_KIND(text), PyUnicode_1BYTE_DATA(text) + start*PyUnicode_KIND(text), stop-start); #else return PyUnicode_FromUnicode(PyUnicode_AS_UNICODE(text)+start, stop-start); #endif } unicode_slice.pyx cdef extern from 'unicode_slice.h': inline unicode unicode_slice(unicode ustring, int start, int stop) inline unicode unicode_slice2(unicode ustring, int start, int stop, int flag) cdef unicode text = u"abcdefghigklmnopqrstuvwxyzabcdefghigklmnopqrstuvwxyz" cdef long f_1(unicode text): cdef int i, j cdef int n = len(text) cdef int val cdef long S = 0 for j in range(10): for i in range(n): val = len(unicode_slice(text, 0, i)) S += val
Re: [Cython] Be more forgiving about memoryview strides
On Thu, Feb 28, 2013 at 11:12 AM, Nathaniel Smith wrote: > On Thu, Feb 28, 2013 at 5:50 PM, Robert Bradshaw wrote: >> On Thu, Feb 28, 2013 at 7:13 AM, Sebastian Berg >> wrote: >>> Hey, >>> >>> Maybe someone here already saw it (I don't have a track account, or I >>> would just create a ticket), but it would be nice if Cython was more >>> forgiving about contiguous requirements on strides. In the future this >>> would make it easier for numpy to go forward with changing the >>> contiguous flags to be more reasonable for its purpose, and second also >>> to allow old (and maybe for the moment remaining) corner cases in numpy >>> to slip past (as well as possibly the same for other programs...). An >>> example is (see also https://github.com/numpy/numpy/issues/2956 and the >>> PR linked there for more details): >>> >>> def add_one(array): >>> cdef double[::1] a = array >>> a[0] += 1. >>> return array >>> >>> giving: >>> >> add_one(np.ascontiguousarray(np.arange(10.)[::100])) >>> ValueError: Buffer and memoryview are not contiguous in the same >>> dimension. >>> >>> This could easily be changed if MemoryViews check the strides as "can be >>> interpreted as contiguous". That means that if shape[i] == 1, then >>> strides[i] are arbitrary (you can just change them if you like). This is >>> also the case for 0-sized arrays, which are arguably always contiguous, >>> no matter their strides are! >> >> I was under the impression that the primary value for contiguous is >> that it a foo[::1] can be interpreted as a foo*. Letting strides be >> arbitrary completely breaks this, right? > > Nope. The natural definition of "C contiguous" is "the array entries > are arranged in memory in the same way they would be if they were a > multidimensional C array" (i.e., what you said.) But it turns out that > this is *not* the definition that numpy and cython use! > > The issue is that the above definition is a constraint on the actual > locations of items in memory, i.e., given a shape, it tells you that > for every index, > (a) sum(index * strides) == sum(index * cumprod(shape[::-1])[::-1] * > itemsize) > Obviously this equality holds if > (b) strides == cumprod(shape[::-1])[::-1] * itemsize > (Or for F-contiguity, we have > (b') strides == cumprod(shape) * itemsize > ) > > (a) is the natural definition of "C contiguous". (b) is the definition > of "C contiguous" used by numpy and cython. (b) implies (a). But (a) > does not imply (b), i.e., there are arrays that are C-contiguous which > numpy and cython think are discontiguous. (Also in numpy there are > some weird cases where numpy accidentally uses the correct definition, > I think, which is the point of Sebastian's example.) > > In particular, if shape[i] == 1, then the value of stride[i] really > should be irrelevant to judging contiguity, because the only thing you > can do with strides[i] is multiply it by index[i], and if shape[i] == > 1 then index[i] is always 0. So an array of int8's with shape = (10, > 1), strides = (1, 73) is contiguous according to (a), but not > according to (b). Also if shape[i] is 0 for any i, then the entire > contents of the strides array becomes irrelevant to judging > contiguity; all zero-sized arrays are contiguous according to (a), but > not (b). Thanks for clarifying. Yes, I think it makes a lot of sense to loosen our definition for Cython. Internally, I think the only way we use this assumption is in not requiring that the first/final index be multiplied by the stride, which should be totally fine. But this merits closer inspection as there may be something else. > (This is really annoying for numpy because given, say, a column vector > with shape (n, 1), it is impossible to be both C- and F-contiguous > according to the (b)-style definition. But people expect expect > various operations to preserve C versus F contiguity, so there are > heuristics in numpy that try to guess whether various result arrays > should pretend to be C- or F-contiguous, and we don't even have a > consistent idea of what it would mean for this code to be working > correctly, never mind test it and keep it working. OTOH if we just fix > numpy to use the (a) definition, then it turns out a bunch of > third-party code breaks, like, for example, cython.) Can you give some examples? - Robert ___ cython-devel mailing list cython-devel@python.org http://mail.python.org/mailman/listinfo/cython-devel
Re: [Cython] About IndexNode and unicode[index]
>>> >>> I think you could even pass in two flags, one for wraparound and one for >>> boundscheck, and then just evaluate them appropriately in the existing "if" >>> tests above. That should allow both features to be supported independently >>> in a fast way. >>> >> Intresting, could C compilers in optimization mode to eliminate unused >> evaluation path in nested if statements with constant conditional >> expressions? > > They'd be worthless if they didn't do that. (Even Cython does it, BTW.) > Then it can simplify writing utility code in order to support different optimization flags in other cases too. ___ cython-devel mailing list cython-devel@python.org http://mail.python.org/mailman/listinfo/cython-devel
Re: [Cython] About IndexNode and unicode[index]
ZS, 01.03.2013 07:43: > 2013/3/1 Stefan Behnel: >> ZS, 28.02.2013 21:07: >>> 2013/2/28 Stefan Behnel: > This allows to write unicode text parsing code almost at C speed > mostly in python (+ .pxd defintions). I suggest simply adding a constant flag argument to the existing function that states if checking should be done or not. Inlining will let the C compiler drop the corresponding code, which may or may nor make it a little faster. >>> >>> static inline Py_UCS4 unicode_char2(PyObject* ustring, Py_ssize_t i, int >>> flag) { >>> Py_ssize_t length; >>> #if CYTHON_PEP393_ENABLED >>> if (PyUnicode_READY(ustring) < 0) return (Py_UCS4)-1; >>> #endif >>> if (flag) { >>> length = __Pyx_PyUnicode_GET_LENGTH(ustring); >>> if ((0 <= i) & (i < length)) { >>> return __Pyx_PyUnicode_READ_CHAR(ustring, i); >>> } else if ((-length <= i) & (i < 0)) { >>> return __Pyx_PyUnicode_READ_CHAR(ustring, i + length); >>> } else { >>> PyErr_SetString(PyExc_IndexError, "string index out of range"); >>> return (Py_UCS4)-1; >>> } >>> } else { >>> return __Pyx_PyUnicode_READ_CHAR(ustring, i); >>> } >>> } >> >> I think you could even pass in two flags, one for wraparound and one for >> boundscheck, and then just evaluate them appropriately in the existing "if" >> tests above. That should allow both features to be supported independently >> in a fast way. >> > Intresting, could C compilers in optimization mode to eliminate unused > evaluation path in nested if statements with constant conditional > expressions? They'd be worthless if they didn't do that. (Even Cython does it, BTW.) Stefan ___ cython-devel mailing list cython-devel@python.org http://mail.python.org/mailman/listinfo/cython-devel
Re: [Cython] About IndexNode and unicode[index]
2013/3/1 Stefan Behnel : > ZS, 28.02.2013 21:07: >> 2013/2/28 Stefan Behnel: This allows to write unicode text parsing code almost at C speed mostly in python (+ .pxd defintions). >>> >>> I suggest simply adding a constant flag argument to the existing function >>> that states if checking should be done or not. Inlining will let the C >>> compiler drop the corresponding code, which may or may nor make it a little >>> faster. >> >> static inline Py_UCS4 unicode_char2(PyObject* ustring, Py_ssize_t i, int >> flag) { >> Py_ssize_t length; >> #if CYTHON_PEP393_ENABLED >> if (PyUnicode_READY(ustring) < 0) return (Py_UCS4)-1; >> #endif >> if (flag) { >> length = __Pyx_PyUnicode_GET_LENGTH(ustring); >> if ((0 <= i) & (i < length)) { >> return __Pyx_PyUnicode_READ_CHAR(ustring, i); >> } else if ((-length <= i) & (i < 0)) { >> return __Pyx_PyUnicode_READ_CHAR(ustring, i + length); >> } else { >> PyErr_SetString(PyExc_IndexError, "string index out of range"); >> return (Py_UCS4)-1; >> } >> } else { >> return __Pyx_PyUnicode_READ_CHAR(ustring, i); >> } >> } > > I think you could even pass in two flags, one for wraparound and one for > boundscheck, and then just evaluate them appropriately in the existing "if" > tests above. That should allow both features to be supported independently > in a fast way. > Intresting, could C compilers in optimization mode to eliminate unused evaluation path in nested if statements with constant conditional expressions? ___ cython-devel mailing list cython-devel@python.org http://mail.python.org/mailman/listinfo/cython-devel
Re: [Cython] About IndexNode and unicode[index]
2013/3/1 Stefan Behnel : > ZS, 28.02.2013 21:07: >> 2013/2/28 Stefan Behnel: This allows to write unicode text parsing code almost at C speed mostly in python (+ .pxd defintions). >>> >>> I suggest simply adding a constant flag argument to the existing function >>> that states if checking should be done or not. Inlining will let the C >>> compiler drop the corresponding code, which may or may nor make it a little >>> faster. >> >> static inline Py_UCS4 unicode_char2(PyObject* ustring, Py_ssize_t i, int >> flag) { >> Py_ssize_t length; >> #if CYTHON_PEP393_ENABLED >> if (PyUnicode_READY(ustring) < 0) return (Py_UCS4)-1; >> #endif >> if (flag) { >> length = __Pyx_PyUnicode_GET_LENGTH(ustring); >> if ((0 <= i) & (i < length)) { >> return __Pyx_PyUnicode_READ_CHAR(ustring, i); >> } else if ((-length <= i) & (i < 0)) { >> return __Pyx_PyUnicode_READ_CHAR(ustring, i + length); >> } else { >> PyErr_SetString(PyExc_IndexError, "string index out of range"); >> return (Py_UCS4)-1; >> } >> } else { >> return __Pyx_PyUnicode_READ_CHAR(ustring, i); >> } >> } > > I think you could even pass in two flags, one for wraparound and one for > boundscheck, and then just evaluate them appropriately in the existing "if" > tests above. That should allow both features to be supported independently > in a fast way. > > >> Here are timings: >> >> (py33) zbook:mytests $ python3.3 -m timeit -n 50 -r 5 -s "from >> mytests.unicode_index import test_1" "test_1()" >> 50 loops, best of 5: 152 msec per loop >> (py33) zbook:mytests $ python3.3 -m timeit -n 50 -r 5 -s "from >> mytests.unicode_index import test_2" "test_2()" >> 50 loops, best of 5: 86.5 msec per loop >> (py33) zbook:mytests $ python3.3 -m timeit -n 50 -r 5 -s "from >> mytests.unicode_index import test_3" "test_3()" >> 50 loops, best of 5: 86.5 msec per loop >> >> So your suggestion would be preferable. > > Nice. Yes, looks like it' worth it. > Sure that same could be applied to unicode slicing too. Zaur Shibzukhov ___ cython-devel mailing list cython-devel@python.org http://mail.python.org/mailman/listinfo/cython-devel
Re: [Cython] About IndexNode and unicode[index]
ZS, 28.02.2013 21:07: > 2013/2/28 Stefan Behnel: >>> This allows to write unicode text parsing code almost at C speed >>> mostly in python (+ .pxd defintions). >> >> I suggest simply adding a constant flag argument to the existing function >> that states if checking should be done or not. Inlining will let the C >> compiler drop the corresponding code, which may or may nor make it a little >> faster. > > static inline Py_UCS4 unicode_char2(PyObject* ustring, Py_ssize_t i, int > flag) { > Py_ssize_t length; > #if CYTHON_PEP393_ENABLED > if (PyUnicode_READY(ustring) < 0) return (Py_UCS4)-1; > #endif > if (flag) { > length = __Pyx_PyUnicode_GET_LENGTH(ustring); > if ((0 <= i) & (i < length)) { > return __Pyx_PyUnicode_READ_CHAR(ustring, i); > } else if ((-length <= i) & (i < 0)) { > return __Pyx_PyUnicode_READ_CHAR(ustring, i + length); > } else { > PyErr_SetString(PyExc_IndexError, "string index out of range"); > return (Py_UCS4)-1; > } > } else { > return __Pyx_PyUnicode_READ_CHAR(ustring, i); > } > } I think you could even pass in two flags, one for wraparound and one for boundscheck, and then just evaluate them appropriately in the existing "if" tests above. That should allow both features to be supported independently in a fast way. > Here are timings: > > (py33) zbook:mytests $ python3.3 -m timeit -n 50 -r 5 -s "from > mytests.unicode_index import test_1" "test_1()" > 50 loops, best of 5: 152 msec per loop > (py33) zbook:mytests $ python3.3 -m timeit -n 50 -r 5 -s "from > mytests.unicode_index import test_2" "test_2()" > 50 loops, best of 5: 86.5 msec per loop > (py33) zbook:mytests $ python3.3 -m timeit -n 50 -r 5 -s "from > mytests.unicode_index import test_3" "test_3()" > 50 loops, best of 5: 86.5 msec per loop > > So your suggestion would be preferable. Nice. Yes, looks like it' worth it. Stefan ___ cython-devel mailing list cython-devel@python.org http://mail.python.org/mailman/listinfo/cython-devel
Re: [Cython] About IndexNode and unicode[index]
2013/2/28 Stefan Behnel : >> This allows to write unicode text parsing code almost at C speed >> mostly in python (+ .pxd defintions). > > I suggest simply adding a constant flag argument to the existing function > that states if checking should be done or not. Inlining will let the C > compiler drop the corresponding code, which may or may nor make it a little > faster. It would be great. To be sure I change the tests: unicode_index.h --- #include "unicodeobject.h" static inline Py_UCS4 unicode_char(PyObject* ustring, Py_ssize_t i); static inline Py_UCS4 unicode_char(PyObject* ustring, Py_ssize_t i) { #if CYTHON_PEP393_ENABLED if (PyUnicode_READY(ustring) < 0) return (Py_UCS4)-1; #endif return __Pyx_PyUnicode_READ_CHAR(ustring, i); } static inline Py_UCS4 unicode_char2(PyObject* ustring, Py_ssize_t i, int flag); static inline Py_UCS4 unicode_char2(PyObject* ustring, Py_ssize_t i, int flag) { Py_ssize_t length; #if CYTHON_PEP393_ENABLED if (PyUnicode_READY(ustring) < 0) return (Py_UCS4)-1; #endif if (flag) { length = __Pyx_PyUnicode_GET_LENGTH(ustring); if ((0 <= i) & (i < length)) { return __Pyx_PyUnicode_READ_CHAR(ustring, i); } else if ((-length <= i) & (i < 0)) { return __Pyx_PyUnicode_READ_CHAR(ustring, i + length); } else { PyErr_SetString(PyExc_IndexError, "string index out of range"); return (Py_UCS4)-1; } } else { return __Pyx_PyUnicode_READ_CHAR(ustring, i); } } unicode_index.pyx -- cdef extern from 'unicode_index.h': inline Py_UCS4 unicode_char(unicode ustring, int i) inline Py_UCS4 unicode_char2(unicode ustring, int i, int flag) cdef unicode text = u"abcdefghigklmnopqrstuvwxyzabcdefghigklmnopqrstuvwxyz" cdef long f_1(unicode text): cdef int i, j cdef int n = len(text) cdef Py_UCS4 ch cdef long S = 0 for j in range(100): for i in range(n): ch = text[i] S += ch * j return S cdef long f_2(unicode text): cdef int i, j cdef int n = len(text) cdef Py_UCS4 ch cdef long S = 0 for j in range(100): for i in range(n): ch = unicode_char(text, i) S += ch * j return S cdef long f_3(unicode text): cdef int i, j cdef int n = len(text) cdef Py_UCS4 ch cdef long S = 0 for j in range(100): for i in range(n): ch = unicode_char2(text, i, 0) S += ch * j return S def test_1(): f_1(text) def test_2(): f_2(text) def test_3(): f_3(text) Here are timings: (py33) zbook:mytests $ python3.3 -m timeit -n 50 -r 5 -s "from mytests.unicode_index import test_1" "test_1()" 50 loops, best of 5: 152 msec per loop (py33) zbook:mytests $ python3.3 -m timeit -n 50 -r 5 -s "from mytests.unicode_index import test_2" "test_2()" 50 loops, best of 5: 86.5 msec per loop (py33) zbook:mytests $ python3.3 -m timeit -n 50 -r 5 -s "from mytests.unicode_index import test_3" "test_3()" 50 loops, best of 5: 86.5 msec per loop So your suggestion would be preferable. ___ cython-devel mailing list cython-devel@python.org http://mail.python.org/mailman/listinfo/cython-devel
Re: [Cython] About IndexNode and unicode[index]
ZS, 28.02.2013 19:31: > 2013/2/28 ZS: >> Looking into IndexNode class in ExprNode.py I have seen a possibility >> for addition of more fast code path for unicode[index] as it done in >> method `generate_setitem_code` in case of lists. >> >> This is files for evaluation of performance difference: >> >> unicode_index.h >> >> /* This is striped version of __Pyx_GetItemInt_Unicode_Fast */ >> #include "unicodeobject.h" >> >> static inline Py_UCS4 unicode_char(PyObject* ustring, Py_ssize_t i); >> >> static inline Py_UCS4 unicode_char(PyObject* ustring, Py_ssize_t i) { >> #if CYTHON_PEP393_ENABLED >> if (PyUnicode_READY(ustring) < 0) return (Py_UCS4)-1; >> #endif >> return __Pyx_PyUnicode_READ_CHAR(ustring, i); >> } Sure, looks ok. >> # unicode_index.pyx >> >> # coding: utf-8 >> >> cdef extern from 'unicode_index.h': >> inline Py_UCS4 unicode_char(unicode ustring, int i) >> >> cdef unicode text = u"abcdefghigklmnopqrstuvwxyzabcdefghigklmnopqrstuvwxyz" >> >> def f_1(unicode text): >> cdef int i, j >> cdef int n = len(text) >> cdef Py_UCS4 ch >> >> for j from 0<=j<=100: Personally, I find a range() loop much easier to read than this beast. >> for i from 0<=i<=n-1: >> ch = text[i] >> >> def f_2(unicode text): >> cdef int i, j >> cdef int n = len(text) >> cdef Py_UCS4 ch >> >> for j from 0<=j<=100: >> for i from 0<=i<=n-1: >> ch = unicode_char(text, i) >> >> def test_1(): >> f_1(text) >> >> def test_2(): >> f_2(text) >> >> Timing results: >> >> (py33) zbook:mytests $ python3.3 -m timeit -n 100 -r 10 -s "from >> mytests.unicode_index import test_1" "test_1()" >> 100 loops, best of 10: 89 msec per loop >> (py33) zbook:mytests $ python3.3 -m timeit -n 100 -r 10 -s "from >> mytests.unicode_index import test_2" "test_2()" >> 100 loops, best of 10: 46.1 msec per loop I seriously doubt that this translates to similar results in real-world code. In the second example above, the C compiler should be able to remove a lot of code, certainly including the useless character read. Maybe even the loops, if it can determine that PyUnicode_READY() will always return the same result. So you're almost certainly not benchmarking what you think you are. >> in setup.py globally: >> >>"boundscheck": False >>"wraparound": False >>"nonecheck": False >> > For the sake of clarity I would like to add the following... This > optimization is for the case when both `boundscheck(False)` and > `wraparound(False)` is applied. Otherwise default path of evaluation > (__Pyx_GetItemInt_Unicode) is applied. > > This allows to write unicode text parsing code almost at C speed > mostly in python (+ .pxd defintions). I suggest simply adding a constant flag argument to the existing function that states if checking should be done or not. Inlining will let the C compiler drop the corresponding code, which may or may nor make it a little faster. Stefan ___ cython-devel mailing list cython-devel@python.org http://mail.python.org/mailman/listinfo/cython-devel
Re: [Cython] Be more forgiving about memoryview strides
On Thu, Feb 28, 2013 at 5:50 PM, Robert Bradshaw wrote: > On Thu, Feb 28, 2013 at 7:13 AM, Sebastian Berg > wrote: >> Hey, >> >> Maybe someone here already saw it (I don't have a track account, or I >> would just create a ticket), but it would be nice if Cython was more >> forgiving about contiguous requirements on strides. In the future this >> would make it easier for numpy to go forward with changing the >> contiguous flags to be more reasonable for its purpose, and second also >> to allow old (and maybe for the moment remaining) corner cases in numpy >> to slip past (as well as possibly the same for other programs...). An >> example is (see also https://github.com/numpy/numpy/issues/2956 and the >> PR linked there for more details): >> >> def add_one(array): >> cdef double[::1] a = array >> a[0] += 1. >> return array >> >> giving: >> > add_one(np.ascontiguousarray(np.arange(10.)[::100])) >> ValueError: Buffer and memoryview are not contiguous in the same >> dimension. >> >> This could easily be changed if MemoryViews check the strides as "can be >> interpreted as contiguous". That means that if shape[i] == 1, then >> strides[i] are arbitrary (you can just change them if you like). This is >> also the case for 0-sized arrays, which are arguably always contiguous, >> no matter their strides are! > > I was under the impression that the primary value for contiguous is > that it a foo[::1] can be interpreted as a foo*. Letting strides be > arbitrary completely breaks this, right? Nope. The natural definition of "C contiguous" is "the array entries are arranged in memory in the same way they would be if they were a multidimensional C array" (i.e., what you said.) But it turns out that this is *not* the definition that numpy and cython use! The issue is that the above definition is a constraint on the actual locations of items in memory, i.e., given a shape, it tells you that for every index, (a) sum(index * strides) == sum(index * cumprod(shape[::-1])[::-1] * itemsize) Obviously this equality holds if (b) strides == cumprod(shape[::-1])[::-1] * itemsize (Or for F-contiguity, we have (b') strides == cumprod(shape) * itemsize ) (a) is the natural definition of "C contiguous". (b) is the definition of "C contiguous" used by numpy and cython. (b) implies (a). But (a) does not imply (b), i.e., there are arrays that are C-contiguous which numpy and cython think are discontiguous. (Also in numpy there are some weird cases where numpy accidentally uses the correct definition, I think, which is the point of Sebastian's example.) In particular, if shape[i] == 1, then the value of stride[i] really should be irrelevant to judging contiguity, because the only thing you can do with strides[i] is multiply it by index[i], and if shape[i] == 1 then index[i] is always 0. So an array of int8's with shape = (10, 1), strides = (1, 73) is contiguous according to (a), but not according to (b). Also if shape[i] is 0 for any i, then the entire contents of the strides array becomes irrelevant to judging contiguity; all zero-sized arrays are contiguous according to (a), but not (b). (This is really annoying for numpy because given, say, a column vector with shape (n, 1), it is impossible to be both C- and F-contiguous according to the (b)-style definition. But people expect expect various operations to preserve C versus F contiguity, so there are heuristics in numpy that try to guess whether various result arrays should pretend to be C- or F-contiguous, and we don't even have a consistent idea of what it would mean for this code to be working correctly, never mind test it and keep it working. OTOH if we just fix numpy to use the (a) definition, then it turns out a bunch of third-party code breaks, like, for example, cython.) -n ___ cython-devel mailing list cython-devel@python.org http://mail.python.org/mailman/listinfo/cython-devel
Re: [Cython] Class methods returning C++ class references are not dealt with correctly?
Hey Yury: Yes, you are right. I was thinking this was a function and not a method. As an even ickier workaround: #define TokenStack_top_p(token_stack) &token_stack->top() cdef extern from "": Token* TokenStack_top_p(TokenStack*) except + cdef Token* tok = TokenStack_top_p(self.pEngine.OStack) -Brad On Thu, Feb 28, 2013 at 10:38 AM, Yury V. Zaytsev wrote: > Hi Brad, > > On Thu, 2013-02-28 at 08:01 -0800, Bradley M. Froehle wrote: > > > > cdef extern from "test.h": > > int* foo2ptr "&foo" () > > > > cdef int *x = foo2ptr() > > Thank you for this interesting suggestion, but I must be missing > something, because when I do the following: > > cdef extern from "tokenstack.h": > cppclass TokenStack: > Token* top "Token&" () except + > > cdef Token* tok = self.pEngine.OStack.top() > > I end up with the following generated code, which, of course, doesn't > compile: > > Token *__pyx_t_5; > __pyx_t_5 = __pyx_v_self->pEngine->OStack.Token&(); > > whereas, I'd like to see generated this: > > Token *__pyx_t_5; > __pyx_t_5 = __pyx_v_self->pEngine->OStack->top(); > > Any ideas? > > -- > Sincerely yours, > Yury V. Zaytsev > > > ___ > cython-devel mailing list > cython-devel@python.org > http://mail.python.org/mailman/listinfo/cython-devel > ___ cython-devel mailing list cython-devel@python.org http://mail.python.org/mailman/listinfo/cython-devel
Re: [Cython] About IndexNode and unicode[index]
2013/2/28 ZS : > Looking into IndexNode class in ExprNode.py I have seen a possibility > for addition of more fast code path for unicode[index] as it done in > method `generate_setitem_code` in case of lists. > > This is files for evaluation of performance difference: > > unicode_index.h > > /* This is striped version of __Pyx_GetItemInt_Unicode_Fast */ > #include "unicodeobject.h" > > static inline Py_UCS4 unicode_char(PyObject* ustring, Py_ssize_t i); > > static inline Py_UCS4 unicode_char(PyObject* ustring, Py_ssize_t i) { > #if CYTHON_PEP393_ENABLED > if (PyUnicode_READY(ustring) < 0) return (Py_UCS4)-1; > #endif > return __Pyx_PyUnicode_READ_CHAR(ustring, i); > } > > # unicode_index.pyx > > # coding: utf-8 > > cdef extern from 'unicode_index.h': > inline Py_UCS4 unicode_char(unicode ustring, int i) > > cdef unicode text = u"abcdefghigklmnopqrstuvwxyzabcdefghigklmnopqrstuvwxyz" > > def f_1(unicode text): > cdef int i, j > cdef int n = len(text) > cdef Py_UCS4 ch > > for j from 0<=j<=100: > for i from 0<=i<=n-1: > ch = text[i] > > def f_2(unicode text): > cdef int i, j > cdef int n = len(text) > cdef Py_UCS4 ch > > for j from 0<=j<=100: > for i from 0<=i<=n-1: > ch = unicode_char(text, i) > > def test_1(): > f_1(text) > > def test_2(): > f_2(text) > > Timing results: > > (py33) zbook:mytests $ python3.3 -m timeit -n 100 -r 10 -s "from > mytests.unicode_index import test_1" "test_1()" > 100 loops, best of 10: 89 msec per loop > (py33) zbook:mytests $ python3.3 -m timeit -n 100 -r 10 -s "from > mytests.unicode_index import test_2" "test_2()" > 100 loops, best of 10: 46.1 msec per loop > > in setup.py globally: > >"boundscheck": False >"wraparound": False >"nonecheck": False > For the sake of clarity I would like to add the following... This optimization is for the case when both `boundscheck(False)` and `wraparound(False)` is applied. Otherwise default path of evaluation (__Pyx_GetItemInt_Unicode) is applied. This allows to write unicode text parsing code almost at C speed mostly in python (+ .pxd defintions). Zaur Shibzukhov ___ cython-devel mailing list cython-devel@python.org http://mail.python.org/mailman/listinfo/cython-devel
Re: [Cython] Class methods returning C++ class references are not dealt with correctly?
Hi Brad, On Thu, 2013-02-28 at 08:01 -0800, Bradley M. Froehle wrote: > > cdef extern from "test.h": > int* foo2ptr "&foo" () > > cdef int *x = foo2ptr() Thank you for this interesting suggestion, but I must be missing something, because when I do the following: cdef extern from "tokenstack.h": cppclass TokenStack: Token* top "Token&" () except + cdef Token* tok = self.pEngine.OStack.top() I end up with the following generated code, which, of course, doesn't compile: Token *__pyx_t_5; __pyx_t_5 = __pyx_v_self->pEngine->OStack.Token&(); whereas, I'd like to see generated this: Token *__pyx_t_5; __pyx_t_5 = __pyx_v_self->pEngine->OStack->top(); Any ideas? -- Sincerely yours, Yury V. Zaytsev ___ cython-devel mailing list cython-devel@python.org http://mail.python.org/mailman/listinfo/cython-devel
Re: [Cython] Be more forgiving about memoryview strides
On Thu, Feb 28, 2013 at 7:13 AM, Sebastian Berg wrote: > Hey, > > Maybe someone here already saw it (I don't have a track account, or I > would just create a ticket), but it would be nice if Cython was more > forgiving about contiguous requirements on strides. In the future this > would make it easier for numpy to go forward with changing the > contiguous flags to be more reasonable for its purpose, and second also > to allow old (and maybe for the moment remaining) corner cases in numpy > to slip past (as well as possibly the same for other programs...). An > example is (see also https://github.com/numpy/numpy/issues/2956 and the > PR linked there for more details): > > def add_one(array): > cdef double[::1] a = array > a[0] += 1. > return array > > giving: > add_one(np.ascontiguousarray(np.arange(10.)[::100])) > ValueError: Buffer and memoryview are not contiguous in the same > dimension. > > This could easily be changed if MemoryViews check the strides as "can be > interpreted as contiguous". That means that if shape[i] == 1, then > strides[i] are arbitrary (you can just change them if you like). This is > also the case for 0-sized arrays, which are arguably always contiguous, > no matter their strides are! I was under the impression that the primary value for contiguous is that it a foo[::1] can be interpreted as a foo*. Letting strides be arbitrary completely breaks this, right? > PS: A similar thing exists with np.ndarray[...] interface if the user > accesses array.strides. They get the arrays strides not the buffers. > This is not quite related, but if it would be easy to use the buffer's > strides in that case, it may make it easier if we want to change the > flags in numpy in the long term, since one could clean up strides for > forced contiguous buffer requests. > > ___ > cython-devel mailing list > cython-devel@python.org > http://mail.python.org/mailman/listinfo/cython-devel ___ cython-devel mailing list cython-devel@python.org http://mail.python.org/mailman/listinfo/cython-devel
Re: [Cython] Class methods returning C++ class references are not dealt with correctly?
On Thu, Feb 28, 2013 at 4:58 AM, Yury V. Zaytsev wrote: > Hi, > > I'm sorry if my question would appear to be trivial, but what am I > supposed to do, if I want to wrap class methods, that return a reference > to another class? As a workaround you could use: cdef extern from "test.h": int* foo2ptr "&foo" () cdef int *x = foo2ptr() This could be extended to your other example as well. -Brad ___ cython-devel mailing list cython-devel@python.org http://mail.python.org/mailman/listinfo/cython-devel
[Cython] Be more forgiving about memoryview strides
Hey, Maybe someone here already saw it (I don't have a track account, or I would just create a ticket), but it would be nice if Cython was more forgiving about contiguous requirements on strides. In the future this would make it easier for numpy to go forward with changing the contiguous flags to be more reasonable for its purpose, and second also to allow old (and maybe for the moment remaining) corner cases in numpy to slip past (as well as possibly the same for other programs...). An example is (see also https://github.com/numpy/numpy/issues/2956 and the PR linked there for more details): def add_one(array): cdef double[::1] a = array a[0] += 1. return array giving: >>> add_one(np.ascontiguousarray(np.arange(10.)[::100])) ValueError: Buffer and memoryview are not contiguous in the same dimension. This could easily be changed if MemoryViews check the strides as "can be interpreted as contiguous". That means that if shape[i] == 1, then strides[i] are arbitrary (you can just change them if you like). This is also the case for 0-sized arrays, which are arguably always contiguous, no matter their strides are! Regards, Sebastian PS: A similar thing exists with np.ndarray[...] interface if the user accesses array.strides. They get the arrays strides not the buffers. This is not quite related, but if it would be easy to use the buffer's strides in that case, it may make it easier if we want to change the flags in numpy in the long term, since one could clean up strides for forced contiguous buffer requests. ___ cython-devel mailing list cython-devel@python.org http://mail.python.org/mailman/listinfo/cython-devel
Re: [Cython] MemoryViews require writeable arrays?
On 28.02.2013 15:55, Dave Hirschfeld wrote: So the issue is that at present memoryviews can't be readonly? https://github.com/cython/cython/blob/master/Cython/Compiler/MemoryView.py#L33 Typed memoryviews are thus acquired with the PyBUF_WRITEABLE flag. If the the assigned buffer is readonly, the request to acquire the PEP3118 buffer will fail. If you remove the PyBUF_WRITEABLE flag from lines 33 to 36, you can acquire a readonly buffer with typed memoryviews. But this is not recommended. In this case you would have to check for the readonly flag yourself and make sure you don't write to readonly buffer. Sturla ___ cython-devel mailing list cython-devel@python.org http://mail.python.org/mailman/listinfo/cython-devel
Re: [Cython] Class methods returning C++ class references are not dealt with correctly?
On 28.02.2013 15:46, Yury V. Zaytsev wrote: My method call is actually wrapped in a try { ... } catch clause, because I declared it as being able to throw exceptions, so the reference can't be defined in this block, or it will not be accessible to the outside world. If Cython generates illegal C++ code (i.e. C++ that don't compile) it is a bug in Cython. There must be a general error in the handling of C++ references when they are declared without a target. Sturla ___ cython-devel mailing list cython-devel@python.org http://mail.python.org/mailman/listinfo/cython-devel
Re: [Cython] MemoryViews require writeable arrays?
Sturla Molden writes: > > On 27.02.2013 20:05, Dave Hirschfeld wrote: > > > Is this a required restriction? Is there any workaround? > > http://www.python.org/dev/peps/pep-3118/ > > What you should consider is the "readonly" field in "struct bufferinfo" > or the access flag "PyBUF_WRITEABLE". > > In short: > > A PEP3118 buffer can be readonly, and then you shouldn't write to it! > When you set the readonly flag, Cython cannot retrieve the buffer with > PyBUF_WRITEABLE. Thus, Cython helps you not to shoot yourself in the > foot. I don't think you can declare a read-only memoryview in Cython. > (Well, not by any means I know of.) > > Sturla > > So the issue is that at present memoryviews can't be readonly? Presumably because this works for numpy arrays it would be possible to also make readonly memoryviews? I think that would certainly be nice to have, but maybe it's a niche use case. Certainly, for IPython.parallel use it's easy enough to write a shim which sets the array to writeable with the understanding that changes don't get propagated back. Thanks, Dave ___ cython-devel mailing list cython-devel@python.org http://mail.python.org/mailman/listinfo/cython-devel
Re: [Cython] Class methods returning C++ class references are not dealt with correctly?
On Thu, 2013-02-28 at 15:34 +0100, Sturla Molden wrote: > > This is clearly a bug in Cython. One cannot let a C++ reference > dangle. Hi Sturla, Thanks for the confirmation! I had a closer look at it, and I think I know why this happens. My method call is actually wrapped in a try { ... } catch clause, because I declared it as being able to throw exceptions, so the reference can't be defined in this block, or it will not be accessible to the outside world. Apparently, Cython should rather do something like this instead: Token *__pyx_v_tok; Token *__pyx_t_5_p; try { Token &__pyx_t_5 = __pyx_v_self->pEngine->OStack.top(); __pyx_t_5_p = (&__pyx_t_5); } ... __pyx_v_tok = __pyx_t_5_p; I'm sorry, but I don't think that I can personally help fixing this, because even if I manage to come up with a patch to generate declarations inside try blocks with my non-existing knowledge of Cython internals, this simply not gonna work. I believe that some convention should be established regarding references handling, i.e. stating that Cython will generate correct code to convert them to pointers if such and such syntax is used... Hopefully, in the mean time, there is some other solution to the problem that I have overlooked. Z. -- Sincerely yours, Yury V. Zaytsev ___ cython-devel mailing list cython-devel@python.org http://mail.python.org/mailman/listinfo/cython-devel
Re: [Cython] Class methods returning C++ class references are not dealt with correctly?
On 28.02.2013 13:58, Yury V. Zaytsev wrote: Hi, I'm sorry if my question would appear to be trivial, but what am I supposed to do, if I want to wrap class methods, that return a reference to another class? From reading the list, I've gathered that apparently the best strategy of dealing with references is just to not to use them (convert to pointers immediately), because of some scoping rules issues. It works for me for a simple case of POD types, like cdef extern from "test.h": int& foo() cdef int* x = &foo() but in a more complex case, Cython generates incorrect C++ code (first it declares a reference, then assigns to it, which, of course, doesn't even compile): cdef extern from "token.h": cppclass Token: Token(const Datum&) except + cdef extern from "tokenstack.h": cppclass TokenStack: Token& top() except + cdef Token* tok = &self.pEngine.OStack.top() <-> Token *__pyx_v_tok; Token &__pyx_t_5; __pyx_t_5 = __pyx_v_self->pEngine->OStack.top(); __pyx_v_tok = (&__pyx_t_5); This is clearly a bug in Cython. The generated code should be: Token *__pyx_v_tok; Token &__pyx_t_5 = __pyx_v_self->pEngine->OStack.top(); __pyx_v_tok = (&__pyx_t_5); One cannot let a C++ reference dangle: Token &__pyx_t_5; // illegal C++ Sturla ___ cython-devel mailing list cython-devel@python.org http://mail.python.org/mailman/listinfo/cython-devel
Re: [Cython] MemoryViews require writeable arrays?
On 27.02.2013 20:05, Dave Hirschfeld wrote: Is this a required restriction? Is there any workaround? http://www.python.org/dev/peps/pep-3118/ What you should consider is the "readonly" field in "struct bufferinfo" or the access flag "PyBUF_WRITEABLE". In short: A PEP3118 buffer can be readonly, and then you shouldn't write to it! When you set the readonly flag, Cython cannot retrieve the buffer with PyBUF_WRITEABLE. Thus, Cython helps you not to shoot yourself in the foot. I don't think you can declare a read-only memoryview in Cython. (Well, not by any means I know of.) Sturla ___ cython-devel mailing list cython-devel@python.org http://mail.python.org/mailman/listinfo/cython-devel
[Cython] Class methods returning C++ class references are not dealt with correctly?
Hi, I'm sorry if my question would appear to be trivial, but what am I supposed to do, if I want to wrap class methods, that return a reference to another class? >From reading the list, I've gathered that apparently the best strategy of dealing with references is just to not to use them (convert to pointers immediately), because of some scoping rules issues. It works for me for a simple case of POD types, like cdef extern from "test.h": int& foo() cdef int* x = &foo() but in a more complex case, Cython generates incorrect C++ code (first it declares a reference, then assigns to it, which, of course, doesn't even compile): cdef extern from "token.h": cppclass Token: Token(const Datum&) except + cdef extern from "tokenstack.h": cppclass TokenStack: Token& top() except + cdef Token* tok = &self.pEngine.OStack.top() <-> Token *__pyx_v_tok; Token &__pyx_t_5; __pyx_t_5 = __pyx_v_self->pEngine->OStack.top(); __pyx_v_tok = (&__pyx_t_5); I would expect to see this instead: Token *__pyx_v_tok = &__pyx_v_self->pEngine->OStack.top(); Am I doing something wrong? Is there any other way to achieve what I want, other than writing custom C macros? Thanks, -- Sincerely yours, Yury V. Zaytsev ___ cython-devel mailing list cython-devel@python.org http://mail.python.org/mailman/listinfo/cython-devel
[Cython] MemoryView Casting slow compared to ndarray buffer syntax
%%cython cimport cython import numpy as np cimport numpy as np ctypedef np.float64_t float64_t @cython.boundscheck(False) @cython.wraparound(False) @cython.cdivision(True) def echo_numpy(np.ndarray[float64_t, ndim=1] x): return x @cython.boundscheck(False) @cython.wraparound(False) @cython.cdivision(True) def echo_memview(double[:] x): return np.asarray(x) @cython.boundscheck(False) @cython.wraparound(False) @cython.cdivision(True) def echo_memview_nocast(double[:] x): return x In [19]: %timeit echo_memview(x) ...: %timeit echo_memview_nocast(x) ...: %timeit echo_numpy(x) 1 loops, best of 3: 38.1 µs per loop 10 loops, best of 3: 5.58 µs per loop 100 loops, best of 3: 749 ns per loop In [20]: 38.1e-6/749e-9 Out[20]: 50.86782376502002 In [21]: 5.58e-6/749e-9 Out[21]: 7.449933244325767 So it seems that the MemoryView is 50x slower than using the ndarray buffer syntax and even 7.5x slower without casting to an array. Is there anything that can be done about this or is it jsut something to be aware of and use each of them in the situations where they perform best? Thanks, Dave ___ cython-devel mailing list cython-devel@python.org http://mail.python.org/mailman/listinfo/cython-devel
Re: [Cython] Non-deterministic behavoiur?
Dave Hirschfeld writes: > > Dave Hirschfeld writes: > > > > > Using the following test code: > > > > > So, it seems either typing the array as a memview or printing res > > will screw up the calculation. > > > > The cython code is given below. Any ideas if this is a cython bug or something > > I'm doing wrong? > > > > Thanks, > > Dave > > > > To answer my own question, it can't be that a simple print statement will > change the program so I must be doing something wrong! It makes it hard > to track down when it gives the right answer most of the time and segfaults > randomly when nothing seems to have changed. I'm sure it's just incorrect > arguments to dgelsy so I'll look into that... > > -Dave > > And for those following, the obvious error was in using the double `worksize` instead of the array of size n, `work` in the 2nd call to DGELSY. DGELSY(&m, &n, &nrhs, &A[0,0], &lda, &res[0], &ldb, &jpvt[0], &rcond, &rank, &worksize, &lwork, &info) Sorry for the noise. -Dave ___ cython-devel mailing list cython-devel@python.org http://mail.python.org/mailman/listinfo/cython-devel
[Cython] About IndexNode and unicode[index]
Looking into IndexNode class in ExprNode.py I have seen a possibility for addition of more fast code path for unicode[index] as it done in method `generate_setitem_code` in case of lists. This is files for evaluation of performance difference: unicode_index.h /* This is striped version of __Pyx_GetItemInt_Unicode_Fast */ #include "unicodeobject.h" static inline Py_UCS4 unicode_char(PyObject* ustring, Py_ssize_t i); static inline Py_UCS4 unicode_char(PyObject* ustring, Py_ssize_t i) { #if CYTHON_PEP393_ENABLED if (PyUnicode_READY(ustring) < 0) return (Py_UCS4)-1; #endif return __Pyx_PyUnicode_READ_CHAR(ustring, i); } # unicode_index.pyx # coding: utf-8 cdef extern from 'unicode_index.h': inline Py_UCS4 unicode_char(unicode ustring, int i) cdef unicode text = u"abcdefghigklmnopqrstuvwxyzabcdefghigklmnopqrstuvwxyz" def f_1(unicode text): cdef int i, j cdef int n = len(text) cdef Py_UCS4 ch for j from 0<=j<=100: for i from 0<=i<=n-1: ch = text[i] def f_2(unicode text): cdef int i, j cdef int n = len(text) cdef Py_UCS4 ch for j from 0<=j<=100: for i from 0<=i<=n-1: ch = unicode_char(text, i) def test_1(): f_1(text) def test_2(): f_2(text) Timing results: (py33) zbook:mytests $ python3.3 -m timeit -n 100 -r 10 -s "from mytests.unicode_index import test_1" "test_1()" 100 loops, best of 10: 89 msec per loop (py33) zbook:mytests $ python3.3 -m timeit -n 100 -r 10 -s "from mytests.unicode_index import test_2" "test_2()" 100 loops, best of 10: 46.1 msec per loop in setup.py globally: "boundscheck": False "wraparound": False "nonecheck": False Zaur Shibzukhov ___ cython-devel mailing list cython-devel@python.org http://mail.python.org/mailman/listinfo/cython-devel
Re: [Cython] Non-deterministic behavoiur?
Dave Hirschfeld writes: > > Using the following test code: > > So, it seems either typing the array as a memview or printing res > will screw up the calculation. > > The cython code is given below. Any ideas if this is a cython bug or > something > I'm doing wrong? > > Thanks, > Dave > To answer my own question, it can't be that a simple print statement will change the program so I must be doing something wrong! It makes it hard to track down when it gives the right answer most of the time and segfaults randomly when nothing seems to have changed. I'm sure it's just incorrect arguments to dgelsy so I'll look into that... -Dave ___ cython-devel mailing list cython-devel@python.org http://mail.python.org/mailman/listinfo/cython-devel