Re: [Cython] About IndexNode and unicode[index]

2013-02-28 Thread Stefan Behnel
Zaur Shibzukhov, 01.03.2013 08:37:
> unicode_slice.h
> -
> 
> #include "unicodeobject.h"
> 
> static inline PyObject* unicode_slice(
> PyObject* text, Py_ssize_t start, Py_ssize_t stop);
> 
> /// PyUnicode_Substring ///
> 
> /* CURRENT */
> 
> static inline PyObject* unicode_slice(
> PyObject* text, Py_ssize_t start, Py_ssize_t stop) {
> Py_ssize_t length;
> #if CYTHON_PEP393_ENABLED
> if (PyUnicode_READY(text) == -1) return NULL;
> length = PyUnicode_GET_LENGTH(text);
> #else
> length = PyUnicode_GET_SIZE(text);
> #endif
> if (start < 0) {
> start += length;
> if (start < 0)
> start = 0;
> }
> if (stop < 0)
> stop += length;
> else if (stop > length)
> stop = length;
> length = stop - start;
> if (length <= 0)
> return PyUnicode_FromUnicode(NULL, 0);
> #if CYTHON_PEP393_ENABLED
> return PyUnicode_FromKindAndData(PyUnicode_KIND(text),
> PyUnicode_1BYTE_DATA(text) + start*PyUnicode_KIND(text), stop-start);
> #else
> return PyUnicode_FromUnicode(PyUnicode_AS_UNICODE(text)+start, 
> stop-start);
> #endif
> }
> 
> static inline PyObject* unicode_slice2(
> PyObject* text, Py_ssize_t start, Py_ssize_t stop, int flag);
> 
> /// PyUnicode_Substring ///
> 
> /* CHANGED */
> 
> static inline PyObject* unicode_slice2(
> PyObject* text, Py_ssize_t start, Py_ssize_t stop, int flag) {
> Py_ssize_t length;
> 
> #if CYTHON_PEP393_ENABLED
> if (PyUnicode_READY(text) == -1) return NULL;
> #endif
> 
> if (flag) {
> #if CYTHON_PEP393_ENABLED
> length = PyUnicode_GET_LENGTH(text);
> #else
> length = PyUnicode_GET_SIZE(text);
> #endif
> if (start < 0) {
> start += length;
> if (start < 0)
> start = 0;
> }
> if (stop < 0)
> stop += length;
> else if (stop > length)
> stop = length;
> length = stop - start;
> if (length <= 0)
> return PyUnicode_FromUnicode(NULL, 0);
> }
> 
> #if CYTHON_PEP393_ENABLED
> return PyUnicode_FromKindAndData(PyUnicode_KIND(text),
> PyUnicode_1BYTE_DATA(text) + start*PyUnicode_KIND(text), stop-start);
> #else
> return PyUnicode_FromUnicode(PyUnicode_AS_UNICODE(text)+start, 
> stop-start);
> #endif
> }
> 
> unicode_slice.pyx
> 
> 
> cdef extern from 'unicode_slice.h':
> inline unicode unicode_slice(unicode ustring, int start, int stop)
> inline unicode unicode_slice2(unicode ustring, int start, int
> stop, int flag)
> 
> cdef unicode text = u"abcdefghigklmnopqrstuvwxyzabcdefghigklmnopqrstuvwxyz"
> 
> cdef long f_1(unicode text):
> cdef int i, j
> cdef int n = len(text)
> cdef int val
> cdef long S = 0
> 
> for j in range(10):
> for i in range(n):
> val = len(unicode_slice(text, 0, i))
> S += val * j
> 
> return S
> 
> cdef long f_2(unicode text):
> cdef int i, j
> cdef int n = len(text)
> cdef int val
> cdef long S = 0
> 
> for j in range(10):
> for i in range(n):
> val = len(unicode_slice2(text, 0, i, 0))
> S += val * j
> 
> return S
> 
> 
> def test_1():
> f_1(text)
> 
> def test_2():
> f_2(text)
> 
> Here are timings:
> 
> (py33) zbook:mytests $ python3.3 -m timeit -n 50 -r 5 -s "from
> mytests.unicode_slice import test_1" "test_1()"
> 50 loops, best of 5: 534 msec per loop
> (py33) zbook:mytests $ python3.3 -m timeit -n 50 -r 5 -s "from
> mytests.unicode_slice import test_2" "test_2()"
> 50 loops, best of 5: 523 msec per loop
> 
> Only 2%

That's to be expected. Creating a Unicode string object is the highly
dominating operation here, including memory allocation, object type
selection and what not.

Stefan

___
cython-devel mailing list
cython-devel@python.org
http://mail.python.org/mailman/listinfo/cython-devel


Re: [Cython] About IndexNode and unicode[index]

2013-02-28 Thread Zaur Shibzukhov
2013/3/1 ZS :
> 2013/3/1 Stefan Behnel :
>> ZS, 28.02.2013 21:07:
>>> 2013/2/28 Stefan Behnel:
> This allows to write unicode text parsing code almost at C speed
> mostly in python (+ .pxd defintions).

 I suggest simply adding a constant flag argument to the existing function
 that states if checking should be done or not. Inlining will let the C
 compiler drop the corresponding code, which may or may nor make it a little
 faster.
>>>
>>> static inline Py_UCS4 unicode_char2(PyObject* ustring, Py_ssize_t i, int 
>>> flag) {
>>> Py_ssize_t length;
>>> #if CYTHON_PEP393_ENABLED
>>> if (PyUnicode_READY(ustring) < 0) return (Py_UCS4)-1;
>>> #endif
>>> if (flag) {
>>> length = __Pyx_PyUnicode_GET_LENGTH(ustring);
>>> if ((0 <= i) & (i < length)) {
>>> return __Pyx_PyUnicode_READ_CHAR(ustring, i);
>>> } else if ((-length <= i) & (i < 0)) {
>>> return __Pyx_PyUnicode_READ_CHAR(ustring, i + length);
>>> } else {
>>> PyErr_SetString(PyExc_IndexError, "string index out of range");
>>> return (Py_UCS4)-1;
>>> }
>>> } else {
>>> return __Pyx_PyUnicode_READ_CHAR(ustring, i);
>>> }
>>> }
>>
>> I think you could even pass in two flags, one for wraparound and one for
>> boundscheck, and then just evaluate them appropriately in the existing "if"
>> tests above. That should allow both features to be supported independently
>> in a fast way.
>>
>>
>>> Here are timings:
>>>
>>> (py33) zbook:mytests $ python3.3 -m timeit -n 50 -r 5 -s "from
>>> mytests.unicode_index import test_1" "test_1()"
>>> 50 loops, best of 5: 152 msec per loop
>>> (py33) zbook:mytests $ python3.3 -m timeit -n 50 -r 5 -s "from
>>> mytests.unicode_index import test_2" "test_2()"
>>> 50 loops, best of 5: 86.5 msec per loop
>>> (py33) zbook:mytests $ python3.3 -m timeit -n 50 -r 5 -s "from
>>> mytests.unicode_index import test_3" "test_3()"
>>> 50 loops, best of 5: 86.5 msec per loop
>>>
>>> So your suggestion would be preferable.
>>
>> Nice. Yes, looks like it' worth it.
>>
>
> Sure that same could be applied to unicode slicing too.
>
I had to verify myself first. So here is the test...

unicode_slice.h
-

#include "unicodeobject.h"

static inline PyObject* unicode_slice(
PyObject* text, Py_ssize_t start, Py_ssize_t stop);

/// PyUnicode_Substring ///

/* CURRENT */

static inline PyObject* unicode_slice(
PyObject* text, Py_ssize_t start, Py_ssize_t stop) {
Py_ssize_t length;
#if CYTHON_PEP393_ENABLED
if (PyUnicode_READY(text) == -1) return NULL;
length = PyUnicode_GET_LENGTH(text);
#else
length = PyUnicode_GET_SIZE(text);
#endif
if (start < 0) {
start += length;
if (start < 0)
start = 0;
}
if (stop < 0)
stop += length;
else if (stop > length)
stop = length;
length = stop - start;
if (length <= 0)
return PyUnicode_FromUnicode(NULL, 0);
#if CYTHON_PEP393_ENABLED
return PyUnicode_FromKindAndData(PyUnicode_KIND(text),
PyUnicode_1BYTE_DATA(text) + start*PyUnicode_KIND(text), stop-start);
#else
return PyUnicode_FromUnicode(PyUnicode_AS_UNICODE(text)+start, stop-start);
#endif
}

static inline PyObject* unicode_slice2(
PyObject* text, Py_ssize_t start, Py_ssize_t stop, int flag);

/// PyUnicode_Substring ///

/* CHANGED */

static inline PyObject* unicode_slice2(
PyObject* text, Py_ssize_t start, Py_ssize_t stop, int flag) {
Py_ssize_t length;

#if CYTHON_PEP393_ENABLED
if (PyUnicode_READY(text) == -1) return NULL;
#endif

if (flag) {
#if CYTHON_PEP393_ENABLED
length = PyUnicode_GET_LENGTH(text);
#else
length = PyUnicode_GET_SIZE(text);
#endif
if (start < 0) {
start += length;
if (start < 0)
start = 0;
}
if (stop < 0)
stop += length;
else if (stop > length)
stop = length;
length = stop - start;
if (length <= 0)
return PyUnicode_FromUnicode(NULL, 0);
}

#if CYTHON_PEP393_ENABLED
return PyUnicode_FromKindAndData(PyUnicode_KIND(text),
PyUnicode_1BYTE_DATA(text) + start*PyUnicode_KIND(text), stop-start);
#else
return PyUnicode_FromUnicode(PyUnicode_AS_UNICODE(text)+start, stop-start);
#endif
}

unicode_slice.pyx


cdef extern from 'unicode_slice.h':
inline unicode unicode_slice(unicode ustring, int start, int stop)
inline unicode unicode_slice2(unicode ustring, int start, int
stop, int flag)

cdef unicode text = u"abcdefghigklmnopqrstuvwxyzabcdefghigklmnopqrstuvwxyz"

cdef long f_1(unicode text):
cdef int i, j
cdef int n = len(text)
cdef int val
cdef long S = 0

for j in range(10):
for i in range(n):
val = len(unicode_slice(text, 0, i))
S += val 

Re: [Cython] Be more forgiving about memoryview strides

2013-02-28 Thread Robert Bradshaw
On Thu, Feb 28, 2013 at 11:12 AM, Nathaniel Smith  wrote:
> On Thu, Feb 28, 2013 at 5:50 PM, Robert Bradshaw  wrote:
>> On Thu, Feb 28, 2013 at 7:13 AM, Sebastian Berg
>>  wrote:
>>> Hey,
>>>
>>> Maybe someone here already saw it (I don't have a track account, or I
>>> would just create a ticket), but it would be nice if Cython was more
>>> forgiving about contiguous requirements on strides. In the future this
>>> would make it easier for numpy to go forward with changing the
>>> contiguous flags to be more reasonable for its purpose, and second also
>>> to allow old (and maybe for the moment remaining) corner cases in numpy
>>> to slip past (as well as possibly the same for other programs...). An
>>> example is (see also https://github.com/numpy/numpy/issues/2956 and the
>>> PR linked there for more details):
>>>
>>> def add_one(array):
>>> cdef double[::1] a = array
>>> a[0] += 1.
>>> return array
>>>
>>> giving:
>>>
>> add_one(np.ascontiguousarray(np.arange(10.)[::100]))
>>> ValueError: Buffer and memoryview are not contiguous in the same
>>> dimension.
>>>
>>> This could easily be changed if MemoryViews check the strides as "can be
>>> interpreted as contiguous". That means that if shape[i] == 1, then
>>> strides[i] are arbitrary (you can just change them if you like). This is
>>> also the case for 0-sized arrays, which are arguably always contiguous,
>>> no matter their strides are!
>>
>> I was under the impression that the primary value for contiguous is
>> that it a foo[::1] can be interpreted as a foo*. Letting strides be
>> arbitrary completely breaks this, right?
>
> Nope. The natural definition of "C contiguous" is "the array entries
> are arranged in memory in the same way they would be if they were a
> multidimensional C array" (i.e., what you said.) But it turns out that
> this is *not* the definition that numpy and cython use!
>
> The issue is that the above definition is a constraint on the actual
> locations of items in memory, i.e., given a shape, it tells you that
> for every index,
>  (a)  sum(index * strides) == sum(index * cumprod(shape[::-1])[::-1] * 
> itemsize)
> Obviously this equality holds if
>  (b)  strides == cumprod(shape[::-1])[::-1] * itemsize
> (Or for F-contiguity, we have
>  (b')  strides == cumprod(shape) * itemsize
> )
>
> (a) is the natural definition of "C contiguous". (b) is the definition
> of "C contiguous" used by numpy and cython. (b) implies (a). But (a)
> does not imply (b), i.e., there are arrays that are C-contiguous which
> numpy and cython think are discontiguous. (Also in numpy there are
> some weird cases where numpy accidentally uses the correct definition,
> I think, which is the point of Sebastian's example.)
>
> In particular, if shape[i] == 1, then the value of stride[i] really
> should be irrelevant to judging contiguity, because the only thing you
> can do with strides[i] is multiply it by index[i], and if shape[i] ==
> 1 then index[i] is always 0. So an array of int8's with shape = (10,
> 1), strides = (1, 73) is contiguous according to (a), but not
> according to (b). Also if shape[i] is 0 for any i, then the entire
> contents of the strides array becomes irrelevant to judging
> contiguity; all zero-sized arrays are contiguous according to (a), but
> not (b).

Thanks for clarifying.

Yes, I think it makes a lot of sense to loosen our definition for
Cython. Internally, I think the only way we use this assumption is in
not requiring that the first/final index be multiplied by the stride,
which should be totally fine. But this merits closer inspection as
there may be something else.

> (This is really annoying for numpy because given, say, a column vector
> with shape (n, 1), it is impossible to be both C- and F-contiguous
> according to the (b)-style definition. But people expect expect
> various operations to preserve C versus F contiguity, so there are
> heuristics in numpy that try to guess whether various result arrays
> should pretend to be C- or F-contiguous, and we don't even have a
> consistent idea of what it would mean for this code to be working
> correctly, never mind test it and keep it working. OTOH if we just fix
> numpy to use the (a) definition, then it turns out a bunch of
> third-party code breaks, like, for example, cython.)

Can you give some examples?

- Robert
___
cython-devel mailing list
cython-devel@python.org
http://mail.python.org/mailman/listinfo/cython-devel


Re: [Cython] About IndexNode and unicode[index]

2013-02-28 Thread Zaur Shibzukhov
>>>
>>> I think you could even pass in two flags, one for wraparound and one for
>>> boundscheck, and then just evaluate them appropriately in the existing "if"
>>> tests above. That should allow both features to be supported independently
>>> in a fast way.
>>>
>> Intresting, could C compilers in optimization mode to eliminate unused
>> evaluation path in nested if statements with constant conditional
>> expressions?
>
> They'd be worthless if they didn't do that. (Even Cython does it, BTW.)
>
Then it can simplify writing utility code in order to support
different optimization flags in other cases too.
___
cython-devel mailing list
cython-devel@python.org
http://mail.python.org/mailman/listinfo/cython-devel


Re: [Cython] About IndexNode and unicode[index]

2013-02-28 Thread Stefan Behnel
ZS, 01.03.2013 07:43:
> 2013/3/1 Stefan Behnel:
>> ZS, 28.02.2013 21:07:
>>> 2013/2/28 Stefan Behnel:
> This allows to write unicode text parsing code almost at C speed
> mostly in python (+ .pxd defintions).

 I suggest simply adding a constant flag argument to the existing function
 that states if checking should be done or not. Inlining will let the C
 compiler drop the corresponding code, which may or may nor make it a little
 faster.
>>>
>>> static inline Py_UCS4 unicode_char2(PyObject* ustring, Py_ssize_t i, int 
>>> flag) {
>>> Py_ssize_t length;
>>> #if CYTHON_PEP393_ENABLED
>>> if (PyUnicode_READY(ustring) < 0) return (Py_UCS4)-1;
>>> #endif
>>> if (flag) {
>>> length = __Pyx_PyUnicode_GET_LENGTH(ustring);
>>> if ((0 <= i) & (i < length)) {
>>> return __Pyx_PyUnicode_READ_CHAR(ustring, i);
>>> } else if ((-length <= i) & (i < 0)) {
>>> return __Pyx_PyUnicode_READ_CHAR(ustring, i + length);
>>> } else {
>>> PyErr_SetString(PyExc_IndexError, "string index out of range");
>>> return (Py_UCS4)-1;
>>> }
>>> } else {
>>> return __Pyx_PyUnicode_READ_CHAR(ustring, i);
>>> }
>>> }
>>
>> I think you could even pass in two flags, one for wraparound and one for
>> boundscheck, and then just evaluate them appropriately in the existing "if"
>> tests above. That should allow both features to be supported independently
>> in a fast way.
>>
> Intresting, could C compilers in optimization mode to eliminate unused
> evaluation path in nested if statements with constant conditional
> expressions?

They'd be worthless if they didn't do that. (Even Cython does it, BTW.)

Stefan

___
cython-devel mailing list
cython-devel@python.org
http://mail.python.org/mailman/listinfo/cython-devel


Re: [Cython] About IndexNode and unicode[index]

2013-02-28 Thread ZS
2013/3/1 Stefan Behnel :
> ZS, 28.02.2013 21:07:
>> 2013/2/28 Stefan Behnel:
 This allows to write unicode text parsing code almost at C speed
 mostly in python (+ .pxd defintions).
>>>
>>> I suggest simply adding a constant flag argument to the existing function
>>> that states if checking should be done or not. Inlining will let the C
>>> compiler drop the corresponding code, which may or may nor make it a little
>>> faster.
>>
>> static inline Py_UCS4 unicode_char2(PyObject* ustring, Py_ssize_t i, int 
>> flag) {
>> Py_ssize_t length;
>> #if CYTHON_PEP393_ENABLED
>> if (PyUnicode_READY(ustring) < 0) return (Py_UCS4)-1;
>> #endif
>> if (flag) {
>> length = __Pyx_PyUnicode_GET_LENGTH(ustring);
>> if ((0 <= i) & (i < length)) {
>> return __Pyx_PyUnicode_READ_CHAR(ustring, i);
>> } else if ((-length <= i) & (i < 0)) {
>> return __Pyx_PyUnicode_READ_CHAR(ustring, i + length);
>> } else {
>> PyErr_SetString(PyExc_IndexError, "string index out of range");
>> return (Py_UCS4)-1;
>> }
>> } else {
>> return __Pyx_PyUnicode_READ_CHAR(ustring, i);
>> }
>> }
>
> I think you could even pass in two flags, one for wraparound and one for
> boundscheck, and then just evaluate them appropriately in the existing "if"
> tests above. That should allow both features to be supported independently
> in a fast way.
>
Intresting, could C compilers in optimization mode to eliminate unused
evaluation path in nested if statements with constant conditional
expressions?
___
cython-devel mailing list
cython-devel@python.org
http://mail.python.org/mailman/listinfo/cython-devel


Re: [Cython] About IndexNode and unicode[index]

2013-02-28 Thread ZS
2013/3/1 Stefan Behnel :
> ZS, 28.02.2013 21:07:
>> 2013/2/28 Stefan Behnel:
 This allows to write unicode text parsing code almost at C speed
 mostly in python (+ .pxd defintions).
>>>
>>> I suggest simply adding a constant flag argument to the existing function
>>> that states if checking should be done or not. Inlining will let the C
>>> compiler drop the corresponding code, which may or may nor make it a little
>>> faster.
>>
>> static inline Py_UCS4 unicode_char2(PyObject* ustring, Py_ssize_t i, int 
>> flag) {
>> Py_ssize_t length;
>> #if CYTHON_PEP393_ENABLED
>> if (PyUnicode_READY(ustring) < 0) return (Py_UCS4)-1;
>> #endif
>> if (flag) {
>> length = __Pyx_PyUnicode_GET_LENGTH(ustring);
>> if ((0 <= i) & (i < length)) {
>> return __Pyx_PyUnicode_READ_CHAR(ustring, i);
>> } else if ((-length <= i) & (i < 0)) {
>> return __Pyx_PyUnicode_READ_CHAR(ustring, i + length);
>> } else {
>> PyErr_SetString(PyExc_IndexError, "string index out of range");
>> return (Py_UCS4)-1;
>> }
>> } else {
>> return __Pyx_PyUnicode_READ_CHAR(ustring, i);
>> }
>> }
>
> I think you could even pass in two flags, one for wraparound and one for
> boundscheck, and then just evaluate them appropriately in the existing "if"
> tests above. That should allow both features to be supported independently
> in a fast way.
>
>
>> Here are timings:
>>
>> (py33) zbook:mytests $ python3.3 -m timeit -n 50 -r 5 -s "from
>> mytests.unicode_index import test_1" "test_1()"
>> 50 loops, best of 5: 152 msec per loop
>> (py33) zbook:mytests $ python3.3 -m timeit -n 50 -r 5 -s "from
>> mytests.unicode_index import test_2" "test_2()"
>> 50 loops, best of 5: 86.5 msec per loop
>> (py33) zbook:mytests $ python3.3 -m timeit -n 50 -r 5 -s "from
>> mytests.unicode_index import test_3" "test_3()"
>> 50 loops, best of 5: 86.5 msec per loop
>>
>> So your suggestion would be preferable.
>
> Nice. Yes, looks like it' worth it.
>

Sure that same could be applied to unicode slicing too.

Zaur Shibzukhov
___
cython-devel mailing list
cython-devel@python.org
http://mail.python.org/mailman/listinfo/cython-devel


Re: [Cython] About IndexNode and unicode[index]

2013-02-28 Thread Stefan Behnel
ZS, 28.02.2013 21:07:
> 2013/2/28 Stefan Behnel:
>>> This allows to write unicode text parsing code almost at C speed
>>> mostly in python (+ .pxd defintions).
>>
>> I suggest simply adding a constant flag argument to the existing function
>> that states if checking should be done or not. Inlining will let the C
>> compiler drop the corresponding code, which may or may nor make it a little
>> faster.
> 
> static inline Py_UCS4 unicode_char2(PyObject* ustring, Py_ssize_t i, int 
> flag) {
> Py_ssize_t length;
> #if CYTHON_PEP393_ENABLED
> if (PyUnicode_READY(ustring) < 0) return (Py_UCS4)-1;
> #endif
> if (flag) {
> length = __Pyx_PyUnicode_GET_LENGTH(ustring);
> if ((0 <= i) & (i < length)) {
> return __Pyx_PyUnicode_READ_CHAR(ustring, i);
> } else if ((-length <= i) & (i < 0)) {
> return __Pyx_PyUnicode_READ_CHAR(ustring, i + length);
> } else {
> PyErr_SetString(PyExc_IndexError, "string index out of range");
> return (Py_UCS4)-1;
> }
> } else {
> return __Pyx_PyUnicode_READ_CHAR(ustring, i);
> }
> }

I think you could even pass in two flags, one for wraparound and one for
boundscheck, and then just evaluate them appropriately in the existing "if"
tests above. That should allow both features to be supported independently
in a fast way.


> Here are timings:
> 
> (py33) zbook:mytests $ python3.3 -m timeit -n 50 -r 5 -s "from
> mytests.unicode_index import test_1" "test_1()"
> 50 loops, best of 5: 152 msec per loop
> (py33) zbook:mytests $ python3.3 -m timeit -n 50 -r 5 -s "from
> mytests.unicode_index import test_2" "test_2()"
> 50 loops, best of 5: 86.5 msec per loop
> (py33) zbook:mytests $ python3.3 -m timeit -n 50 -r 5 -s "from
> mytests.unicode_index import test_3" "test_3()"
> 50 loops, best of 5: 86.5 msec per loop
> 
> So your suggestion would be preferable.

Nice. Yes, looks like it' worth it.

Stefan

___
cython-devel mailing list
cython-devel@python.org
http://mail.python.org/mailman/listinfo/cython-devel


Re: [Cython] About IndexNode and unicode[index]

2013-02-28 Thread ZS
2013/2/28 Stefan Behnel :
>> This allows to write unicode text parsing code almost at C speed
>> mostly in python (+ .pxd defintions).
>
> I suggest simply adding a constant flag argument to the existing function
> that states if checking should be done or not. Inlining will let the C
> compiler drop the corresponding code, which may or may nor make it a little
> faster.
It would be great.

To be sure I change the tests:

unicode_index.h
---

#include "unicodeobject.h"

static inline Py_UCS4 unicode_char(PyObject* ustring, Py_ssize_t i);

static inline Py_UCS4 unicode_char(PyObject* ustring, Py_ssize_t i) {
#if CYTHON_PEP393_ENABLED
if (PyUnicode_READY(ustring) < 0) return (Py_UCS4)-1;
#endif
return __Pyx_PyUnicode_READ_CHAR(ustring, i);
}

static inline Py_UCS4 unicode_char2(PyObject* ustring, Py_ssize_t i, int flag);

static inline Py_UCS4 unicode_char2(PyObject* ustring, Py_ssize_t i, int flag) {
Py_ssize_t length;
#if CYTHON_PEP393_ENABLED
if (PyUnicode_READY(ustring) < 0) return (Py_UCS4)-1;
#endif
if (flag) {
length = __Pyx_PyUnicode_GET_LENGTH(ustring);
if ((0 <= i) & (i < length)) {
return __Pyx_PyUnicode_READ_CHAR(ustring, i);
} else if ((-length <= i) & (i < 0)) {
return __Pyx_PyUnicode_READ_CHAR(ustring, i + length);
} else {
PyErr_SetString(PyExc_IndexError, "string index out of range");
return (Py_UCS4)-1;
}
} else {
return __Pyx_PyUnicode_READ_CHAR(ustring, i);
}
}

unicode_index.pyx
--

cdef extern from 'unicode_index.h':
inline Py_UCS4 unicode_char(unicode ustring, int i)
inline Py_UCS4 unicode_char2(unicode ustring, int i, int flag)

cdef unicode text = u"abcdefghigklmnopqrstuvwxyzabcdefghigklmnopqrstuvwxyz"

cdef long f_1(unicode text):
cdef int i, j
cdef int n = len(text)
cdef Py_UCS4 ch
cdef long S = 0

for j in range(100):
for i in range(n):
ch = text[i]
S += ch * j

return S

cdef long f_2(unicode text):
cdef int i, j
cdef int n = len(text)
cdef Py_UCS4 ch
cdef long S = 0

for j in range(100):
for i in range(n):
ch = unicode_char(text, i)
S += ch * j

return S

cdef long f_3(unicode text):
cdef int i, j
cdef int n = len(text)
cdef Py_UCS4 ch
cdef long S = 0

for j in range(100):
for i in range(n):
ch = unicode_char2(text, i, 0)
S += ch * j

return S

def test_1():
f_1(text)

def test_2():
f_2(text)

def test_3():
f_3(text)

Here are timings:

(py33) zbook:mytests $ python3.3 -m timeit -n 50 -r 5 -s "from
mytests.unicode_index import test_1" "test_1()"
50 loops, best of 5: 152 msec per loop
(py33) zbook:mytests $ python3.3 -m timeit -n 50 -r 5 -s "from
mytests.unicode_index import test_2" "test_2()"
50 loops, best of 5: 86.5 msec per loop
(py33) zbook:mytests $ python3.3 -m timeit -n 50 -r 5 -s "from
mytests.unicode_index import test_3" "test_3()"
50 loops, best of 5: 86.5 msec per loop

So your suggestion would be preferable.
___
cython-devel mailing list
cython-devel@python.org
http://mail.python.org/mailman/listinfo/cython-devel


Re: [Cython] About IndexNode and unicode[index]

2013-02-28 Thread Stefan Behnel
ZS, 28.02.2013 19:31:
> 2013/2/28 ZS:
>> Looking into IndexNode class in ExprNode.py I have seen a possibility
>> for addition of more fast code path for unicode[index]  as it done in
>> method `generate_setitem_code` in case of lists.
>>
>> This is files for evaluation of performance difference:
>>
>>  unicode_index.h
>>
>> /* This is striped version of __Pyx_GetItemInt_Unicode_Fast */
>> #include "unicodeobject.h"
>>
>> static inline Py_UCS4 unicode_char(PyObject* ustring, Py_ssize_t i);
>>
>> static inline Py_UCS4 unicode_char(PyObject* ustring, Py_ssize_t i) {
>> #if CYTHON_PEP393_ENABLED
>> if (PyUnicode_READY(ustring) < 0) return (Py_UCS4)-1;
>> #endif
>> return __Pyx_PyUnicode_READ_CHAR(ustring, i);
>> }

Sure, looks ok.


>> # unicode_index.pyx
>>
>> # coding: utf-8
>>
>> cdef extern from 'unicode_index.h':
>> inline Py_UCS4 unicode_char(unicode ustring, int i)
>>
>> cdef unicode text = u"abcdefghigklmnopqrstuvwxyzabcdefghigklmnopqrstuvwxyz"
>>
>> def f_1(unicode text):
>> cdef int i, j
>> cdef int n = len(text)
>> cdef Py_UCS4 ch
>>
>> for j from 0<=j<=100:

Personally, I find a range() loop much easier to read than this beast.


>> for i from 0<=i<=n-1:
>> ch = text[i]
>>
>> def f_2(unicode text):
>> cdef int i, j
>> cdef int n = len(text)
>> cdef Py_UCS4 ch
>>
>> for j from 0<=j<=100:
>> for i from 0<=i<=n-1:
>> ch = unicode_char(text, i)
>>
>> def test_1():
>> f_1(text)
>>
>> def test_2():
>> f_2(text)
>>
>> Timing results:
>>
>> (py33) zbook:mytests $ python3.3 -m timeit -n 100 -r 10 -s "from
>> mytests.unicode_index import test_1" "test_1()"
>> 100 loops, best of 10: 89 msec per loop
>> (py33) zbook:mytests $ python3.3 -m timeit -n 100 -r 10 -s "from
>> mytests.unicode_index import test_2" "test_2()"
>> 100 loops, best of 10: 46.1 msec per loop

I seriously doubt that this translates to similar results in real-world
code. In the second example above, the C compiler should be able to remove
a lot of code, certainly including the useless character read. Maybe even
the loops, if it can determine that PyUnicode_READY() will always return
the same result. So you're almost certainly not benchmarking what you think
you are.


>> in setup.py globally:
>>
>>"boundscheck": False
>>"wraparound": False
>>"nonecheck": False
>>
> For the sake of clarity I would like to add the following... This
> optimization is for the case when both `boundscheck(False)` and
> `wraparound(False)` is applied. Otherwise default path of evaluation
> (__Pyx_GetItemInt_Unicode) is applied.
> 
> This allows to write unicode text parsing code almost at C speed
> mostly in python (+ .pxd defintions).

I suggest simply adding a constant flag argument to the existing function
that states if checking should be done or not. Inlining will let the C
compiler drop the corresponding code, which may or may nor make it a little
faster.

Stefan

___
cython-devel mailing list
cython-devel@python.org
http://mail.python.org/mailman/listinfo/cython-devel


Re: [Cython] Be more forgiving about memoryview strides

2013-02-28 Thread Nathaniel Smith
On Thu, Feb 28, 2013 at 5:50 PM, Robert Bradshaw  wrote:
> On Thu, Feb 28, 2013 at 7:13 AM, Sebastian Berg
>  wrote:
>> Hey,
>>
>> Maybe someone here already saw it (I don't have a track account, or I
>> would just create a ticket), but it would be nice if Cython was more
>> forgiving about contiguous requirements on strides. In the future this
>> would make it easier for numpy to go forward with changing the
>> contiguous flags to be more reasonable for its purpose, and second also
>> to allow old (and maybe for the moment remaining) corner cases in numpy
>> to slip past (as well as possibly the same for other programs...). An
>> example is (see also https://github.com/numpy/numpy/issues/2956 and the
>> PR linked there for more details):
>>
>> def add_one(array):
>> cdef double[::1] a = array
>> a[0] += 1.
>> return array
>>
>> giving:
>>
> add_one(np.ascontiguousarray(np.arange(10.)[::100]))
>> ValueError: Buffer and memoryview are not contiguous in the same
>> dimension.
>>
>> This could easily be changed if MemoryViews check the strides as "can be
>> interpreted as contiguous". That means that if shape[i] == 1, then
>> strides[i] are arbitrary (you can just change them if you like). This is
>> also the case for 0-sized arrays, which are arguably always contiguous,
>> no matter their strides are!
>
> I was under the impression that the primary value for contiguous is
> that it a foo[::1] can be interpreted as a foo*. Letting strides be
> arbitrary completely breaks this, right?

Nope. The natural definition of "C contiguous" is "the array entries
are arranged in memory in the same way they would be if they were a
multidimensional C array" (i.e., what you said.) But it turns out that
this is *not* the definition that numpy and cython use!

The issue is that the above definition is a constraint on the actual
locations of items in memory, i.e., given a shape, it tells you that
for every index,
 (a)  sum(index * strides) == sum(index * cumprod(shape[::-1])[::-1] * itemsize)
Obviously this equality holds if
 (b)  strides == cumprod(shape[::-1])[::-1] * itemsize
(Or for F-contiguity, we have
 (b')  strides == cumprod(shape) * itemsize
)

(a) is the natural definition of "C contiguous". (b) is the definition
of "C contiguous" used by numpy and cython. (b) implies (a). But (a)
does not imply (b), i.e., there are arrays that are C-contiguous which
numpy and cython think are discontiguous. (Also in numpy there are
some weird cases where numpy accidentally uses the correct definition,
I think, which is the point of Sebastian's example.)

In particular, if shape[i] == 1, then the value of stride[i] really
should be irrelevant to judging contiguity, because the only thing you
can do with strides[i] is multiply it by index[i], and if shape[i] ==
1 then index[i] is always 0. So an array of int8's with shape = (10,
1), strides = (1, 73) is contiguous according to (a), but not
according to (b). Also if shape[i] is 0 for any i, then the entire
contents of the strides array becomes irrelevant to judging
contiguity; all zero-sized arrays are contiguous according to (a), but
not (b).

(This is really annoying for numpy because given, say, a column vector
with shape (n, 1), it is impossible to be both C- and F-contiguous
according to the (b)-style definition. But people expect expect
various operations to preserve C versus F contiguity, so there are
heuristics in numpy that try to guess whether various result arrays
should pretend to be C- or F-contiguous, and we don't even have a
consistent idea of what it would mean for this code to be working
correctly, never mind test it and keep it working. OTOH if we just fix
numpy to use the (a) definition, then it turns out a bunch of
third-party code breaks, like, for example, cython.)

-n
___
cython-devel mailing list
cython-devel@python.org
http://mail.python.org/mailman/listinfo/cython-devel


Re: [Cython] Class methods returning C++ class references are not dealt with correctly?

2013-02-28 Thread Bradley M. Froehle
Hey Yury:

Yes, you are right.  I was thinking this was a function and not a method.
 As an even ickier workaround:

#define TokenStack_top_p(token_stack)  &token_stack->top()

cdef extern from "":
Token* TokenStack_top_p(TokenStack*) except +

cdef Token* tok = TokenStack_top_p(self.pEngine.OStack)

-Brad


On Thu, Feb 28, 2013 at 10:38 AM, Yury V. Zaytsev  wrote:

> Hi Brad,
>
> On Thu, 2013-02-28 at 08:01 -0800, Bradley M. Froehle wrote:
> >
> > cdef extern from "test.h":
> > int* foo2ptr "&foo" ()
> >
> > cdef int *x = foo2ptr()
>
> Thank you for this interesting suggestion, but I must be missing
> something, because when I do the following:
>
> cdef extern from "tokenstack.h":
> cppclass TokenStack:
> Token* top "Token&" () except +
>
> cdef Token* tok = self.pEngine.OStack.top()
>
> I end up with the following generated code, which, of course, doesn't
> compile:
>
> Token *__pyx_t_5;
> __pyx_t_5 = __pyx_v_self->pEngine->OStack.Token&();
>
> whereas, I'd like to see generated this:
>
> Token *__pyx_t_5;
> __pyx_t_5 = __pyx_v_self->pEngine->OStack->top();
>
> Any ideas?
>
> --
> Sincerely yours,
> Yury V. Zaytsev
>
>
> ___
> cython-devel mailing list
> cython-devel@python.org
> http://mail.python.org/mailman/listinfo/cython-devel
>
___
cython-devel mailing list
cython-devel@python.org
http://mail.python.org/mailman/listinfo/cython-devel


Re: [Cython] About IndexNode and unicode[index]

2013-02-28 Thread ZS
2013/2/28 ZS :
> Looking into IndexNode class in ExprNode.py I have seen a possibility
> for addition of more fast code path for unicode[index]  as it done in
> method `generate_setitem_code` in case of lists.
>
> This is files for evaluation of performance difference:
>
>  unicode_index.h
>
> /* This is striped version of __Pyx_GetItemInt_Unicode_Fast */
> #include "unicodeobject.h"
>
> static inline Py_UCS4 unicode_char(PyObject* ustring, Py_ssize_t i);
>
> static inline Py_UCS4 unicode_char(PyObject* ustring, Py_ssize_t i) {
> #if CYTHON_PEP393_ENABLED
> if (PyUnicode_READY(ustring) < 0) return (Py_UCS4)-1;
> #endif
> return __Pyx_PyUnicode_READ_CHAR(ustring, i);
> }
>
> # unicode_index.pyx
>
> # coding: utf-8
>
> cdef extern from 'unicode_index.h':
> inline Py_UCS4 unicode_char(unicode ustring, int i)
>
> cdef unicode text = u"abcdefghigklmnopqrstuvwxyzabcdefghigklmnopqrstuvwxyz"
>
> def f_1(unicode text):
> cdef int i, j
> cdef int n = len(text)
> cdef Py_UCS4 ch
>
> for j from 0<=j<=100:
> for i from 0<=i<=n-1:
> ch = text[i]
>
> def f_2(unicode text):
> cdef int i, j
> cdef int n = len(text)
> cdef Py_UCS4 ch
>
> for j from 0<=j<=100:
> for i from 0<=i<=n-1:
> ch = unicode_char(text, i)
>
> def test_1():
> f_1(text)
>
> def test_2():
> f_2(text)
>
> Timing results:
>
> (py33) zbook:mytests $ python3.3 -m timeit -n 100 -r 10 -s "from
> mytests.unicode_index import test_1" "test_1()"
> 100 loops, best of 10: 89 msec per loop
> (py33) zbook:mytests $ python3.3 -m timeit -n 100 -r 10 -s "from
> mytests.unicode_index import test_2" "test_2()"
> 100 loops, best of 10: 46.1 msec per loop
>
> in setup.py globally:
>
>"boundscheck": False
>"wraparound": False
>"nonecheck": False
>
For the sake of clarity I would like to add the following... This
optimization is for the case when both `boundscheck(False)` and
`wraparound(False)` is applied. Otherwise default path of evaluation
(__Pyx_GetItemInt_Unicode) is applied.

This allows to write unicode text parsing code almost at C speed
mostly in python (+ .pxd defintions).

 Zaur Shibzukhov
___
cython-devel mailing list
cython-devel@python.org
http://mail.python.org/mailman/listinfo/cython-devel


Re: [Cython] Class methods returning C++ class references are not dealt with correctly?

2013-02-28 Thread Yury V. Zaytsev
Hi Brad,

On Thu, 2013-02-28 at 08:01 -0800, Bradley M. Froehle wrote:
> 
> cdef extern from "test.h":
> int* foo2ptr "&foo" ()
> 
> cdef int *x = foo2ptr()

Thank you for this interesting suggestion, but I must be missing
something, because when I do the following:

cdef extern from "tokenstack.h":
cppclass TokenStack:
Token* top "Token&" () except +

cdef Token* tok = self.pEngine.OStack.top()

I end up with the following generated code, which, of course, doesn't
compile:

Token *__pyx_t_5;
__pyx_t_5 = __pyx_v_self->pEngine->OStack.Token&();

whereas, I'd like to see generated this:

Token *__pyx_t_5;
__pyx_t_5 = __pyx_v_self->pEngine->OStack->top();

Any ideas?

-- 
Sincerely yours,
Yury V. Zaytsev


___
cython-devel mailing list
cython-devel@python.org
http://mail.python.org/mailman/listinfo/cython-devel


Re: [Cython] Be more forgiving about memoryview strides

2013-02-28 Thread Robert Bradshaw
On Thu, Feb 28, 2013 at 7:13 AM, Sebastian Berg
 wrote:
> Hey,
>
> Maybe someone here already saw it (I don't have a track account, or I
> would just create a ticket), but it would be nice if Cython was more
> forgiving about contiguous requirements on strides. In the future this
> would make it easier for numpy to go forward with changing the
> contiguous flags to be more reasonable for its purpose, and second also
> to allow old (and maybe for the moment remaining) corner cases in numpy
> to slip past (as well as possibly the same for other programs...). An
> example is (see also https://github.com/numpy/numpy/issues/2956 and the
> PR linked there for more details):
>
> def add_one(array):
> cdef double[::1] a = array
> a[0] += 1.
> return array
>
> giving:
>
 add_one(np.ascontiguousarray(np.arange(10.)[::100]))
> ValueError: Buffer and memoryview are not contiguous in the same
> dimension.
>
> This could easily be changed if MemoryViews check the strides as "can be
> interpreted as contiguous". That means that if shape[i] == 1, then
> strides[i] are arbitrary (you can just change them if you like). This is
> also the case for 0-sized arrays, which are arguably always contiguous,
> no matter their strides are!

I was under the impression that the primary value for contiguous is
that it a foo[::1] can be interpreted as a foo*. Letting strides be
arbitrary completely breaks this, right?

> PS: A similar thing exists with np.ndarray[...] interface if the user
> accesses array.strides. They get the arrays strides not the buffers.
> This is not quite related, but if it would be easy to use the buffer's
> strides in that case, it may make it easier if we want to change the
> flags in numpy in the long term, since one could clean up strides for
> forced contiguous buffer requests.
>
> ___
> cython-devel mailing list
> cython-devel@python.org
> http://mail.python.org/mailman/listinfo/cython-devel
___
cython-devel mailing list
cython-devel@python.org
http://mail.python.org/mailman/listinfo/cython-devel


Re: [Cython] Class methods returning C++ class references are not dealt with correctly?

2013-02-28 Thread Bradley M. Froehle
On Thu, Feb 28, 2013 at 4:58 AM, Yury V. Zaytsev  wrote:

> Hi,
>
> I'm sorry if my question would appear to be trivial, but what am I
> supposed to do, if I want to wrap class methods, that return a reference
> to another class?


As a workaround you could use:

cdef extern from "test.h":
int* foo2ptr "&foo" ()

cdef int *x = foo2ptr()

This could be extended to your other example as well.

-Brad
___
cython-devel mailing list
cython-devel@python.org
http://mail.python.org/mailman/listinfo/cython-devel


[Cython] Be more forgiving about memoryview strides

2013-02-28 Thread Sebastian Berg
Hey,

Maybe someone here already saw it (I don't have a track account, or I
would just create a ticket), but it would be nice if Cython was more
forgiving about contiguous requirements on strides. In the future this
would make it easier for numpy to go forward with changing the
contiguous flags to be more reasonable for its purpose, and second also
to allow old (and maybe for the moment remaining) corner cases in numpy
to slip past (as well as possibly the same for other programs...). An
example is (see also https://github.com/numpy/numpy/issues/2956 and the
PR linked there for more details):

def add_one(array):
cdef double[::1] a = array
a[0] += 1.
return array

giving:

>>> add_one(np.ascontiguousarray(np.arange(10.)[::100]))
ValueError: Buffer and memoryview are not contiguous in the same
dimension.

This could easily be changed if MemoryViews check the strides as "can be
interpreted as contiguous". That means that if shape[i] == 1, then
strides[i] are arbitrary (you can just change them if you like). This is
also the case for 0-sized arrays, which are arguably always contiguous,
no matter their strides are!

Regards,

Sebastian

PS: A similar thing exists with np.ndarray[...] interface if the user
accesses array.strides. They get the arrays strides not the buffers.
This is not quite related, but if it would be easy to use the buffer's
strides in that case, it may make it easier if we want to change the
flags in numpy in the long term, since one could clean up strides for
forced contiguous buffer requests.

___
cython-devel mailing list
cython-devel@python.org
http://mail.python.org/mailman/listinfo/cython-devel


Re: [Cython] MemoryViews require writeable arrays?

2013-02-28 Thread Sturla Molden

On 28.02.2013 15:55, Dave Hirschfeld wrote:


So the issue is that at present memoryviews can't be readonly?


https://github.com/cython/cython/blob/master/Cython/Compiler/MemoryView.py#L33

Typed memoryviews are thus acquired with the PyBUF_WRITEABLE flag. If 
the the assigned buffer is readonly, the request to acquire the PEP3118 
buffer will fail.


If you remove the PyBUF_WRITEABLE flag from lines 33 to 36, you can 
acquire a readonly buffer with typed memoryviews. But this is not 
recommended. In this case you would have to check for the readonly flag 
yourself and make sure you don't write to readonly buffer.



Sturla


___
cython-devel mailing list
cython-devel@python.org
http://mail.python.org/mailman/listinfo/cython-devel


Re: [Cython] Class methods returning C++ class references are not dealt with correctly?

2013-02-28 Thread Sturla Molden

On 28.02.2013 15:46, Yury V. Zaytsev wrote:


My method call is actually wrapped in a try { ... } catch clause,
because I declared it as being able to throw exceptions, so the
reference can't be defined in this block, or it will not be accessible
to the outside world.


If Cython generates illegal C++ code (i.e. C++ that don't compile) it is 
a bug in Cython.


There must be a general error in the handling of C++ references when 
they are declared without a target.



Sturla




___
cython-devel mailing list
cython-devel@python.org
http://mail.python.org/mailman/listinfo/cython-devel


Re: [Cython] MemoryViews require writeable arrays?

2013-02-28 Thread Dave Hirschfeld
Sturla Molden  writes:

> 
> On 27.02.2013 20:05, Dave Hirschfeld wrote:
> 
> > Is this a required restriction? Is there any workaround?
> 
> http://www.python.org/dev/peps/pep-3118/
> 
> What you should consider is the "readonly" field in "struct bufferinfo" 
> or the access flag "PyBUF_WRITEABLE".
> 
> In short:
> 
> A PEP3118 buffer can be readonly, and then you shouldn't write to it! 
> When you set the readonly flag, Cython cannot retrieve the buffer with 
> PyBUF_WRITEABLE. Thus, Cython helps you not to shoot yourself in the 
> foot. I don't think you can declare a read-only memoryview in Cython. 
> (Well, not by any means I know of.)
> 
> Sturla
> 
> 

So the issue is that at present memoryviews can't be readonly? Presumably 
because this works for numpy arrays it would be possible to also make readonly 
memoryviews? I think that would certainly be nice to have, but maybe it's a
niche use case. 

Certainly, for IPython.parallel use it's easy enough to write a shim which sets 
the array to writeable with the understanding that changes don't get propagated 
back.

Thanks,
Dave





___
cython-devel mailing list
cython-devel@python.org
http://mail.python.org/mailman/listinfo/cython-devel


Re: [Cython] Class methods returning C++ class references are not dealt with correctly?

2013-02-28 Thread Yury V. Zaytsev
On Thu, 2013-02-28 at 15:34 +0100, Sturla Molden wrote:
> 
> This is clearly a bug in Cython. One cannot let a C++ reference
> dangle.

Hi Sturla,

Thanks for the confirmation! I had a closer look at it, and I think I
know why this happens.

My method call is actually wrapped in a try { ... } catch clause,
because I declared it as being able to throw exceptions, so the
reference can't be defined in this block, or it will not be accessible
to the outside world.

Apparently, Cython should rather do something like this instead:

Token *__pyx_v_tok;
Token *__pyx_t_5_p;

try {
Token &__pyx_t_5 = __pyx_v_self->pEngine->OStack.top();
__pyx_t_5_p = (&__pyx_t_5);
}
...

__pyx_v_tok = __pyx_t_5_p;

I'm sorry, but I don't think that I can personally help fixing this,
because even if I manage to come up with a patch to generate
declarations inside try blocks with my non-existing knowledge of Cython
internals, this simply not gonna work.

I believe that some convention should be established regarding
references handling, i.e. stating that Cython will generate correct code
to convert them to pointers if such and such syntax is used...

Hopefully, in the mean time, there is some other solution to the problem
that I have overlooked.

Z.

-- 
Sincerely yours,
Yury V. Zaytsev


___
cython-devel mailing list
cython-devel@python.org
http://mail.python.org/mailman/listinfo/cython-devel


Re: [Cython] Class methods returning C++ class references are not dealt with correctly?

2013-02-28 Thread Sturla Molden

On 28.02.2013 13:58, Yury V. Zaytsev wrote:

Hi,

I'm sorry if my question would appear to be trivial, but what am I
supposed to do, if I want to wrap class methods, that return a reference
to another class?

 From reading the list, I've gathered that apparently the best strategy
of dealing with references is just to not to use them (convert to
pointers immediately), because of some scoping rules issues.

It works for me for a simple case of POD types, like

 cdef extern from "test.h":
 int& foo()

 cdef int* x = &foo()

but in a more complex case, Cython generates incorrect C++ code (first
it declares a reference, then assigns to it, which, of course, doesn't
even compile):

 cdef extern from "token.h":
 cppclass Token:
 Token(const Datum&) except +

 cdef extern from "tokenstack.h":
 cppclass TokenStack:
 Token& top() except +

 cdef Token* tok = &self.pEngine.OStack.top()

<->

 Token *__pyx_v_tok;
 Token &__pyx_t_5;
 __pyx_t_5 = __pyx_v_self->pEngine->OStack.top();
 __pyx_v_tok = (&__pyx_t_5);



This is clearly a bug in Cython. The generated code should be:

  Token *__pyx_v_tok;
  Token &__pyx_t_5 = __pyx_v_self->pEngine->OStack.top();
  __pyx_v_tok = (&__pyx_t_5);


One cannot let a C++ reference dangle:

  Token &__pyx_t_5;  // illegal C++



Sturla













___
cython-devel mailing list
cython-devel@python.org
http://mail.python.org/mailman/listinfo/cython-devel


Re: [Cython] MemoryViews require writeable arrays?

2013-02-28 Thread Sturla Molden

On 27.02.2013 20:05, Dave Hirschfeld wrote:


Is this a required restriction? Is there any workaround?




http://www.python.org/dev/peps/pep-3118/

What you should consider is the "readonly" field in "struct bufferinfo" 
or the access flag "PyBUF_WRITEABLE".


In short:

A PEP3118 buffer can be readonly, and then you shouldn't write to it! 
When you set the readonly flag, Cython cannot retrieve the buffer with 
PyBUF_WRITEABLE. Thus, Cython helps you not to shoot yourself in the 
foot. I don't think you can declare a read-only memoryview in Cython. 
(Well, not by any means I know of.)



Sturla



___
cython-devel mailing list
cython-devel@python.org
http://mail.python.org/mailman/listinfo/cython-devel


[Cython] Class methods returning C++ class references are not dealt with correctly?

2013-02-28 Thread Yury V. Zaytsev
Hi,

I'm sorry if my question would appear to be trivial, but what am I
supposed to do, if I want to wrap class methods, that return a reference
to another class?

>From reading the list, I've gathered that apparently the best strategy
of dealing with references is just to not to use them (convert to
pointers immediately), because of some scoping rules issues.

It works for me for a simple case of POD types, like

cdef extern from "test.h":
int& foo()

cdef int* x = &foo()

but in a more complex case, Cython generates incorrect C++ code (first
it declares a reference, then assigns to it, which, of course, doesn't
even compile):

cdef extern from "token.h":
cppclass Token:
Token(const Datum&) except +

cdef extern from "tokenstack.h":
cppclass TokenStack:
Token& top() except +

cdef Token* tok = &self.pEngine.OStack.top()

<->

Token *__pyx_v_tok;
Token &__pyx_t_5;
__pyx_t_5 = __pyx_v_self->pEngine->OStack.top();
__pyx_v_tok = (&__pyx_t_5);

I would expect to see this instead:

Token *__pyx_v_tok = &__pyx_v_self->pEngine->OStack.top();

Am I doing something wrong? Is there any other way to achieve what I
want, other than writing custom C macros?

Thanks,

-- 
Sincerely yours,
Yury V. Zaytsev


___
cython-devel mailing list
cython-devel@python.org
http://mail.python.org/mailman/listinfo/cython-devel


[Cython] MemoryView Casting slow compared to ndarray buffer syntax

2013-02-28 Thread Dave Hirschfeld
%%cython
cimport cython

import numpy as np
cimport numpy as np

ctypedef np.float64_t float64_t

@cython.boundscheck(False)
@cython.wraparound(False)
@cython.cdivision(True)
def echo_numpy(np.ndarray[float64_t, ndim=1] x):
return x

@cython.boundscheck(False)
@cython.wraparound(False)
@cython.cdivision(True)
def echo_memview(double[:] x):
return np.asarray(x)

@cython.boundscheck(False)
@cython.wraparound(False)
@cython.cdivision(True)
def echo_memview_nocast(double[:] x):
return x


In [19]: %timeit echo_memview(x)
...: %timeit echo_memview_nocast(x)
...: %timeit echo_numpy(x)
1 loops, best of 3: 38.1 µs per loop
10 loops, best of 3: 5.58 µs per loop
100 loops, best of 3: 749 ns per loop

In [20]: 38.1e-6/749e-9
Out[20]: 50.86782376502002

In [21]: 5.58e-6/749e-9
Out[21]: 7.449933244325767

So it seems that the MemoryView is 50x slower than using the ndarray buffer
syntax and even 7.5x slower without casting to an array.

Is there anything that can be done about this or is it jsut something to be
aware of and use each of them in the situations where they perform best?

Thanks,
Dave

___
cython-devel mailing list
cython-devel@python.org
http://mail.python.org/mailman/listinfo/cython-devel


Re: [Cython] Non-deterministic behavoiur?

2013-02-28 Thread Dave Hirschfeld
Dave Hirschfeld  writes:

> 
> Dave Hirschfeld  writes:
> 
> > 
> > Using the following test code:
> 
> > 
> > So, it seems either typing the array as a memview or printing res
> > will screw up the calculation.
> > 
> > The cython code is given below. Any ideas if this is a cython bug or 
something 
> > I'm doing wrong?
> > 
> > Thanks,
> > Dave
> > 
> 
> To answer my own question, it can't be that a simple print statement will
> change the program so I must be doing something wrong! It makes it hard
> to track down when it gives the right answer most of the time and segfaults
> randomly when nothing seems to have changed. I'm sure it's just incorrect
> arguments to dgelsy so I'll look into that...
> 
> -Dave
> 
> 

And for those following, the obvious error was in using the double `worksize`
instead of the array of size n, `work` in the 2nd call to DGELSY.

DGELSY(&m, &n, &nrhs, &A[0,0], &lda, &res[0], &ldb, &jpvt[0], &rcond, &rank, 
&worksize, &lwork, &info)

Sorry for the noise.

-Dave




___
cython-devel mailing list
cython-devel@python.org
http://mail.python.org/mailman/listinfo/cython-devel


[Cython] About IndexNode and unicode[index]

2013-02-28 Thread ZS
Looking into IndexNode class in ExprNode.py I have seen a possibility
for addition of more fast code path for unicode[index]  as it done in
method `generate_setitem_code` in case of lists.

This is files for evaluation of performance difference:

 unicode_index.h

/* This is striped version of __Pyx_GetItemInt_Unicode_Fast */
#include "unicodeobject.h"

static inline Py_UCS4 unicode_char(PyObject* ustring, Py_ssize_t i);

static inline Py_UCS4 unicode_char(PyObject* ustring, Py_ssize_t i) {
#if CYTHON_PEP393_ENABLED
if (PyUnicode_READY(ustring) < 0) return (Py_UCS4)-1;
#endif
return __Pyx_PyUnicode_READ_CHAR(ustring, i);
}

# unicode_index.pyx

# coding: utf-8

cdef extern from 'unicode_index.h':
inline Py_UCS4 unicode_char(unicode ustring, int i)

cdef unicode text = u"abcdefghigklmnopqrstuvwxyzabcdefghigklmnopqrstuvwxyz"

def f_1(unicode text):
cdef int i, j
cdef int n = len(text)
cdef Py_UCS4 ch

for j from 0<=j<=100:
for i from 0<=i<=n-1:
ch = text[i]

def f_2(unicode text):
cdef int i, j
cdef int n = len(text)
cdef Py_UCS4 ch

for j from 0<=j<=100:
for i from 0<=i<=n-1:
ch = unicode_char(text, i)

def test_1():
f_1(text)

def test_2():
f_2(text)

Timing results:

(py33) zbook:mytests $ python3.3 -m timeit -n 100 -r 10 -s "from
mytests.unicode_index import test_1" "test_1()"
100 loops, best of 10: 89 msec per loop
(py33) zbook:mytests $ python3.3 -m timeit -n 100 -r 10 -s "from
mytests.unicode_index import test_2" "test_2()"
100 loops, best of 10: 46.1 msec per loop

in setup.py globally:

   "boundscheck": False
   "wraparound": False
   "nonecheck": False

Zaur Shibzukhov
___
cython-devel mailing list
cython-devel@python.org
http://mail.python.org/mailman/listinfo/cython-devel


Re: [Cython] Non-deterministic behavoiur?

2013-02-28 Thread Dave Hirschfeld
Dave Hirschfeld  writes:

> 
> Using the following test code:

> 
> So, it seems either typing the array as a memview or printing res
> will screw up the calculation.
> 
> The cython code is given below. Any ideas if this is a cython bug or 
> something 
> I'm doing wrong?
> 
> Thanks,
> Dave
> 

To answer my own question, it can't be that a simple print statement will
change the program so I must be doing something wrong! It makes it hard
to track down when it gives the right answer most of the time and segfaults
randomly when nothing seems to have changed. I'm sure it's just incorrect
arguments to dgelsy so I'll look into that...

-Dave



___
cython-devel mailing list
cython-devel@python.org
http://mail.python.org/mailman/listinfo/cython-devel