On Fri, Mar 14, 2008 at 02:53:17PM -0500, Travis E. Oliphant wrote: > Zbyszek Szmek wrote: > > On Thu, Mar 13, 2008 at 05:44:54PM -0400, Alan G Isaac wrote: > >> In principle you should be able to use ``fromiter``, > >> I believe, but it does not work. BUG? (Crasher.) > >> > >>>>> import numpy as N > >>>>> x = [1,2,3] > >>>>> fmt="%03d" > >>>>> N.fromiter([xi for xi in x],dtype='S') > >>>>> > >> Python crashes. > > > > 2. what does dtype with dtype.elsize==0 mean? Should it be allowed at all? > > If it is sometimes valid, then PyArray_FromIter should be fixed. > > It is a bug that needs to be fixed in PyArray_FromIter, I think. >
Upon deeper review, this function has more problems. 1. The bug above: PyArray_NewFromDescr sometimes returns an array with dtype different then the one specified. E.g.: >>> dtype('S'); empty(300, dtype('S')).dtype dtype('|S0') dtype('|S1') so the element size should be taken from the created array, not from specified dtype. 2. The check for overflow is incorrect, when elsize==1 the function always returns a MemoryError. 3. From the docstring: 'If count is nonegative, the new array will have count elements, otherwise it's size is determined by the generator.' However, later we test for count == -1. I think it is simplifies things to split out the overflow tests and resizing into a helper function. multiarraymodule.c | 74 ++++++++++++++++++++++++++++++++--------------------- 1 file changed, 45 insertions(+), 29 deletions(-) ------------------------------------------------------------------------- --- numpy/core/src/multiarraymodule.c_unmodified 2008-03-28 13:46:38.000000000 +0100 +++ numpy/core/src/multiarraymodule.c 2008-03-28 18:26:51.000000000 +0100 @@ -6301,6 +6301,38 @@ return ret; } +/* + Grow ret->data: + this is similar for the strategy for PyListObject, but we use + 50% overallocation => 0, 4, 8, 16, ... + + Returns 1 on success, 0 on error. +*/ +static int increase_array_size(PyArrayObject* ar, intp* elcount) +{ + char *new_data; + intp elsize = ar->strides[0]; + + /* size_t is unsigned so the behavior on overflow is defined. */ + size_t bufsize; + size_t half = ar->dimensions[0] ? 2 * (size_t)ar->dimensions[0] : 2; + *elcount = half * 2; + bufsize = *elcount * elsize; + + if( *elcount/2 != half || bufsize/elsize != *elcount) + goto error; + + new_data = PyDataMem_RENEW(ar->data, bufsize); + if(!new_data) + goto error; + + ar->data = new_data; + return 1; + +error: + PyErr_SetString(PyExc_MemoryError, "cannot allocate array memory"); + return 0; +} /* steals a reference to dtype (which cannot be NULL) */ /*OBJECT_API */ @@ -6310,14 +6342,12 @@ PyObject *value; PyObject *iter = PyObject_GetIter(obj); PyArrayObject *ret = NULL; - intp i, elsize, elcount; + intp i; + intp elcount = (count < 0) ? 0 : count; char *item, *new_data; if (iter == NULL) goto done; - elcount = (count < 0) ? 0 : count; - elsize = dtype->elsize; - /* We would need to alter the memory RENEW code to decrement any reference counts before throwing away any memory. */ @@ -6329,31 +6359,17 @@ ret = (PyArrayObject *)PyArray_NewFromDescr(&PyArray_Type, dtype, 1, &elcount, NULL,NULL, 0, NULL); - dtype = NULL; + dtype = NULL; /* dtype is always eaten by PA_NewFromDescr */ if (ret == NULL) goto done; - for (i = 0; (i < count || count == -1) && + for (i = 0; (i < count || count < 0) && (value = PyIter_Next(iter)); i++) { - if (i >= elcount) { - /* - Grow ret->data: - this is similar for the strategy for PyListObject, but we use - 50% overallocation => 0, 4, 8, 14, 23, 36, 56, 86 ... - */ - elcount = (i >> 1) + (i < 4 ? 4 : 2) + i; - if (elcount <= (intp)((~(size_t)0) / elsize)) - new_data = PyDataMem_RENEW(ret->data, elcount * elsize); - else - new_data = NULL; - if (new_data == NULL) { - PyErr_SetString(PyExc_MemoryError, - "cannot allocate array memory"); - Py_DECREF(value); - goto done; - } - ret->data = new_data; + if (i == elcount && !increase_array_size(ret, &elcount)){ + Py_DECREF(value); + goto done; } + ret->dimensions[0] = i+1; if (((item = index2ptr(ret, i)) == NULL) || @@ -6373,13 +6389,13 @@ /* Realloc the data so that don't keep extra memory tied up (assuming realloc is reasonably good about reusing space...) + + If the reallocation fails, it is not a fatal error. */ if (i==0) i = 1; - ret->data = PyDataMem_RENEW(ret->data, i * elsize); - if (ret->data == NULL) { - PyErr_SetString(PyExc_MemoryError, "cannot allocate array memory"); - goto done; - } + new_data = PyDataMem_RENEW(ret->data, i * ret->strides[0]); + if (new_data != NULL) + ret->data = new_data; done: Py_XDECREF(iter); ------------------------------------------------------------------------- And the doctest: >>> import numpy >>> x = [1,2,3] >>> print numpy.fromiter(x, dtype='d') [ 1. 2. 3.] >>> print numpy.fromiter(x, dtype='S1') ['1' '2' '3'] >>> print numpy.fromiter((i * 100 for i in x), dtype='S2') ['10' '20' '30'] >>> print numpy.fromiter(x, dtype='S1', count=-1) ['1' '2' '3'] >>> print numpy.fromiter(x, dtype='S1', count=-2) ['1' '2' '3'] >>> print numpy.fromiter(x, dtype='S1', count=3) ['1' '2' '3'] >>> print numpy.fromiter(x, dtype='S1', count=4) Traceback (most recent call last): File "doctest.py", line 1248, in __run compileflags, 1) in test.globs File "<doctest test_fromiter.py[9]>", line 1, in ? print numpy.fromiter(x, dtype='S1', count=4) ValueError: iterator too short >>> print numpy.fromiter(range(3000000), dtype='S2') # doctest: +ELLIPSIS ['0' ... '29'] >>> print numpy.fromiter(x, dtype='S') ['1' '2' '3'] Does this look OK? Cheers, Zbyszek
--- numpy/core/src/multiarraymodule.c_unmodified 2008-03-28 13:46:38.000000000 +0100 +++ numpy/core/src/multiarraymodule.c 2008-03-28 18:26:51.000000000 +0100 @@ -6301,6 +6301,38 @@ return ret; } +/* + Grow ret->data: + this is similar for the strategy for PyListObject, but we use + 50% overallocation => 0, 4, 8, 16, ... + + Returns 1 on success, 0 on error. +*/ +static int increase_array_size(PyArrayObject* ar, intp* elcount) +{ + char *new_data; + intp elsize = ar->strides[0]; + + /* size_t is unsigned so the behavior on overflow is defined. */ + size_t bufsize; + size_t half = ar->dimensions[0] ? 2 * (size_t)ar->dimensions[0] : 2; + *elcount = half * 2; + bufsize = *elcount * elsize; + + if( *elcount/2 != half || bufsize/elsize != *elcount) + goto error; + + new_data = PyDataMem_RENEW(ar->data, bufsize); + if(!new_data) + goto error; + + ar->data = new_data; + return 1; + +error: + PyErr_SetString(PyExc_MemoryError, "cannot allocate array memory"); + return 0; +} /* steals a reference to dtype (which cannot be NULL) */ /*OBJECT_API */ @@ -6310,14 +6342,12 @@ PyObject *value; PyObject *iter = PyObject_GetIter(obj); PyArrayObject *ret = NULL; - intp i, elsize, elcount; + intp i; + intp elcount = (count < 0) ? 0 : count; char *item, *new_data; if (iter == NULL) goto done; - elcount = (count < 0) ? 0 : count; - elsize = dtype->elsize; - /* We would need to alter the memory RENEW code to decrement any reference counts before throwing away any memory. */ @@ -6329,31 +6359,17 @@ ret = (PyArrayObject *)PyArray_NewFromDescr(&PyArray_Type, dtype, 1, &elcount, NULL,NULL, 0, NULL); - dtype = NULL; + dtype = NULL; /* dtype is always eaten by PA_NewFromDescr */ if (ret == NULL) goto done; - for (i = 0; (i < count || count == -1) && + for (i = 0; (i < count || count < 0) && (value = PyIter_Next(iter)); i++) { - if (i >= elcount) { - /* - Grow ret->data: - this is similar for the strategy for PyListObject, but we use - 50% overallocation => 0, 4, 8, 14, 23, 36, 56, 86 ... - */ - elcount = (i >> 1) + (i < 4 ? 4 : 2) + i; - if (elcount <= (intp)((~(size_t)0) / elsize)) - new_data = PyDataMem_RENEW(ret->data, elcount * elsize); - else - new_data = NULL; - if (new_data == NULL) { - PyErr_SetString(PyExc_MemoryError, - "cannot allocate array memory"); - Py_DECREF(value); - goto done; - } - ret->data = new_data; + if (i == elcount && !increase_array_size(ret, &elcount)){ + Py_DECREF(value); + goto done; } + ret->dimensions[0] = i+1; if (((item = index2ptr(ret, i)) == NULL) || @@ -6373,13 +6389,13 @@ /* Realloc the data so that don't keep extra memory tied up (assuming realloc is reasonably good about reusing space...) + + If the reallocation fails, it is not a fatal error. */ if (i==0) i = 1; - ret->data = PyDataMem_RENEW(ret->data, i * elsize); - if (ret->data == NULL) { - PyErr_SetString(PyExc_MemoryError, "cannot allocate array memory"); - goto done; - } + new_data = PyDataMem_RENEW(ret->data, i * ret->strides[0]); + if (new_data != NULL) + ret->data = new_data; done: Py_XDECREF(iter);
_______________________________________________ Numpy-discussion mailing list Numpy-discussion@scipy.org http://projects.scipy.org/mailman/listinfo/numpy-discussion