Den 11.06.2010 00:57, skrev David Cournapeau:
Do you have the code for this ? That's something I wanted to do, but
never took the time to do. Faster generic iterator would be nice, but
very hard to do in general.



/* this computes the start adress for every vector along a dimension
(axis) of an ndarray */

typedef PyArrayObject ndarray;

inline char *datapointer(ndarray *a, npy_intp *indices)
{
    char *data = a->data;
    int i;
    npy_intp j;
    for (i=0; i < a->nd; i++) {
        j = indices[i];
        data += j * a->strides[i];
    }
    return data;
}

static double ** _get_axis_pointer(npy_intp *indices, int axis,
                           ndarray *a, double **axis_ptr_array, int dim)
{
    /* recursive axis pointer search for 4 dimensions or more */
    npy_intp i;
    double *axis_ptr;
    if (dim == a->nd) {
        /* done recursing dimensions,
            compute address of this axis */
        axis_ptr = (double *) datapointer(a, indices);
        *axis_ptr_array = axis_ptr;
        return (axis_ptr_array + 1);
    } else if (dim == axis) {
        /* use first element only */
        indices[dim] = 0;
axis_ptr_array = _get_axis_pointer(indices, axis, a, axis_ptr_array, dim+1);
        return axis_ptr_array;
    } else {
        /* iterate range of indices */
        npy_intp length = a->dimensions[dim];
        for (i=0; i < length; i++) {
            indices[dim] = i;
axis_ptr_array = _get_axis_pointer(indices, axis, a, axis_ptr_array, dim+1);
        }
        return axis_ptr_array;
    }
}


static double **get_axis_pointers(ndarray *a, int axis, npy_intp *count)
{
    npy_intp indices[NPY_MAXDIMS];
    double **out, **tmp;
    npy_intp i, j;
    j = 1;
    for (i=0; i < a->nd; i++) {
        if (i != axis)
            j *= a->dimensions[i];
    }
    *count = j;

    out = (double **) malloc(*count * sizeof(double*));
    if (out == NULL) {
        *count = 0;
        return NULL;
    }
    tmp = out;
    switch (a->nd) {
        /* for one dimension we just return the data pointer */
        case 1:
            *tmp = (double *)a->data;
            break;
        /* specialized for two dimensions */
        case 2:
            switch (axis) {
                case 0:
                    for (i=0; i<a->dimensions[1]; i++)
                        *tmp++ = (double *)(a->data + i*a->strides[1]);
                    break;

                case 1:
                    for (i=0; i<a->dimensions[0]; i++)
                        *tmp++ = (double *)(a->data + i*a->strides[0]);
                    break;
            }
            break;
        /* specialized for three dimensions */
        case 3:
            switch (axis) {
                case 0:
                    for (i=0; i<a->dimensions[1]; i++)
                        for (j=0; j<a->dimensions[2]; j++)
*tmp++ = (double *)(a->data + i*a->strides[1] + j*a->strides[2]);
                    break;
                case 1:
                    for (i=0; i<a->dimensions[0]; i++)
                        for (j=0; j<a->dimensions[2]; j++)
*tmp++ = (double *)(a->data + i*a->strides[0] + j*a->strides[2]);
                    break;

                case 2:
                    for (i=0; i<a->dimensions[0]; i++)
                        for (j=0; j<a->dimensions[1]; j++)
*tmp++ = (double *)(a->data + i*a->strides[0] + j*a->strides[1]);

            }
            break;
        /* four or more dimensions: use recursion */
        default:
            for (i=0; i<a->nd; i++) indices[i] = 0;
            _get_axis_pointer(indices, axis, a, out, 0);

    }
done:
    return out;
}


Another thing I did when reimplementing lfilter was "copy-in copy-out"
for strided arrays.
What is copy-in copy out ? I am not familiar with this term ?


Strided memory access is slow. So it often helps to make a temporary copy that are contiguous.

static void copy_in(double *restrict y, double *restrict x, npy_intp n, npy_intp s)
{
    npy_intp i;
    char *tmp;
    if (s == sizeof(double))
        memcpy(y, x, n*sizeof(double));
    else {
        tmp = (char *)x;
        for (i=0; i<n; i++) {
            *y++ = *x;
            tmp += s;
            x = (double *)tmp;
        }
    }
}

static void copy_out(double *restrict y, double *restrict x, npy_intp n, npy_intp s)
{
    npy_intp i;
    char *tmp;
    if (s == sizeof(double))
        memcpy(y, x, n*sizeof(double));
    else {
        tmp = (char *)y;
        for (i=0; i<n; i++) {
            *y = *x++;
            tmp += s;
            y = (double *)tmp;
        }
    }
}



Sturla








_______________________________________________
NumPy-Discussion mailing list
NumPy-Discussion@scipy.org
http://mail.scipy.org/mailman/listinfo/numpy-discussion

Reply via email to