Howdy,

we recently had a discussion about being able to do some common things
like reductions and binary operations on recarrays, and there didn't
seem to be much consensus on it being needed in the core of numpy.

Since  we do actually need this quite pressingly for everyday tasks,
we wrote a very simple version of this today, I'm attaching it here in
case it proves useful to others.

Basically it lets you do reductions and binary operations on record
arrays whose dtype is a simple composite of native ones (example at
the end).  For our needs it's quite useful, so  it may also be to
others.

Cheers,

f

Example:


>>> import recarrutil as ru
>>> dt = np.dtype(dict(names=['x','y'],formats=[float,float]))
>>> x = np.arange(6,dtype=float).reshape(2,3)
>>> y = np.arange(10,16,dtype=float).reshape(2,3)
>>> z = np.empty( (2,3), dt).view(np.recarray)
>>> z.x = x
>>> z.y = y
>>> z
rec.array([[(0.0, 10.0), (1.0, 11.0), (2.0, 12.0)],
       [(3.0, 13.0), (4.0, 14.0), (5.0, 15.0)]],
      dtype=[('x', '<f8'), ('y', '<f8')])
>>> ru.mean(z)
rec.array((2.5, 12.5),
      dtype=[('x', '<f8'), ('y', '<f8')])
>>> ru.mean(z,0)
rec.array([(1.5, 11.5), (2.5, 12.5), (3.5, 13.5)],
      dtype=[('x', '<f8'), ('y', '<f8')])
>>> ru.mean(z,1)
rec.array([(1.0, 11.0), (4.0, 14.0)],
      dtype=[('x', '<f8'), ('y', '<f8')])
>>> ru.add(z,z)
rec.array([[(0.0, 20.0), (2.0, 22.0), (4.0, 24.0)],
       [(6.0, 26.0), (8.0, 28.0), (10.0, 30.0)]],
      dtype=[('x', '<f8'), ('y', '<f8')])
>>> ru.subtract(z,z)
rec.array([[(0.0, 0.0), (0.0, 0.0), (0.0, 0.0)],
       [(0.0, 0.0), (0.0, 0.0), (0.0, 0.0)]],
      dtype=[('x', '<f8'), ('y', '<f8')])
"""Some utilities for manipulating recarrays.
"""

import numpy as np
import numpy.testing as nt

#-----------------------------------------------------------------------------
# Functions and public utilities
#-----------------------------------------------------------------------------

def extrude(arr,flatten=False):
    """Create a view of a recarray with one extra 'extruded' dimension.

    XXX - document more...    
    """

    dt = arr.dtype

    fieldtypes = [ v[0] for v in dt.fields.values() ]
    
    if len(set(fieldtypes)) > 1:
        raise ValueError("dtype of recarray must be uniform")
    newdtype = fieldtypes[0]

    nfields = len(dt.fields)

    # If axis is None, for a normal array this means flatten everything and
    # return a single number. In our case, we actually want to keep the last
    # dimension (the "extruded" one) alive so that we can reconstruct the
    # recarray in the end.
    if flatten:
        newshape = (arr.size,nfields)
    else:
        newshape = arr.shape + (nfields,)

    # Make the new temp array we'll work with
    return np.reshape(arr.view(newdtype),newshape)


def intrude(arr,dtype):
    """Intrude a recarray by 'flattening' its last dimension into a composite
    dtype.

    XXX - finish doc
    """
    outshape = arr.shape[:-1]
    return (np.reshape(arr.view(dtype),outshape)).view(np.recarray)


def offset_axis(axis):
    """Axis handling logic that is generic to all reductions."""
    flatten = axis is None
    if flatten:
        axis = 0
    else:
        if axis < 0:
            # The case of a negative input axis needs compensation, because we
            # are adding a dimension by ourselves
            axis -= 1
    return flatten, axis

    
def reduction(name):
    """Create a reduction operation for a given method name.
    """
    def op(arr, axis=None):

        flatten, axis = offset_axis(axis)
        newarr = extrude(arr,flatten)
        # Do the operation on the new array
        method = getattr(newarr,name)
        result = method(axis)
        # Make the output back into a recarray of the original dtype
        return intrude(result, arr.dtype)

    doc = "%s of a recarray, preserving its structure." % name
    op.__doc__ = doc
    op.func_name = name
    return op


# For methods in the array interface that take an axis argument, the pattern is
# always the same: extrude, operate, intrude.  So we just auto-generate these
# functions here.
reduction_names = ['mean', 'std', 'var', 'min', 'max',
                   'sum', 'cumsum', 'prod', 'cumprod' ]

for fname in reduction_names:
    exec "%s = reduction('%s')" % (fname, fname)

def binop(name):
    """Create a binary operation for a given name.
    """
    def op(a1, a2, out=None):

        new_a1 = extrude(a1)
        new_a2 = extrude(a2)
        if out is not None:
            out = extrude(out)
            
        # Do the operation on the new array
        method = getattr(np,name)
        result = method(new_a1, new_a2, out)
        # Make the output back into a recarray of the original dtype
        return intrude(result, a1.dtype)

    doc = "Binary %s of two recarrays, preserving their structure." % name
    op.__doc__ = doc
    op.func_name = name
    return op


# For methods in the array interface that take an axis argument, the pattern is
# always the same: extrude, operate, intrude.  So we just auto-generate these
# functions here.
binop_names = ['add', 'subtract', 'multiply', 'divide',]

for fname in binop_names:
    exec "%s = binop('%s')" % (fname, fname)


#-----------------------------------------------------------------------------
# Tests
#-----------------------------------------------------------------------------

def test_mean_zero():
    dt = np.dtype(dict(names=['x','y'], formats=[float,float]))
    z = np.zeros((2,3), dt)
    nt.assert_equal(mean(z),z)
    return 1


def mk_xyz():
    """Test utility, make x, y, z arrays."""
    dt = np.dtype(dict(names=['x','y'],formats=[float,float]))
    x = np.arange(6,dtype=float).reshape(2,3)
    y = np.arange(10,16,dtype=float).reshape(2,3)
    z = np.empty( (2,3), dt).view(np.recarray)
    z.x = x
    z.y = y
    return x, y, z


def mk_xyzw():
    """Test utility, make x, y, z, w arrays."""
    x, y, z = mk_xyz()
    w = z.copy()
    w.x *= 2
    w.y *= 2
    return x, y, z, w


def test_reductions():
    x, y, z = mk_xyz()
    for fname in reduction_names:
        reduction = eval(fname)
        xmeth = getattr(x, fname)
        ymeth = getattr(y, fname)
        for axis in [None,0,1,-1,-2]:
            zred = reduction(z,axis)
            yield(nt.assert_equal, zred.x, xmeth(axis))
            yield(nt.assert_equal, zred.y, ymeth(axis))


def test_binops():
    x, y, z, w = mk_xyzw()
    for fname in binop_names:
        op = eval(fname)
        npop = getattr(np, fname)
        opres = op(z,w)
        yield(nt.assert_equal, opres.x, npop(z.x, w.x) )
        yield(nt.assert_equal, opres.y, npop(z.y, w.y) )

# Test support utilities

def eval_tests(testgen):
    """Little utility to consume a nose-compliant test generator.

    Returns
    -------
    The number of executed tests.  An exception is raised if any fails."""
    return len([ t[0](*t[1:]) for t in testgen() ])

# Mark it as not being a test itself, so nose doesn't try to run it
eval_tests.__test__ = False


def run_test_suite():
    """Call all our tests in sequence.

    This lets us run the script as a test suite without needing nose or any
    other test runner for simple cases"""
    from  time  import clock

    # Initialize counters
    ntests = 0
    start = clock()
    
    # Call the tests and count them
    ntests += test_mean_zero()
    ntests += eval_tests(test_reductions)
    ntests += eval_tests(test_binops)

    # Stop clock and summarize
    end = clock()
    print '-'*70
    print "Ran %s tests in %.3f" % (ntests, end-start)
    print '\nOK'
    
run_test_suite.__test__ = False


# If run as a script, just run all the tests and print summary if successful
if __name__ == '__main__':
    run_test_suite()
_______________________________________________
NumPy-Discussion mailing list
NumPy-Discussion@scipy.org
http://mail.scipy.org/mailman/listinfo/numpy-discussion

Reply via email to