Re: [GRASS-user] r.neighbors velocity

Sören Gebbert Sat, 29 Jun 2013 06:51:41 -0700

Hi,
i have implemented a "real" average neighborhood algorithm that runs in
parallel using openmp. The source code and the benchmark shell script is
attached.


The neighbor program computes the average moving window of arbitrary size.
The size of the map rows x cols and the size of the moving window  (odd
number cols==rows) can be specified.

./neighbor rows cols mw_size

IMHO the new program is better for compiler comparison and neighborhood
operation performance.

This is the benchmark on my 5 year old AMD phenom 4 core computer using 1,
2 and 4 threads:

gcc -Wall -fopenmp -lgomp -Ofast main.c -o neighbor
export OMP_NUM_THREADS=1
time ./neighbor 5000 5000 23
real 0m37.211s
user 0m36.998s
sys 0m0.196s

export OMP_NUM_THREADS=2
time ./neighbor 5000 5000 23
real 0m19.907s
user 0m38.890s
sys 0m0.248s

export OMP_NUM_THREADS=4
time ./neighbor 5000 5000 23
real 0m10.170s
user 0m38.466s
sys 0m0.192s

Happy hacking, compiling and testing. :)

Best regards
Soeren




2013/6/29 Markus Metz <markus.metz.gisw...@gmail.com>

> On Sat, Jun 29, 2013 at 1:26 PM, Hamish <hamis...@yahoo.com> wrote:
> > Markus Metz wrote:
> >
> >> Some more results with Sören's test program on a Intel(R) Core(TM) i5
> >> CPU M450 @ 2.40GHz (2 real cores, 4 fake cores) with gcc 4.7.2 and
> >> clang 3.3
> >>
> >> gcc -O3
> >> v is 2.09131e+13
> >>
> >> real    2m0.393s
> >> user    1m57.610s
> >> sys    0m0.003s
> >>
> >> gcc -Ofast
> >> v is 2.09131e+13
> >>
> >> real    0m7.218s
> >> user    0m7.018s
> >> sys    0m0.017s
> >
> >
> > nice. one thing we need to remember though is that it's not entirely
> > free, one thing -Ofast turns on is -ffast-math,
> > """
> >  This option is not turned on by any -O option besides -Ofast since it
> can
> >  result in incorrect output for programs that depend on an exact
> >  implementation of IEEE or ISO rules/specifications for math functions.
> It
> >  may, however, yield faster code for programs that do not require the
> >  guarantees of these specifications.
> > """
> >
> > which may not be fit for our purposes.
> >
> >
> > With the ifort compiler there is '-fp-model precise' which allows only
> > optimizations which don't harm the results. Maybe gcc has something
> > similar.
>
> In gcc, you can turn of -ffoo with -fno-foo, maybe this way you can
> use -Ofast -fno-fast-math to preserve IEEE specifications.
> >
> > Glad to see -floop-parallelize-all in gcc 4.7, it will help us identify
> > places to focus OpenMP work on.
> >
> >
> > Hamish
> >
>

benchmark.sh
Description: Bourne shell script

#include <stdio.h>
#include <stdlib.h>

/* #define DEBUG 1 */

/* Prototypes for gathering and average computation */
static int gather_values(double **input, double *buff, int nrows, 
        int ncols, int mw_size, int col, int row, int dist);

static double average(double *values, int size);

int main(int argc, char **argv)
{
    int nrows, ncols, mw_size, size, dist;
    double **input = NULL, **output = NULL;
    int i, j;

    /* Check and parse the input parameter */
    if(argc != 4)
    {

        fprintf(stderr, "Warning!\n");
        fprintf(stderr, "Please specifiy the number of rows and columns and the "
                "\nsize of the moving window (must be an odd number)\n");
        fprintf(stderr, "\nUsage: neighbor 5000 5000 51\n");
        fprintf(stderr, "\nUsing default values: rows = 5000, cols = 5000, moving window = 51\n");
        nrows = 5000;
        ncols = 5000;
        mw_size = 51;
    } 
    else 
    {

        sscanf(argv[1], "%d", &nrows);
        sscanf(argv[2], "%d", &ncols);
        sscanf(argv[3], "%d", &mw_size);

        if(mw_size%2 == 0) {
            fprintf(stderr,"The size of the moving window must be odd");
            return -1;
        }
    }

    size = mw_size * mw_size;
    dist = mw_size / 2;

    /* Allocate input and output */
    input = (double**)calloc(nrows, sizeof(double*));
    output= (double**)calloc(nrows, sizeof(double*));

    if(input == NULL || output == NULL)
    {
        fprintf(stderr, "Unable to allocate arrays");
        return -1;
    }

    for(i = 0; i < nrows; i++) 
    {
        input[i] = (double*)calloc(ncols, sizeof(double));
        output[i]= (double*)calloc(ncols, sizeof(double));

        if(input[i] == NULL || output[i] == NULL)
        {
            fprintf(stderr, "Unable to allocate arrays");
            return -1;
        }

#ifdef DEBUG
        for(j = 0; j < ncols; j++)
            input[i][j] = i + j;
#endif
    }

#pragma omp parallel for private(i, j)
    for(i = 0; i < nrows; i++) 
    {
        for(j = 0; j < ncols; j++)
        {

        /* Value buffer with maximum size */
        double *buff = NULL;
        buff = (double*)calloc(size, sizeof(double));

        /* Gather value in moving window */
        int num = gather_values(input, buff, nrows, ncols, mw_size, i, j, dist);

        output[i][j] = average(buff, num);

        free(buff);
        }
    }

#ifdef DEBUG
    printf("\nInput\n");
    for(i = 0; i < nrows; i++) 
    {
        for(j = 0; j < ncols; j++)
        {
            printf("%.2f ", input[i][j]);
        }
        printf("\n");
    }

    printf("\nOutput\n");
    for(i = 0; i < nrows; i++) 
    {
        for(j = 0; j < ncols; j++)
        {
            printf("%.2f ", output[i][j]);
        }
        printf("\n");
    }
#endif

    return 0;
}

int gather_values(double **input, double *buff, int nrows, 
        int ncols, int mw_size, int col, int row, int dist)
{
    int i, j, k;

    int start_row = row - dist;
    int start_col = col - dist;
    int end_row = start_row + mw_size;
    int end_col = start_col + mw_size;

    if(start_row < 0)
        start_row = 0;

    if(start_col < 0)
        start_col = 0;

    if(end_row > nrows)
        end_row = nrows;

    if(end_col > ncols)
        end_col = ncols;

    k = 0;

    for(i = start_row; i < end_row; i++)
    {
        for(j = start_col; j < end_col; j++)
        {
            buff[k] = input[i][j];
            k++;
        }
    }

    return k;
}

double average(double *values, int size)
{
    int i;
    double a = 0.0;
    for(i = 0; i < size; i++)
        a += values[i];

    a = a / (double)size;
    return a;
}

_______________________________________________
grass-user mailing list
grass-user@lists.osgeo.org
http://lists.osgeo.org/mailman/listinfo/grass-user

Re: [GRASS-user] r.neighbors velocity

Reply via email to