Dear developers,

in connection with attempts to rewrite some GRASS module with OpenMP we have rewrote the v.kcv module. The new version is much faster that the previous (no OpenMP is now needed - there is only one loop now with direct writing to the resulting layer). We believe that the new version of the module do the same work as original. We are not familiar with GRASS GIS development so the code is included in the attachment of this message. Hope that it could be useful.

Kind regards

Jan Ruzicka and Jan Vandrol
/*
 * from s.kcv
 * Copyright (C) 1993-1994. James Darrell McCauley.
 *
 * Author: James Darrell McCauley darr...@mccauley-usa.com
 *                                http://mccauley-usa.com/
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version 2
 * of the License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Modification History:
 * 4.2B <27 Jan 1994>  fixed RAND_MAX for Solaris 2.3
 * <13 Sep 2000> released under GPL
 * 10/2004 Upgrade to 5.7 Radim Blazek
 * 5/2013 Change algorithm to make it faster by Jan Vandrol and Jan Ruzicka
 */
#include <stdlib.h>
#include <unistd.h>
#include <math.h>
#include <grass/gis.h>
#include <grass/dbmi.h>
#include <grass/Vect.h>
#include <grass/glocale.h>
#include "kcv.h"

#ifndef RAND_MAX
#define RAND_MAX (pow(2.0,31.0)-1)
#endif
#if defined(__CYGWIN__) || defined(__APPLE__) || defined(__MINGW32__)
double drand48()
{
    return (rand() / 32767.0);
}

#define srand48(sv) (srand((unsigned)(sv)))
#else
double drand48();
void srand48();
#endif

struct Cell_head window;

int rand_range(int min_n, int max_n)
{
    return rand() % (max_n - min_n + 1) + min_n;
}

int main(int argc, char *argv[])
{

    int line, nlines, nlinks;
    double (*rng) (), max, myrand();
    int nsites, verbose, parts, np, *p, dcmp();
    int *pnt_part, *part_count, *part_available;
    struct Map_info In, Out;
    char *mapset;
    struct GModule *module;
    struct Option *in_opt, *out_opt, *col_opt, *npart_opt;
    struct Flag *drand48_flag, *q_flag;

    /* Attributes */
    struct field_info *Fi;
    dbDriver *Driver;
    char buf[2000];
    dbString sql;

    module = G_define_module();
    module->keywords = _("vector, statistics");
    module->description =
	_("Randomly partition points into test/train sets.");

    in_opt = G_define_standard_option(G_OPT_V_INPUT);
    out_opt = G_define_standard_option(G_OPT_V_OUTPUT);

    npart_opt = G_define_option();
    npart_opt->key = "k";
    npart_opt->type = TYPE_INTEGER;
    npart_opt->required = YES;
    npart_opt->description = _("Number of partitions");
    npart_opt->options = "1-32767";

    col_opt = G_define_option();
    col_opt->key = "column";
    col_opt->type = TYPE_STRING;
    col_opt->required = YES;
    col_opt->multiple = NO;
    col_opt->answer = "part";
    col_opt->description =
	_("Name for new column to which partition number is written");

    drand48_flag = G_define_flag();
    drand48_flag->key = 'd';
    drand48_flag->description = _("Use drand48()");

    /* please remove in GRASS 7 */
    q_flag = G_define_flag();
    q_flag->key = 'q';
    q_flag->description = _("Quiet");

    G_gisinit(argv[0]);

    if (G_parser(argc, argv))
	exit(EXIT_FAILURE);

    verbose = (!q_flag->answer);
    np = atoi(npart_opt->answer);

    if (drand48_flag->answer) {
	rng = drand48;
	max = 1.0;
	srand48((long)getpid());
    }
    else {
	rng = myrand;
	max = RAND_MAX;
	srand(getpid());
    }

    

    /* open input vector */
    if ((mapset = G_find_vector2(in_opt->answer, "")) == NULL) {
	G_fatal_error(_("Vector map <%s> not found"), in_opt->answer);
    }

    Vect_set_open_level(2);
    if (Vect_open_old(&In, in_opt->answer, mapset) < 2) {
	G_fatal_error(_("Unable to open vector map <%s> at topological level %d"),
		      in_opt->answer, 2);
    }

    nsites = Vect_get_num_primitives(&In, GV_POINT);

    if (nsites < np) {
	G_fatal_error("Sites found: %i\nMore partitions than sites.", nsites);
    }

    Vect_set_fatal_error(GV_FATAL_PRINT);
    Vect_open_new(&Out, out_opt->answer, Vect_is_3d(&In));

    Vect_copy_head_data(&In, &Out);
    Vect_hist_copy(&In, &Out);
    Vect_hist_command(&Out);

    /* Copy vector lines */
    Vect_copy_map_lines(&In, &Out);

    /* Copy tables */
    if (Vect_copy_tables(&In, &Out, 0))
	G_warning(_("Failed to copy attribute table to output map"));

    /* Add column */
    db_init_string(&sql);

    /* Check if there is a database for output */
    nlinks = Vect_get_num_dblinks(&Out);
    if (nlinks < 1)
	Fi = Vect_default_field_info(&Out, 1, NULL, GV_1TABLE);
    else
	Fi = Vect_get_field(&Out, 1);
    if (Fi == NULL) {
	Vect_delete(Out.name);
	G_fatal_error(_("Unable to get layer info for vector map <%s>"),
		      out_opt->answer);
    }

    Driver = db_start_driver_open_database(Fi->driver, Fi->database);
    if (Driver == NULL) {
	Vect_delete(Out.name);
	G_fatal_error(_("Unable to open database <%s> by driver <%s>"),
		      Fi->database, Fi->driver);
    }

    if (nlinks < 1)
	sprintf(buf, "create table %s (%s integer, %s integer)", Fi->table,
		Fi->key, col_opt->answer);
    else
	sprintf(buf, "alter table %s add column %s integer", Fi->table,
		col_opt->answer);

    db_set_string(&sql, buf);

    G_debug(3, "SQL: %s", db_get_string(&sql));

    if (db_execute_immediate(Driver, &sql) != DB_OK) {
	Vect_delete(Out.name);
	G_fatal_error(_("Cannot alter table: %s"), db_get_string(&sql));
    }
    if (nlinks < 1)
	Vect_map_add_dblink(&Out, Fi->number, Fi->name, Fi->table, Fi->key,
			    Fi->database, Fi->driver);

    /* nlines - count of records */	
    nlines = Vect_get_num_lines(&In);
    /* pnt_part - not used
    pnt_part = (int *)G_calloc(nlines + 1, sizeof(int));
    */

    /* currentmaxpartindex - current maximal index of array part_available, maximal usable index from array part_available */
    int currentmaxpartindex = np - 1;
    /* maximal count of features in one category, +1 for floating point result of nlines / np */
    //double maxcount = (nlines / np) + 1;

    /* maximal count of features in one category */
    int maxcount = (int) (nlines / np);				
    
    /* array of available categories */
    part_available = (int *)G_calloc(np, sizeof(int));
    /* number of features in categories */
    part_count = (int *)G_calloc(np, sizeof(int));
 
    /* initialization of arrays */
    for (parts = 0; parts < np; parts++) {
	part_available[parts] = parts + 1; //identifiers of categories
	part_count[parts] = 0; 
    }

    /* loop for all features */
    for (line = 1; line <= nlines; line++) {
		int type;
		int cat = rand_range(0, currentmaxpartindex); //randomly chosen category for current feature

		if (nlinks < 1)
		    sprintf(buf, "insert into %s (%s, %s) values (%d, %d)",
			    Fi->table, Fi->key, col_opt->answer, line, part_available[cat]);
		else
		    sprintf(buf, "update %s set %s = %d where %s = %d",
			    Fi->table, col_opt->answer, part_available[cat], Fi->key, line);

		db_set_string(&sql, buf);

		G_debug(3, "SQL: %s", db_get_string(&sql));

		if (db_execute_immediate(Driver, &sql) != DB_OK) {
		    Vect_delete(Out.name);
		    G_fatal_error(_("Unable to insert row: %s"),
				  db_get_string(&sql));
		}

		part_count[cat]++; //increase count of features in category
		/* if the category if full */	
		if (part_count[cat] >= maxcount) {
			int pom_part_count = part_count[cat]; //switch number of features of full category with the end of array
			part_count[cat] = part_count[currentmaxpartindex]; 
		        part_count[currentmaxpartindex] = pom_part_count;
			int pom_part_available = part_available[cat]; //switch full category with the end of array
			part_available[cat] = part_available[currentmaxpartindex]; 
			part_available[currentmaxpartindex] = pom_part_available;
			currentmaxpartindex--; //maximal usable index is decreased, so the full category at the end of array will not be used in the next loops
		}
		if (currentmaxpartindex < 0) {
	  	  maxcount++;
	  	  currentmaxpartindex = np - 1;
		}		
    }

    Vect_close(&In);

    db_close_database_shutdown_driver(Driver);

    Vect_build(&Out);
    Vect_close(&Out);

    exit(EXIT_SUCCESS);
}
_______________________________________________
grass-dev mailing list
grass-dev@lists.osgeo.org
http://lists.osgeo.org/mailman/listinfo/grass-dev

Reply via email to