Changeset: f41a9714b4a3 for MonetDB URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=f41a9714b4a3 Modified Files: geom/monetdb5/grid.c geom/sql/Tests/grid.sql Branch: grid Log Message:
Added a test for testing the grid-based distance join diffs (truncated from 534 to 300 lines): diff --git a/geom/monetdb5/grid.c b/geom/monetdb5/grid.c --- a/geom/monetdb5/grid.c +++ b/geom/monetdb5/grid.c @@ -45,7 +45,7 @@ if (maxSize > GDK_oid_max) { goto distancejoin_fail; \ } \ while (maxSize > BATcapacity((r1))) { \ -fprintf(stderr, "maxSize: %zu capacity: %zu\n", maxSize, BATcapacity((r1))); \ +fprintf(stderr, "maxSize: %zu capacity: %zu\n", maxSize, BATcapacity((r1))); \ if ((BATextend((r1), BATgrows((r1))) != GDK_SUCCEED) || \ (BATextend((r2), BATgrows((r2))) != GDK_SUCCEED)) { \ (msg) = createException(MAL, "grid.distance", \ @@ -53,51 +53,53 @@ fprintf(stderr, "maxSize: %zu capacity: goto distancejoin_fail; \ } \ } \ -/* fprintf(stderr, "Comparing [%zu](%zu)-[%zu](%zu)\n", \ - cellR, g1->dir[cellR+1]-g1->dir[cellR], \ - cellS, g2->dir[cellS+1]-g2->dir[cellS]); */ \ } while (0) -#define GRIDcmp(x1Vals, y1Vals, br1, g1, x2Vals, y2Vals, br2, g2, cellR, cellS, r1, r2, msg) \ -do { \ -if ((cellR) > (g1)->cellsNum || (cellS) > (g2)->cellsNum) \ - continue; \ -GRIDextend(g1, g2, cellR, cellS, r1, r2, msg); \ -/* compare points of R in cellR with points of S in cellS */ \ -/* TODO: the outer relation should be the one with more points */ \ -for (size_t m = (g1)->dir[(cellR)]; m < (g1)->dir[(cellR)+1]; m++) { \ - oid oid1 = m; \ - lng x1v = (x1Vals)[oid1]; \ - lng y1v = (y1Vals)[oid1]; \ - for (size_t n = (g2)->dir[(cellS)]; n < (g2)->dir[(cellS)+1]; n++) { \ - size_t oid2 = n; \ - lng x2v = (x2Vals)[oid2]; \ - lng y2v = (y2Vals)[oid2]; \ - double ddist = (x2v-x1v)*(x2v-x1v)+(y2v-y1v)*(y2v-y1v); \ -/* fprintf(stderr, "The distance between [%lld,%lld] and [%lld,%lld] is %f (requested distance: %f)\n",x1v,y1v,x2v,y2v,ddist,distsqr); */ \ +#define GRIDcmp(x1Vals, y1Vals, br1, g1, \ + x2Vals, y2Vals, br2 ,g2, \ + cellR, cellS, r1, r2, seq1, seq2, msg) \ +do { \ +if ((cellR) >= (g1)->cellsNum || (cellS) >= (g2)->cellsNum) \ + continue; \ +GRIDextend(g1, g2, cellR, cellS, r1, r2, msg); \ +/* compare points of R in cellR with points of S in cellS */ \ +/* TODO: the outer relation should be the one with more points */ \ +for (size_t m = (g1)->dir[(cellR)]; m < (g1)->dir[(cellR)+1]; m++) { \ + oid oid1 = m; \ + lng x1v = (x1Vals)[oid1]; \ + lng y1v = (y1Vals)[oid1]; \ + for (size_t n = (g2)->dir[(cellS)]; n < (g2)->dir[(cellS)+1]; n++) { \ + size_t oid2 = n; \ + lng x2v = (x2Vals)[oid2]; \ + lng y2v = (y2Vals)[oid2]; \ + double ddist = (x2v-x1v)*(x2v-x1v)+(y2v-y1v)*(y2v-y1v); \ if (ddist <= distsqr) { \ - bunfastapp_nocheck_inc((r1), (br1), &oid1); \ - bunfastapp_nocheck_inc((r2), (br2), &oid2); \ +/* TODO: Now that you have reserved space in advance, use Tloc+predication */ \ + oid o1 = oid1+seq1; \ + oid o2 = oid2+seq2; \ + bunfastapp_nocheck_inc((r1), (br1), &o1); \ + bunfastapp_nocheck_inc((r2), (br2), &o2); \ } \ } \ } \ } while (0) +typedef struct Grid Grid; struct Grid { - dbl xmin; /* grid universe: minimum X value */ - dbl ymin; /* grid universe: minimum Y value */ - dbl xmax; /* grid universe: maximum X value */ - dbl ymax; /* grid universe: maximum Y value */ - mbr mbb; + dbl xmin; /* minimum X value of input BATs */ + dbl ymin; /* minimum Y value of input BATs */ + dbl xmax; /* maximum X value of input BATs */ + dbl ymax; /* maximum Y value of input BATs */ + mbr mbb; /* grid universe (might differ from the input values) */ bte shift; - size_t cellsNum; /* number of cells */ - size_t cellsPerAxis; /* number of cells per axis */ - size_t cellsX; - size_t cellsY; - bat xbat; /* bat id for X coordinates */ - bat ybat; /* bat id for Y coordinates */ + size_t cellsNum; /* number of cells */ + size_t cellsPerAxis;/* number of cells per axis */ + size_t cellsX; /* number of cells in X axis */ + size_t cellsY; /* number of cells in Y axis */ + bat xbat; /* bat id for X coordinates */ + bat ybat; /* bat id for Y coordinates */ oid * dir; /* the grid directory */ oid * oids; /* heap where the index is stored */ }; @@ -127,8 +129,8 @@ grid_print(Grid * g) fprintf(stderr, "- MBB : [%f %f, %f %f]\n", g->mbb.xmin, g->mbb.ymin, g->mbb.xmax, g->mbb.ymax); fprintf(stderr, "- Cells X: %zu, Y: %zu, total: %zu, per axis: %zu\n", g->cellsX, g->cellsY, g->cellsNum, g->cellsPerAxis); m = ceil(log10(g->dir[g->cellsNum])); - n = ceil(log10(g->cellsY)); - o = ceil(log10(g->cellsX)); + n = ceil(log10(g->cellsY-1)); + o = ceil(log10(g->cellsX-1)); m = m > o ? m : o; fprintf(stderr, "- Directory\n"); @@ -263,16 +265,12 @@ grid_create(BAT *bx, BAT *by) /* TODO: move here the code for compressing the index */ -#ifndef NDEBUG - grid_print(g); -#endif return g; } -static Grid * -grid_create_mbr(BAT *bx, BAT *by, mbr *m, dbl * d) +static str +grid_create_mbr(Grid * g, BAT *bx, BAT *by, mbr *m, dbl * d) { - Grid * g; lng *xVals, *yVals; size_t i, cnt; dbl fxa, fxb, fya, fyb; @@ -281,9 +279,6 @@ grid_create_mbr(BAT *bx, BAT *by, mbr *m assert(BATcount(bx) > 0); assert((*d) > 0); - if ((g = GDKmalloc(sizeof(Grid))) == NULL) - return g; - g->xbat = bx->batCacheid; g->ybat = by->batCacheid; xVals = (lng*)Tloc(bx, BUNfirst(bx)); @@ -298,27 +293,37 @@ grid_create_mbr(BAT *bx, BAT *by, mbr *m g->xmax = m->xmax; g->ymax = m->ymax; +#if 0 #ifndef NDEBUG fprintf(stderr, "grid borders: [%f %f, %f %f]\n", g->mbb.xmin, g->mbb.ymin, g->mbb.xmax, g->mbb.ymax); #endif +#endif /* determine the appropriate number of cells */ - g->cellsX = (m->xmax - m->xmin)/(*d); - g->cellsY = (m->ymax - m->ymin)/(*d); + g->cellsX = (size_t)ceil((m->xmax - m->xmin)/(*d))+1; + g->cellsY = (size_t)ceil((m->ymax - m->ymin)/(*d))+1; + if ((size_t)(m->xmax-m->xmin)/(*d) == (size_t)g->cellsX*(*d)) + g->cellsX++; + if ((m->ymax-m->ymin)/(*d) == g->cellsY*(*d)) + g->cellsY++; + g->mbb.xmax = g->mbb.xmin + g->cellsX*(*d); + g->mbb.ymax = g->mbb.ymin + g->cellsY*(*d); /* how many bits do we need? */ g->shift = (bte)ceil(log2(g->cellsX>g->cellsY?g->cellsX:g->cellsY)); cnt = BATcount(bx); g->cellsNum = g->cellsX * g->cellsY; +#if 0 #ifndef NDEBUG fprintf(stderr, "shift: %d\n", g->shift); fprintf(stderr, "Cells: x=%zu, y=%zu (total: %zu)\n", g->cellsX, g->cellsY, g->cellsNum); #endif +#endif /* allocate space for the directory */ if ((g->dir = GDKmalloc((g->cellsNum+1)*sizeof(oid))) == NULL) { GDKfree(g); g = NULL; - return g; + return createException(MAL, "grid.create_mbr", MAL_MALLOC_FAIL); } for (i = 0; i <= g->cellsNum; i++) g->dir[i] = 0; @@ -327,7 +332,7 @@ grid_create_mbr(BAT *bx, BAT *by, mbr *m if((g->oids = GDKmalloc(BATcount(bx)*sizeof(oid))) == NULL) { GDKfree(g); g = NULL; - return g; + return createException(MAL, "grid.create_mbr", MAL_MALLOC_FAIL); } /* compute the index */ @@ -336,10 +341,12 @@ grid_create_mbr(BAT *bx, BAT *by, mbr *m fxb = (double)g->mbb.xmin*fxa; fya = ((double)g->cellsY/(g->mbb.ymax-g->mbb.ymin)); fyb = (double)g->mbb.ymin*fya; +#if 0 #ifndef NDEBUG fprintf(stderr, "Coefficients: fxa=%f, fxb=%f\n", fxa, fxb); fprintf(stderr, "Coefficients: fya=%f, fyb=%f\n", fya, fyb); #endif +#endif cnt = BATcount(bx); for (i = 0; i < cnt; i++) { @@ -375,8 +382,7 @@ grid_create_mbr(BAT *bx, BAT *by, mbr *m #ifndef NDEBUG grid_print(g); #endif - - return g; + return MAL_SUCCEED; } @@ -387,6 +393,7 @@ grid_create_bats(Grid ** gg1, Grid **gg2 lng *x1Vals, *y1Vals, *x2Vals, *y2Vals; size_t i, cnt1, cnt2; mbr *m1 = NULL, *m2 = NULL, *mi = NULL, *mu = NULL; + str msg = MAL_SUCCEED; assert(BATcount(bx1) == BATcount(by1)); assert(BATcount(bx2) == BATcount(by2)); @@ -399,13 +406,8 @@ grid_create_bats(Grid ** gg1, Grid **gg2 ((m2 = GDKmalloc(sizeof(mbr))) == NULL) || ((g1 = GDKmalloc(sizeof(Grid))) == NULL) || ((g2 = GDKmalloc(sizeof(Grid))) == NULL)) { - if (mi) GDKfree(mi); - if (mu) GDKfree(mu); - if (m1) GDKfree(m1); - if (m2) GDKfree(m2); - if (g1) GDKfree(g1); - if (g2) GDKfree(g2); - return createException(MAL, "grid.create_bats", MAL_MALLOC_FAIL); + msg=createException(MAL, "grid.create_bats", MAL_MALLOC_FAIL); + goto grid_create_bats_fail; } g1->xbat = bx1->batCacheid; @@ -439,10 +441,12 @@ grid_create_bats(Grid ** gg1, Grid **gg2 g1->mbb.ymin=g1->ymin; g1->mbb.xmax=g1->xmax; g1->mbb.ymax=g1->ymax; +#if 0 #ifndef NDEBUG fprintf(stderr, "Outer relation limits: [%f %f, %f %f]\n", g1->mbb.xmin, g1->mbb.ymin, g1->mbb.xmax, g1->mbb.ymax); fprintf(stderr, "Outer relation limits: [%f %f, %f %f]\n", g1->xmin, g1->ymin, g1->xmax, g1->ymax); #endif +#endif cnt2 = BATcount(bx2); g2->xmin = g2->xmax = x2Vals[0]; for (i = 1; i < cnt2; i++) { @@ -464,40 +468,36 @@ grid_create_bats(Grid ** gg1, Grid **gg2 g2->mbb.ymin=g2->ymin; g2->mbb.xmax=g2->xmax; g2->mbb.ymax=g2->ymax; +#if 0 #ifndef NDEBUG fprintf(stderr, "Outer relation limits: [%f %f, %f %f]\n", g2->mbb.xmin, g2->mbb.ymin, g2->mbb.xmax, g2->mbb.ymax); fprintf(stderr, "Outer relation limits: [%f %f, %f %f]\n", g2->xmin, g2->ymin, g2->xmax, g2->ymax); #endif - +#endif m1->xmin = g1->mbb.xmin - *d; m1->ymin = g1->mbb.ymin - *d; m1->xmax = g1->mbb.xmax + *d; m1->ymax = g1->mbb.ymax + *d; m2->xmin = g2->mbb.xmin - *d; m2->ymin = g2->mbb.ymin - *d; m2->xmax = g2->mbb.xmax + *d; m2->ymax = g2->mbb.ymax + *d; /* compute the intersection between the two coverages */ mbrIntersection(&mi, &m1, &m2); /* query the grid only on the intersecting mbr */ mbrUnion(&mu, &m1, &m2); /* create the grid index on the union of the two universes */ +#if 0 #ifndef NDEBUG fprintf(stderr, "outer relation mbb: [%f %f, %f %f]\n", m1->xmin, m1->ymin, m1->xmax, m1->ymax); fprintf(stderr, "inner relation mbb: [%f %f, %f %f]\n", m2->xmin, m2->ymin, m2->xmax, m2->ymax); fprintf(stderr, " intersection mbb: [%f %f, %f %f]\n", mi->xmin, mi->ymin, mi->xmax, mi->ymax); fprintf(stderr, " union mbb: [%f %f, %f %f]\n", mu->xmin, mu->ymin, mu->xmax, mu->ymax); #endif - +#endif /* if the two mbbs do not intersect, return an empty result */ - if (mbr_isnil(mi)) { - GDKfree(mi); - GDKfree(mu); - GDKfree(m1); - GDKfree(m2); - GDKfree(g1); - GDKfree(g2); - g1 = NULL; - g2 = NULL; - return MAL_SUCCEED; - } + if (mbr_isnil(mi)) + goto grid_create_bats_return; /* create grid index using a common grid */ - g1 = grid_create_mbr(bx1, by1, mu, d); - g2 = grid_create_mbr(bx2, by2, mu, d); + if ( grid_create_mbr(g1, bx1, by1, mu, d) != MAL_SUCCEED || + grid_create_mbr(g2, bx2, by2, mu, d) != MAL_SUCCEED) { + msg = createException(MAL, "grid.create_bats", "Could not create the grid index"); + goto grid_create_bats_return; + } /* TODO: move here the code for compressing the index */ _______________________________________________ checkin-list mailing list checkin-list@monetdb.org https://www.monetdb.org/mailman/listinfo/checkin-list