Changeset: 66e1531dd8d5 for MonetDB URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=66e1531dd8d5 Modified Files: geom/monetdb5/geom.c geom/monetdb5/geom.h geom/monetdb5/grid.c geom/monetdb5/grid.h geom/monetdb5/grid.mal geom/sql/41_grid.sql Branch: grid Log Message:
Grid based distance join added diffs (truncated from 774 to 300 lines): diff --git a/geom/monetdb5/geom.c b/geom/monetdb5/geom.c --- a/geom/monetdb5/geom.c +++ b/geom/monetdb5/geom.c @@ -1873,7 +1873,7 @@ str geom_epilogue(void *ret) { } /* Check if fixed-sized atom mbr is null */ -static int mbr_isnil(mbr *m) { +int mbr_isnil(mbr *m) { if (!m || m->xmin == flt_nil || m->ymin == flt_nil || m->xmax == flt_nil || m->ymax == flt_nil) return 1; @@ -4357,6 +4357,47 @@ str mbrOverlaps(bit *out, mbr **b1, mbr return MAL_SUCCEED; } +/*returns the intersection of two mbrs */ +str mbrIntersection(mbr **r, mbr **b1, mbr **b2) { + bit overlap; + mbrOverlaps(&overlap, b1, b2); + if (overlap == 0) { + *r = mbr_nil; + } else { + (*r)->xmin = ((*b1)->xmin < (*b2)->xmin) ? (*b2)->xmin : (*b1)->xmin; + (*r)->ymin = ((*b1)->ymin < (*b2)->ymin) ? (*b2)->ymin : (*b1)->ymin; + (*r)->xmax = ((*b1)->xmax > (*b2)->xmax) ? (*b2)->xmax : (*b1)->xmax; + (*r)->ymax = ((*b1)->ymax > (*b2)->ymax) ? (*b2)->ymax : (*b1)->ymax; + } + return MAL_SUCCEED; +} + +/*returns the union of two mbrs */ +str mbrUnion(mbr **r, mbr **b1, mbr **b2) { + bit overlap; + mbrOverlaps(&overlap, b1, b2); + if (mbr_isnil(*b1) && mbr_isnil(*b2)) { + *r = mbr_nil; + } else if (mbr_isnil(*b1)) { + (*r)->xmin = (*b2)->xmin; + (*r)->ymin = (*b2)->ymin; + (*r)->xmax = (*b2)->xmax; + (*r)->ymax = (*b2)->ymax; + } else if (mbr_isnil(*b2)) { + (*r)->xmin = (*b1)->xmin; + (*r)->ymin = (*b1)->ymin; + (*r)->xmax = (*b1)->xmax; + (*r)->ymax = (*b1)->ymax; + } else { + (*r)->xmin = ((*b1)->xmin > (*b2)->xmin) ? (*b2)->xmin : (*b1)->xmin; + (*r)->ymin = ((*b1)->ymin > (*b2)->ymin) ? (*b2)->ymin : (*b1)->ymin; + (*r)->xmax = ((*b1)->xmax < (*b2)->xmax) ? (*b2)->xmax : (*b1)->xmax; + (*r)->ymax = ((*b1)->ymax < (*b2)->ymax) ? (*b2)->ymax : (*b1)->ymax; + } + + return MAL_SUCCEED; +} + /*returns true if the mbrs of the two geometris overlap */ str mbrOverlaps_wkb(bit *out, wkb **geom1WKB, wkb **geom2WKB) { mbr *geom1MBR = NULL, *geom2MBR = NULL; diff --git a/geom/monetdb5/geom.h b/geom/monetdb5/geom.h --- a/geom/monetdb5/geom.h +++ b/geom/monetdb5/geom.h @@ -260,6 +260,8 @@ geom_export str mbrEqual(bit *out, mbr * geom_export str mbrEqual_wkb(bit *out, wkb **geom1WKB, wkb **geom2WKB); geom_export str mbrDistance(dbl *out, mbr **b1, mbr **b2); geom_export str mbrDistance_wkb(dbl *out, wkb **geom1WKB, wkb **geom2WKB); +geom_export str mbrIntersection(mbr **r, mbr **b1, mbr **b2); +geom_export str mbrUnion(mbr **r, mbr **b1, mbr **b2); geom_export str wkbCoordinateFromWKB(dbl*, wkb**, int*); geom_export str wkbCoordinateFromMBR(dbl*, mbr**, int*); @@ -294,3 +296,6 @@ geom_export str wkbCoordinateFromMBR_bat geom_export int geom_catalog_upgrade(void *, int); geom_export str geom_sql_upgrade(int); + +geom_export int mbr_isnil(mbr *m); + diff --git a/geom/monetdb5/grid.c b/geom/monetdb5/grid.c --- a/geom/monetdb5/grid.c +++ b/geom/monetdb5/grid.c @@ -34,19 +34,73 @@ do { max = res + add; \ } while (0) -typedef struct grid { - lng xmin; /* grid universe: minimum X value */ - lng ymin; /* grid universe: minimum Y value */ - lng xmax; /* grid universe: maximum X value */ - lng ymax; /* grid universe: maximum Y value */ +#define GRIDextend(g1, g2, cellR, cellS, r1, r2, msg) \ +do { \ +/* make space to cater for the worst case where all points qualify */ \ +BUN maxSize = BATcount((r1)) + \ + ((g1)->dir[(cellR)+1]-(g1)->dir[(cellR)])* \ + ((g2)->dir[(cellS)+1]-(g2)->dir[(cellS)]); \ +if (maxSize > GDK_oid_max) { \ + (msg) = createException(MAL, "grid.distance", "overflow of head value"); \ + goto distancejoin_fail; \ +} \ +while (maxSize > BATcapacity((r1))) { \ +fprintf(stderr, "maxSize: %zu capacity: %zu\n", maxSize, BATcapacity((r1))); \ + if ((BATextend((r1), BATgrows((r1))) != GDK_SUCCEED) || \ + (BATextend((r2), BATgrows((r2))) != GDK_SUCCEED)) { \ + (msg) = createException(MAL, "grid.distance", \ + "could not extend BATs for storing the join results"); \ + goto distancejoin_fail; \ + } \ +} \ +/* fprintf(stderr, "Comparing [%zu](%zu)-[%zu](%zu)\n", \ + cellR, g1->dir[cellR+1]-g1->dir[cellR], \ + cellS, g2->dir[cellS+1]-g2->dir[cellS]); */ \ +} while (0) + + + +#define GRIDcmp(x1Vals, y1Vals, br1, g1, x2Vals, y2Vals, br2, g2, cellR, cellS, r1, r2, msg) \ +do { \ +if ((cellR) > (g1)->cellsNum || (cellS) > (g2)->cellsNum) \ + continue; \ +GRIDextend(g1, g2, cellR, cellS, r1, r2, msg); \ +/* compare points of R in cellR with points of S in cellS */ \ +/* TODO: the outer relation should be the one with more points */ \ +for (size_t m = (g1)->dir[(cellR)]; m < (g1)->dir[(cellR)+1]; m++) { \ + oid oid1 = m; \ + lng x1v = (x1Vals)[oid1]; \ + lng y1v = (y1Vals)[oid1]; \ + for (size_t n = (g2)->dir[(cellS)]; n < (g2)->dir[(cellS)+1]; n++) { \ + size_t oid2 = n; \ + lng x2v = (x2Vals)[oid2]; \ + lng y2v = (y2Vals)[oid2]; \ + double ddist = (x2v-x1v)*(x2v-x1v)+(y2v-y1v)*(y2v-y1v); \ +/* fprintf(stderr, "The distance between [%lld,%lld] and [%lld,%lld] is %f (requested distance: %f)\n",x1v,y1v,x2v,y2v,ddist,distsqr); */ \ + if (ddist <= distsqr) { \ + bunfastapp_nocheck_inc((r1), (br1), &oid1); \ + bunfastapp_nocheck_inc((r2), (br2), &oid2); \ + } \ + } \ +} \ +} while (0) + +struct Grid { + dbl xmin; /* grid universe: minimum X value */ + dbl ymin; /* grid universe: minimum Y value */ + dbl xmax; /* grid universe: maximum X value */ + dbl ymax; /* grid universe: maximum Y value */ + mbr mbb; bte shift; size_t cellsNum; /* number of cells */ size_t cellsPerAxis; /* number of cells per axis */ + size_t cellsX; + size_t cellsY; bat xbat; /* bat id for X coordinates */ bat ybat; /* bat id for Y coordinates */ - size_t * dir; /* the grid directory */ + oid * dir; /* the grid directory */ oid * oids; /* heap where the index is stored */ -} grid; +}; static size_t countSetBits(uint64_t *resBitvector, size_t vectorSize) @@ -63,10 +117,56 @@ countSetBits(uint64_t *resBitvector, siz return num; } -static grid * +#ifndef NDEBUG +static void +grid_print(Grid * g) +{ + int m = 0, n = 0, o = 0; + fprintf(stderr, "GRID %p (shift %d). batx: %d, baty: %d\n", g, g->shift, g->xbat, g->ybat); + fprintf(stderr, "- Universe: [%f %f, %f %f]\n", g->xmin, g->ymin, g->xmax, g->ymax); + fprintf(stderr, "- MBB : [%f %f, %f %f]\n", g->mbb.xmin, g->mbb.ymin, g->mbb.xmax, g->mbb.ymax); + fprintf(stderr, "- Cells X: %zu, Y: %zu, total: %zu, per axis: %zu\n", g->cellsX, g->cellsY, g->cellsNum, g->cellsPerAxis); + m = ceil(log10(g->dir[g->cellsNum])); + n = ceil(log10(g->cellsY)); + o = ceil(log10(g->cellsX)); + m = m > o ? m : o; + + fprintf(stderr, "- Directory\n"); + for (size_t i = 0; i < g->cellsX*(m+1)+n+4; i++) + fprintf(stderr,"-"); + fprintf(stderr, "\n"); + for (size_t k = 0; k < g->cellsY; k++) { + size_t j = g->cellsY - k - 1; + fprintf(stderr,"||%*zu||",n,j); + for (size_t i = 0; i < g->cellsX; i++) { + oid cell = i + j*g->cellsX; + size_t v = g->dir[cell+1]-g->dir[cell]; + if (v == 0) + fprintf(stderr, "%*s|", m, ""); + else + fprintf(stderr, "%*zu|", m, v); + } + fprintf(stderr,"\n"); + } + for (size_t i = 0; i < g->cellsX*(m+1)+n+4; i++) + fprintf(stderr,"-"); + fprintf(stderr, "\n"); + fprintf(stderr, "||%*s||", n, ""); + for (size_t i = 0; i < g->cellsX; i++) + fprintf(stderr, "%*zu|", m, i); + fprintf(stderr, "\n"); + fprintf(stderr, "- OIDs\n["); + for (size_t i = 0; i < g->dir[g->cellsNum]; i++) { + fprintf(stderr, "%zu,", g->oids[i]); + } + fprintf(stderr, "\r]\n"); +} +#endif + +static Grid * grid_create(BAT *bx, BAT *by) { - grid * g; + Grid * g; lng *xVals, *yVals; size_t i, cnt; dbl fxa, fxb, fya, fyb; @@ -74,7 +174,7 @@ grid_create(BAT *bx, BAT *by) assert(BATcount(bx) == BATcount(by)); assert(BATcount(bx) > 0); - if ((g = GDKmalloc(sizeof(grid))) == NULL) + if ((g = GDKmalloc(sizeof(Grid))) == NULL) return g; g->xbat = bx->batCacheid; @@ -114,7 +214,7 @@ grid_create(BAT *bx, BAT *by) } /* allocate space for the directory */ - if ((g->dir = GDKmalloc((g->cellsNum+1)*sizeof(size_t))) == NULL) { + if ((g->dir = GDKmalloc((g->cellsNum+1)*sizeof(oid))) == NULL) { GDKfree(g); g = NULL; return g; @@ -153,8 +253,6 @@ grid_create(BAT *bx, BAT *by) oid cellx = (double)xVals[i]*fxa - fxb; oid celly = (double)yVals[i]*fya - fyb; oid cell = ((cellx << g->shift) | celly); -// assert(cell < g->cellsNum); -// assert(position < offsetVals[g->cellsNum]); g->oids[g->dir[cell]++] = i; } @@ -165,11 +263,255 @@ grid_create(BAT *bx, BAT *by) /* TODO: move here the code for compressing the index */ +#ifndef NDEBUG + grid_print(g); +#endif return g; } +static Grid * +grid_create_mbr(BAT *bx, BAT *by, mbr *m, dbl * d) +{ + Grid * g; + lng *xVals, *yVals; + size_t i, cnt; + dbl fxa, fxb, fya, fyb; + + assert(BATcount(bx) == BATcount(by)); + assert(BATcount(bx) > 0); + assert((*d) > 0); + + if ((g = GDKmalloc(sizeof(Grid))) == NULL) + return g; + + g->xbat = bx->batCacheid; + g->ybat = by->batCacheid; + xVals = (lng*)Tloc(bx, BUNfirst(bx)); + yVals = (lng*)Tloc(by, BUNfirst(by)); + + g->mbb.xmin = m->xmin; + g->mbb.ymin = m->ymin; + g->mbb.xmax = m->xmax; + g->mbb.ymax = m->ymax; + g->xmin = m->xmin; + g->ymin = m->ymin; + g->xmax = m->xmax; + g->ymax = m->ymax; + +#ifndef NDEBUG + fprintf(stderr, "grid borders: [%f %f, %f %f]\n", g->mbb.xmin, g->mbb.ymin, g->mbb.xmax, g->mbb.ymax); +#endif + /* determine the appropriate number of cells */ + g->cellsX = (m->xmax - m->xmin)/(*d); + g->cellsY = (m->ymax - m->ymin)/(*d); + + /* how many bits do we need? */ + g->shift = (bte)ceil(log2(g->cellsX>g->cellsY?g->cellsX:g->cellsY)); + cnt = BATcount(bx); + g->cellsNum = g->cellsX * g->cellsY; +#ifndef NDEBUG _______________________________________________ checkin-list mailing list checkin-list@monetdb.org https://www.monetdb.org/mailman/listinfo/checkin-list