Changeset: 66e1531dd8d5 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=66e1531dd8d5
Modified Files:
        geom/monetdb5/geom.c
        geom/monetdb5/geom.h
        geom/monetdb5/grid.c
        geom/monetdb5/grid.h
        geom/monetdb5/grid.mal
        geom/sql/41_grid.sql
Branch: grid
Log Message:

Grid based distance join added


diffs (truncated from 774 to 300 lines):

diff --git a/geom/monetdb5/geom.c b/geom/monetdb5/geom.c
--- a/geom/monetdb5/geom.c
+++ b/geom/monetdb5/geom.c
@@ -1873,7 +1873,7 @@ str geom_epilogue(void *ret) {
 }
 
 /* Check if fixed-sized atom mbr is null */
-static int mbr_isnil(mbr *m) {
+int mbr_isnil(mbr *m) {
        if (!m || m->xmin == flt_nil || m->ymin == flt_nil ||
            m->xmax == flt_nil || m->ymax == flt_nil)
                return 1;
@@ -4357,6 +4357,47 @@ str mbrOverlaps(bit *out, mbr **b1, mbr 
        return MAL_SUCCEED;
 }
 
+/*returns the intersection of two mbrs */
+str mbrIntersection(mbr **r, mbr **b1, mbr **b2) {
+       bit overlap;
+       mbrOverlaps(&overlap, b1, b2);
+       if (overlap == 0) {
+               *r = mbr_nil;
+       } else {
+               (*r)->xmin = ((*b1)->xmin < (*b2)->xmin) ? (*b2)->xmin : 
(*b1)->xmin;
+               (*r)->ymin = ((*b1)->ymin < (*b2)->ymin) ? (*b2)->ymin : 
(*b1)->ymin;
+               (*r)->xmax = ((*b1)->xmax > (*b2)->xmax) ? (*b2)->xmax : 
(*b1)->xmax;
+               (*r)->ymax = ((*b1)->ymax > (*b2)->ymax) ? (*b2)->ymax : 
(*b1)->ymax;
+       }
+       return MAL_SUCCEED;
+}
+
+/*returns the union of two mbrs */
+str mbrUnion(mbr **r, mbr **b1, mbr **b2) {
+       bit overlap;
+       mbrOverlaps(&overlap, b1, b2);
+       if (mbr_isnil(*b1) && mbr_isnil(*b2)) {
+               *r = mbr_nil;
+       } else if (mbr_isnil(*b1)) {
+               (*r)->xmin = (*b2)->xmin;
+               (*r)->ymin = (*b2)->ymin;
+               (*r)->xmax = (*b2)->xmax;
+               (*r)->ymax = (*b2)->ymax;
+       } else if (mbr_isnil(*b2)) {
+               (*r)->xmin = (*b1)->xmin;
+               (*r)->ymin = (*b1)->ymin;
+               (*r)->xmax = (*b1)->xmax;
+               (*r)->ymax = (*b1)->ymax;
+       } else {
+               (*r)->xmin = ((*b1)->xmin > (*b2)->xmin) ? (*b2)->xmin : 
(*b1)->xmin;
+               (*r)->ymin = ((*b1)->ymin > (*b2)->ymin) ? (*b2)->ymin : 
(*b1)->ymin;
+               (*r)->xmax = ((*b1)->xmax < (*b2)->xmax) ? (*b2)->xmax : 
(*b1)->xmax;
+               (*r)->ymax = ((*b1)->ymax < (*b2)->ymax) ? (*b2)->ymax : 
(*b1)->ymax;
+       }
+
+       return MAL_SUCCEED;
+}
+
 /*returns true if the mbrs of the two geometris overlap */
 str mbrOverlaps_wkb(bit *out, wkb **geom1WKB, wkb **geom2WKB) {
        mbr *geom1MBR = NULL, *geom2MBR = NULL;
diff --git a/geom/monetdb5/geom.h b/geom/monetdb5/geom.h
--- a/geom/monetdb5/geom.h
+++ b/geom/monetdb5/geom.h
@@ -260,6 +260,8 @@ geom_export str mbrEqual(bit *out, mbr *
 geom_export str mbrEqual_wkb(bit *out, wkb **geom1WKB, wkb **geom2WKB);
 geom_export str mbrDistance(dbl *out, mbr **b1, mbr **b2);
 geom_export str mbrDistance_wkb(dbl *out, wkb **geom1WKB, wkb **geom2WKB);
+geom_export str mbrIntersection(mbr **r, mbr **b1, mbr **b2);
+geom_export str mbrUnion(mbr **r, mbr **b1, mbr **b2);
 
 geom_export str wkbCoordinateFromWKB(dbl*, wkb**, int*);
 geom_export str wkbCoordinateFromMBR(dbl*, mbr**, int*);
@@ -294,3 +296,6 @@ geom_export str wkbCoordinateFromMBR_bat
 
 geom_export int geom_catalog_upgrade(void *, int);
 geom_export str geom_sql_upgrade(int);
+
+geom_export int mbr_isnil(mbr *m);
+
diff --git a/geom/monetdb5/grid.c b/geom/monetdb5/grid.c
--- a/geom/monetdb5/grid.c
+++ b/geom/monetdb5/grid.c
@@ -34,19 +34,73 @@ do {                                    
     max = res + add;                            \
 } while (0)
 
-typedef struct grid {
-       lng xmin;                       /* grid universe: minimum X value */
-       lng ymin;                       /* grid universe: minimum Y value */
-       lng xmax;                       /* grid universe: maximum X value */
-       lng ymax;                       /* grid universe: maximum Y value */
+#define GRIDextend(g1, g2, cellR, cellS, r1, r2, msg)                          
    \
+do {                                                                           
    \
+/* make space to cater for the worst case where all points qualify */          
    \
+BUN maxSize = BATcount((r1)) +                                                 
    \
+              ((g1)->dir[(cellR)+1]-(g1)->dir[(cellR)])*                       
    \
+              ((g2)->dir[(cellS)+1]-(g2)->dir[(cellS)]);                       
    \
+if (maxSize > GDK_oid_max) {                                                   
    \
+               (msg) = createException(MAL, "grid.distance", "overflow of head 
value");   \
+               goto distancejoin_fail;                                         
           \
+}                                                                              
    \
+while (maxSize > BATcapacity((r1))) {                                          
    \
+fprintf(stderr, "maxSize: %zu capacity: %zu\n", maxSize, BATcapacity((r1))); \
+       if ((BATextend((r1), BATgrows((r1))) != GDK_SUCCEED) ||                 
       \
+               (BATextend((r2), BATgrows((r2))) != GDK_SUCCEED)) {             
           \
+               (msg) = createException(MAL, "grid.distance",                   
           \
+                            "could not extend BATs for storing the join 
results"); \
+               goto distancejoin_fail;                                         
           \
+       }                                                                       
       \
+}                                                                              
    \
+/* fprintf(stderr, "Comparing [%zu](%zu)-[%zu](%zu)\n",                        
    \
+                   cellR, g1->dir[cellR+1]-g1->dir[cellR],                     
    \
+                   cellS, g2->dir[cellS+1]-g2->dir[cellS]); */                 
    \
+} while (0)
+
+
+
+#define GRIDcmp(x1Vals, y1Vals, br1, g1, x2Vals, y2Vals, br2, g2, cellR, 
cellS, r1, r2, msg) \
+do {                                                                           
    \
+if ((cellR) > (g1)->cellsNum || (cellS) > (g2)->cellsNum)                      
    \
+       continue;                                                               
       \
+GRIDextend(g1, g2, cellR, cellS, r1, r2, msg);                                 
    \
+/* compare points of R in cellR with points of S in cellS */                   
    \
+/* TODO: the outer relation should be the one with more points */              
    \
+for (size_t m = (g1)->dir[(cellR)]; m < (g1)->dir[(cellR)+1]; m++) {           
    \
+       oid oid1 = m;                                                           
       \
+       lng x1v = (x1Vals)[oid1];                                               
       \
+       lng y1v = (y1Vals)[oid1];                                               
       \
+       for (size_t n = (g2)->dir[(cellS)]; n < (g2)->dir[(cellS)+1]; n++) {    
       \
+               size_t oid2 = n;                                                
           \
+               lng x2v = (x2Vals)[oid2];                                       
           \
+               lng y2v = (y2Vals)[oid2];                                       
           \
+               double ddist = (x2v-x1v)*(x2v-x1v)+(y2v-y1v)*(y2v-y1v);         
           \
+/* fprintf(stderr, "The distance between [%lld,%lld] and [%lld,%lld] is %f 
(requested distance: %f)\n",x1v,y1v,x2v,y2v,ddist,distsqr); */ \
+               if (ddist <= distsqr) {                                         
          \
+                        bunfastapp_nocheck_inc((r1), (br1), &oid1);            
              \
+                        bunfastapp_nocheck_inc((r2), (br2), &oid2);            
              \
+               }                                                               
          \
+       }                                                                       
      \
+}                                                                              
   \
+} while (0)
+
+struct Grid {
+       dbl xmin;                       /* grid universe: minimum X value */
+       dbl ymin;                       /* grid universe: minimum Y value */
+       dbl xmax;                       /* grid universe: maximum X value */
+       dbl ymax;                       /* grid universe: maximum Y value */
+       mbr mbb;
        bte shift;
        size_t cellsNum;                /* number of cells                 */
        size_t cellsPerAxis;    /* number of cells per axis        */
+       size_t cellsX;
+       size_t cellsY;
        bat xbat;                               /* bat id for X coordinates     
   */
        bat ybat;                               /* bat id for Y coordinates     
   */
-       size_t * dir;                   /* the grid directory              */
+       oid * dir;                      /* the grid directory              */
        oid * oids;                     /* heap where the index is stored  */
-} grid;
+};
 
 static size_t
 countSetBits(uint64_t *resBitvector, size_t vectorSize)
@@ -63,10 +117,56 @@ countSetBits(uint64_t *resBitvector, siz
        return num;
 }
 
-static grid *
+#ifndef NDEBUG
+static void
+grid_print(Grid * g)
+{
+       int m = 0, n = 0, o = 0;
+       fprintf(stderr, "GRID %p (shift %d). batx: %d, baty: %d\n", g, 
g->shift, g->xbat, g->ybat);
+       fprintf(stderr, "- Universe: [%f %f, %f %f]\n", g->xmin, g->ymin, 
g->xmax, g->ymax);
+       fprintf(stderr, "- MBB     : [%f %f, %f %f]\n", g->mbb.xmin, 
g->mbb.ymin, g->mbb.xmax, g->mbb.ymax);
+       fprintf(stderr, "- Cells  X: %zu, Y: %zu, total: %zu, per axis: %zu\n", 
g->cellsX, g->cellsY, g->cellsNum, g->cellsPerAxis);
+       m = ceil(log10(g->dir[g->cellsNum]));
+       n = ceil(log10(g->cellsY));
+       o = ceil(log10(g->cellsX));
+       m = m > o ? m : o;
+
+       fprintf(stderr, "- Directory\n");
+       for (size_t i = 0; i < g->cellsX*(m+1)+n+4; i++)
+               fprintf(stderr,"-");
+       fprintf(stderr, "\n");
+       for (size_t k = 0; k < g->cellsY; k++) {
+               size_t j = g->cellsY - k - 1;
+               fprintf(stderr,"||%*zu||",n,j);
+               for (size_t i = 0; i < g->cellsX; i++) {
+                       oid cell = i + j*g->cellsX;
+                       size_t v = g->dir[cell+1]-g->dir[cell];
+                       if (v == 0)
+                               fprintf(stderr, "%*s|", m, "");
+                       else
+                               fprintf(stderr, "%*zu|", m, v);
+               }
+               fprintf(stderr,"\n");
+       }
+       for (size_t i = 0; i < g->cellsX*(m+1)+n+4; i++)
+               fprintf(stderr,"-");
+       fprintf(stderr, "\n");
+       fprintf(stderr, "||%*s||", n, "");
+       for (size_t i = 0; i < g->cellsX; i++)
+               fprintf(stderr, "%*zu|", m, i);
+       fprintf(stderr, "\n");
+       fprintf(stderr, "- OIDs\n[");
+       for (size_t i = 0; i < g->dir[g->cellsNum]; i++) {
+               fprintf(stderr, "%zu,", g->oids[i]);
+       }
+       fprintf(stderr, "\r]\n");
+}
+#endif
+
+static Grid *
 grid_create(BAT *bx, BAT *by)
 {
-       grid * g;
+       Grid * g;
        lng *xVals, *yVals;
        size_t i, cnt;
        dbl fxa, fxb, fya, fyb;
@@ -74,7 +174,7 @@ grid_create(BAT *bx, BAT *by)
        assert(BATcount(bx) == BATcount(by));
        assert(BATcount(bx) > 0);
 
-       if ((g = GDKmalloc(sizeof(grid))) == NULL)
+       if ((g = GDKmalloc(sizeof(Grid))) == NULL)
                return g;
 
        g->xbat = bx->batCacheid;
@@ -114,7 +214,7 @@ grid_create(BAT *bx, BAT *by)
        }
 
        /* allocate space for the directory */
-       if ((g->dir = GDKmalloc((g->cellsNum+1)*sizeof(size_t))) == NULL) {
+       if ((g->dir = GDKmalloc((g->cellsNum+1)*sizeof(oid))) == NULL) {
                GDKfree(g);
                g = NULL;
                return g;
@@ -153,8 +253,6 @@ grid_create(BAT *bx, BAT *by)
                oid cellx = (double)xVals[i]*fxa - fxb;
                oid celly = (double)yVals[i]*fya - fyb;
                oid cell = ((cellx << g->shift) | celly);
-//             assert(cell < g->cellsNum);
-//             assert(position < offsetVals[g->cellsNum]);
                g->oids[g->dir[cell]++] = i;
        }
 
@@ -165,11 +263,255 @@ grid_create(BAT *bx, BAT *by)
 
        /* TODO: move here the code for compressing the index */
 
+#ifndef NDEBUG
+       grid_print(g);
+#endif
        return g;
 }
 
+static Grid *
+grid_create_mbr(BAT *bx, BAT *by, mbr *m, dbl * d)
+{
+       Grid * g;
+       lng *xVals, *yVals;
+       size_t i, cnt;
+       dbl fxa, fxb, fya, fyb;
+
+       assert(BATcount(bx) == BATcount(by));
+       assert(BATcount(bx) > 0);
+       assert((*d) > 0);
+
+       if ((g = GDKmalloc(sizeof(Grid))) == NULL)
+               return g;
+
+       g->xbat = bx->batCacheid;
+       g->ybat = by->batCacheid;
+       xVals = (lng*)Tloc(bx, BUNfirst(bx));
+       yVals = (lng*)Tloc(by, BUNfirst(by));
+
+       g->mbb.xmin = m->xmin;
+       g->mbb.ymin = m->ymin;
+       g->mbb.xmax = m->xmax;
+       g->mbb.ymax = m->ymax;
+       g->xmin = m->xmin;
+       g->ymin = m->ymin;
+       g->xmax = m->xmax;
+       g->ymax = m->ymax;
+
+#ifndef NDEBUG
+       fprintf(stderr, "grid borders: [%f %f, %f %f]\n", g->mbb.xmin, 
g->mbb.ymin, g->mbb.xmax, g->mbb.ymax);
+#endif
+       /* determine the appropriate number of cells */
+       g->cellsX = (m->xmax - m->xmin)/(*d);
+       g->cellsY = (m->ymax - m->ymin)/(*d);
+
+       /* how many bits do we need? */
+       g->shift = (bte)ceil(log2(g->cellsX>g->cellsY?g->cellsX:g->cellsY));
+       cnt = BATcount(bx);
+       g->cellsNum = g->cellsX * g->cellsY;
+#ifndef NDEBUG
_______________________________________________
checkin-list mailing list
checkin-list@monetdb.org
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to