Changeset: f41a9714b4a3 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=f41a9714b4a3
Modified Files:
        geom/monetdb5/grid.c
        geom/sql/Tests/grid.sql
Branch: grid
Log Message:

Added a test for testing the grid-based distance join


diffs (truncated from 534 to 300 lines):

diff --git a/geom/monetdb5/grid.c b/geom/monetdb5/grid.c
--- a/geom/monetdb5/grid.c
+++ b/geom/monetdb5/grid.c
@@ -45,7 +45,7 @@ if (maxSize > GDK_oid_max) {            
                goto distancejoin_fail;                                         
           \
 }                                                                              
    \
 while (maxSize > BATcapacity((r1))) {                                          
    \
-fprintf(stderr, "maxSize: %zu capacity: %zu\n", maxSize, BATcapacity((r1))); \
+fprintf(stderr, "maxSize: %zu capacity: %zu\n", maxSize, BATcapacity((r1)));   
    \
        if ((BATextend((r1), BATgrows((r1))) != GDK_SUCCEED) ||                 
       \
                (BATextend((r2), BATgrows((r2))) != GDK_SUCCEED)) {             
           \
                (msg) = createException(MAL, "grid.distance",                   
           \
@@ -53,51 +53,53 @@ fprintf(stderr, "maxSize: %zu capacity: 
                goto distancejoin_fail;                                         
           \
        }                                                                       
       \
 }                                                                              
    \
-/* fprintf(stderr, "Comparing [%zu](%zu)-[%zu](%zu)\n",                        
    \
-                   cellR, g1->dir[cellR+1]-g1->dir[cellR],                     
    \
-                   cellS, g2->dir[cellS+1]-g2->dir[cellS]); */                 
    \
 } while (0)
 
 
 
-#define GRIDcmp(x1Vals, y1Vals, br1, g1, x2Vals, y2Vals, br2, g2, cellR, 
cellS, r1, r2, msg) \
-do {                                                                           
    \
-if ((cellR) > (g1)->cellsNum || (cellS) > (g2)->cellsNum)                      
    \
-       continue;                                                               
       \
-GRIDextend(g1, g2, cellR, cellS, r1, r2, msg);                                 
    \
-/* compare points of R in cellR with points of S in cellS */                   
    \
-/* TODO: the outer relation should be the one with more points */              
    \
-for (size_t m = (g1)->dir[(cellR)]; m < (g1)->dir[(cellR)+1]; m++) {           
    \
-       oid oid1 = m;                                                           
       \
-       lng x1v = (x1Vals)[oid1];                                               
       \
-       lng y1v = (y1Vals)[oid1];                                               
       \
-       for (size_t n = (g2)->dir[(cellS)]; n < (g2)->dir[(cellS)+1]; n++) {    
       \
-               size_t oid2 = n;                                                
           \
-               lng x2v = (x2Vals)[oid2];                                       
           \
-               lng y2v = (y2Vals)[oid2];                                       
           \
-               double ddist = (x2v-x1v)*(x2v-x1v)+(y2v-y1v)*(y2v-y1v);         
           \
-/* fprintf(stderr, "The distance between [%lld,%lld] and [%lld,%lld] is %f 
(requested distance: %f)\n",x1v,y1v,x2v,y2v,ddist,distsqr); */ \
+#define GRIDcmp(x1Vals, y1Vals, br1, g1,                                       
   \
+                x2Vals, y2Vals, br2 ,g2,                                       
   \
+                cellR, cellS, r1, r2, seq1, seq2, msg)                         
   \
+do {                                                                           
   \
+if ((cellR) >= (g1)->cellsNum || (cellS) >= (g2)->cellsNum)                    
   \
+       continue;                                                               
      \
+GRIDextend(g1, g2, cellR, cellS, r1, r2, msg);                                 
   \
+/* compare points of R in cellR with points of S in cellS */                   
   \
+/* TODO: the outer relation should be the one with more points */              
   \
+for (size_t m = (g1)->dir[(cellR)]; m < (g1)->dir[(cellR)+1]; m++) {           
   \
+       oid oid1 = m;                                                           
      \
+       lng x1v = (x1Vals)[oid1];                                               
      \
+       lng y1v = (y1Vals)[oid1];                                               
      \
+       for (size_t n = (g2)->dir[(cellS)]; n < (g2)->dir[(cellS)+1]; n++) {    
      \
+               size_t oid2 = n;                                                
          \
+               lng x2v = (x2Vals)[oid2];                                       
          \
+               lng y2v = (y2Vals)[oid2];                                       
          \
+               double ddist = (x2v-x1v)*(x2v-x1v)+(y2v-y1v)*(y2v-y1v);         
          \
                if (ddist <= distsqr) {                                         
          \
-                        bunfastapp_nocheck_inc((r1), (br1), &oid1);            
              \
-                        bunfastapp_nocheck_inc((r2), (br2), &oid2);            
              \
+/* TODO: Now that you have reserved space in advance, use Tloc+predication */  
   \
+                       oid o1 = oid1+seq1;                                     
              \
+                       oid o2 = oid2+seq2;                                     
              \
+                       bunfastapp_nocheck_inc((r1), (br1), &o1);               
              \
+                       bunfastapp_nocheck_inc((r2), (br2), &o2);               
              \
                }                                                               
          \
        }                                                                       
      \
 }                                                                              
   \
 } while (0)
 
+typedef struct Grid Grid;
 struct Grid {
-       dbl xmin;                       /* grid universe: minimum X value */
-       dbl ymin;                       /* grid universe: minimum Y value */
-       dbl xmax;                       /* grid universe: maximum X value */
-       dbl ymax;                       /* grid universe: maximum Y value */
-       mbr mbb;
+       dbl xmin;                       /* minimum X value of input BATs   */
+       dbl ymin;                       /* minimum Y value of input BATs   */
+       dbl xmax;                       /* maximum X value of input BATs   */
+       dbl ymax;                       /* maximum Y value of input BATs   */
+       mbr mbb;                        /* grid universe (might differ from the 
input values) */
        bte shift;
-       size_t cellsNum;                /* number of cells                 */
-       size_t cellsPerAxis;    /* number of cells per axis        */
-       size_t cellsX;
-       size_t cellsY;
-       bat xbat;                               /* bat id for X coordinates     
   */
-       bat ybat;                               /* bat id for Y coordinates     
   */
+       size_t cellsNum;        /* number of cells                 */
+       size_t cellsPerAxis;/* number of cells per axis        */
+       size_t cellsX;          /* number of cells in X axis       */
+       size_t cellsY;          /* number of cells in Y axis       */
+       bat xbat;                       /* bat id for X coordinates        */
+       bat ybat;                       /* bat id for Y coordinates        */
        oid * dir;                      /* the grid directory              */
        oid * oids;                     /* heap where the index is stored  */
 };
@@ -127,8 +129,8 @@ grid_print(Grid * g)
        fprintf(stderr, "- MBB     : [%f %f, %f %f]\n", g->mbb.xmin, 
g->mbb.ymin, g->mbb.xmax, g->mbb.ymax);
        fprintf(stderr, "- Cells  X: %zu, Y: %zu, total: %zu, per axis: %zu\n", 
g->cellsX, g->cellsY, g->cellsNum, g->cellsPerAxis);
        m = ceil(log10(g->dir[g->cellsNum]));
-       n = ceil(log10(g->cellsY));
-       o = ceil(log10(g->cellsX));
+       n = ceil(log10(g->cellsY-1));
+       o = ceil(log10(g->cellsX-1));
        m = m > o ? m : o;
 
        fprintf(stderr, "- Directory\n");
@@ -263,16 +265,12 @@ grid_create(BAT *bx, BAT *by)
 
        /* TODO: move here the code for compressing the index */
 
-#ifndef NDEBUG
-       grid_print(g);
-#endif
        return g;
 }
 
-static Grid *
-grid_create_mbr(BAT *bx, BAT *by, mbr *m, dbl * d)
+static str
+grid_create_mbr(Grid * g, BAT *bx, BAT *by, mbr *m, dbl * d)
 {
-       Grid * g;
        lng *xVals, *yVals;
        size_t i, cnt;
        dbl fxa, fxb, fya, fyb;
@@ -281,9 +279,6 @@ grid_create_mbr(BAT *bx, BAT *by, mbr *m
        assert(BATcount(bx) > 0);
        assert((*d) > 0);
 
-       if ((g = GDKmalloc(sizeof(Grid))) == NULL)
-               return g;
-
        g->xbat = bx->batCacheid;
        g->ybat = by->batCacheid;
        xVals = (lng*)Tloc(bx, BUNfirst(bx));
@@ -298,27 +293,37 @@ grid_create_mbr(BAT *bx, BAT *by, mbr *m
        g->xmax = m->xmax;
        g->ymax = m->ymax;
 
+#if 0
 #ifndef NDEBUG
        fprintf(stderr, "grid borders: [%f %f, %f %f]\n", g->mbb.xmin, 
g->mbb.ymin, g->mbb.xmax, g->mbb.ymax);
 #endif
+#endif
        /* determine the appropriate number of cells */
-       g->cellsX = (m->xmax - m->xmin)/(*d);
-       g->cellsY = (m->ymax - m->ymin)/(*d);
+       g->cellsX = (size_t)ceil((m->xmax - m->xmin)/(*d))+1;
+       g->cellsY = (size_t)ceil((m->ymax - m->ymin)/(*d))+1;
+       if ((size_t)(m->xmax-m->xmin)/(*d) == (size_t)g->cellsX*(*d))
+               g->cellsX++;
+       if ((m->ymax-m->ymin)/(*d) == g->cellsY*(*d))
+               g->cellsY++;
+       g->mbb.xmax = g->mbb.xmin + g->cellsX*(*d);
+       g->mbb.ymax = g->mbb.ymin + g->cellsY*(*d);
 
        /* how many bits do we need? */
        g->shift = (bte)ceil(log2(g->cellsX>g->cellsY?g->cellsX:g->cellsY));
        cnt = BATcount(bx);
        g->cellsNum = g->cellsX * g->cellsY;
+#if 0
 #ifndef NDEBUG
        fprintf(stderr, "shift: %d\n", g->shift);
        fprintf(stderr, "Cells: x=%zu, y=%zu (total: %zu)\n", g->cellsX, 
g->cellsY, g->cellsNum);
 #endif
+#endif
 
        /* allocate space for the directory */
        if ((g->dir = GDKmalloc((g->cellsNum+1)*sizeof(oid))) == NULL) {
                GDKfree(g);
                g = NULL;
-               return g;
+               return createException(MAL, "grid.create_mbr", MAL_MALLOC_FAIL);
        }
        for (i = 0; i <= g->cellsNum; i++)
                g->dir[i] = 0;
@@ -327,7 +332,7 @@ grid_create_mbr(BAT *bx, BAT *by, mbr *m
        if((g->oids = GDKmalloc(BATcount(bx)*sizeof(oid))) == NULL) {
                GDKfree(g);
                g = NULL;
-               return g;
+               return createException(MAL, "grid.create_mbr", MAL_MALLOC_FAIL);
        }
 
        /* compute the index */
@@ -336,10 +341,12 @@ grid_create_mbr(BAT *bx, BAT *by, mbr *m
        fxb = (double)g->mbb.xmin*fxa;
        fya = ((double)g->cellsY/(g->mbb.ymax-g->mbb.ymin));
        fyb = (double)g->mbb.ymin*fya;
+#if 0
 #ifndef NDEBUG
        fprintf(stderr, "Coefficients: fxa=%f, fxb=%f\n", fxa, fxb);
        fprintf(stderr, "Coefficients: fya=%f, fyb=%f\n", fya, fyb);
 #endif
+#endif
 
        cnt = BATcount(bx);
        for (i = 0; i < cnt; i++) {
@@ -375,8 +382,7 @@ grid_create_mbr(BAT *bx, BAT *by, mbr *m
 #ifndef NDEBUG
        grid_print(g);
 #endif
-
-       return g;
+       return MAL_SUCCEED;
 }
 
 
@@ -387,6 +393,7 @@ grid_create_bats(Grid ** gg1, Grid **gg2
        lng *x1Vals, *y1Vals, *x2Vals, *y2Vals;
        size_t i, cnt1, cnt2;
        mbr *m1 = NULL, *m2 = NULL, *mi = NULL, *mu = NULL;
+       str msg = MAL_SUCCEED;
 
        assert(BATcount(bx1) == BATcount(by1));
        assert(BATcount(bx2) == BATcount(by2));
@@ -399,13 +406,8 @@ grid_create_bats(Grid ** gg1, Grid **gg2
                ((m2 = GDKmalloc(sizeof(mbr))) == NULL) ||
                ((g1 = GDKmalloc(sizeof(Grid))) == NULL) ||
                ((g2 = GDKmalloc(sizeof(Grid))) == NULL)) {
-               if (mi) GDKfree(mi);
-               if (mu) GDKfree(mu);
-               if (m1) GDKfree(m1);
-               if (m2) GDKfree(m2);
-               if (g1) GDKfree(g1);
-               if (g2) GDKfree(g2);
-               return createException(MAL, "grid.create_bats", 
MAL_MALLOC_FAIL);
+               msg=createException(MAL, "grid.create_bats", MAL_MALLOC_FAIL);
+               goto grid_create_bats_fail;
        }
 
        g1->xbat = bx1->batCacheid;
@@ -439,10 +441,12 @@ grid_create_bats(Grid ** gg1, Grid **gg2
        g1->mbb.ymin=g1->ymin;
        g1->mbb.xmax=g1->xmax;
        g1->mbb.ymax=g1->ymax;
+#if 0
 #ifndef NDEBUG
        fprintf(stderr, "Outer relation limits: [%f %f, %f %f]\n", 
g1->mbb.xmin, g1->mbb.ymin, g1->mbb.xmax, g1->mbb.ymax);
        fprintf(stderr, "Outer relation limits: [%f %f, %f %f]\n", g1->xmin, 
g1->ymin, g1->xmax, g1->ymax);
 #endif
+#endif
        cnt2 = BATcount(bx2);
        g2->xmin = g2->xmax = x2Vals[0];
        for (i = 1; i < cnt2; i++) {
@@ -464,40 +468,36 @@ grid_create_bats(Grid ** gg1, Grid **gg2
        g2->mbb.ymin=g2->ymin;
        g2->mbb.xmax=g2->xmax;
        g2->mbb.ymax=g2->ymax;
+#if 0
 #ifndef NDEBUG
        fprintf(stderr, "Outer relation limits: [%f %f, %f %f]\n", 
g2->mbb.xmin, g2->mbb.ymin, g2->mbb.xmax, g2->mbb.ymax);
        fprintf(stderr, "Outer relation limits: [%f %f, %f %f]\n", g2->xmin, 
g2->ymin, g2->xmax, g2->ymax);
 #endif
-
+#endif
        m1->xmin = g1->mbb.xmin - *d; m1->ymin = g1->mbb.ymin - *d; m1->xmax = 
g1->mbb.xmax + *d; m1->ymax = g1->mbb.ymax + *d;
        m2->xmin = g2->mbb.xmin - *d; m2->ymin = g2->mbb.ymin - *d; m2->xmax = 
g2->mbb.xmax + *d; m2->ymax = g2->mbb.ymax + *d;
 
        /* compute the intersection between the two coverages */
        mbrIntersection(&mi, &m1, &m2);  /* query the grid only on the 
intersecting mbr */
        mbrUnion(&mu, &m1, &m2);         /* create the grid index on the union 
of the two universes */
+#if 0
 #ifndef NDEBUG
        fprintf(stderr, "outer relation mbb: [%f %f, %f %f]\n", m1->xmin, 
m1->ymin, m1->xmax, m1->ymax);
        fprintf(stderr, "inner relation mbb: [%f %f, %f %f]\n", m2->xmin, 
m2->ymin, m2->xmax, m2->ymax);
        fprintf(stderr, "  intersection mbb: [%f %f, %f %f]\n", mi->xmin, 
mi->ymin, mi->xmax, mi->ymax);
        fprintf(stderr, "         union mbb: [%f %f, %f %f]\n", mu->xmin, 
mu->ymin, mu->xmax, mu->ymax);
 #endif
-
+#endif
        /* if the two mbbs do not intersect, return an empty result */
-       if (mbr_isnil(mi)) {
-               GDKfree(mi);
-               GDKfree(mu);
-               GDKfree(m1);
-               GDKfree(m2);
-               GDKfree(g1);
-               GDKfree(g2);
-               g1 = NULL;
-               g2 = NULL;
-               return MAL_SUCCEED;
-       }
+       if (mbr_isnil(mi))
+               goto grid_create_bats_return; 
 
        /* create grid index using a common grid */
-       g1 = grid_create_mbr(bx1, by1, mu, d);
-       g2 = grid_create_mbr(bx2, by2, mu, d);
+       if ( grid_create_mbr(g1, bx1, by1, mu, d) != MAL_SUCCEED ||
+                grid_create_mbr(g2, bx2, by2, mu, d) != MAL_SUCCEED) {
+               msg = createException(MAL, "grid.create_bats", "Could not 
create the grid index");
+               goto grid_create_bats_return;
+       }
 
        /* TODO: move here the code for compressing the index */
_______________________________________________
checkin-list mailing list
checkin-list@monetdb.org
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to