Changeset: 8671d66745fb for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/8671d66745fb
Modified Files:
        gdk/gdk_strimps.c
Branch: string_imprints
Log Message:

Only one thread should compute the header and allocate the heap


diffs (261 lines):

diff --git a/gdk/gdk_strimps.c b/gdk/gdk_strimps.c
--- a/gdk/gdk_strimps.c
+++ b/gdk/gdk_strimps.c
@@ -216,7 +216,7 @@ STRMPchoosePairs(PairHistogramElem *hist
        const size_t cmin_max = STRIMP_HEADER_SIZE - 1;
        size_t hidx;
 
-       TRC_DEBUG_IF(ALGO) t0 = GDKusec();
+       TRC_DEBUG_IF(ACCELERATOR) t0 = GDKusec();
 
        for(i = 0; i < hist_size; i++) {
                if (max_counts[cmin_max] < hist[i].cnt) {
@@ -234,7 +234,7 @@ STRMPchoosePairs(PairHistogramElem *hist
                cp[i].psize = hist[indices[i]].p->psize;
        }
 
-       TRC_DEBUG(ALGO, LLFMT " usec\n", GDKusec() - t0);
+       TRC_DEBUG(ACCELERATOR, LLFMT " usec\n", GDKusec() - t0);
 }
 
 static bool
@@ -249,7 +249,7 @@ STRMPbuildHeader(BAT *b, CharPair *hpair
        PairIterator pi, *pip;
        CharPair cp, *cpp;
 
-       TRC_DEBUG_IF(ALGO) t0 = GDKusec();
+       TRC_DEBUG_IF(ACCELERATOR) t0 = GDKusec();
        hlen = STRIMP_HISTSIZE;
        if ((hist = (PairHistogramElem 
*)GDKmalloc(hlen*sizeof(PairHistogramElem))) == NULL) {
                // TODO handle error
@@ -317,7 +317,7 @@ STRMPbuildHeader(BAT *b, CharPair *hpair
        }
        GDKfree(hist);
 
-       TRC_DEBUG(ALGO, LLFMT " usec\n", GDKusec() - t0);
+       TRC_DEBUG(ACCELERATOR, LLFMT " usec\n", GDKusec() - t0);
        return true;
 }
 
@@ -333,39 +333,48 @@ STRMPcreateStrimpHeap(BAT *b)
        CharPair hpairs[STRIMP_HEADER_SIZE];
        const char *nme;
 
-
-       STRMPbuildHeader(b, hpairs);  /* Find the header pairs */
-       sz = 8 + STRIMP_HEADER_SIZE;  /* add 8-bytes for the descriptor */
-       for(i = 0; i < STRIMP_HEADER_SIZE; i++) {
-               sz += hpairs[i].psize;
-       }
+       if (b->tstrimps == NULL) {
+               MT_lock_set(&b->batIdxLock);
+               /* Make sure no other thread got here first */
+                if (b->tstrimps == NULL) {
+                       STRMPbuildHeader(b, hpairs); /* Find the header pairs */
+                       sz = 8 + STRIMP_HEADER_SIZE; /* add 8-bytes for the 
descriptor */
+                       for (i = 0; i < STRIMP_HEADER_SIZE; i++) {
+                               sz += hpairs[i].psize;
+                       }
 
-       nme = GDKinmemory(b->theap->farmid) ? ":memory:" : 
BBP_physical(b->batCacheid);
-       /* Allocate the strimps heap */
-       if ((r = GDKzalloc(sizeof(Strimps))) == NULL ||
-           (r->strimps.farmid = BBPselectfarm(b->batRole, b->ttype, 
strimpheap)) < 0 ||
-           strconcat_len(r->strimps.filename, sizeof(r->strimps.filename),
-                         nme, ".tstrimps", NULL) >= 
sizeof(r->strimps.filename) ||
-           HEAPalloc(&r->strimps, BATcount(b)*sizeof(uint64_t) + sz, 
sizeof(uint8_t), 0) != GDK_SUCCEED) {
-               GDKfree(r);
-               return NULL;
+                       nme = GDKinmemory(b->theap->farmid) ? ":memory:" : 
BBP_physical(b->batCacheid);
+                       /* Allocate the strimps heap */
+                       if ((r = GDKzalloc(sizeof(Strimps))) == NULL ||
+                           (r->strimps.farmid = BBPselectfarm(b->batRole, 
b->ttype, strimpheap)) < 0 ||
+                           strconcat_len(r->strimps.filename, 
sizeof(r->strimps.filename), nme,
+                                         ".tstrimps", NULL) >= 
sizeof(r->strimps.filename) ||
+                           HEAPalloc(&r->strimps, BATcount(b) * 
sizeof(uint64_t) + sz, sizeof(uint8_t), 0) != GDK_SUCCEED) {
+                               GDKfree(r);
+                               MT_lock_unset(&b->batIdxLock);
+                               return NULL;
+                       }
+
+                       descriptor = STRIMP_VERSION | 
((uint64_t)STRIMP_HEADER_SIZE) << 8 | ((uint64_t)sz) << 16;
+
+                       ((uint64_t *)r->strimps.base)[0] = descriptor;
+                       r->sizes_base = h1 = (uint8_t *)r->strimps.base + 8;
+                       r->pairs_base = h2 = (uint8_t *)h1 + STRIMP_HEADER_SIZE;
+
+                       for (i = 0; i < STRIMP_HEADER_SIZE; i++) {
+                               *(h1 + i) = hpairs[i].psize;
+                               memcpy(h2, hpairs[i].pbytes, hpairs[i].psize);
+                               h2 += hpairs[i].psize;
+                       }
+                       r->strimps_base = h2;
+                       r->strimps.free = sz;
+
+                       b->tstrimps = r;
+                       b->batDirtydesc = true;
+               }
+               MT_lock_unset(&b->batIdxLock);
        }
-
-       descriptor =  STRIMP_VERSION | ((uint64_t)STRIMP_HEADER_SIZE) << 8 | 
((uint64_t)sz) << 16;
-
-       ((uint64_t *)r->strimps.base)[0] = descriptor;
-       r->sizes_base = h1 = (uint8_t *)r->strimps.base + 8;
-       r->pairs_base = h2 = (uint8_t *)h1 + STRIMP_HEADER_SIZE;
-
-       for (i = 0; i < STRIMP_HEADER_SIZE; i++) {
-               *(h1 + i) = hpairs[i].psize;
-               memcpy(h2, hpairs[i].pbytes, hpairs[i].psize);
-               h2 += hpairs[i].psize;
-       }
-       r->strimps_base = h2;
-       r->strimps.free = sz;
-
-       return r;
+        return b->tstrimps;
 }
 
 static bool
@@ -423,7 +432,7 @@ BATcheckstrimps(BAT *b)
                                                close(fd);
                                                hp->strimps.parentid = 
b->batCacheid;
                                                b->tstrimps = hp;
-                                               TRC_DEBUG(ALGO, 
"BATcheckstrimps(" ALGOBATFMT "): reusing persisted strimp\n", ALGOBATPAR(b));
+                                               TRC_DEBUG(ACCELERATOR, 
"BATcheckstrimps(" ALGOBATFMT "): reusing persisted strimp\n", ALGOBATPAR(b));
                                                MT_lock_unset(&b->batIdxLock);
                                                return true;
                                        }
@@ -437,10 +446,10 @@ BATcheckstrimps(BAT *b)
                        GDKclrerr();    /* we're not currently interested in 
errors */
                }
                MT_lock_unset(&b->batIdxLock);
-       }
-       ret = b->tstrimps != NULL;
+        }
+        ret = b->tstrimps != NULL;
        if (ret)
-               TRC_DEBUG(ALGO, "BATcheckstrimps(" ALGOBATFMT "): already has 
strimps, waited " LLFMT " usec\n", ALGOBATPAR(b), GDKusec() - t);
+               TRC_DEBUG(ACCELERATOR, "BATcheckstrimps(" ALGOBATFMT "): 
already has strimps, waited " LLFMT " usec\n", ALGOBATPAR(b), GDKusec() - t);
 
        return ret;
 }
@@ -496,11 +505,11 @@ BATstrimpsync(void *arg)
        int fd;
        const char *failed = " failed";
 
-       TRC_DEBUG_IF(ALGO) t0 = GDKusec();
+       TRC_DEBUG_IF(ACCELERATOR) t0 = GDKusec();
 
        MT_lock_set(&b->batIdxLock);
        if ((hp = &b->tstrimps->strimps)) {
-               if (HEAPsave(hp, hp->filename, NULL, true) == GDK_SUCCEED) {
+               if (HEAPsave(hp, hp->filename, NULL, true, hp->free) == 
GDK_SUCCEED) {
                        if (hp->storage == STORE_MEM) {
                                if ((fd = GDKfdlocate(hp->farmid, hp->filename, 
"rb+", NULL)) >= 0) {
                                        ((uint64_t *)hp->base)[0] |= (uint64_t) 
1 << 32;
@@ -531,7 +540,7 @@ BATstrimpsync(void *arg)
                                        failed = "";
                                }
                        }
-                       TRC_DEBUG(ALGO, "BATstrimpsync(%s): strimps persisted"
+                       TRC_DEBUG(ACCELERATOR, "BATstrimpsync(%s): strimps 
persisted"
                                  " (" LLFMT " usec)%s\n",
                                  BATgetId(b), GDKusec() - t0, failed);
                }
@@ -543,7 +552,7 @@ BATstrimpsync(void *arg)
 static void
 persistStrimp(BAT *b)
 {
-       TRC_DEBUG(ALGO, "zoo: %d\n", (BBP_status(b->batCacheid) & BBPEXISTING));
+       TRC_DEBUG(ACCELERATOR, "zoo: %d\n", (BBP_status(b->batCacheid) & 
BBPEXISTING));
        if((BBP_status(b->batCacheid) & BBPEXISTING)
           && b->batInserted == b->batCount
           && !b->theap->dirty
@@ -556,7 +565,7 @@ persistStrimp(BAT *b)
                                     MT_THR_DETACHED, name) < 0)
                        BBPunfix(b->batCacheid);
        } else
-               TRC_DEBUG(ALGO, "persistStrimp(" ALGOBATFMT "): NOT persisting 
strimp\n", ALGOBATPAR(b));
+               TRC_DEBUG(ACCELERATOR, "persistStrimp(" ALGOBATFMT "): NOT 
persisting strimp\n", ALGOBATPAR(b));
 }
 
 /* Create */
@@ -571,13 +580,14 @@ STRMPcreate(BAT *b)
        uint64_t *dh;
 
        assert(b->ttype == TYPE_str);
-       TRC_DEBUG_IF(ALGO) t0 = GDKusec();
+       TRC_DEBUG_IF(ACCELERATOR) t0 = GDKusec();
 
 
        if (BATcheckstrimps(b))
                return GDK_SUCCEED;
 
-       if (VIEWtparent(b)) {
+       /* Disable this before merging to default */
+       if (isVIEW(b)) {
                assert(b->tstrimps == NULL);
                b = BBPdescriptor(VIEWtparent(b));
        }
@@ -597,7 +607,8 @@ STRMPcreate(BAT *b)
        }
        h->strimps.free += b->batCount*sizeof(uint64_t);
 
-       /* Debug */
+
+#ifndef NDEBUG
        {
                FILE *f = fopen("/tmp/strmp", "wb");
                if (f) {
@@ -605,17 +616,16 @@ STRMPcreate(BAT *b)
                        fclose(f);
                }
        }
+#endif
 
        /* After we have computed the strimp, attempt to write it back
         * to the BAT.
         */
        MT_lock_set(&b->batIdxLock);
-       b->tstrimps = h;
-       b->batDirtydesc = true;
        persistStrimp(b);
        MT_lock_unset(&b->batIdxLock);
 
-       TRC_DEBUG(ALGO, "strimp creation took " LLFMT " usec\n", GDKusec()-t0);
+       TRC_DEBUG(ACCELERATOR, "strimp creation took " LLFMT " usec\n", 
GDKusec()-t0);
        return GDK_SUCCEED;
 }
 
@@ -667,7 +677,7 @@ STRMPmakehistogramBP(BAT *b, uint64_t *h
        char *ptr, *s;
        /* uint64_t cur_min = 0; */
 
-       TRC_DEBUG_IF(ALGO) t0 = GDKusec();
+       TRC_DEBUG_IF(ACCELERATOR) t0 = GDKusec();
        assert(b->ttype == TYPE_str);
 
        for(hi = 0; hi < hist_size; hi++)
@@ -706,7 +716,7 @@ STRMPmakehistogramBP(BAT *b, uint64_t *h
                }
        }
 
-       TRC_DEBUG(ALGO, LLFMT " usec\n", GDKusec() - t0);
+       TRC_DEBUG(ACCELERATOR, LLFMT " usec\n", GDKusec() - t0);
        GDKtracer_flush_buffer();
        return GDK_SUCCEED;
 }
@@ -793,10 +803,10 @@ STRMPndigrams(BAT *b, size_t *n)
                s = (char *)BUNtail(bi, i);
                 // *n += STRMP_strlen(s) - 1;
                *n += strlen(s) - 1;
-               // TRC_DEBUG(ALGO, "s["LLFMT"]=%s\n", i, s);
+               // TRC_DEBUG(ACCELERATOR, "s["LLFMT"]=%s\n", i, s);
        }
 
-       // TRC_DEBUG(ALGO, LLFMT "usec\n", GDKusec() - t0);
+       // TRC_DEBUG(ACCELERATOR, LLFMT "usec\n", GDKusec() - t0);
        // GDKtracer_flush_buffer();
 
        return GDK_SUCCEED;
_______________________________________________
checkin-list mailing list
checkin-list@monetdb.org
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to