Changeset: 6920e02d734f for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=6920e02d734f
Modified Files:
        clients/Tests/exports.stable.out
        gdk/gdk.h
        gdk/gdk_align.c
        gdk/gdk_bat.c
        gdk/gdk_batop.c
        gdk/gdk_bbp.c
        gdk/gdk_cand.c
        gdk/gdk_hash.c
        gdk/gdk_heap.c
        gdk/gdk_imprints.c
        gdk/gdk_orderidx.c
        gdk/gdk_private.h
        gdk/gdk_string.c
        monetdb5/modules/mal/tablet.c
Branch: unlock
Log Message:

Convert string offset heaps to var_t size when starting to mmap them.
This avoids the problem that a view might be looking at at (mmapped)
offset heap that gets widened by the parent when it inserts extra data.


diffs (truncated from 807 to 300 lines):

diff --git a/clients/Tests/exports.stable.out b/clients/Tests/exports.stable.out
--- a/clients/Tests/exports.stable.out
+++ b/clients/Tests/exports.stable.out
@@ -323,7 +323,7 @@ gdk_return GDKtracer_set_flush_level(con
 gdk_return GDKtracer_set_layer_level(const char *layer, const char *lvl);
 gdk_return GDKtracer_stop(void);
 size_t GDKuniqueid(size_t offset);
-gdk_return GDKupgradevarheap(BAT *b, var_t v, bool copyall, bool mayshare) 
__attribute__((__warn_unused_result__));
+gdk_return GDKupgradevarheap(BAT *b, var_t v, BUN cap, bool copyall) 
__attribute__((__warn_unused_result__));
 lng GDKusec(void);
 const char *GDKversion(void) __attribute__((__const__));
 size_t GDKvm_cursize(void);
diff --git a/gdk/gdk.h b/gdk/gdk.h
--- a/gdk/gdk.h
+++ b/gdk/gdk.h
@@ -889,7 +889,7 @@ gdk_export gdk_return BATextend(BAT *b, 
 gdk_export uint8_t ATOMelmshift(int sz)
        __attribute__((__const__));
 
-gdk_export gdk_return GDKupgradevarheap(BAT *b, var_t v, bool copyall, bool 
mayshare)
+gdk_export gdk_return GDKupgradevarheap(BAT *b, var_t v, BUN cap, bool copyall)
        __attribute__((__warn_unused_result__));
 gdk_export gdk_return BUNappend(BAT *b, const void *right, bool force)
        __attribute__((__warn_unused_result__));
@@ -1507,8 +1507,7 @@ Tputvalue(BAT *b, BUN p, const void *v, 
                if (b->twidth < SIZEOF_VAR_T &&
                    (b->twidth <= 2 ? d - GDK_VAROFFSET : d) >= ((size_t) 1 << 
(8 * b->twidth))) {
                        /* doesn't fit in current heap, upgrade it */
-                       rc = GDKupgradevarheap(b, d, copyall,
-                                              b->batRestricted == BAT_READ);
+                       rc = GDKupgradevarheap(b, d, 0, copyall);
                        if (rc != GDK_SUCCEED)
                                return rc;
                }
@@ -1592,8 +1591,7 @@ tfastins_nocheckVAR(BAT *b, BUN p, const
        if (b->twidth < SIZEOF_VAR_T &&
            (b->twidth <= 2 ? d - GDK_VAROFFSET : d) >= ((size_t) 1 << (8 * 
b->twidth))) {
                /* doesn't fit in current heap, upgrade it */
-               rc = GDKupgradevarheap(b, d, false,
-                                      b->batRestricted == BAT_READ);
+               rc = GDKupgradevarheap(b, d, 0, false);
                if (rc != GDK_SUCCEED)
                        return rc;
        }
diff --git a/gdk/gdk_align.c b/gdk/gdk_align.c
--- a/gdk/gdk_align.c
+++ b/gdk/gdk_align.c
@@ -187,7 +187,7 @@ BATmaterialize(BAT *b)
 
        strconcat_len(b->theap->filename, sizeof(b->theap->filename),
                      BBP_physical(b->batCacheid), ".tail", NULL);
-       if (HEAPalloc(tail, cnt, sizeof(oid)) != GDK_SUCCEED) {
+       if (HEAPalloc(tail, cnt, sizeof(oid), 0) != GDK_SUCCEED) {
                return GDK_FAIL;
        }
 
@@ -196,6 +196,7 @@ BATmaterialize(BAT *b)
        assert(ATOMIC_GET(&b->theap->refs) > 0);
        HEAPdecref(b->theap, false);
        b->theap = tail;
+       b->tbaseoff = 0;
        MT_lock_unset(&b->theaplock);
        b->ttype = TYPE_oid;
        BATsetdims(b);
@@ -318,7 +319,7 @@ VIEWreset(BAT *b)
 
                strconcat_len(tail->filename, sizeof(tail->filename),
                              nme, ".tail", NULL);
-               if (b->ttype && HEAPalloc(tail, cnt, Tsize(b)) != GDK_SUCCEED)
+               if (b->ttype && HEAPalloc(tail, cnt, Tsize(b), 
ATOMsize(b->ttype)) != GDK_SUCCEED)
                        goto bailout;
                if (b->tvheap) {
                        th = GDKmalloc(sizeof(Heap));
@@ -363,6 +364,10 @@ VIEWreset(BAT *b)
                b->tkey = BATtkey(v);
 
                /* replace the heaps */
+               if (tail->storage == STORE_MMAP) {
+                       b->twidth = ATOMsize(b->ttype);
+                       b->tshift = ATOMelmshift(Tsize(b));
+               }
                b->theap = tail;
                tail = NULL;
                b->tbaseoff = 0;
diff --git a/gdk/gdk_bat.c b/gdk/gdk_bat.c
--- a/gdk/gdk_bat.c
+++ b/gdk/gdk_bat.c
@@ -213,9 +213,13 @@ COLnew(oid hseq, int tt, BUN cap, role_t
        bn->batCapacity = cap;
 
        /* alloc the main heaps */
-       if (tt && HEAPalloc(bn->theap, cap, bn->twidth) != GDK_SUCCEED) {
+       if (tt && HEAPalloc(bn->theap, cap, bn->twidth, ATOMsize(bn->ttype)) != 
GDK_SUCCEED) {
                goto bailout;
        }
+       if (bn->theap->storage == STORE_MMAP) {
+               bn->twidth = ATOMsize(bn->ttype);
+               bn->tshift = ATOMelmshift(Tsize(bn));
+       }
 
        if (bn->tvheap && ATOMheap(tt, bn->tvheap, cap) != GDK_SUCCEED) {
                goto bailout;
@@ -469,6 +473,8 @@ BATextend(BAT *b, BUN newcap)
        if (b->theap->base) {
                TRC_DEBUG(HEAP, "HEAPgrow in BATextend %s %zu %zu\n",
                          b->theap->filename, b->theap->size, theap_size);
+               if (b->ttype == TYPE_str)
+                       return GDKupgradevarheap(b, GDK_VAROFFSET, newcap, 
false);
                Heap *h = HEAPgrow(b->theap, theap_size);
                if (h == NULL)
                        return GDK_FAIL;
@@ -1094,6 +1100,8 @@ BUNappend(BAT *b, const void *t, bool fo
                        b->tseqbase = * (const oid *) t;
                } else if (is_oid_nil(* (oid *) t) ||
                           b->tseqbase + b->batCount != *(const oid *) t) {
+                       /* we need to materialize b; allocate enough capacity */
+                       b->batCapacity = BATcount(b) + 1;
                        if (BATmaterialize(b) != GDK_SUCCEED)
                                return GDK_FAIL;
                }
@@ -1334,7 +1342,7 @@ BUNinplace(BAT *b, BUN p, const void *t,
                if (b->twidth < SIZEOF_VAR_T &&
                    (b->twidth <= 2 ? _d - GDK_VAROFFSET : _d) >= ((size_t) 1 
<< (8 * b->twidth))) {
                        /* doesn't fit in current heap, upgrade it */
-                       if (GDKupgradevarheap(b, _d, false, b->batRestricted == 
BAT_READ) != GDK_SUCCEED)
+                       if (GDKupgradevarheap(b, _d, 0, false) != GDK_SUCCEED)
                                return GDK_FAIL;
                }
                _ptr = BUNtloc(bi, p);
diff --git a/gdk/gdk_batop.c b/gdk/gdk_batop.c
--- a/gdk/gdk_batop.c
+++ b/gdk/gdk_batop.c
@@ -57,7 +57,7 @@ unshare_string_heap(BAT *b)
 #endif
 
 static gdk_return
-insert_string_bat(BAT *b, BAT *n, struct canditer *ci, bool force)
+insert_string_bat(BAT *b, BAT *n, struct canditer *ci)
 {
        BATiter ni;             /* iterator */
        size_t toff = ~(size_t) 0;      /* tail offset */
@@ -193,26 +193,15 @@ insert_string_bat(BAT *b, BAT *n, struct
                                }
                        }
                }
-               if (toff != ~(size_t) 0) {
-                       /* we only have to copy the offsets from n to
-                        * b, possibly with an offset (if toff != 0),
-                        * so set up some variables and set up b's
-                        * tail so that it looks like it's a fixed
-                        * size column.  Of course, we must make sure
-                        * first that the width of b's offset heap can
-                        * accommodate all values. */
-                       if (b->twidth < SIZEOF_VAR_T &&
-                           ((size_t) 1 << 8 * b->twidth) <= (b->twidth <= 2 ? 
b->tvheap->size - GDK_VAROFFSET : b->tvheap->size)) {
-                               /* offsets aren't going to fit, so
-                                * widen offset heap */
-                               if (GDKupgradevarheap(b, (var_t) 
b->tvheap->size, false, force) != GDK_SUCCEED) {
-                                       toff = ~(size_t) 0;
-                                       return GDK_FAIL;
-                               }
-                       }
-               }
        } else if (unshare_string_heap(b) != GDK_SUCCEED)
                return GDK_FAIL;
+
+       /* make sure there is (vertical) space in the offset heap, we
+        * may have to widen the heap later */
+       if (GDKupgradevarheap(b, (var_t) b->tvheap->size, BATcount(b) + cnt,
+                             false) != GDK_SUCCEED)
+               return GDK_FAIL;
+
        if (toff == 0 && n->twidth == b->twidth && ci->tpe == cand_dense) {
                /* we don't need to do any translation of offset
                 * values, so we can use fast memcpy */
@@ -224,6 +213,7 @@ insert_string_bat(BAT *b, BAT *n, struct
                 * string heap at known locations (namely the offset
                 * in n added to toff), so insert offsets from n after
                 * adding toff into b */
+
                /* note the use of the "restrict" qualifier here: all
                 * four pointers below point to the same value, but
                 * only one of them will actually be used, hence we
@@ -286,14 +276,6 @@ insert_string_bat(BAT *b, BAT *n, struct
                        v = (var_t) ((size_t) v + toff);
                        assert(v >= GDK_VAROFFSET);
                        assert((size_t) v < b->tvheap->free);
-                       if (BUNlast(b) >= BATcapacity(b)) {
-                               if (BATcount(b) == BUN_MAX) {
-                                       GDKerror("too many elements to 
accomodate (" BUNFMT ")\n", BUN_MAX);
-                                       goto bunins_failed;
-                               }
-                               if (BATextend(b, BATgrows(b)) != GDK_SUCCEED)
-                                       goto bunins_failed;
-                       }
                        switch (b->twidth) {
                        case 1:
                                assert(v - GDK_VAROFFSET < ((var_t) 1 << 8));
@@ -360,14 +342,6 @@ insert_string_bat(BAT *b, BAT *n, struct
                                 * have to insert a new string into b:
                                 * we can just copy the offset */
                                v = (var_t) off;
-                               if (b->twidth < SIZEOF_VAR_T &&
-                                   ((size_t) 1 << 8 * b->twidth) <= (b->twidth 
<= 2 ? v - GDK_VAROFFSET : v)) {
-                                       /* offset isn't going to fit,
-                                        * so widen offset heap */
-                                       if (GDKupgradevarheap(b, v, false, 
force) != GDK_SUCCEED) {
-                                               goto bunins_failed;
-                                       }
-                               }
                                switch (b->twidth) {
                                case 1:
                                        assert(v - GDK_VAROFFSET < ((var_t) 1 
<< 8));
@@ -425,6 +399,17 @@ append_varsized_bat(BAT *b, BAT *n, stru
        assert(b->twidth == SIZEOF_VAR_T);
        if (cnt == 0)
                return GDK_SUCCEED;
+       if (cnt > BATcapacity(b) - BATcount(b)) {
+               /* if needed space exceeds a normal growth extend just
+                * with what's needed */
+               BUN ncap = BATcount(b) + cnt;
+               BUN grows = BATgrows(b);
+
+               if (ncap > grows)
+                       grows = ncap;
+               if (BATextend(b, grows) != GDK_SUCCEED)
+                       return GDK_FAIL;
+       }
        if (BATcount(b) == 0 &&
            b->batRole == TRANSIENT &&
            n->batRestricted == BAT_READ &&
@@ -432,9 +417,8 @@ append_varsized_bat(BAT *b, BAT *n, stru
                /* if b is still empty, in the transient farm, and n
                 * is read-only, we replace b's vheap with a reference
                 * to n's */
-               /* make sure locking happens in a
-                * predictable order: lowest id
-                * first */
+               /* make sure locking happens in a predictable order:
+                * lowest id first */
                if (b->batCacheid < n->batCacheid) {
                        MT_lock_set(&b->theaplock);
                        MT_lock_set(&n->theaplock);
@@ -532,7 +516,7 @@ BATappend(BAT *b, BAT *n, BAT *s, bool f
        char buf[64];
        lng t0 = 0;
 
-       if (b == NULL || n == NULL || (cnt = BATcount(n)) == 0) {
+       if (b == NULL || n == NULL || BATcount(n) == 0) {
                return GDK_SUCCEED;
        }
        assert(b->batCacheid > 0);
@@ -638,27 +622,17 @@ BATappend(BAT *b, BAT *n, BAT *s, bool f
                b->batCapacity = BATcount(b) + cnt;
                if (BATmaterialize(b) != GDK_SUCCEED)
                        return GDK_FAIL;
-       } else if (cnt > BATcapacity(b) - BATcount(b)) {
-               /* if needed space exceeds a normal growth extend just
-                * with what's needed */
-               BUN ncap = BATcount(b) + cnt;
-               BUN grows = BATgrows(b);
-
-               if (ncap > grows)
-                       grows = ncap;
-               if (BATextend(b, grows) != GDK_SUCCEED)
-                       return GDK_FAIL;
        }
 
        r = BUNlast(b);
 
+       /* property setting */
        if (BATcount(b) == 0) {
                b->tsorted = n->tsorted;
                b->trevsorted = n->trevsorted;
                b->tseqbase = oid_nil;
                b->tnonil = n->tnonil;
                b->tnil = n->tnil && cnt == BATcount(n);
-               b->tseqbase = oid_nil;
                if (ci.tpe == cand_dense) {
                        b->tnosorted = ci.seq - hseq <= n->tnosorted && 
n->tnosorted < ci.seq + cnt - hseq ? n->tnosorted + hseq - ci.seq : 0;
                        b->tnorevsorted = ci.seq - hseq <= n->tnorevsorted && 
n->tnorevsorted < ci.seq + cnt - hseq ? n->tnorevsorted + hseq - ci.seq : 0;
@@ -707,8 +681,9 @@ BATappend(BAT *b, BAT *n, BAT *s, bool f
                b->tnonil &= n->tnonil;
                b->tnil |= n->tnil && cnt == BATcount(n);
        }
+
        if (b->ttype == TYPE_str) {
-               if (insert_string_bat(b, n, &ci, force) != GDK_SUCCEED) {
+               if (insert_string_bat(b, n, &ci) != GDK_SUCCEED) {
                        return GDK_FAIL;
                }
        } else if (ATOMvarsized(b->ttype)) {
@@ -716,6 +691,17 @@ BATappend(BAT *b, BAT *n, BAT *s, bool f
                        return GDK_FAIL;
                }
        } else {
+               if (cnt > BATcapacity(b) - BATcount(b)) {
+                       /* if needed space exceeds a normal growth
+                        * extend just with what's needed */
+                       BUN ncap = BATcount(b) + cnt;
_______________________________________________
checkin-list mailing list
checkin-list@monetdb.org
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to