Changeset: 155b1ef95b0e for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/155b1ef95b0e
Modified Files:
        gdk/gdk_batop.c
        gdk/gdk_group.c
        gdk/gdk_join.c
        gdk/gdk_project.c
        sql/backends/monet5/sql.c
Branch: default
Log Message:

propagate the tunique_est a bit more and use this in append (strings). When we 
know only a limited number of
(unique) strings is expected, don't reuse the (largish) vheap.
In group by set the tunique_est based on the group result.


diffs (118 lines):

diff --git a/gdk/gdk_batop.c b/gdk/gdk_batop.c
--- a/gdk/gdk_batop.c
+++ b/gdk/gdk_batop.c
@@ -125,10 +125,13 @@ insert_string_bat(BAT *b, BATiter *ni, s
                        r = (GDK_ELIMLIMIT - GDK_STRHASHSIZE) / (len + 12);
                        /* r is estimate of number of strings in
                         * double-eliminated area */
-                       if (r < ci->ncand)
-                               len = GDK_ELIMLIMIT + (ci->ncand - r) * len;
+                       BUN ecnt = ci->ncand;
+                       if (ni->b->tunique_est > 0 && ecnt > ni->b->tunique_est)
+                               ecnt = ni->b->tunique_est;
+                       if (r < ecnt)
+                               len = GDK_ELIMLIMIT + (ecnt - r) * len;
                        else
-                               len = GDK_STRHASHSIZE + ci->ncand * (len + 12);
+                               len = GDK_STRHASHSIZE + ecnt * (len + 12);
                        /* len is total estimated expected size of vheap */
 
                        if (len > ni->vhfree / 2) {
diff --git a/gdk/gdk_group.c b/gdk/gdk_group.c
--- a/gdk/gdk_group.c
+++ b/gdk/gdk_group.c
@@ -1282,6 +1282,7 @@ BATgroup_internal(BAT **groups, BAT **ex
                en->trevsorted = ngrp == 1;
                en->tnonil = true;
                en->tnil = false;
+               en->tunique_est = ngrp;
                *extents = virtualize(en);
        }
        if (histo) {
@@ -1304,7 +1305,11 @@ BATgroup_internal(BAT **groups, BAT **ex
        gn->tnonil = true;
        gn->tnil = false;
        gn->tmaxpos = maxgrppos;
+       gn->tunique_est = ngrp;
        *groups = gn;
+       if (!g && !e && !s) {
+               b->tunique_est = ngrp;
+       }
        TRC_DEBUG(ALGO, "b=" ALGOBATFMT ",s=" ALGOOPTBATFMT
                  ",g=" ALGOOPTBATFMT ",e=" ALGOOPTBATFMT
                  ",h=" ALGOOPTBATFMT ",subsorted=%s -> groups="
diff --git a/gdk/gdk_join.c b/gdk/gdk_join.c
--- a/gdk/gdk_join.c
+++ b/gdk/gdk_join.c
@@ -3259,6 +3259,7 @@ hashjoin(BAT **r1p, BAT **r2p, BAT **r3p
        }
        /* also set other bits of heap to correct value to indicate size */
        BATsetcount(r1, BATcount(r1));
+       r1->tunique_est = MIN(l->tunique_est, r->tunique_est);
        if (BATcount(r1) <= 1) {
                r1->tsorted = true;
                r1->trevsorted = true;
@@ -3274,11 +3275,13 @@ hashjoin(BAT **r1p, BAT **r2p, BAT **r3p
                        r2->tkey = true;
                        r2->tseqbase = 0;
                }
+               r2->tunique_est = MIN(l->tunique_est, r->tunique_est);
        }
        if (r3) {
                r3->tnonil = !r3->tnil;
                BATsetcount(r3, BATcount(r3));
                assert(BATcount(r1) == BATcount(r3));
+               r3->tunique_est = MIN(l->tunique_est, r->tunique_est);
        }
        if (BATcount(r1) > 0) {
                if (BATtdense(r1))
diff --git a/gdk/gdk_project.c b/gdk/gdk_project.c
--- a/gdk/gdk_project.c
+++ b/gdk/gdk_project.c
@@ -571,6 +571,9 @@ project_str(BATiter *restrict li, struct
        bn->tnil = false;
        bn->tnonil = r1i->nonil & r2i->nonil;
        bn->tkey = false;
+       bn->tunique_est =
+               MIN(li->b->tunique_est?li->b->tunique_est:BATcount(li->b),
+                  r1i->b->tunique_est?r1i->b->tunique_est:BATcount(r1i->b));
        TRC_DEBUG(ALGO, "l=" ALGOBATFMT " r1=" ALGOBATFMT " r2=" ALGOBATFMT
                  " -> " ALGOBATFMT "%s " LLFMT "us\n",
                  ALGOBATPAR(li->b), ALGOBATPAR(r1i->b), ALGOBATPAR(r2i->b),
@@ -820,6 +823,9 @@ BATproject2(BAT *restrict l, BAT *restri
                bn->tascii = r1i.ascii;
        }
 
+       bn->tunique_est =
+               MIN(li.b->tunique_est?li.b->tunique_est:BATcount(li.b),
+                  r1i.b->tunique_est?r1i.b->tunique_est:BATcount(r1i.b));
        if (!BATtdensebi(&r1i) || (r2 && !BATtdensebi(&r2i)))
                BATtseqbase(bn, oid_nil);
 
@@ -1138,6 +1144,15 @@ BATprojectchain(BAT **bats)
        bn->tnonil = nonil & b->tnonil;
        bn->tseqbase = oid_nil;
        bn->tkey = (ba[0].cnt <= 1);
+       double est = 0;
+       for (int i = 0; i < n; i++) {
+               double nest = 
ba[i].b->tunique_est?ba[i].b->tunique_est:BATcount(ba[i].b);
+               if (est)
+                       est = MIN(est, nest);
+               else
+                       est = nest;
+       }
+       bn->tunique_est = est;
        /* note, b may point to one of the bats in tobedeleted, so
         * reclaim after the last use of b */
        while (ndelete-- > 0)
diff --git a/sql/backends/monet5/sql.c b/sql/backends/monet5/sql.c
--- a/sql/backends/monet5/sql.c
+++ b/sql/backends/monet5/sql.c
@@ -2430,6 +2430,7 @@ SQLtid(Client cntxt, MalBlkPtr mb, MalSt
                nr_parts = *getArgReference_int(stk, pci, 5);
        }
        BAT *b = store->storage_api.bind_cands(tr, t, nr_parts, part_nr);
+       b->tunique_est = BATcount(b);
        if (b) {
                *res = b->batCacheid;
                BBPkeepref(b);
_______________________________________________
checkin-list mailing list -- checkin-list@monetdb.org
To unsubscribe send an email to checkin-list-le...@monetdb.org

Reply via email to