Changeset: 0ffbc159626f for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=0ffbc159626f
Modified Files:
        gdk/gdk_join.c
        gdk/gdk_private.h
        sql/common/sql_types.c
        sql/server/rel_updates.c
        sql/test/mapi/Tests/sql_int128.stable.out.int128
        sql/test/mapi/Tests/sql_int128.test
Branch: unlock
Log Message:

Merge with default branch.


diffs (truncated from 1684 to 300 lines):

diff --git a/MonetDB.spec b/MonetDB.spec
--- a/MonetDB.spec
+++ b/MonetDB.spec
@@ -2698,7 +2698,7 @@ sed -i 's|/var/run|/run|' \
   are equal to 1.1.  (The old code returned 33554432 instead of 1.1e8.)
 
 * Sun Nov  5 2017 Sjoerd Mullender <sjo...@acm.org> - 11.27.9-20171105
-- BZ#6460 - selinux doen't allow mmap
+- BZ#6460: selinux doen't allow mmap
 
 * Mon Oct 23 2017 Sjoerd Mullender <sjo...@acm.org> - 11.27.9-20171023
 - Rebuilt.
diff --git a/debian/changelog b/debian/changelog
--- a/debian/changelog
+++ b/debian/changelog
@@ -2469,6 +2469,12 @@ monetdb (11.27.11) unstable; urgency=low
 
 monetdb (11.27.9) unstable; urgency=low
 
+  * BZ#6460: selinux doen't allow mmap
+
+ -- Sjoerd Mullender <sjo...@acm.org>  Sun, 5 Nov 2017 09:56:39 +0100
+
+monetdb (11.27.9) unstable; urgency=low
+
   * Rebuilt.
   * BZ#6207: identifier ambiguous when grouping and selecting the same
     column twice
diff --git a/gdk/gdk_join.c b/gdk/gdk_join.c
--- a/gdk/gdk_join.c
+++ b/gdk/gdk_join.c
@@ -2502,7 +2502,8 @@ hashjoin(BAT **r1p, BAT **r2p, BAT *l, B
         struct canditer *restrict lci, struct canditer *restrict rci,
         bool nil_matches, bool nil_on_miss, bool semi, bool only_misses,
         bool not_in, bool max_one, bool min_one,
-        BUN estimate, lng t0, bool swapped, bool hash, bool phash,
+        BUN estimate, lng t0, bool swapped,
+        bool hash, bool phash, bool hash_cand,
         const char *reason)
 {
        oid lo, ro;
@@ -2521,7 +2522,6 @@ hashjoin(BAT **r1p, BAT **r2p, BAT *l, B
        const char *v = (const char *) &lval;
        bool lskipped = false;  /* whether we skipped values in l */
        Hash *restrict hsh = NULL;
-       bool hash_cand = false;
 
        assert(!BATtvoid(r));
        assert(ATOMtype(l->ttype) == ATOMtype(r->ttype));
@@ -2558,7 +2558,24 @@ hashjoin(BAT **r1p, BAT **r2p, BAT *l, B
 
        rl = rci->seq - r->hseqbase;
        rh = canditer_last(rci) + 1 - r->hseqbase;
-       if (phash) {
+       if (hash_cand) {
+               /* we need to create a hash on r specific for the
+                * candidate list */
+               char ext[32];
+               assert(rci->s);
+               MT_thread_setalgorithm(swapped ? "hashjoin using candidate hash 
(swapped)" : "hashjoin using candidate hash");
+               TRC_DEBUG(ALGO, ALGOBATFMT ": creating "
+                         "hash for candidate list " ALGOBATFMT "%s%s\n",
+                         ALGOBATPAR(r), ALGOBATPAR(rci->s),
+                         r->thash ? " ignoring existing hash" : "",
+                         swapped ? " (swapped)" : "");
+               if (snprintf(ext, sizeof(ext), "thshjn%x",
+                            (unsigned) rci->s->batCacheid) >= (int) 
sizeof(ext))
+                       goto bailout;
+               if ((hsh = BAThash_impl(r, rci, ext)) == NULL) {
+                       goto bailout;
+               }
+       } else if (phash) {
                /* there is a hash on the parent which we should use */
                MT_thread_setalgorithm(swapped ? "hashjoin using parent hash 
(swapped)" : "hashjoin using parent hash");
                BAT *b = BBPdescriptor(VIEWtparent(r));
@@ -2580,24 +2597,6 @@ hashjoin(BAT **r1p, BAT **r2p, BAT *l, B
                          "existing hash%s\n",
                          ALGOBATPAR(r),
                          swapped ? " (swapped)" : "");
-       } else if (rci->tpe != cand_dense || rci->ncand != BATcount(r)) {
-               /* we need to create a hash on r specific for the
-                * candidate list */
-               char ext[32];
-               assert(rci->s);
-               MT_thread_setalgorithm(swapped ? "hashjoin using candidate hash 
(swapped)" : "hashjoin using candidate hash");
-               TRC_DEBUG(ALGO, ALGOBATFMT ": creating "
-                         "hash for candidate list " ALGOBATFMT "%s%s\n",
-                         ALGOBATPAR(r), ALGOBATPAR(rci->s),
-                         r->thash ? " ignoring existing hash" : "",
-                         swapped ? " (swapped)" : "");
-               if (snprintf(ext, sizeof(ext), "thshjn%x",
-                            (unsigned) rci->s->batCacheid) >= (int) 
sizeof(ext))
-                       goto bailout;
-               if ((hsh = BAThash_impl(r, rci, ext)) == NULL) {
-                       goto bailout;
-               }
-               hash_cand = true;
        } else {
                /* we need to create a hash on r */
                MT_thread_setalgorithm(swapped ? "hashjoin using new hash 
(swapped)" : "hashjoin using new hash");
@@ -3078,6 +3077,79 @@ guess_uniques(BAT *b, struct canditer *c
        return B;
 }
 
+/* estimate the cost of doing a hashjoin with a hash on r; return value
+ * is the estimated cost, the last three arguments receive some extra
+ * information */
+static double
+joincost(BAT *r, struct canditer *lci, struct canditer *rci,
+        bool *hash, bool *phash, bool *cand)
+{
+       bool rhash = BATcheckhash(r);
+       bool prhash = false;
+       bool rcand = false;
+       double rcost = 1;
+       bat parent;
+       BAT *b;
+
+       if (rci->nvals > 0) {
+               /* if we need to do binary search on candidate
+                * list, take that into account */
+               rcost += log2((double) rci->nvals);
+       }
+       rcost *= lci->ncand;
+       if (rhash) {
+               /* average chain length */
+               rcost *= (double) BATcount(r) / r->thash->nheads;
+       } else if ((parent = VIEWtparent(r)) != 0 &&
+                  (b = BBPdescriptor(parent)) != NULL &&
+                  BATcheckhash(b)) {
+               rhash = prhash = true;
+               /* average chain length */
+               rcost *= (double) BATcount(b) / b->thash->nheads;
+       } else {
+               PROPrec *prop = BATgetprop(r, GDK_NUNIQUE);
+               if (prop) {
+                       /* we know number of unique values, assume some
+                        * collisions */
+                       rcost *= 1.1 * ((double) BATcount(r) / 
prop->v.val.oval);
+               } else {
+                       /* guess number of unique value and work with that */
+                       rcost *= 1.1 * ((double) BATcount(r) / guess_uniques(r, 
&(struct canditer){.tpe=cand_dense, .ncand=BATcount(r)}));
+               }
+#ifdef PERSISTENTHASH
+               /* only count the cost of creating the hash for
+                * non-persistent bats */
+               if (!(BBP_status(r->batCacheid) & BBPEXISTING) || 
r->theap->dirty || GDKinmemory(r->theap->farmid))
+#endif
+                       rcost += BATcount(r) * 2.0;
+       }
+       if (rci->ncand != BATcount(r)) {
+               /* instead of using the hash on r (cost in rcost), we
+                * can build a new hash on r taking the candidate list
+                * into account */
+               double rccost;
+               PROPrec *prop = BATgetprop(r, GDK_NUNIQUE);
+               if (prop) {
+                       /* we know number of unique values, assume some
+                        * chains */
+                       rccost = 1.1 * ((double) BATcount(r) / 
prop->v.val.oval);
+               } else {
+                       /* guess number of unique value and work with that */
+                       rccost = 1.1 * ((double) BATcount(r) / guess_uniques(r, 
rci));
+               }
+               rccost *= lci->ncand;
+               rccost += rci->ncand * 2.0; /* cost of building the hash */
+               if (rccost < rcost) {
+                       rcost = rccost;
+                       rcand = true;
+               }
+       }
+       *hash = rhash;
+       *phash = prhash;
+       *cand = rcand;
+       return rcost;
+}
+
 #define MASK_EQ                1
 #define MASK_LT                2
 #define MASK_GT                4
@@ -3418,7 +3490,7 @@ leftjoin(BAT **r1p, BAT **r2p, BAT *l, B
 {
        BUN lcnt, rcnt;
        struct canditer lci, rci;
-       bool rhash, prhash = false;
+       bool rhash, prhash, rcand;
        bat parent;
        double rcost = 0;
        gdk_return rc;
@@ -3537,114 +3609,16 @@ leftjoin(BAT **r1p, BAT **r2p, BAT *l, B
                               not_in, max_one, min_one, estimate, t0, false, 
func);
                goto doreturn;
        }
-       rhash = BATcheckhash(r);
-       if (rhash) {
-               /* average chain length */
-               rcost = (double) BATcount(r) / r->thash->nheads;
-       } else if (/* DISABLES CODE */ (0) && (parent = VIEWtparent(r)) != 0) {
-               BAT *b = BBPdescriptor(parent);
-               rhash = prhash = BATcheckhash(b);
-               if (prhash) {
-                       /* average chain length */
-                       rcost = (double) BATcount(b) / b->thash->nheads;
-               }
-       }
-       if (!rhash) {
-               /* no hash table, so cost includes time to build the
-                * hash table (single scan) plus the time to do the
-                * lookups (also single scan, we assume some chains) */
-               PROPrec *prop = BATgetprop(r, GDK_NUNIQUE);
-               if (prop) {
-                       /* we know number of unique values, assume some
-                        * chains */
-                       rcost = lci.ncand * 1.1 * ((double) BATcount(r) / 
prop->v.val.oval);
-               } else {
-                       /* guess number of unique value and work with that */
-                       rcost = lci.ncand * 1.1 * ((double) BATcount(r) / 
guess_uniques(r, &rci));
-               }
-#ifdef PERSISTENTHASH
-               /* only count the cost of creating the hash for
-                * non-persistent bats */
-               if (rci.ncand != BATcount(r) || !(BBP_status(r->batCacheid) & 
BBPEXISTING) || r->theap->dirty || GDKinmemory(r->theap->farmid))
-#endif
-                       rcost += rci.ncand * 2.0;
-       } else {
-               if (rci.nvals > 0) {
-                       /* if we need to do binary search on candidate
-                        * list, take that into account */
-                       rcost *= log2((double) rci.nvals) + 1;
-               }
-               /* all of this so far for each lookup of which we have
-                * rci.ncand */
-               rcost *= lci.ncand;
-               if (rci.ncand < BATcount(r) &&
-                   rci.ncand * 2 + lci.ncand * 1.1 < rcost) {
-                       /* it's cheaper to rebuild the hash table for
-                        * just the candidates (this saves on the
-                        * binary searches), again, assume some
-                        * chains */
-                       rhash = prhash = false;
-                       rcost = rci.ncand * 2 + lci.ncand * 1.1;
-               }
-       }
+       rcost = joincost(r, &lci, &rci, &rhash, &prhash, &rcand);
 
        if (!nil_on_miss && !only_misses && !not_in && !max_one && !min_one) {
                /* maybe do a hash join on the swapped operands; if we
                 * do, we need to sort the output, so we take that into
                 * account as well */
-               bool lhash = BATcheckhash(l);
-               bool plhash = false;
-               double lcost = 0;
-               if (lhash) {
-                       /* average chain length */
-                       lcost = (double) BATcount(l) / l->thash->nheads;
-               } else if (/* DISABLES CODE */ (0) && (parent = VIEWtparent(l)) 
!= 0) {
-                       BAT *b = BBPdescriptor(parent);
-                       lhash = plhash = BATcheckhash(b);
-                       if (plhash) {
-                               /* average chain length */
-                               lcost = (double) BATcount(b) / b->thash->nheads;
-                       }
-               }
-               if (!lhash) {
-                       /* no hash table, so cost includes time to build the
-                        * hash table (single scan) plus the time to do the
-                        * lookups (also single scan, we assume some chains) */
-                       PROPrec *prop = BATgetprop(l, GDK_NUNIQUE);
-                       if (prop) {
-                               /* we know number of unique values, assume some
-                                * chains */
-                               lcost = rci.ncand * 1.1 * ((double) BATcount(l) 
/ prop->v.val.oval);
-                       } else {
-                               /* guess number of unique value and work
-                                * with that */
-                               lcost = rci.ncand * 1.1 * ((double) BATcount(l) 
/ guess_uniques(l, &lci));
-                       }
-#ifdef PERSISTENTHASH
-                       /* only count the cost of creating the hash
-                        * for non-persistent bats */
-                       if (lci.ncand != BATcount(l) || 
!(BBP_status(l->batCacheid) & BBPEXISTING) || l->theap->dirty || 
GDKinmemory(l->theap->farmid))
-#endif
-                               lcost += lci.ncand * 2.0;
-               } else {
-                       if (lci.nvals > 0) {
-                               /* if we need to do binary search on candidate
-                                * list, take that into account */
-                               lcost *= log2((double) lci.nvals) + 1;
-                       }
-                       /* all of this so far for each lookup of which we have
-                        * rci.ncand */
-                       lcost *= rci.ncand;
-                       if (lci.ncand < BATcount(l) &&
-                           lci.ncand * 2 + rci.ncand * 1.1 < lcost) {
-                               /* it's cheaper to rebuild the hash table for
-                                * just the candidates (this saves on the
-                                * binary searches), again, assume some
-                                * chains */
-                               lhash = plhash = false;
-                               lcost = lci.ncand * 2 + rci.ncand * 1.1;
_______________________________________________
checkin-list mailing list
checkin-list@monetdb.org
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to