Changeset: 655d3eb72e16 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=655d3eb72e16
Modified Files:
clients/Tests/MAL-signatures.stable.out
clients/Tests/MAL-signatures.stable.out.int128
clients/Tests/exports.stable.out
clients/mapiclient/mhelp.c
gdk/gdk_atoms.h
gdk/gdk_cand.c
gdk/gdk_hash.c
gdk/gdk_hash.h
gdk/gdk_join.c
gdk/gdk_private.h
gdk/gdk_system.c
gdk/gdk_tracer.h
gdk/gdk_unique.c
monetdb5/mal/mal_profiler.c
monetdb5/modules/atoms/str.c
monetdb5/modules/kernel/alarm.c
monetdb5/modules/kernel/alarm.mal
monetdb5/modules/mal/sysmon.c
sql/ChangeLog
sql/backends/monet5/sql_upgrades.c
sql/scripts/25_debug.sql
sql/server/sql_parser.y
sql/test/emptydb/Tests/check.stable.out
sql/test/emptydb/Tests/check.stable.out.32bit
sql/test/emptydb/Tests/check.stable.out.int128
sql/test/remote/Tests/creds.SQL.py
sql/test/remote/Tests/different_user.SQL.py
sql/test/remote/Tests/invalid_creds.SQL.py
sql/test/remote/Tests/ssbm.SQL.py
sql/test/sys-schema/Tests/systemfunctions.stable.out
sql/test/sys-schema/Tests/systemfunctions.stable.out.int128
Branch: statistics-analytics
Log Message:
Merged with default
diffs (truncated from 2450 to 300 lines):
diff --git a/clients/Tests/MAL-signatures.stable.out
b/clients/Tests/MAL-signatures.stable.out
--- a/clients/Tests/MAL-signatures.stable.out
+++ b/clients/Tests/MAL-signatures.stable.out
@@ -565,7 +565,9 @@ stdout of test 'MAL-signatures` in direc
[ "aggr", "variancep", "command aggr.variancep(b:bat[:any_2]):dbl ",
"ALGvariancep;", "Gives the variance of all tail values" ]
[ "alarm", "ctime", "unsafe command alarm.ctime():str ",
"ALARMctime;", "Return the current time as a C-time string." ]
[ "alarm", "epoch", "unsafe command alarm.epoch():int ",
"ALARMepoch;", "Return time since Jan 1, 1970 in seconds." ]
-[ "alarm", "sleep", "unsafe command alarm.sleep(secs:int):void ",
"ALARMsleep;", "Sleep a few seconds" ]
+[ "alarm", "sleep", "unsafe pattern
alarm.sleep(msecs:bat[:int]):bat[:int] ", "ALARMsleep;", "Sleep a few
milliseconds and return the slept value" ]
+[ "alarm", "sleep", "unsafe pattern alarm.sleep(msecs:int):int ",
"ALARMsleep;", "Sleep a few milliseconds and return the slept value" ]
+[ "alarm", "sleep", "unsafe pattern alarm.sleep(msecs:int):void ",
"ALARMsleep;", "Sleep a few milliseconds" ]
[ "alarm", "time", "unsafe command alarm.time():int ", "ALARMtime;",
"Return time since program start in milliseconds." ]
[ "alarm", "usec", "unsafe command alarm.usec():lng ", "ALARMusec;",
"Return time since Jan 1, 1970 in microseconds." ]
[ "algebra", "antijoin", "function algebra.antijoin(l:bat[:any_1],
r:bat[:any_1], sl:bat[:oid], sr:bat[:oid], nil_matches:bit, estimate:lng)
(X_0:bat[:oid], X_1:bat[:oid]);", "", "" ]
diff --git a/clients/Tests/MAL-signatures.stable.out.int128
b/clients/Tests/MAL-signatures.stable.out.int128
--- a/clients/Tests/MAL-signatures.stable.out.int128
+++ b/clients/Tests/MAL-signatures.stable.out.int128
@@ -674,7 +674,9 @@ stdout of test 'MAL-signatures` in direc
[ "aggr", "variancep", "command aggr.variancep(b:bat[:any_2]):dbl ",
"ALGvariancep;", "Gives the variance of all tail values" ]
[ "alarm", "ctime", "unsafe command alarm.ctime():str ",
"ALARMctime;", "Return the current time as a C-time string." ]
[ "alarm", "epoch", "unsafe command alarm.epoch():int ",
"ALARMepoch;", "Return time since Jan 1, 1970 in seconds." ]
-[ "alarm", "sleep", "unsafe command alarm.sleep(secs:int):void ",
"ALARMsleep;", "Sleep a few seconds" ]
+[ "alarm", "sleep", "unsafe pattern
alarm.sleep(msecs:bat[:int]):bat[:int] ", "ALARMsleep;", "Sleep a few
milliseconds and return the slept value" ]
+[ "alarm", "sleep", "unsafe pattern alarm.sleep(msecs:int):int ",
"ALARMsleep;", "Sleep a few milliseconds and return the slept value" ]
+[ "alarm", "sleep", "unsafe pattern alarm.sleep(msecs:int):void ",
"ALARMsleep;", "Sleep a few milliseconds" ]
[ "alarm", "time", "unsafe command alarm.time():int ", "ALARMtime;",
"Return time since program start in milliseconds." ]
[ "alarm", "usec", "unsafe command alarm.usec():lng ", "ALARMusec;",
"Return time since Jan 1, 1970 in microseconds." ]
[ "algebra", "antijoin", "function algebra.antijoin(l:bat[:any_1],
r:bat[:any_1], sl:bat[:oid], sr:bat[:oid], nil_matches:bit, estimate:lng)
(X_0:bat[:oid], X_1:bat[:oid]);", "", "" ]
diff --git a/clients/Tests/exports.stable.out b/clients/Tests/exports.stable.out
--- a/clients/Tests/exports.stable.out
+++ b/clients/Tests/exports.stable.out
@@ -745,7 +745,7 @@ str AGGRvariance3_dbl(bat *retval, const
str AGGRvariancep3_dbl(bat *retval, const bat *bid, const bat *gid, const bat
*eid);
str ALARMctime(str *res);
str ALARMepoch(int *res);
-str ALARMsleep(void *res, int *secs);
+str ALARMsleep(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci);
str ALARMtime(int *res);
str ALARMusec(lng *ret);
str ALGbandjoin(bat *r1, bat *r2, const bat *lid, const bat *rid, const bat
*slid, const bat *srid, const void *low, const void *high, const bit *li, const
bit *hi, const lng *estimate);
diff --git a/clients/mapiclient/mhelp.c b/clients/mapiclient/mhelp.c
--- a/clients/mapiclient/mhelp.c
+++ b/clients/mapiclient/mhelp.c
@@ -508,7 +508,7 @@ SQLhelp sqlhelp1[] = {
NULL},
{"START TRANSACTION",
"Change transaction mode from auto-commit to user controlled
commit/rollback",
- "START TRANSACTION transactionmode",
+ "{ START | BEGIN } TRANSACTION transactionmode",
"transactionmode,isolevel",
"See also
https://www.monetdb.org/Documentation/Manuals/SQLreference/Transactions"},
{"TABLE JOINS",
diff --git a/gdk/gdk_atoms.h b/gdk/gdk_atoms.h
--- a/gdk/gdk_atoms.h
+++ b/gdk/gdk_atoms.h
@@ -165,8 +165,8 @@ gdk_export const ptr ptr_nil;
* In all algorithms across GDK, you will find switches on the types
* (bte, sht, int, flt, dbl, lng, hge, str). They respectively
* represent an octet, a 16-bit int, a 32-bit int, a 32-bit float, a
- * 64-bit double, a 64-bit int, and a pointer-sized location of a
- * char-buffer (ended by a zero char).
+ * 64-bit double, a 64-bit int, a 128-bit int, and a pointer-sized location
+ * of a char-buffer (ended by a zero char).
*
* In contrast, the types (bit, ptr, bat, oid) are derived types. They
* do not occur in the switches. The ATOMstorage macro maps them
diff --git a/gdk/gdk_cand.c b/gdk/gdk_cand.c
--- a/gdk/gdk_cand.c
+++ b/gdk/gdk_cand.c
@@ -384,7 +384,6 @@ canditer_init(struct canditer *ci, BAT *
} else {
/* why the vheap? */
ci->tpe = cand_dense;
- ci->oids = NULL;
}
} else {
ci->tpe = cand_dense;
@@ -404,23 +403,6 @@ canditer_init(struct canditer *ci, BAT *
ci->tpe = cand_dense;
}
switch (ci->tpe) {
- case cand_dense:
- case_cand_dense:
- if (b != NULL) {
- if (ci->seq + cnt <= b->hseqbase ||
- ci->seq >= b->hseqbase + BATcount(b)) {
- ci->ncand = 0;
- return 0;
- }
- if (b->hseqbase > ci->seq) {
- cnt -= b->hseqbase - ci->seq;
- ci->offset += b->hseqbase - ci->seq;
- ci->seq = b->hseqbase;
- }
- if (ci->seq + cnt > b->hseqbase + BATcount(b))
- cnt = b->hseqbase + BATcount(b) - ci->seq;
- }
- break;
case cand_materialized:
if (b != NULL) {
if (ci->oids[ci->noids - 1] < b->hseqbase) {
@@ -488,14 +470,9 @@ canditer_init(struct canditer *ci, BAT *
while (ci->noids > 0 &&
ci->oids[ci->noids - 1] == ci->seq + cnt + ci->noids - 1)
ci->noids--;
- /* WARNING: don't reset ci->oids to NULL when setting
- * ci->tpe to cand_dense below: BATprojectchain will
- * fail */
- if (ci->noids == 0) {
- ci->tpe = cand_dense;
- goto case_cand_dense;
- }
- if (b != NULL) {
+ if (ci->noids > 0) {
+ if (b == NULL)
+ break;
BUN p;
p = binsearchcand(ci->oids, ci->noids - 1, b->hseqbase);
if (p == ci->noids) {
@@ -505,6 +482,7 @@ canditer_init(struct canditer *ci, BAT *
ci->seq = b->hseqbase;
ci->noids = 0;
ci->tpe = cand_dense;
+ ci->oids = NULL;
break;
}
assert(b->hseqbase > ci->seq || p == 0);
@@ -532,10 +510,26 @@ canditer_init(struct canditer *ci, BAT *
while (ci->noids > 0 &&
ci->oids[ci->noids - 1] == ci->seq + cnt +
ci->noids - 1)
ci->noids--;
- if (ci->noids == 0) {
- ci->tpe = cand_dense;
- goto case_cand_dense;
+ if (ci->noids > 0)
+ break;
+ }
+ ci->tpe = cand_dense;
+ ci->oids = NULL;
+ /* fall through */
+ case cand_dense:
+ if (b != NULL) {
+ if (ci->seq + cnt <= b->hseqbase ||
+ ci->seq >= b->hseqbase + BATcount(b)) {
+ ci->ncand = 0;
+ return 0;
}
+ if (b->hseqbase > ci->seq) {
+ cnt -= b->hseqbase - ci->seq;
+ ci->offset += b->hseqbase - ci->seq;
+ ci->seq = b->hseqbase;
+ }
+ if (ci->seq + cnt > b->hseqbase + BATcount(b))
+ cnt = b->hseqbase + BATcount(b) - ci->seq;
}
break;
}
diff --git a/gdk/gdk_hash.c b/gdk/gdk_hash.c
--- a/gdk/gdk_hash.c
+++ b/gdk/gdk_hash.c
@@ -649,13 +649,13 @@ BAThashsync(void *arg)
} \
HASHputlink(h, p, hget); \
HASHput(h, c, p); \
- o = canditer_next(&ci); \
+ o = canditer_next(ci); \
} \
} while (0)
#define finishhash(TYPE) \
do { \
const TYPE *restrict v = (const TYPE *) BUNtloc(bi, 0); \
- for (; p < cnt; p++) { \
+ for (; p < ci->ncand; p++) {
\
c = hash_##TYPE(h, v + o - b->hseqbase); \
c = hash_##TYPE(h, v + o - b->hseqbase); \
hget = HASHget(h, c); \
@@ -669,22 +669,21 @@ BAThashsync(void *arg)
h->nunique += hb == hnil; \
HASHputlink(h, p, hget); \
HASHput(h, c, p); \
- o = canditer_next(&ci); \
+ o = canditer_next(ci); \
} \
} while (0)
-/*
- * The prime routine for the BAT layer is to create a new hash index.
- * Its argument is the element type and the maximum number of BUNs be
- * stored under the hash function.
- */
+/* Internal function to create a hash table for the given BAT b.
+ * If a candidate list s is also given, the hash table is specific for
+ * the combination of the two: only values from b that are referred to
+ * by s are included in the hash table, so if a result is found when
+ * searching the hash table, the result is a candidate. */
Hash *
-BAThash_impl(BAT *b, BAT *s, const char *ext)
+BAThash_impl(BAT *restrict b, struct canditer *restrict ci, const char
*restrict ext)
{
lng t0 = 0;
unsigned int tpe = ATOMbasetype(b->ttype);
- BUN cnt, cnt1;
- struct canditer ci;
+ BUN cnt1;
BUN mask, maxmask = 0;
BUN p, c;
oid o;
@@ -693,6 +692,9 @@ BAThash_impl(BAT *b, BAT *s, const char
const char *nme = GDKinmemory() ? ":inmemory" :
BBP_physical(b->batCacheid);
BATiter bi = bat_iterator(b);
PROPrec *prop;
+ bool hascand = ci->tpe != cand_dense || ci->ncand != BATcount(b);
+
+ assert(strcmp(ext, "thash") != 0 || !hascand);
TRC_DEBUG_IF(ACCELERATOR) t0 = GDKusec();
TRC_DEBUG(ACCELERATOR,
@@ -710,8 +712,6 @@ BAThash_impl(BAT *b, BAT *s, const char
tpe = TYPE_void;
}
- cnt = canditer_init(&ci, b, s);
-
if ((h = GDKzalloc(sizeof(*h))) == NULL ||
(h->heaplink.farmid = BBPselectfarm(b->batRole, b->ttype,
hashheap)) < 0 ||
(h->heapbckt.farmid = BBPselectfarm(b->batRole, b->ttype,
hashheap)) < 0) {
@@ -725,12 +725,12 @@ BAThash_impl(BAT *b, BAT *s, const char
nme, ".", ext, "l", NULL);
strconcat_len(h->heapbckt.filename, sizeof(h->heapbckt.filename),
nme, ".", ext, "b", NULL);
- if (HEAPalloc(&h->heaplink, s ? cnt : BATcapacity(b),
+ if (HEAPalloc(&h->heaplink, hascand ? ci->ncand : BATcapacity(b),
h->width) != GDK_SUCCEED) {
GDKfree(h);
return NULL;
}
- h->heaplink.free = cnt * h->width;
+ h->heaplink.free = ci->ncand * h->width;
h->Link = h->heaplink.base;
#ifndef NDEBUG
/* clear unused part of Link array */
@@ -747,35 +747,35 @@ BAThash_impl(BAT *b, BAT *s, const char
} else if (ATOMsize(tpe) == 2) {
/* perfect hash for two-byte sized atoms */
mask = (1 << 16);
- } else if (b->tkey || cnt <= 4096) {
+ } else if (b->tkey || ci->ncand <= 4096) {
/* if key, or if small, don't bother dynamically
* adjusting the hash mask */
- mask = HASHmask(cnt);
- } else if (s == NULL && (prop = BATgetprop_nolock(b, GDK_NUNIQUE)) !=
NULL) {
+ mask = HASHmask(ci->ncand);
+ } else if (!hascand && (prop = BATgetprop_nolock(b, GDK_NUNIQUE)) !=
NULL) {
assert(prop->v.vtype == TYPE_oid);
mask = prop->v.val.oval * 8 / 7;
- } else if (s == NULL && (prop = BATgetprop_nolock(b, GDK_HASH_BUCKETS))
!= NULL) {
+ } else if (!hascand && (prop = BATgetprop_nolock(b, GDK_HASH_BUCKETS))
!= NULL) {
assert(prop->v.vtype == TYPE_oid);
mask = prop->v.val.oval;
- maxmask = HASHmask(cnt);
+ maxmask = HASHmask(ci->ncand);
if (mask > maxmask)
mask = maxmask;
} else {
- /* dynamic hash: we start with HASHmask(cnt)/64, or,
- * if cnt large enough, HASHmask(cnt)/256; if there
- * are too many collisions we try HASHmask(cnt)/64,
- * HASHmask(cnt)/16, HASHmask(cnt)/4, and finally
- * HASHmask(cnt), but we might skip some of these if
+ /* dynamic hash: we start with HASHmask(ci->ncand)/64, or,
+ * if ci->ncand large enough, HASHmask(ci->ncand)/256; if there
+ * are too many collisions we try HASHmask(ci->ncand)/64,
+ * HASHmask(ci->ncand)/16, HASHmask(ci->ncand)/4, and finally
+ * HASHmask(ci->ncand), but we might skip some of these if
* there are many distinct values. */
- maxmask = HASHmask(cnt);
+ maxmask = HASHmask(ci->ncand);
mask = maxmask >> 6;
while (mask > 4096)
mask >>= 2;
/* try out on first 25% of b */
- cnt1 = cnt >> 2;
+ cnt1 = ci->ncand >> 2;
}
- o = canditer_next(&ci); /* always one ahead */
+ o = canditer_next(ci); /* always one ahead */
for (;;) {
lng t1 = 0;
TRC_DEBUG_IF(ACCELERATOR) t1 = GDKusec();
@@ -787,7 +787,7 @@ BAThash_impl(BAT *b, BAT *s, const char
HEAPfree(&h->heapbckt, true);
/* create the hash structures */
if (HASHnew(h, ATOMtype(b->ttype), BATcapacity(b),
- mask, cnt, true) != GDK_SUCCEED) {
+ mask, ci->ncand, true) != GDK_SUCCEED) {
HEAPfree(&h->heaplink, true);
GDKfree(h);
return NULL;
@@ -842,7 +842,7 @@ BAThash_impl(BAT *b, BAT *s, const char
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list