Changeset: ff4639cf258b for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/ff4639cf258b
Modified Files:
gdk/gdk_group.c
Branch: dict
Log Message:
Merge with default branch.
diffs (truncated from 1472 to 300 lines):
diff --git a/CMakeLists.txt b/CMakeLists.txt
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -24,10 +24,6 @@ set(CMAKE_PROJECT_HOMEPAGE_URL "https://
set(C_STANDARD_REQUIRED ON)
set(CMAKE_C_STANDARD 99)
-if(${CMAKE_VERSION} VERSION_GREATER_EQUAL "3.13.0")
- cmake_policy(SET CMP0076 OLD)
-endif()
-
# We give precedence to UNIX include directories over
# OS X Frameworks directories
set(CMAKE_FIND_FRAMEWORK LAST)
diff --git a/gdk/gdk_group.c b/gdk/gdk_group.c
--- a/gdk/gdk_group.c
+++ b/gdk/gdk_group.c
@@ -73,7 +73,7 @@
if (ngrp == maxgrps) { \
/* we need to extend extents and histo bats, */ \
/* do it at most once */ \
- maxgrps = BATcount(b); \
+ maxgrps = bi.count; \
if (extents) { \
BATsetcount(en, ngrp); \
if (BATextend(en, maxgrps) != GDK_SUCCEED) \
@@ -600,12 +600,10 @@ ctz(oid x)
#define GRP_small_values(BG, BV, GV) \
do { \
- uint##BG##_t *restrict sgrps = GDKmalloc((1 << BG) *
sizeof(uint##BG##_t)); \
+ uint##BG##_t sgrps[1 << BG]; \
const uint##BV##_t *restrict w = (const uint##BV##_t *)
bi.base; \
uint##BG##_t v; \
- if (sgrps == NULL) \
- goto error1; \
- memset(sgrps, 0xFF, (1 << BG) * sizeof(uint##BG##_t)); \
+ memset(sgrps, 0xFF, sizeof(sgrps)); \
if (histo) \
memset(cnts, 0, maxgrps * sizeof(lng)); \
ngrp = 0; \
@@ -647,7 +645,6 @@ ctz(oid x)
} \
TIMEOUT_CHECK(timeoffset, \
GOTO_LABEL_TIMEOUT_HANDLER(error)); \
- GDKfree(sgrps); \
} while (0)
gdk_return
diff --git a/gdk/gdk_unique.c b/gdk/gdk_unique.c
--- a/gdk/gdk_unique.c
+++ b/gdk/gdk_unique.c
@@ -32,17 +32,14 @@ BATunique(BAT *b, BAT *s)
const char *vars;
int width;
oid i, o;
- uint16_t *seen = NULL;
const char *nme;
Hash *hs = NULL;
BUN hb;
- BATiter bi;
int (*cmp)(const void *, const void *);
struct canditer ci;
const char *algomsg = "";
lng t0 = 0;
- size_t counter = 0;
lng timeoffset = 0;
QryCtx *qry_ctx = MT_thread_get_qry_ctx();
if (qry_ctx != NULL) {
@@ -81,62 +78,46 @@ BATunique(BAT *b, BAT *s)
assert(b->ttype != TYPE_void);
+ BATiter bi = bat_iterator(b);
BUN initsize = BUN_NONE;
if (s == NULL) {
MT_rwlock_rdlock(&b->thashlock);
if (b->thash != NULL && b->thash != (Hash *) 1)
initsize = b->thash->nunique;
MT_rwlock_rdunlock(&b->thashlock);
- if (initsize == BUN_NONE) {
- MT_lock_set(&b->theaplock);
- if (b->tunique_est != 0)
- initsize = (BUN) b->tunique_est;
- MT_lock_unset(&b->theaplock);
- }
+ if (initsize == BUN_NONE && bi.unique_est != 0)
+ initsize = (BUN) bi.unique_est;
}
if (initsize == BUN_NONE)
initsize = 1024;
bn = COLnew(0, TYPE_oid, initsize, TRANSIENT);
- if (bn == NULL)
+ if (bn == NULL) {
+ bat_iterator_end(&bi);
return NULL;
- bi = bat_iterator(b);
+ }
vals = bi.base;
- if (b->tvarsized && b->ttype)
+ if (b->tvarsized && bi.type)
vars = bi.vh->base;
else
vars = NULL;
width = bi.width;
- cmp = ATOMcompare(b->ttype);
+ cmp = ATOMcompare(bi.type);
- if (BATordered(b) || BATordered_rev(b)) {
- const void *prev = NULL;
- algomsg = "unique: sorted";
- for (i = 0; i < cnt; i++) {
- GDK_CHECK_TIMEOUT(timeoffset, counter,
-
GOTO_LABEL_TIMEOUT_HANDLER(bunins_failed));
- o = canditer_next(&ci);
- v = VALUE(o - b->hseqbase);
- if (prev == NULL || (*cmp)(v, prev) != 0) {
- if (bunfastappTYPE(oid, bn, &o) != GDK_SUCCEED)
- goto bunins_failed;
- }
- prev = v;
- }
- } else if (ATOMbasetype(b->ttype) == TYPE_bte) {
- unsigned char val;
+ if (ATOMbasetype(bi.type) == TYPE_bte ||
+ (bi.width == 1 &&
+ ATOMstorage(bi.type) == TYPE_str &&
+ GDK_ELIMDOUBLES(bi.vh))) {
+ uint8_t val;
algomsg = "unique: byte-sized atoms";
- assert(vars == NULL);
- seen = GDKzalloc((256 / 16) * sizeof(seen[0]));
- if (seen == NULL)
- goto bunins_failed;
- for (i = 0; i < cnt; i++) {
- GDK_CHECK_TIMEOUT(timeoffset, counter,
-
GOTO_LABEL_TIMEOUT_HANDLER(bunins_failed));
+ uint32_t seen[256 >> 5];
+ memset(seen, 0, sizeof(seen));
+ TIMEOUT_LOOP_IDX(i, cnt, timeoffset) {
o = canditer_next(&ci);
- val = ((const unsigned char *) vals)[o - b->hseqbase];
- if (!(seen[val >> 4] & (1U << (val & 0xF)))) {
- seen[val >> 4] |= 1U << (val & 0xF);
+ val = ((const uint8_t *) vals)[o - b->hseqbase];
+ uint32_t m = UINT32_C(1) << (val & 0x1F);
+ if (!(seen[val >> 5] & m)) {
+ seen[val >> 5] |= m;
if (bunfastappTYPE(oid, bn, &o) != GDK_SUCCEED)
goto bunins_failed;
if (bn->batCount == 256) {
@@ -146,23 +127,23 @@ BATunique(BAT *b, BAT *s)
}
}
}
- GDKfree(seen);
- seen = NULL;
- } else if (ATOMbasetype(b->ttype) == TYPE_sht) {
- unsigned short val;
+ TIMEOUT_CHECK(timeoffset,
+ GOTO_LABEL_TIMEOUT_HANDLER(bunins_failed));
+ } else if (ATOMbasetype(bi.type) == TYPE_sht ||
+ (bi.width == 2 &&
+ ATOMstorage(bi.type) == TYPE_str &&
+ GDK_ELIMDOUBLES(bi.vh))) {
+ uint16_t val;
algomsg = "unique: short-sized atoms";
- assert(vars == NULL);
- seen = GDKzalloc((65536 / 16) * sizeof(seen[0]));
- if (seen == NULL)
- goto bunins_failed;
- for (i = 0; i < cnt; i++) {
- GDK_CHECK_TIMEOUT(timeoffset, counter,
-
GOTO_LABEL_TIMEOUT_HANDLER(bunins_failed));
+ uint32_t seen[65536 >> 5];
+ memset(seen, 0, sizeof(seen));
+ TIMEOUT_LOOP_IDX(i, cnt, timeoffset) {
o = canditer_next(&ci);
- val = ((const unsigned short *) vals)[o - b->hseqbase];
- if (!(seen[val >> 4] & (1U << (val & 0xF)))) {
- seen[val >> 4] |= 1U << (val & 0xF);
+ val = ((const uint16_t *) vals)[o - b->hseqbase];
+ uint32_t m = UINT32_C(1) << (val & 0x1F);
+ if (!(seen[val >> 5] & m)) {
+ seen[val >> 5] |= m;
if (bunfastappTYPE(oid, bn, &o) != GDK_SUCCEED)
goto bunins_failed;
if (bn->batCount == 65536) {
@@ -172,11 +153,25 @@ BATunique(BAT *b, BAT *s)
}
}
}
- GDKfree(seen);
- seen = NULL;
+ TIMEOUT_CHECK(timeoffset,
+ GOTO_LABEL_TIMEOUT_HANDLER(bunins_failed));
+ } else if (BATordered(b) || BATordered_rev(b)) {
+ const void *prev = NULL;
+ algomsg = "unique: sorted";
+ TIMEOUT_LOOP_IDX(i, cnt, timeoffset) {
+ o = canditer_next(&ci);
+ v = VALUE(o - b->hseqbase);
+ if (prev == NULL || (*cmp)(v, prev) != 0) {
+ if (bunfastappTYPE(oid, bn, &o) != GDK_SUCCEED)
+ goto bunins_failed;
+ }
+ prev = v;
+ }
+ TIMEOUT_CHECK(timeoffset,
+ GOTO_LABEL_TIMEOUT_HANDLER(bunins_failed));
} else if (BATcheckhash(b) ||
(!b->batTransient &&
- cnt == BATcount(b) &&
+ cnt == bi.count &&
BAThash(b) == GDK_SUCCEED)) {
BUN lo = 0;
oid seq;
@@ -192,9 +187,7 @@ BATunique(BAT *b, BAT *s)
MT_rwlock_rdunlock(&b->thashlock);
goto lost_hash;
}
- for (i = 0; i < cnt; i++) {
- GDK_CHECK_TIMEOUT(timeoffset, counter,
-
GOTO_LABEL_TIMEOUT_HANDLER(bunins_failed));
+ TIMEOUT_LOOP_IDX(i, cnt, timeoffset) {
BUN p;
o = canditer_next(&ci);
@@ -220,6 +213,8 @@ BATunique(BAT *b, BAT *s)
}
}
MT_rwlock_rdunlock(&b->thashlock);
+ TIMEOUT_CHECK(timeoffset,
+ GOTO_LABEL_TIMEOUT_HANDLER(bunins_failed));
} else {
BUN prb;
BUN p;
@@ -229,10 +224,10 @@ BATunique(BAT *b, BAT *s)
GDKclrerr(); /* not interested in BAThash errors */
algomsg = "unique: new partial hash";
nme = BBP_physical(b->batCacheid);
- if (ATOMbasetype(b->ttype) == TYPE_bte) {
+ if (ATOMbasetype(bi.type) == TYPE_bte) {
mask = (BUN) 1 << 8;
cmp = NULL; /* no compare needed, "hash" is perfect */
- } else if (ATOMbasetype(b->ttype) == TYPE_sht) {
+ } else if (ATOMbasetype(bi.type) == TYPE_sht) {
mask = (BUN) 1 << 16;
cmp = NULL; /* no compare needed, "hash" is perfect */
} else {
@@ -244,19 +239,17 @@ BATunique(BAT *b, BAT *s)
GDKerror("cannot allocate hash table\n");
goto bunins_failed;
}
- if ((hs->heaplink.farmid = BBPselectfarm(TRANSIENT, b->ttype,
hashheap)) < 0 ||
- (hs->heapbckt.farmid = BBPselectfarm(TRANSIENT, b->ttype,
hashheap)) < 0 ||
+ if ((hs->heaplink.farmid = BBPselectfarm(TRANSIENT, bi.type,
hashheap)) < 0 ||
+ (hs->heapbckt.farmid = BBPselectfarm(TRANSIENT, bi.type,
hashheap)) < 0 ||
snprintf(hs->heaplink.filename,
sizeof(hs->heaplink.filename), "%s.thshunil%x", nme, (unsigned) THRgettid()) >=
(int) sizeof(hs->heaplink.filename) ||
snprintf(hs->heapbckt.filename,
sizeof(hs->heapbckt.filename), "%s.thshunib%x", nme, (unsigned) THRgettid()) >=
(int) sizeof(hs->heapbckt.filename) ||
- HASHnew(hs, b->ttype, BUNlast(b), mask, BUN_NONE, false) !=
GDK_SUCCEED) {
+ HASHnew(hs, bi.type, BUNlast(b), mask, BUN_NONE, false) !=
GDK_SUCCEED) {
GDKfree(hs);
hs = NULL;
GDKerror("cannot allocate hash table\n");
goto bunins_failed;
}
- for (i = 0; i < cnt; i++) {
- GDK_CHECK_TIMEOUT(timeoffset, counter,
-
GOTO_LABEL_TIMEOUT_HANDLER(bunins_failed));
+ TIMEOUT_LOOP_IDX(i, cnt, timeoffset) {
o = canditer_next(&ci);
v = VALUE(o - b->hseqbase);
prb = HASHprobe(hs, v);
@@ -278,6 +271,8 @@ BATunique(BAT *b, BAT *s)
HEAPfree(&hs->heaplink, true);
HEAPfree(&hs->heapbckt, true);
GDKfree(hs);
+ TIMEOUT_CHECK(timeoffset,
+ GOTO_LABEL_TIMEOUT_HANDLER(bunins_failed));
}
bat_iterator_end(&bi);
@@ -287,7 +282,7 @@ BATunique(BAT *b, BAT *s)
bn->tkey = true;
bn->tnil = false;
bn->tnonil = true;
- if (BATcount(bn) == BATcount(b)) {
+ if (BATcount(bn) == bi.count) {
/* it turns out all values are distinct */
assert(b->tnokey[0] == 0);
assert(b->tnokey[1] == 0);
@@ -305,8 +300,6 @@ BATunique(BAT *b, BAT *s)
bunins_failed:
bat_iterator_end(&bi);
- if (seen)
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list