Changeset: 1d5a05156550 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/1d5a05156550
Modified Files:
gdk/gdk_unique.c
Branch: default
Log Message:
Use knowledge about double-eliminated strings.
diffs (175 lines):
diff --git a/gdk/gdk_unique.c b/gdk/gdk_unique.c
--- a/gdk/gdk_unique.c
+++ b/gdk/gdk_unique.c
@@ -32,7 +32,6 @@ BATunique(BAT *b, BAT *s)
const char *vars;
int width;
oid i, o;
- uint16_t *seen = NULL;
const char *nme;
Hash *hs = NULL;
BUN hb;
@@ -42,7 +41,6 @@ BATunique(BAT *b, BAT *s)
const char *algomsg = "";
lng t0 = 0;
- size_t counter = 0;
lng timeoffset = 0;
QryCtx *qry_ctx = MT_thread_get_qry_ctx();
if (qry_ctx != NULL) {
@@ -108,35 +106,19 @@ BATunique(BAT *b, BAT *s)
width = bi.width;
cmp = ATOMcompare(b->ttype);
- if (BATordered(b) || BATordered_rev(b)) {
- const void *prev = NULL;
- algomsg = "unique: sorted";
- for (i = 0; i < cnt; i++) {
- GDK_CHECK_TIMEOUT(timeoffset, counter,
-
GOTO_LABEL_TIMEOUT_HANDLER(bunins_failed));
- o = canditer_next(&ci);
- v = VALUE(o - b->hseqbase);
- if (prev == NULL || (*cmp)(v, prev) != 0) {
- if (bunfastappTYPE(oid, bn, &o) != GDK_SUCCEED)
- goto bunins_failed;
- }
- prev = v;
- }
- } else if (ATOMbasetype(b->ttype) == TYPE_bte) {
- unsigned char val;
+ if (ATOMbasetype(b->ttype) == TYPE_bte ||
+ (b->twidth == 1 && ATOMstorage(b->ttype) == TYPE_str &&
GDK_ELIMDOUBLES(b->tvheap))) {
+ uint8_t val;
algomsg = "unique: byte-sized atoms";
- assert(vars == NULL);
- seen = GDKzalloc((256 / 16) * sizeof(seen[0]));
- if (seen == NULL)
- goto bunins_failed;
- for (i = 0; i < cnt; i++) {
- GDK_CHECK_TIMEOUT(timeoffset, counter,
-
GOTO_LABEL_TIMEOUT_HANDLER(bunins_failed));
+ uint32_t seen[256 >> 5];
+ memset(seen, 0, sizeof(seen));
+ TIMEOUT_LOOP_IDX(i, cnt, timeoffset) {
o = canditer_next(&ci);
- val = ((const unsigned char *) vals)[o - b->hseqbase];
- if (!(seen[val >> 4] & (1U << (val & 0xF)))) {
- seen[val >> 4] |= 1U << (val & 0xF);
+ val = ((const uint8_t *) vals)[o - b->hseqbase];
+ uint32_t m = UINT32_C(1) << (val & 0x1F);
+ if (!(seen[val >> 5] & m)) {
+ seen[val >> 5] |= m;
if (bunfastappTYPE(oid, bn, &o) != GDK_SUCCEED)
goto bunins_failed;
if (bn->batCount == 256) {
@@ -146,23 +128,21 @@ BATunique(BAT *b, BAT *s)
}
}
}
- GDKfree(seen);
- seen = NULL;
- } else if (ATOMbasetype(b->ttype) == TYPE_sht) {
- unsigned short val;
+ TIMEOUT_CHECK(timeoffset,
+ GOTO_LABEL_TIMEOUT_HANDLER(bunins_failed));
+ } else if (ATOMbasetype(b->ttype) == TYPE_sht ||
+ (b->twidth == 2 && ATOMstorage(b->ttype) == TYPE_str &&
GDK_ELIMDOUBLES(b->tvheap))) {
+ uint16_t val;
algomsg = "unique: short-sized atoms";
- assert(vars == NULL);
- seen = GDKzalloc((65536 / 16) * sizeof(seen[0]));
- if (seen == NULL)
- goto bunins_failed;
- for (i = 0; i < cnt; i++) {
- GDK_CHECK_TIMEOUT(timeoffset, counter,
-
GOTO_LABEL_TIMEOUT_HANDLER(bunins_failed));
+ uint32_t seen[65536 >> 5];
+ memset(seen, 0, sizeof(seen));
+ TIMEOUT_LOOP_IDX(i, cnt, timeoffset) {
o = canditer_next(&ci);
- val = ((const unsigned short *) vals)[o - b->hseqbase];
- if (!(seen[val >> 4] & (1U << (val & 0xF)))) {
- seen[val >> 4] |= 1U << (val & 0xF);
+ val = ((const uint16_t *) vals)[o - b->hseqbase];
+ uint32_t m = UINT32_C(1) << (val & 0x1F);
+ if (!(seen[val >> 5] & m)) {
+ seen[val >> 5] |= m;
if (bunfastappTYPE(oid, bn, &o) != GDK_SUCCEED)
goto bunins_failed;
if (bn->batCount == 65536) {
@@ -172,8 +152,22 @@ BATunique(BAT *b, BAT *s)
}
}
}
- GDKfree(seen);
- seen = NULL;
+ TIMEOUT_CHECK(timeoffset,
+ GOTO_LABEL_TIMEOUT_HANDLER(bunins_failed));
+ } else if (BATordered(b) || BATordered_rev(b)) {
+ const void *prev = NULL;
+ algomsg = "unique: sorted";
+ TIMEOUT_LOOP_IDX(i, cnt, timeoffset) {
+ o = canditer_next(&ci);
+ v = VALUE(o - b->hseqbase);
+ if (prev == NULL || (*cmp)(v, prev) != 0) {
+ if (bunfastappTYPE(oid, bn, &o) != GDK_SUCCEED)
+ goto bunins_failed;
+ }
+ prev = v;
+ }
+ TIMEOUT_CHECK(timeoffset,
+ GOTO_LABEL_TIMEOUT_HANDLER(bunins_failed));
} else if (BATcheckhash(b) ||
(!b->batTransient &&
cnt == BATcount(b) &&
@@ -192,9 +186,7 @@ BATunique(BAT *b, BAT *s)
MT_rwlock_rdunlock(&b->thashlock);
goto lost_hash;
}
- for (i = 0; i < cnt; i++) {
- GDK_CHECK_TIMEOUT(timeoffset, counter,
-
GOTO_LABEL_TIMEOUT_HANDLER(bunins_failed));
+ TIMEOUT_LOOP_IDX(i, cnt, timeoffset) {
BUN p;
o = canditer_next(&ci);
@@ -220,6 +212,8 @@ BATunique(BAT *b, BAT *s)
}
}
MT_rwlock_rdunlock(&b->thashlock);
+ TIMEOUT_CHECK(timeoffset,
+ GOTO_LABEL_TIMEOUT_HANDLER(bunins_failed));
} else {
BUN prb;
BUN p;
@@ -254,9 +248,7 @@ BATunique(BAT *b, BAT *s)
GDKerror("cannot allocate hash table\n");
goto bunins_failed;
}
- for (i = 0; i < cnt; i++) {
- GDK_CHECK_TIMEOUT(timeoffset, counter,
-
GOTO_LABEL_TIMEOUT_HANDLER(bunins_failed));
+ TIMEOUT_LOOP_IDX(i, cnt, timeoffset) {
o = canditer_next(&ci);
v = VALUE(o - b->hseqbase);
prb = HASHprobe(hs, v);
@@ -278,6 +270,8 @@ BATunique(BAT *b, BAT *s)
HEAPfree(&hs->heaplink, true);
HEAPfree(&hs->heapbckt, true);
GDKfree(hs);
+ TIMEOUT_CHECK(timeoffset,
+ GOTO_LABEL_TIMEOUT_HANDLER(bunins_failed));
}
bat_iterator_end(&bi);
@@ -305,8 +299,6 @@ BATunique(BAT *b, BAT *s)
bunins_failed:
bat_iterator_end(&bi);
- if (seen)
- GDKfree(seen);
if (hs != NULL) {
HEAPfree(&hs->heaplink, true);
HEAPfree(&hs->heapbckt, true);
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list