Changeset: a7d6f1b2be59 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/a7d6f1b2be59
Branch: string_imprints
Log Message:

Merge with default branch


diffs (truncated from 560 to 300 lines):

diff --git a/gdk/gdk_group.c b/gdk/gdk_group.c
--- a/gdk/gdk_group.c
+++ b/gdk/gdk_group.c
@@ -73,7 +73,7 @@
                if (ngrp == maxgrps) {                                  \
                        /* we need to extend extents and histo bats, */ \
                        /* do it at most once */                        \
-                       maxgrps = BATcount(b);                          \
+                       maxgrps = bi.count;                             \
                        if (extents) {                                  \
                                BATsetcount(en, ngrp);                  \
                                if (BATextend(en, maxgrps) != GDK_SUCCEED) \
@@ -965,15 +965,13 @@ BATgroup_internal(BAT **groups, BAT **ex
                /* byte-sized values, use 256 entry array to keep
                 * track of doled out group ids; note that we can't
                 * possibly have more than 256 groups, so the group id
-                * fits in an unsigned char */
-               unsigned char *restrict bgrps = GDKmalloc(256);
-               const unsigned char *restrict w = (const unsigned char *) 
bi.base;
-               unsigned char v;
+                * fits in an uint8_t */
+               uint8_t bgrps[256];
+               const uint8_t *restrict w = (const uint8_t *) bi.base;
+               uint8_t v;
 
                algomsg = "byte-sized groups -- ";
-               if (bgrps == NULL)
-                       goto error1;
-               memset(bgrps, 0xFF, 256);
+               memset(bgrps, 0xFF, sizeof(bgrps));
                if (histo)
                        memset(cnts, 0, maxgrps * sizeof(lng));
                ngrp = 0;
@@ -982,7 +980,7 @@ BATgroup_internal(BAT **groups, BAT **ex
                        oid o = canditer_next(&ci);
                        p = o - b->hseqbase;
                        if ((v = bgrps[w[p]]) == 0xFF && ngrp < 256) {
-                               bgrps[w[p]] = v = (unsigned char) ngrp++;
+                               bgrps[w[p]] = v = (uint8_t) ngrp++;
                                maxgrppos = r;
                                if (extents)
                                        exts[v] = o;
@@ -995,20 +993,19 @@ BATgroup_internal(BAT **groups, BAT **ex
                }
                TIMEOUT_CHECK(timeoffset,
                              GOTO_LABEL_TIMEOUT_HANDLER(error));
-               GDKfree(bgrps);
        } else if (g == NULL && t == TYPE_sht) {
                /* short-sized values, use 65536 entry array to keep
                 * track of doled out group ids; note that we can't
                 * possibly have more than 65536 groups, so the group
-                * id fits in an unsigned short */
-               unsigned short *restrict sgrps = GDKmalloc(65536 * 
sizeof(short));
-               const unsigned short *restrict w = (const unsigned short *) 
bi.base;
-               unsigned short v;
+                * id fits in an uint16_t */
+               uint16_t *restrict sgrps = GDKmalloc(65536 * sizeof(short));
+               const uint16_t *restrict w = (const uint16_t *) bi.base;
+               uint16_t v;
 
                algomsg = "short-sized groups -- ";
                if (sgrps == NULL)
                        goto error1;
-               memset(sgrps, 0xFF, 65536 * sizeof(short));
+               memset(sgrps, 0xFF, 65536 * sizeof(uint16_t));
                if (histo)
                        memset(cnts, 0, maxgrps * sizeof(lng));
                ngrp = 0;
@@ -1017,7 +1014,7 @@ BATgroup_internal(BAT **groups, BAT **ex
                        oid o = canditer_next(&ci);
                        p = o - b->hseqbase;
                        if ((v = sgrps[w[p]]) == 0xFFFF && ngrp < 65536) {
-                               sgrps[w[p]] = v = (unsigned short) ngrp++;
+                               sgrps[w[p]] = v = (uint16_t) ngrp++;
                                maxgrppos = r;
                                if (extents)
                                        exts[v] = o;
@@ -1190,7 +1187,7 @@ BATgroup_internal(BAT **groups, BAT **ex
                                const bte *w = (bte *) bi.base;
                                GRP_create_partial_hash_table_core(
                                        (void) 0,
-                                       (v = ((ulng)grps[r]<<8)|(unsigned 
char)w[p], hash_lng(hs, &v)),
+                                       (v = ((ulng)grps[r]<<8)|(uint8_t)w[p], 
hash_lng(hs, &v)),
                                        w[p] == w[hb] && grps[r] == grps[q],
                                        (void) 0,
                                        NOGRPTST);
@@ -1207,7 +1204,7 @@ BATgroup_internal(BAT **groups, BAT **ex
                                const sht *w = (sht *) bi.base;
                                GRP_create_partial_hash_table_core(
                                        (void) 0,
-                                       (v = ((ulng)grps[r]<<16)|(unsigned 
short)w[p], hash_lng(hs, &v)),
+                                       (v = 
((ulng)grps[r]<<16)|(uint16_t)w[p], hash_lng(hs, &v)),
                                        w[p] == w[hb] && grps[r] == grps[q],
                                        (void) 0,
                                        NOGRPTST);
diff --git a/gdk/gdk_unique.c b/gdk/gdk_unique.c
--- a/gdk/gdk_unique.c
+++ b/gdk/gdk_unique.c
@@ -32,17 +32,14 @@ BATunique(BAT *b, BAT *s)
        const char *vars;
        int width;
        oid i, o;
-       uint16_t *seen = NULL;
        const char *nme;
        Hash *hs = NULL;
        BUN hb;
-       BATiter bi;
        int (*cmp)(const void *, const void *);
        struct canditer ci;
        const char *algomsg = "";
        lng t0 = 0;
 
-       size_t counter = 0;
        lng timeoffset = 0;
        QryCtx *qry_ctx = MT_thread_get_qry_ctx();
        if (qry_ctx != NULL) {
@@ -81,62 +78,46 @@ BATunique(BAT *b, BAT *s)
 
        assert(b->ttype != TYPE_void);
 
+       BATiter bi = bat_iterator(b);
        BUN initsize = BUN_NONE;
        if (s == NULL) {
                MT_rwlock_rdlock(&b->thashlock);
                if (b->thash != NULL && b->thash != (Hash *) 1)
                        initsize = b->thash->nunique;
                MT_rwlock_rdunlock(&b->thashlock);
-               if (initsize == BUN_NONE) {
-                       MT_lock_set(&b->theaplock);
-                       if (b->tunique_est != 0)
-                               initsize = (BUN) b->tunique_est;
-                       MT_lock_unset(&b->theaplock);
-               }
+               if (initsize == BUN_NONE && bi.unique_est != 0)
+                       initsize = (BUN) bi.unique_est;
        }
        if (initsize == BUN_NONE)
                initsize = 1024;
        bn = COLnew(0, TYPE_oid, initsize, TRANSIENT);
-       if (bn == NULL)
+       if (bn == NULL) {
+               bat_iterator_end(&bi);
                return NULL;
-       bi = bat_iterator(b);
+       }
        vals = bi.base;
-       if (b->tvarsized && b->ttype)
+       if (b->tvarsized && bi.type)
                vars = bi.vh->base;
        else
                vars = NULL;
        width = bi.width;
-       cmp = ATOMcompare(b->ttype);
+       cmp = ATOMcompare(bi.type);
 
-       if (BATordered(b) || BATordered_rev(b)) {
-               const void *prev = NULL;
-               algomsg = "unique: sorted";
-               for (i = 0; i < cnt; i++) {
-                       GDK_CHECK_TIMEOUT(timeoffset, counter,
-                                       
GOTO_LABEL_TIMEOUT_HANDLER(bunins_failed));
-                       o = canditer_next(&ci);
-                       v = VALUE(o - b->hseqbase);
-                       if (prev == NULL || (*cmp)(v, prev) != 0) {
-                               if (bunfastappTYPE(oid, bn, &o) != GDK_SUCCEED)
-                                       goto bunins_failed;
-                       }
-                       prev = v;
-               }
-       } else if (ATOMbasetype(b->ttype) == TYPE_bte) {
-               unsigned char val;
+       if (ATOMbasetype(bi.type) == TYPE_bte ||
+           (bi.width == 1 &&
+            ATOMstorage(bi.type) == TYPE_str &&
+            GDK_ELIMDOUBLES(bi.vh))) {
+               uint8_t val;
 
                algomsg = "unique: byte-sized atoms";
-               assert(vars == NULL);
-               seen = GDKzalloc((256 / 16) * sizeof(seen[0]));
-               if (seen == NULL)
-                       goto bunins_failed;
-               for (i = 0; i < cnt; i++) {
-                       GDK_CHECK_TIMEOUT(timeoffset, counter,
-                                       
GOTO_LABEL_TIMEOUT_HANDLER(bunins_failed));
+               uint32_t seen[256 >> 5];
+               memset(seen, 0, sizeof(seen));
+               TIMEOUT_LOOP_IDX(i, cnt, timeoffset) {
                        o = canditer_next(&ci);
-                       val = ((const unsigned char *) vals)[o - b->hseqbase];
-                       if (!(seen[val >> 4] & (1U << (val & 0xF)))) {
-                               seen[val >> 4] |= 1U << (val & 0xF);
+                       val = ((const uint8_t *) vals)[o - b->hseqbase];
+                       uint32_t m = UINT32_C(1) << (val & 0x1F);
+                       if (!(seen[val >> 5] & m)) {
+                               seen[val >> 5] |= m;
                                if (bunfastappTYPE(oid, bn, &o) != GDK_SUCCEED)
                                        goto bunins_failed;
                                if (bn->batCount == 256) {
@@ -146,23 +127,23 @@ BATunique(BAT *b, BAT *s)
                                }
                        }
                }
-               GDKfree(seen);
-               seen = NULL;
-       } else if (ATOMbasetype(b->ttype) == TYPE_sht) {
-               unsigned short val;
+               TIMEOUT_CHECK(timeoffset,
+                             GOTO_LABEL_TIMEOUT_HANDLER(bunins_failed));
+       } else if (ATOMbasetype(bi.type) == TYPE_sht ||
+                  (bi.width == 2 &&
+                   ATOMstorage(bi.type) == TYPE_str &&
+                   GDK_ELIMDOUBLES(bi.vh))) {
+               uint16_t val;
 
                algomsg = "unique: short-sized atoms";
-               assert(vars == NULL);
-               seen = GDKzalloc((65536 / 16) * sizeof(seen[0]));
-               if (seen == NULL)
-                       goto bunins_failed;
-               for (i = 0; i < cnt; i++) {
-                       GDK_CHECK_TIMEOUT(timeoffset, counter,
-                                       
GOTO_LABEL_TIMEOUT_HANDLER(bunins_failed));
+               uint32_t seen[65536 >> 5];
+               memset(seen, 0, sizeof(seen));
+               TIMEOUT_LOOP_IDX(i, cnt, timeoffset) {
                        o = canditer_next(&ci);
-                       val = ((const unsigned short *) vals)[o - b->hseqbase];
-                       if (!(seen[val >> 4] & (1U << (val & 0xF)))) {
-                               seen[val >> 4] |= 1U << (val & 0xF);
+                       val = ((const uint16_t *) vals)[o - b->hseqbase];
+                       uint32_t m = UINT32_C(1) << (val & 0x1F);
+                       if (!(seen[val >> 5] & m)) {
+                               seen[val >> 5] |= m;
                                if (bunfastappTYPE(oid, bn, &o) != GDK_SUCCEED)
                                        goto bunins_failed;
                                if (bn->batCount == 65536) {
@@ -172,11 +153,25 @@ BATunique(BAT *b, BAT *s)
                                }
                        }
                }
-               GDKfree(seen);
-               seen = NULL;
+               TIMEOUT_CHECK(timeoffset,
+                             GOTO_LABEL_TIMEOUT_HANDLER(bunins_failed));
+       } else if (BATordered(b) || BATordered_rev(b)) {
+               const void *prev = NULL;
+               algomsg = "unique: sorted";
+               TIMEOUT_LOOP_IDX(i, cnt, timeoffset) {
+                       o = canditer_next(&ci);
+                       v = VALUE(o - b->hseqbase);
+                       if (prev == NULL || (*cmp)(v, prev) != 0) {
+                               if (bunfastappTYPE(oid, bn, &o) != GDK_SUCCEED)
+                                       goto bunins_failed;
+                       }
+                       prev = v;
+               }
+               TIMEOUT_CHECK(timeoffset,
+                             GOTO_LABEL_TIMEOUT_HANDLER(bunins_failed));
        } else if (BATcheckhash(b) ||
                   (!b->batTransient &&
-                   cnt == BATcount(b) &&
+                   cnt == bi.count &&
                    BAThash(b) == GDK_SUCCEED)) {
                BUN lo = 0;
                oid seq;
@@ -192,9 +187,7 @@ BATunique(BAT *b, BAT *s)
                        MT_rwlock_rdunlock(&b->thashlock);
                        goto lost_hash;
                }
-               for (i = 0; i < cnt; i++) {
-                       GDK_CHECK_TIMEOUT(timeoffset, counter,
-                                       
GOTO_LABEL_TIMEOUT_HANDLER(bunins_failed));
+               TIMEOUT_LOOP_IDX(i, cnt, timeoffset) {
                        BUN p;
 
                        o = canditer_next(&ci);
@@ -220,6 +213,8 @@ BATunique(BAT *b, BAT *s)
                        }
                }
                MT_rwlock_rdunlock(&b->thashlock);
+               TIMEOUT_CHECK(timeoffset,
+                             GOTO_LABEL_TIMEOUT_HANDLER(bunins_failed));
        } else {
                BUN prb;
                BUN p;
@@ -229,10 +224,10 @@ BATunique(BAT *b, BAT *s)
                GDKclrerr();    /* not interested in BAThash errors */
                algomsg = "unique: new partial hash";
                nme = BBP_physical(b->batCacheid);
-               if (ATOMbasetype(b->ttype) == TYPE_bte) {
+               if (ATOMbasetype(bi.type) == TYPE_bte) {
                        mask = (BUN) 1 << 8;
                        cmp = NULL; /* no compare needed, "hash" is perfect */
-               } else if (ATOMbasetype(b->ttype) == TYPE_sht) {
+               } else if (ATOMbasetype(bi.type) == TYPE_sht) {
                        mask = (BUN) 1 << 16;
                        cmp = NULL; /* no compare needed, "hash" is perfect */
                } else {
@@ -244,19 +239,17 @@ BATunique(BAT *b, BAT *s)
_______________________________________________
checkin-list mailing list
checkin-list@monetdb.org
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to