Changeset: ff4639cf258b for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/ff4639cf258b
Modified Files:
        gdk/gdk_group.c
Branch: dict
Log Message:

Merge with default branch.


diffs (truncated from 1472 to 300 lines):

diff --git a/CMakeLists.txt b/CMakeLists.txt
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -24,10 +24,6 @@ set(CMAKE_PROJECT_HOMEPAGE_URL "https://
 set(C_STANDARD_REQUIRED ON)
 set(CMAKE_C_STANDARD 99)
 
-if(${CMAKE_VERSION} VERSION_GREATER_EQUAL "3.13.0")
-  cmake_policy(SET CMP0076 OLD)
-endif()
-
 # We give precedence to UNIX include directories over
 # OS X Frameworks directories
 set(CMAKE_FIND_FRAMEWORK LAST)
diff --git a/gdk/gdk_group.c b/gdk/gdk_group.c
--- a/gdk/gdk_group.c
+++ b/gdk/gdk_group.c
@@ -73,7 +73,7 @@
                if (ngrp == maxgrps) {                                  \
                        /* we need to extend extents and histo bats, */ \
                        /* do it at most once */                        \
-                       maxgrps = BATcount(b);                          \
+                       maxgrps = bi.count;                             \
                        if (extents) {                                  \
                                BATsetcount(en, ngrp);                  \
                                if (BATextend(en, maxgrps) != GDK_SUCCEED) \
@@ -600,12 +600,10 @@ ctz(oid x)
 
 #define GRP_small_values(BG, BV, GV)                                   \
        do {                                                            \
-               uint##BG##_t *restrict sgrps = GDKmalloc((1 << BG) * 
sizeof(uint##BG##_t)); \
+               uint##BG##_t sgrps[1 << BG];                            \
                const uint##BV##_t *restrict w = (const uint##BV##_t *) 
bi.base; \
                uint##BG##_t v;                                         \
-               if (sgrps == NULL)                                      \
-                       goto error1;                                    \
-               memset(sgrps, 0xFF, (1 << BG) * sizeof(uint##BG##_t));  \
+               memset(sgrps, 0xFF, sizeof(sgrps));                     \
                if (histo)                                              \
                        memset(cnts, 0, maxgrps * sizeof(lng));         \
                ngrp = 0;                                               \
@@ -647,7 +645,6 @@ ctz(oid x)
                }                                                       \
                TIMEOUT_CHECK(timeoffset,                               \
                              GOTO_LABEL_TIMEOUT_HANDLER(error));       \
-               GDKfree(sgrps);                                         \
        } while (0)
 
 gdk_return
diff --git a/gdk/gdk_unique.c b/gdk/gdk_unique.c
--- a/gdk/gdk_unique.c
+++ b/gdk/gdk_unique.c
@@ -32,17 +32,14 @@ BATunique(BAT *b, BAT *s)
        const char *vars;
        int width;
        oid i, o;
-       uint16_t *seen = NULL;
        const char *nme;
        Hash *hs = NULL;
        BUN hb;
-       BATiter bi;
        int (*cmp)(const void *, const void *);
        struct canditer ci;
        const char *algomsg = "";
        lng t0 = 0;
 
-       size_t counter = 0;
        lng timeoffset = 0;
        QryCtx *qry_ctx = MT_thread_get_qry_ctx();
        if (qry_ctx != NULL) {
@@ -81,62 +78,46 @@ BATunique(BAT *b, BAT *s)
 
        assert(b->ttype != TYPE_void);
 
+       BATiter bi = bat_iterator(b);
        BUN initsize = BUN_NONE;
        if (s == NULL) {
                MT_rwlock_rdlock(&b->thashlock);
                if (b->thash != NULL && b->thash != (Hash *) 1)
                        initsize = b->thash->nunique;
                MT_rwlock_rdunlock(&b->thashlock);
-               if (initsize == BUN_NONE) {
-                       MT_lock_set(&b->theaplock);
-                       if (b->tunique_est != 0)
-                               initsize = (BUN) b->tunique_est;
-                       MT_lock_unset(&b->theaplock);
-               }
+               if (initsize == BUN_NONE && bi.unique_est != 0)
+                       initsize = (BUN) bi.unique_est;
        }
        if (initsize == BUN_NONE)
                initsize = 1024;
        bn = COLnew(0, TYPE_oid, initsize, TRANSIENT);
-       if (bn == NULL)
+       if (bn == NULL) {
+               bat_iterator_end(&bi);
                return NULL;
-       bi = bat_iterator(b);
+       }
        vals = bi.base;
-       if (b->tvarsized && b->ttype)
+       if (b->tvarsized && bi.type)
                vars = bi.vh->base;
        else
                vars = NULL;
        width = bi.width;
-       cmp = ATOMcompare(b->ttype);
+       cmp = ATOMcompare(bi.type);
 
-       if (BATordered(b) || BATordered_rev(b)) {
-               const void *prev = NULL;
-               algomsg = "unique: sorted";
-               for (i = 0; i < cnt; i++) {
-                       GDK_CHECK_TIMEOUT(timeoffset, counter,
-                                       
GOTO_LABEL_TIMEOUT_HANDLER(bunins_failed));
-                       o = canditer_next(&ci);
-                       v = VALUE(o - b->hseqbase);
-                       if (prev == NULL || (*cmp)(v, prev) != 0) {
-                               if (bunfastappTYPE(oid, bn, &o) != GDK_SUCCEED)
-                                       goto bunins_failed;
-                       }
-                       prev = v;
-               }
-       } else if (ATOMbasetype(b->ttype) == TYPE_bte) {
-               unsigned char val;
+       if (ATOMbasetype(bi.type) == TYPE_bte ||
+           (bi.width == 1 &&
+            ATOMstorage(bi.type) == TYPE_str &&
+            GDK_ELIMDOUBLES(bi.vh))) {
+               uint8_t val;
 
                algomsg = "unique: byte-sized atoms";
-               assert(vars == NULL);
-               seen = GDKzalloc((256 / 16) * sizeof(seen[0]));
-               if (seen == NULL)
-                       goto bunins_failed;
-               for (i = 0; i < cnt; i++) {
-                       GDK_CHECK_TIMEOUT(timeoffset, counter,
-                                       
GOTO_LABEL_TIMEOUT_HANDLER(bunins_failed));
+               uint32_t seen[256 >> 5];
+               memset(seen, 0, sizeof(seen));
+               TIMEOUT_LOOP_IDX(i, cnt, timeoffset) {
                        o = canditer_next(&ci);
-                       val = ((const unsigned char *) vals)[o - b->hseqbase];
-                       if (!(seen[val >> 4] & (1U << (val & 0xF)))) {
-                               seen[val >> 4] |= 1U << (val & 0xF);
+                       val = ((const uint8_t *) vals)[o - b->hseqbase];
+                       uint32_t m = UINT32_C(1) << (val & 0x1F);
+                       if (!(seen[val >> 5] & m)) {
+                               seen[val >> 5] |= m;
                                if (bunfastappTYPE(oid, bn, &o) != GDK_SUCCEED)
                                        goto bunins_failed;
                                if (bn->batCount == 256) {
@@ -146,23 +127,23 @@ BATunique(BAT *b, BAT *s)
                                }
                        }
                }
-               GDKfree(seen);
-               seen = NULL;
-       } else if (ATOMbasetype(b->ttype) == TYPE_sht) {
-               unsigned short val;
+               TIMEOUT_CHECK(timeoffset,
+                             GOTO_LABEL_TIMEOUT_HANDLER(bunins_failed));
+       } else if (ATOMbasetype(bi.type) == TYPE_sht ||
+                  (bi.width == 2 &&
+                   ATOMstorage(bi.type) == TYPE_str &&
+                   GDK_ELIMDOUBLES(bi.vh))) {
+               uint16_t val;
 
                algomsg = "unique: short-sized atoms";
-               assert(vars == NULL);
-               seen = GDKzalloc((65536 / 16) * sizeof(seen[0]));
-               if (seen == NULL)
-                       goto bunins_failed;
-               for (i = 0; i < cnt; i++) {
-                       GDK_CHECK_TIMEOUT(timeoffset, counter,
-                                       
GOTO_LABEL_TIMEOUT_HANDLER(bunins_failed));
+               uint32_t seen[65536 >> 5];
+               memset(seen, 0, sizeof(seen));
+               TIMEOUT_LOOP_IDX(i, cnt, timeoffset) {
                        o = canditer_next(&ci);
-                       val = ((const unsigned short *) vals)[o - b->hseqbase];
-                       if (!(seen[val >> 4] & (1U << (val & 0xF)))) {
-                               seen[val >> 4] |= 1U << (val & 0xF);
+                       val = ((const uint16_t *) vals)[o - b->hseqbase];
+                       uint32_t m = UINT32_C(1) << (val & 0x1F);
+                       if (!(seen[val >> 5] & m)) {
+                               seen[val >> 5] |= m;
                                if (bunfastappTYPE(oid, bn, &o) != GDK_SUCCEED)
                                        goto bunins_failed;
                                if (bn->batCount == 65536) {
@@ -172,11 +153,25 @@ BATunique(BAT *b, BAT *s)
                                }
                        }
                }
-               GDKfree(seen);
-               seen = NULL;
+               TIMEOUT_CHECK(timeoffset,
+                             GOTO_LABEL_TIMEOUT_HANDLER(bunins_failed));
+       } else if (BATordered(b) || BATordered_rev(b)) {
+               const void *prev = NULL;
+               algomsg = "unique: sorted";
+               TIMEOUT_LOOP_IDX(i, cnt, timeoffset) {
+                       o = canditer_next(&ci);
+                       v = VALUE(o - b->hseqbase);
+                       if (prev == NULL || (*cmp)(v, prev) != 0) {
+                               if (bunfastappTYPE(oid, bn, &o) != GDK_SUCCEED)
+                                       goto bunins_failed;
+                       }
+                       prev = v;
+               }
+               TIMEOUT_CHECK(timeoffset,
+                             GOTO_LABEL_TIMEOUT_HANDLER(bunins_failed));
        } else if (BATcheckhash(b) ||
                   (!b->batTransient &&
-                   cnt == BATcount(b) &&
+                   cnt == bi.count &&
                    BAThash(b) == GDK_SUCCEED)) {
                BUN lo = 0;
                oid seq;
@@ -192,9 +187,7 @@ BATunique(BAT *b, BAT *s)
                        MT_rwlock_rdunlock(&b->thashlock);
                        goto lost_hash;
                }
-               for (i = 0; i < cnt; i++) {
-                       GDK_CHECK_TIMEOUT(timeoffset, counter,
-                                       
GOTO_LABEL_TIMEOUT_HANDLER(bunins_failed));
+               TIMEOUT_LOOP_IDX(i, cnt, timeoffset) {
                        BUN p;
 
                        o = canditer_next(&ci);
@@ -220,6 +213,8 @@ BATunique(BAT *b, BAT *s)
                        }
                }
                MT_rwlock_rdunlock(&b->thashlock);
+               TIMEOUT_CHECK(timeoffset,
+                             GOTO_LABEL_TIMEOUT_HANDLER(bunins_failed));
        } else {
                BUN prb;
                BUN p;
@@ -229,10 +224,10 @@ BATunique(BAT *b, BAT *s)
                GDKclrerr();    /* not interested in BAThash errors */
                algomsg = "unique: new partial hash";
                nme = BBP_physical(b->batCacheid);
-               if (ATOMbasetype(b->ttype) == TYPE_bte) {
+               if (ATOMbasetype(bi.type) == TYPE_bte) {
                        mask = (BUN) 1 << 8;
                        cmp = NULL; /* no compare needed, "hash" is perfect */
-               } else if (ATOMbasetype(b->ttype) == TYPE_sht) {
+               } else if (ATOMbasetype(bi.type) == TYPE_sht) {
                        mask = (BUN) 1 << 16;
                        cmp = NULL; /* no compare needed, "hash" is perfect */
                } else {
@@ -244,19 +239,17 @@ BATunique(BAT *b, BAT *s)
                        GDKerror("cannot allocate hash table\n");
                        goto bunins_failed;
                }
-               if ((hs->heaplink.farmid = BBPselectfarm(TRANSIENT, b->ttype, 
hashheap)) < 0 ||
-                   (hs->heapbckt.farmid = BBPselectfarm(TRANSIENT, b->ttype, 
hashheap)) < 0 ||
+               if ((hs->heaplink.farmid = BBPselectfarm(TRANSIENT, bi.type, 
hashheap)) < 0 ||
+                   (hs->heapbckt.farmid = BBPselectfarm(TRANSIENT, bi.type, 
hashheap)) < 0 ||
                    snprintf(hs->heaplink.filename, 
sizeof(hs->heaplink.filename), "%s.thshunil%x", nme, (unsigned) THRgettid()) >= 
(int) sizeof(hs->heaplink.filename) ||
                    snprintf(hs->heapbckt.filename, 
sizeof(hs->heapbckt.filename), "%s.thshunib%x", nme, (unsigned) THRgettid()) >= 
(int) sizeof(hs->heapbckt.filename) ||
-                   HASHnew(hs, b->ttype, BUNlast(b), mask, BUN_NONE, false) != 
GDK_SUCCEED) {
+                   HASHnew(hs, bi.type, BUNlast(b), mask, BUN_NONE, false) != 
GDK_SUCCEED) {
                        GDKfree(hs);
                        hs = NULL;
                        GDKerror("cannot allocate hash table\n");
                        goto bunins_failed;
                }
-               for (i = 0; i < cnt; i++) {
-                       GDK_CHECK_TIMEOUT(timeoffset, counter,
-                                       
GOTO_LABEL_TIMEOUT_HANDLER(bunins_failed));
+               TIMEOUT_LOOP_IDX(i, cnt, timeoffset) {
                        o = canditer_next(&ci);
                        v = VALUE(o - b->hseqbase);
                        prb = HASHprobe(hs, v);
@@ -278,6 +271,8 @@ BATunique(BAT *b, BAT *s)
                HEAPfree(&hs->heaplink, true);
                HEAPfree(&hs->heapbckt, true);
                GDKfree(hs);
+               TIMEOUT_CHECK(timeoffset,
+                             GOTO_LABEL_TIMEOUT_HANDLER(bunins_failed));
        }
        bat_iterator_end(&bi);
 
@@ -287,7 +282,7 @@ BATunique(BAT *b, BAT *s)
        bn->tkey = true;
        bn->tnil = false;
        bn->tnonil = true;
-       if (BATcount(bn) == BATcount(b)) {
+       if (BATcount(bn) == bi.count) {
                /* it turns out all values are distinct */
                assert(b->tnokey[0] == 0);
                assert(b->tnokey[1] == 0);
@@ -305,8 +300,6 @@ BATunique(BAT *b, BAT *s)
 
   bunins_failed:
        bat_iterator_end(&bi);
-       if (seen)
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to