Changeset: 9ee3b0fb88c4 for MonetDB URL: https://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=9ee3b0fb88c4 Modified Files: gdk/gdk_firstn.c gdk/gdk_group.c gdk/gdk_imprints.c gdk/gdk_logger.c gdk/gdk_select.c monetdb5/extras/rapi/rapi.c monetdb5/modules/atoms/json.c monetdb5/modules/kernel/aggr.c monetdb5/modules/kernel/algebra.c monetdb5/modules/kernel/bat5.c monetdb5/modules/kernel/batmmath.c monetdb5/modules/kernel/batstr.c monetdb5/modules/kernel/group.c monetdb5/modules/kernel/microbenchmark.c monetdb5/modules/mal/batExtensions.c monetdb5/modules/mal/batcalc.c monetdb5/modules/mal/mat.c monetdb5/modules/mal/pcre.c monetdb5/modules/mal/sample.c monetdb5/modules/mal/txtsim.c sql/backends/monet5/UDF/pyapi/connection.c sql/backends/monet5/generator/generator.c sql/backends/monet5/sql.c sql/backends/monet5/sql_rank.c sql/backends/monet5/sql_result.c sql/backends/monet5/vaults/bam/bam_lib.c sql/storage/bat/bat_storage.c sql/test/leaks/Tests/check1.stable.out sql/test/leaks/Tests/check1.stable.out.int128 sql/test/leaks/Tests/check2.stable.out sql/test/leaks/Tests/check3.stable.out sql/test/leaks/Tests/check4.stable.out sql/test/leaks/Tests/check5.stable.out sql/test/leaks/Tests/select1.stable.out sql/test/leaks/Tests/select1.stable.out.int128 sql/test/leaks/Tests/select2.stable.out sql/test/leaks/Tests/select2.stable.out.int128 sql/test/leaks/Tests/temp1.stable.out Branch: default Log Message:
Merge with Jul2017 branch. diffs (truncated from 3035 to 300 lines): diff --git a/gdk/gdk_firstn.c b/gdk/gdk_firstn.c --- a/gdk/gdk_firstn.c +++ b/gdk/gdk_firstn.c @@ -102,7 +102,7 @@ #define shuffle_unique(TYPE, OP) \ do { \ - const TYPE *restrict vals = (const TYPE *) Tloc(b, 0); \ + const TYPE *restrict vals = (const TYPE *) Tloc(b, 0); \ heapify(OP##fix, SWAP1); \ while (cand ? cand < candend : start < end) { \ i = cand ? *cand++ : start++ + b->hseqbase; \ @@ -121,7 +121,7 @@ * multiple equal values to take us past N, we return a subset of those. */ static BAT * -BATfirstn_unique(BAT *b, BAT *s, BUN n, int asc) +BATfirstn_unique(BAT *b, BAT *s, BUN n, int asc, oid *lastp) { BAT *bn; BATiter bi = bat_iterator(b); @@ -140,6 +140,8 @@ BATfirstn_unique(BAT *b, BAT *s, BUN n, if (n >= (BUN) (candend - cand)) { /* trivial: return the candidate list (the * part that refers to b, that is) */ + if (lastp) + *lastp = 0; return BATslice(s, (BUN) (cand - (const oid *) Tloc(s, 0)), (BUN) (candend - (const oid *) Tloc(s, 0))); @@ -151,6 +153,8 @@ BATfirstn_unique(BAT *b, BAT *s, BUN n, return NULL; BATsetcount(bn, cnt); BATtseqbase(bn, start + b->hseqbase); + if (lastp) + *lastp = 0; return bn; } /* note, we want to do both calls */ @@ -163,11 +167,15 @@ BATfirstn_unique(BAT *b, BAT *s, BUN n, /* return copy of first relevant part * of candidate list */ i = (BUN) (cand - (const oid *) Tloc(s, 0)); + if (lastp) + *lastp = cand[n - 1]; return BATslice(s, i, i + n); } /* return copy of last relevant part of * candidate list */ i = (BUN) (candend - (const oid *) Tloc(s, 0)); + if (lastp) + *lastp = candend[-(ssize_t)n]; return BATslice(s, i - n, i); } bn = COLnew(0, TYPE_void, n, TRANSIENT); @@ -177,9 +185,13 @@ BATfirstn_unique(BAT *b, BAT *s, BUN n, if (asc ? b->tsorted : b->trevsorted) { /* first n entries from b */ BATtseqbase(bn, start + b->hseqbase); + if (lastp) + *lastp = start + b->hseqbase + n - 1; } else { /* last n entries from b */ BATtseqbase(bn, start + cnt + b->hseqbase - n); + if (lastp) + *lastp = start + cnt + b->hseqbase - n; } return bn; } @@ -293,6 +305,8 @@ BATfirstn_unique(BAT *b, BAT *s, BUN n, break; } } + if (lastp) + *lastp = oids[0]; /* store id of largest value */ /* output must be sorted since it's a candidate list */ GDKqsort(oids, NULL, NULL, (size_t) n, sizeof(oid), 0, TYPE_oid); bn->tsorted = 1; @@ -357,7 +371,7 @@ BATfirstn_unique(BAT *b, BAT *s, BUN n, } while (0) static BAT * -BATfirstn_unique_with_groups(BAT *b, BAT *s, BAT *g, BUN n, int asc) +BATfirstn_unique_with_groups(BAT *b, BAT *s, BAT *g, BUN n, int asc, oid *lastp, oid *lastgp) { BAT *bn; BATiter bi = bat_iterator(b); @@ -533,6 +547,10 @@ BATfirstn_unique_with_groups(BAT *b, BAT break; } } + if (lastp) + *lastp = oids[0]; + if (lastgp) + *lastgp = goids[0]; GDKfree(goids); /* output must be sorted since it's a candidate list */ GDKqsort(oids, NULL, NULL, (size_t) n, sizeof(oid), 0, TYPE_oid); @@ -545,597 +563,238 @@ BATfirstn_unique_with_groups(BAT *b, BAT return bn; } -#define shuffle_grouped1_body(COMPARE, EQUAL) \ - do { \ - for (i = cand ? *cand++ - b->hseqbase : start; \ - i < end; \ - cand < candend ? (i = *cand++ - b->hseqbase) : i++) { \ - for (j = 0; j < n; j++) { \ - if (j == top) { \ - assert(top < n); \ - groups[top].cnt = 1; \ - groups[top++].bun = i; \ - break; \ - } else { \ - assert(j < top); \ - assert(groups[j].bun < i); \ - if (COMPARE) { \ - if (top < n) \ - top++; \ - for (k = top - 1; k > j; k--) { \ - groups[k] = groups[k - 1]; \ - } \ - groups[j].bun = i; \ - groups[j].cnt = 1; \ - break; \ - } else if (EQUAL) { \ - groups[j].cnt++; \ - break; \ - } \ - } \ - } \ - } \ - } while (0) - -#define shuffle_grouped1(TYPE, OPER) \ - do { \ - const TYPE *restrict v = (const TYPE *) Tloc(b, 0); \ - shuffle_grouped1_body(OPER(v[i], v[groups[j].bun]), \ - v[i] == v[groups[j].bun]); \ - } while (0) - -#define shuffle_grouped2(TYPE) \ - do { \ - const TYPE *restrict v = (const TYPE *) Tloc(b, 0); \ - TYPE lastval = v[groups[top - 1].bun]; \ - for (i = cand ? *cand++ - b->hseqbase : start; \ - i < end; \ - cand < candend ? (i = *cand++ - b->hseqbase) : i++) { \ - if (asc ? v[i] > lastval : v[i] < lastval) \ - continue; \ - for (j = 0; j < top; j++) { \ - if (v[i] == v[groups[j].bun]) { \ - if (bp) \ - *bp++ = i + b->hseqbase; \ - *gp++ = j; \ - break; \ - } \ - } \ - } \ - } while (0) - static gdk_return BATfirstn_grouped(BAT **topn, BAT **gids, BAT *b, BAT *s, BUN n, int asc, int distinct) { - BAT *bn, *gn; - BATiter bi = bat_iterator(b); - oid *restrict bp, *restrict gp; - BUN top, i, j, k, cnt, start, end; - const oid *restrict cand, *candend, *oldcand; - int tpe = b->ttype; - int c; - int (*cmp)(const void *, const void *); - BUN ncnt; - struct group { - BUN bun; - BUN cnt; - } *restrict groups; + BAT *bn, *gn, *su = NULL; + oid last; + gdk_return rc; - assert(topn); - - CANDINIT(b, s, start, end, cnt, cand, candend); - - if (n > cnt) - n = cnt; - if (cand && n > (BUN) (candend - cand)) - n = (BUN) (candend - cand); - - if (n == 0) { - /* candidate list might refer only to values outside - * of the bat and hence be effectively empty */ - bn = COLnew(0, TYPE_void, 0, TRANSIENT); - if (bn == NULL) + if (distinct && !b->tkey) { + su = s; + s = BATunique(b, s); + if (s == NULL) return GDK_FAIL; - BATtseqbase(bn, 0); + } + bn = BATfirstn_unique(b, s, n, asc, &last); + if (bn == NULL) + return GDK_FAIL; + if (BATcount(bn) == 0) { if (gids) { gn = COLnew(0, TYPE_void, 0, TRANSIENT); if (gn == NULL) { - BBPreclaim(bn); + BBPunfix(bn->batCacheid); return GDK_FAIL; } - BATtseqbase(gn, 0); *gids = gn; } *topn = bn; return GDK_SUCCEED; } + if (!b->tkey) { + if (distinct) { + BAT *bn1; - top = 0; - cmp = ATOMcompare(b->ttype); - /* if base type has same comparison function as type itself, we - * can use the base type */ - tpe = ATOMbasetype(tpe); /* takes care of oid */ - groups = GDKmalloc(sizeof(*groups) * n); - if (groups == NULL) - return GDK_FAIL; - oldcand = cand; - if (asc) { - switch (tpe) { - case TYPE_void: - shuffle_grouped1_body(i < groups[j].bun, - i == groups[j].bun); - break; - case TYPE_bte: - shuffle_grouped1(bte, LT); - break; - case TYPE_sht: - shuffle_grouped1(sht, LT); - break; - case TYPE_int: - shuffle_grouped1(int, LT); - break; - case TYPE_lng: - shuffle_grouped1(lng, LT); - break; -#ifdef HAVE_HGE - case TYPE_hge: - shuffle_grouped1(hge, LT); - break; -#endif - case TYPE_flt: - shuffle_grouped1(flt, LT); - break; - case TYPE_dbl: - shuffle_grouped1(dbl, LT); - break; - default: - shuffle_grouped1_body( - (c = cmp(BUNtail(bi, i), - BUNtail(bi, groups[j].bun))) < 0, - c == 0); - break; - } - } else { - switch (tpe) { - case TYPE_void: - shuffle_grouped1_body(i > groups[j].bun, - i == groups[j].bun); - break; - case TYPE_bte: - shuffle_grouped1(bte, GT); - break; - case TYPE_sht: - shuffle_grouped1(sht, GT); - break; - case TYPE_int: - shuffle_grouped1(int, GT); - break; - case TYPE_lng: - shuffle_grouped1(lng, GT); - break; -#ifdef HAVE_HGE - case TYPE_hge: - shuffle_grouped1(hge, GT); - break; -#endif - case TYPE_flt: - shuffle_grouped1(flt, GT); - break; - case TYPE_dbl: - shuffle_grouped1(dbl, GT); - break; - default: - shuffle_grouped1_body( - (c = cmp(BUNtail(bi, i), _______________________________________________ checkin-list mailing list checkin-list@monetdb.org https://www.monetdb.org/mailman/listinfo/checkin-list