Changeset: 626f65a1893e for MonetDB URL: https://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=626f65a1893e Modified Files: clients/Tests/MAL-signatures.stable.out clients/Tests/MAL-signatures.stable.out.int128 clients/Tests/exports.stable.out gdk/gdk.h gdk/gdk_join.c monetdb5/modules/kernel/algebra.c Branch: unlock Log Message:
Implemented extra flag for algebra.outerjoin to match exactly one. diffs (truncated from 538 to 300 lines): diff --git a/clients/Tests/MAL-signatures.stable.out b/clients/Tests/MAL-signatures.stable.out --- a/clients/Tests/MAL-signatures.stable.out +++ b/clients/Tests/MAL-signatures.stable.out @@ -676,12 +676,12 @@ stdout of test 'MAL-signatures` in direc [ "algebra", "not_like", "command algebra.not_like(X_1:str, X_2:str):bit ", "PCREnotlike2;", "" ] [ "algebra", "not_like", "command algebra.not_like(X_1:str, X_2:str, X_3:str):bit ", "PCREnotlike3;", "" ] [ "algebra", "orderidx", "command algebra.orderidx(X_1:bat[:any_1], X_2:bit):bat[:any_1] ", "OIDXorderidx;", "" ] -[ "algebra", "outerjoin", "command algebra.outerjoin(X_1:bat[:any_1], X_2:bat[:any_1], X_3:bat[:cnd], X_4:bat[:cnd], X_5:bit, X_6:lng):bat[:oid] ", "ALGouterjoin1;", "" ] -[ "algebra", "outerjoin", "command algebra.outerjoin(X_1:bat[:any_1], X_2:bat[:msk], X_3:bat[:cnd], X_4:bat[:cnd], X_5:bit, X_6:lng):bat[:oid] ", "ALGouterjoin1;", "" ] -[ "algebra", "outerjoin", "command algebra.outerjoin(X_1:bat[:msk], X_2:bat[:any_1], X_3:bat[:cnd], X_4:bat[:cnd], X_5:bit, X_6:lng):bat[:oid] ", "ALGouterjoin1;", "" ] -[ "algebra", "outerjoin", "command algebra.outerjoin(X_2:bat[:any_1], X_3:bat[:any_1], X_4:bat[:cnd], X_5:bat[:cnd], X_6:bit, X_7:lng) (X_0:bat[:oid], X_1:bat[:oid]) ", "ALGouterjoin;", "" ] -[ "algebra", "outerjoin", "command algebra.outerjoin(X_2:bat[:any_1], X_3:bat[:msk], X_4:bat[:cnd], X_5:bat[:cnd], X_6:bit, X_7:lng) (X_0:bat[:oid], X_1:bat[:oid]) ", "ALGouterjoin;", "" ] -[ "algebra", "outerjoin", "command algebra.outerjoin(X_2:bat[:msk], X_3:bat[:any_1], X_4:bat[:cnd], X_5:bat[:cnd], X_6:bit, X_7:lng) (X_0:bat[:oid], X_1:bat[:oid]) ", "ALGouterjoin;", "" ] +[ "algebra", "outerjoin", "command algebra.outerjoin(X_1:bat[:any_1], X_2:bat[:any_1], X_3:bat[:cnd], X_4:bat[:cnd], X_5:bit, X_6:bit, X_7:lng):bat[:oid] ", "ALGouterjoin1;", "" ] +[ "algebra", "outerjoin", "command algebra.outerjoin(X_1:bat[:any_1], X_2:bat[:msk], X_3:bat[:cnd], X_4:bat[:cnd], X_5:bit, X_6:bit, X_7:lng):bat[:oid] ", "ALGouterjoin1;", "" ] +[ "algebra", "outerjoin", "command algebra.outerjoin(X_1:bat[:msk], X_2:bat[:any_1], X_3:bat[:cnd], X_4:bat[:cnd], X_5:bit, X_6:bit, X_7:lng):bat[:oid] ", "ALGouterjoin1;", "" ] +[ "algebra", "outerjoin", "command algebra.outerjoin(X_2:bat[:any_1], X_3:bat[:any_1], X_4:bat[:cnd], X_5:bat[:cnd], X_6:bit, X_7:bit, X_8:lng) (X_0:bat[:oid], X_1:bat[:oid]) ", "ALGouterjoin;", "" ] +[ "algebra", "outerjoin", "command algebra.outerjoin(X_2:bat[:any_1], X_3:bat[:msk], X_4:bat[:cnd], X_5:bat[:cnd], X_6:bit, X_7:bit, X_8:lng) (X_0:bat[:oid], X_1:bat[:oid]) ", "ALGouterjoin;", "" ] +[ "algebra", "outerjoin", "command algebra.outerjoin(X_2:bat[:msk], X_3:bat[:any_1], X_4:bat[:cnd], X_5:bat[:cnd], X_6:bit, X_7:bit, X_8:lng) (X_0:bat[:oid], X_1:bat[:oid]) ", "ALGouterjoin;", "" ] [ "algebra", "project", "pattern algebra.project(X_1:bat[:any_1], X_2:any_3):bat[:any_3] ", "ALGprojecttail;", "" ] [ "algebra", "projection", "command algebra.projection(X_1:bat[:msk], X_2:bat[:any_3], X_3:bat[:any_3]):bat[:any_3] ", "ALGprojection2;", "" ] [ "algebra", "projection", "command algebra.projection(X_1:bat[:oid], X_2:bat[:any_3], X_3:bat[:any_3]):bat[:any_3] ", "ALGprojection2;", "" ] diff --git a/clients/Tests/MAL-signatures.stable.out.int128 b/clients/Tests/MAL-signatures.stable.out.int128 --- a/clients/Tests/MAL-signatures.stable.out.int128 +++ b/clients/Tests/MAL-signatures.stable.out.int128 @@ -793,12 +793,12 @@ stdout of test 'MAL-signatures` in direc [ "algebra", "not_like", "command algebra.not_like(X_1:str, X_2:str):bit ", "PCREnotlike2;", "" ] [ "algebra", "not_like", "command algebra.not_like(X_1:str, X_2:str, X_3:str):bit ", "PCREnotlike3;", "" ] [ "algebra", "orderidx", "command algebra.orderidx(X_1:bat[:any_1], X_2:bit):bat[:any_1] ", "OIDXorderidx;", "" ] -[ "algebra", "outerjoin", "command algebra.outerjoin(X_1:bat[:any_1], X_2:bat[:any_1], X_3:bat[:cnd], X_4:bat[:cnd], X_5:bit, X_6:lng):bat[:oid] ", "ALGouterjoin1;", "" ] -[ "algebra", "outerjoin", "command algebra.outerjoin(X_1:bat[:any_1], X_2:bat[:msk], X_3:bat[:cnd], X_4:bat[:cnd], X_5:bit, X_6:lng):bat[:oid] ", "ALGouterjoin1;", "" ] -[ "algebra", "outerjoin", "command algebra.outerjoin(X_1:bat[:msk], X_2:bat[:any_1], X_3:bat[:cnd], X_4:bat[:cnd], X_5:bit, X_6:lng):bat[:oid] ", "ALGouterjoin1;", "" ] -[ "algebra", "outerjoin", "command algebra.outerjoin(X_2:bat[:any_1], X_3:bat[:any_1], X_4:bat[:cnd], X_5:bat[:cnd], X_6:bit, X_7:lng) (X_0:bat[:oid], X_1:bat[:oid]) ", "ALGouterjoin;", "" ] -[ "algebra", "outerjoin", "command algebra.outerjoin(X_2:bat[:any_1], X_3:bat[:msk], X_4:bat[:cnd], X_5:bat[:cnd], X_6:bit, X_7:lng) (X_0:bat[:oid], X_1:bat[:oid]) ", "ALGouterjoin;", "" ] -[ "algebra", "outerjoin", "command algebra.outerjoin(X_2:bat[:msk], X_3:bat[:any_1], X_4:bat[:cnd], X_5:bat[:cnd], X_6:bit, X_7:lng) (X_0:bat[:oid], X_1:bat[:oid]) ", "ALGouterjoin;", "" ] +[ "algebra", "outerjoin", "command algebra.outerjoin(X_1:bat[:any_1], X_2:bat[:any_1], X_3:bat[:cnd], X_4:bat[:cnd], X_5:bit, X_6:bit, X_7:lng):bat[:oid] ", "ALGouterjoin1;", "" ] +[ "algebra", "outerjoin", "command algebra.outerjoin(X_1:bat[:any_1], X_2:bat[:msk], X_3:bat[:cnd], X_4:bat[:cnd], X_5:bit, X_6:bit, X_7:lng):bat[:oid] ", "ALGouterjoin1;", "" ] +[ "algebra", "outerjoin", "command algebra.outerjoin(X_1:bat[:msk], X_2:bat[:any_1], X_3:bat[:cnd], X_4:bat[:cnd], X_5:bit, X_6:bit, X_7:lng):bat[:oid] ", "ALGouterjoin1;", "" ] +[ "algebra", "outerjoin", "command algebra.outerjoin(X_2:bat[:any_1], X_3:bat[:any_1], X_4:bat[:cnd], X_5:bat[:cnd], X_6:bit, X_7:bit, X_8:lng) (X_0:bat[:oid], X_1:bat[:oid]) ", "ALGouterjoin;", "" ] +[ "algebra", "outerjoin", "command algebra.outerjoin(X_2:bat[:any_1], X_3:bat[:msk], X_4:bat[:cnd], X_5:bat[:cnd], X_6:bit, X_7:bit, X_8:lng) (X_0:bat[:oid], X_1:bat[:oid]) ", "ALGouterjoin;", "" ] +[ "algebra", "outerjoin", "command algebra.outerjoin(X_2:bat[:msk], X_3:bat[:any_1], X_4:bat[:cnd], X_5:bat[:cnd], X_6:bit, X_7:bit, X_8:lng) (X_0:bat[:oid], X_1:bat[:oid]) ", "ALGouterjoin;", "" ] [ "algebra", "project", "pattern algebra.project(X_1:bat[:any_1], X_2:any_3):bat[:any_3] ", "ALGprojecttail;", "" ] [ "algebra", "projection", "command algebra.projection(X_1:bat[:msk], X_2:bat[:any_3], X_3:bat[:any_3]):bat[:any_3] ", "ALGprojection2;", "" ] [ "algebra", "projection", "command algebra.projection(X_1:bat[:oid], X_2:bat[:any_3], X_3:bat[:any_3]):bat[:any_3] ", "ALGprojection2;", "" ] diff --git a/clients/Tests/exports.stable.out b/clients/Tests/exports.stable.out --- a/clients/Tests/exports.stable.out +++ b/clients/Tests/exports.stable.out @@ -173,7 +173,7 @@ BAT *BATnil_grp(BAT *l, BAT *g, BAT *e, bool BATordered(BAT *b); bool BATordered_rev(BAT *b); gdk_return BATorderidx(BAT *b, bool stable); -gdk_return BATouterjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr, bool nil_matches, BUN estimate) __attribute__((__warn_unused_result__)); +gdk_return BATouterjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr, bool nil_matches, bool match_one, BUN estimate) __attribute__((__warn_unused_result__)); gdk_return BATprint(stream *s, BAT *b); gdk_return BATprintcolumns(stream *s, int argc, BAT *argv[]); gdk_return BATprod(void *res, int tp, BAT *b, BAT *s, bool skip_nils, bool abort_on_error, bool nil_if_empty); diff --git a/gdk/gdk.h b/gdk/gdk.h --- a/gdk/gdk.h +++ b/gdk/gdk.h @@ -2116,7 +2116,7 @@ gdk_export gdk_return BATsubcross(BAT ** gdk_export gdk_return BATleftjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr, bool nil_matches, BUN estimate) __attribute__((__warn_unused_result__)); -gdk_export gdk_return BATouterjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr, bool nil_matches, BUN estimate) +gdk_export gdk_return BATouterjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr, bool nil_matches, bool match_one, BUN estimate) __attribute__((__warn_unused_result__)); gdk_export gdk_return BATthetajoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr, int op, bool nil_matches, BUN estimate) __attribute__((__warn_unused_result__)); diff --git a/gdk/gdk_join.c b/gdk/gdk_join.c --- a/gdk/gdk_join.c +++ b/gdk/gdk_join.c @@ -95,7 +95,8 @@ joinparamcheck(BAT *l, BAT *r1, BAT *r2, * number of outputs that could possibly be generated. */ static BUN joininitresults(BAT **r1p, BAT **r2p, BUN lcnt, BUN rcnt, bool lkey, bool rkey, - bool semi, bool nil_on_miss, bool only_misses, BUN estimate) + bool semi, bool nil_on_miss, bool only_misses, bool min_one, + BUN estimate) { BAT *r1, *r2; BUN maxsize, size; @@ -151,6 +152,8 @@ joininitresults(BAT **r1p, BAT **r2p, BU * once */ size = maxsize; } + if (min_one && size < lcnt) + size = lcnt; if (maxsize == 0) { r1 = BATdense(0, 0, 0); @@ -790,7 +793,7 @@ mergejoin_int(BAT **r1p, BAT **r2p, BAT if ((maxsize = joininitresults(r1p, r2p, BATcount(l), BATcount(r), l->tkey, r->tkey, false, false, - false, estimate)) == BUN_NONE) + false, false, estimate)) == BUN_NONE) return GDK_FAIL; r1 = *r1p; r2 = r2p ? *r2p : NULL; @@ -1089,7 +1092,7 @@ mergejoin_lng(BAT **r1p, BAT **r2p, BAT if ((maxsize = joininitresults(r1p, r2p, BATcount(l), BATcount(r), l->tkey, r->tkey, false, false, - false, estimate)) == BUN_NONE) + false, false, estimate)) == BUN_NONE) return GDK_FAIL; r1 = *r1p; r2 = r2p ? *r2p : NULL; @@ -1396,7 +1399,7 @@ mergejoin_cand(BAT **r1p, BAT **r2p, BAT if ((maxsize = joininitresults(r1p, r2p, BATcount(l), BATcount(r), l->tkey, r->tkey, false, false, - false, estimate)) == BUN_NONE) + false, false, estimate)) == BUN_NONE) return GDK_FAIL; r1 = *r1p; r2 = r2p ? *r2p : NULL; @@ -1648,7 +1651,8 @@ static gdk_return mergejoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, struct canditer *restrict lci, struct canditer *restrict rci, bool nil_matches, bool nil_on_miss, bool semi, bool only_misses, - bool not_in, bool max_one, BUN estimate, lng t0, bool swapped, + bool not_in, bool max_one, bool min_one, BUN estimate, + lng t0, bool swapped, const char *reason) { /* [lr]scan determine how far we look ahead in l/r in order to @@ -1680,7 +1684,8 @@ mergejoin(BAT **r1p, BAT **r2p, BAT *l, if (lci->tpe == cand_dense && lci->ncand == BATcount(l) && rci->tpe == cand_dense && rci->ncand == BATcount(r) && - !nil_on_miss && !semi && !max_one && !only_misses && !not_in && + !nil_on_miss && !semi && !max_one && !min_one && !only_misses && + !not_in && l->tsorted && r->tsorted) { /* special cases with far fewer options */ if (r->ttype == TYPE_void && r->tvheap) @@ -1757,7 +1762,8 @@ mergejoin(BAT **r1p, BAT **r2p, BAT *l, BUN maxsize = joininitresults(r1p, r2p, lci->ncand, rci->ncand, l->tkey, r->tkey, semi | max_one, - nil_on_miss, only_misses, estimate); + nil_on_miss, only_misses, min_one, + estimate); if (maxsize == BUN_NONE) return GDK_FAIL; BAT *r1 = *r1p; @@ -2138,6 +2144,10 @@ mergejoin(BAT **r1p, BAT **r2p, BAT *l, if (nr == 0) { /* no entries in r found */ if (!(nil_on_miss | only_misses)) { + if (min_one) { + GDKerror("not enough matches"); + goto bailout; + } if (lscan > 0 && (equal_order ? rci->next == rci->ncand : rci->next == 0)) { /* nothing more left to match @@ -2450,6 +2460,9 @@ mergejoin(BAT **r1p, BAT **r2p, BAT *l, goto bailout; \ APPEND(r1, lo); \ APPEND(r2, oid_nil); \ + } else if (min_one) { \ + GDKerror("not enough matches"); \ + goto bailout; \ } else { \ lskipped = BATcount(r1) > 0; \ } \ @@ -2482,7 +2495,7 @@ static gdk_return hashjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, struct canditer *restrict lci, struct canditer *restrict rci, bool nil_matches, bool nil_on_miss, bool semi, bool only_misses, - bool not_in, bool max_one, + bool not_in, bool max_one, bool min_one, BUN estimate, lng t0, bool swapped, bool hash, bool phash, const char *reason) { @@ -2530,7 +2543,8 @@ hashjoin(BAT **r1p, BAT **r2p, BAT *l, B BUN maxsize = joininitresults(r1p, r2p, lci->ncand, rci->ncand, l->tkey, r->tkey, semi | max_one, - nil_on_miss, only_misses, estimate); + nil_on_miss, only_misses, min_one, + estimate); if (maxsize == BUN_NONE) return GDK_FAIL; @@ -2721,6 +2735,9 @@ hashjoin(BAT **r1p, BAT **r2p, BAT *l, B goto bailout; APPEND(r1, lo); APPEND(r2, oid_nil); + } else if (min_one) { + GDKerror("not enough matches"); + goto bailout; } else { lskipped = BATcount(r1) > 0; } @@ -2784,7 +2801,7 @@ hashjoin(BAT **r1p, BAT **r2p, BAT *l, B TRC_DEBUG(ALGO, "l=" ALGOBATFMT "," "r=" ALGOBATFMT ",sl=" ALGOOPTBATFMT "," "sr=" ALGOOPTBATFMT "," "nil_matches=%s,nil_on_miss=%s,semi=%s,only_misses=%s," - "not_in=%s,max_one=%s;%s %s -> " ALGOBATFMT "," ALGOOPTBATFMT + "not_in=%s,max_one=%s,min_one=%s;%s %s -> " ALGOBATFMT "," ALGOOPTBATFMT " (" LLFMT "usec)\n", ALGOBATPAR(l), ALGOBATPAR(r), ALGOOPTBATPAR(lci->s), ALGOOPTBATPAR(rci->s), @@ -2794,6 +2811,7 @@ hashjoin(BAT **r1p, BAT **r2p, BAT *l, B only_misses ? "true" : "false", not_in ? "true" : "false", max_one ? "true" : "false", + min_one ? "true" : "false", swapped ? " swapped" : "", reason, ALGOBATPAR(r1), ALGOOPTBATPAR(r2), GDKusec() - t0); @@ -2874,7 +2892,7 @@ thetajoin(BAT **r1p, BAT **r2p, BAT *l, } BUN maxsize = joininitresults(r1p, r2p, lcnt, rcnt, false, false, - false, false, false, estimate); + false, false, false, false, estimate); if (maxsize == BUN_NONE) return GDK_FAIL; BAT *r1 = *r1p; @@ -3146,7 +3164,8 @@ bitmaskjoin(BAT *l, BAT *r, static gdk_return leftjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr, bool nil_matches, bool nil_on_miss, bool semi, bool only_misses, - bool not_in, bool max_one, BUN estimate, const char *func, lng t0) + bool not_in, bool max_one, bool min_one, BUN estimate, + const char *func, lng t0) { BUN lcnt, rcnt; struct canditer lci, rci; @@ -3208,18 +3227,18 @@ leftjoin(BAT **r1p, BAT **r2p, BAT *l, B "r=" ALGOBATFMT ",sl=" ALGOOPTBATFMT "," "sr=" ALGOOPTBATFMT ",nil_matches=%d," "nil_on_miss=%d,semi=%d,only_misses=%d," - "not_in=%d,max_one=%d)\n", + "not_in=%d,max_one=%d,min_one=%d)\n", func, ALGOBATPAR(l), ALGOBATPAR(r), ALGOOPTBATPAR(sl), ALGOOPTBATPAR(sr), nil_matches, nil_on_miss, semi, only_misses, - not_in, max_one); + not_in, max_one, min_one); rc = nomatch(r1p, r2p, l, r, &lci, nil_on_miss, only_misses, func, t0); goto doreturn; } - if (!nil_on_miss && !semi && !max_one && !only_misses && !not_in && + if (!nil_on_miss && !semi && !max_one && !min_one && !only_misses && !not_in && (lcnt == 1 || (BATordered(l) && BATordered_rev(l)) || (l->ttype == TYPE_void && is_oid_nil(l->tseqbase)))) { /* single value to join, use select */ @@ -3238,6 +3257,7 @@ leftjoin(BAT **r1p, BAT **r2p, BAT *l, B && rci.tpe == cand_dense && !semi && !max_one + && !min_one && !nil_matches && !only_misses && !not_in @@ -3252,7 +3272,8 @@ leftjoin(BAT **r1p, BAT **r2p, BAT *l, B && (semi || only_misses) && !nil_on_miss && !not_in - && !max_one) { + && !max_one + && !min_one) { *r1p = bitmaskjoin(l, r, &lci, &rci, only_misses, func, t0); rc = *r1p == NULL ? GDK_FAIL : GDK_SUCCEED; goto doreturn; @@ -3264,7 +3285,7 @@ leftjoin(BAT **r1p, BAT **r2p, BAT *l, B || BATcount(r) * (Tsize(r) + (r->tvheap ? r->tvheap->size : 0) + 2 * sizeof(BUN)) > GDK_mem_maxsize / (GDKnr_threads ? GDKnr_threads : 1))) { rc = mergejoin(r1p, r2p, l, r, &lci, &rci, nil_matches, nil_on_miss, semi, only_misses, - not_in, max_one, estimate, t0, false, func); + not_in, max_one, min_one, estimate, t0, false, func); goto doreturn; } rhash = BATcheckhash(r); @@ -3310,7 +3331,7 @@ leftjoin(BAT **r1p, BAT **r2p, BAT *l, B } } - if (!nil_on_miss && !only_misses && !not_in && !max_one) { + if (!nil_on_miss && !only_misses && !not_in && !max_one && !min_one) { /* maybe do a hash join on the swapped operands; if we * do, we need to sort the output, so we take that into * account as well */ @@ -3376,7 +3397,7 @@ leftjoin(BAT **r1p, BAT **r2p, BAT *l, B canditer_init(&rci, r, sr); } rc = hashjoin(&r2, &r1, r, l, &rci, &lci, nil_matches, - false, false, false, false, false, estimate, + false, false, false, false, false, false, estimate, t0, true, lhash, plhash, func); if (semi) BBPunfix(sr->batCacheid); _______________________________________________ checkin-list mailing list checkin-list@monetdb.org https://www.monetdb.org/mailman/listinfo/checkin-list