Changeset: 4582a46fc658 for MonetDB URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=4582a46fc658 Modified Files: gdk/gdk_aggr.c monetdb5/modules/kernel/aggr.c monetdb5/modules/kernel/aggr.mal monetdb5/modules/kernel/aggr.mal.sh sql/scripts/39_analytics.sql Branch: default Log Message:
Quantiles now available from SQL. diffs (truncated from 357 to 300 lines): diff --git a/gdk/gdk_aggr.c b/gdk/gdk_aggr.c --- a/gdk/gdk_aggr.c +++ b/gdk/gdk_aggr.c @@ -2224,10 +2224,11 @@ BAT * GDKerror("BATgroupquantile: cannot determine quantile for p=%f (p has to be in [0,1])\n",quantile); return NULL; } + assert(quantile >=0 && quantile <=1); if (BATcount(b) == 0 || ngrp == 0) { - /* trivial: no medians, so return bat aligned with e with - * nil in the tail */ + /* trivial: no values, thus also no quantiles, + * so return bat aligned with e with nil in the tail */ bn = BATconstant(tp, ATOMnilptr(tp), ngrp); BATseqbase(bn, ngrp == 0 ? 0 : min); return bn; @@ -2254,12 +2255,12 @@ BAT * } } - /* we want to sort b so that we can figure out the median, but + /* we want to sort b so that we can figure out the quantile, but * if g is given, sort g and subsort b so that we can get the - * median for each group */ + * quantile for each group */ if (g) { if (BATtdense(g)) { - /* singleton groups, so calculating medians is + /* singleton groups, so calculating quantile is * easy */ bn = BATcopy(b, TYPE_void, b->ttype, 0); BATseqbase(bn, g->tseqbase); @@ -2291,15 +2292,18 @@ BAT * nil = ATOMnilptr(b->ttype); atomcmp = BATatoms[b->ttype].atomCmp; - if (g) { + if (g) { /* we have to do this by group */ const oid *grps; oid prev; BUN p, q, r; grps = (const oid *) Tloc(g, BUNfirst(g)); prev = grps[0]; + /* for each group (r and p are the beginning and end of the current group, respectively) */ for (r = 0, p = 1, q = BATcount(g); p <= q; p++) { - if (p == q || grps[p] != prev) { + assert(r < p); + if ( p == q || grps[p] != prev) { + BUN qindex; if (skip_nils) { while (r < p && (*atomcmp)(BUNtail(bi, BUNfirst(b) + r), nil) == 0) r++; @@ -2309,17 +2313,14 @@ BAT * nil, 0, Tsize(bn)); nils++; } - if (r == p) { - bunfastins_nocheck(bn, BUNlast(bn), 0, - nil, 0, Tsize(bn)); - nils++; - } else { - // actual selection of quantile value for groups - v = BUNtail(bi, (oid)( BUNfirst(b) + (r + p - 1) * quantile)); - bunfastins_nocheck(bn, BUNlast(bn), 0, - v, 0, Tsize(bn)); - nils += (*atomcmp)(v, nil) == 0; - } + qindex = BUNfirst(b) + (BUN) (r + (p-r-1) * quantile); + // be a little paranoid about the index + assert(qindex >= (BUNfirst(b) + r )); + assert(qindex < (BUNfirst(b) + p)); + v = BUNtail(bi, qindex); + bunfastins_nocheck(bn, BUNlast(bn), 0, v, 0, Tsize(bn)); + nils += (*atomcmp)(v, nil) == 0; + r = p; if (p < q) prev = grps[p]; @@ -2330,9 +2331,10 @@ BAT * nil, 0, Tsize(bn)); } BATseqbase(bn, min); - } else { - // actual selection of quantile value - v = BUNtail(bi, (oid) (BUNfirst(b) + (BATcount(b) - 1) * quantile)); + } else { /* quantiles for entire BAT b, EZ */ + + BUN index = BUNfirst(b) + (BUN) ((BATcount(b) - 1) * quantile); + v = BUNtail(bi, index); BUNappend(bn, v, FALSE); BATseqbase(bn, 0); nils += (*atomcmp)(v, nil) == 0; diff --git a/monetdb5/modules/kernel/aggr.c b/monetdb5/modules/kernel/aggr.c --- a/monetdb5/modules/kernel/aggr.c +++ b/monetdb5/modules/kernel/aggr.c @@ -39,11 +39,12 @@ AGGRgrouped(bat *retval1, bat *retval2, BAT *(*grpfunc1)(BAT *, BAT *, BAT *, BAT *, int, int, int), gdk_return (*grpfunc2)(BAT **, BAT **, BAT *, BAT *, BAT *, BAT *, int, int, int), BAT *(*quantilefunc)(BAT *, BAT *, BAT *, BAT *, int, double, int, int), - double quantile, + BAT *quantile, int skip_nils, const char *malfunc) { BAT *bn, *cnts = NULL, *t, *map; + double qvalue; /* one of grpfunc1, grpfunc2 and quantilefunc is non-NULL and the others are */ assert((grpfunc1 != NULL && grpfunc2 == NULL && quantilefunc == NULL) || @@ -51,6 +52,7 @@ AGGRgrouped(bat *retval1, bat *retval2, (grpfunc1 == NULL && grpfunc2 == NULL && quantilefunc != NULL) ); /* if retval2 is non-NULL, we must have grpfunc2 */ assert(retval2 == NULL || grpfunc2 != NULL); + assert(quantile == NULL || quantilefunc != NULL); if (b == NULL || g == NULL || e == NULL) { if (b) @@ -115,8 +117,17 @@ AGGRgrouped(bat *retval1, bat *retval2, } if (grpfunc1) bn = (*grpfunc1)(b, g, e, NULL, tp, skip_nils, 1); - if (quantilefunc) - bn = (*quantilefunc)(b, g, e, NULL, tp, quantile, skip_nils, 1); + if (quantilefunc) { + assert(BATcount(quantile)>0); + assert(quantile->ttype == TYPE_dbl); + qvalue = ((const double *)Tloc(quantile, BUNfirst(quantile)))[0]; + if (qvalue < 0|| qvalue > 1) { + char *s; + s = createException(MAL, malfunc, "quantile value of %f is not in range [0,1]", qvalue); + return s; + } + bn = (*quantilefunc)(b, g, e, NULL, tp, qvalue, skip_nils, 1); + } if (grpfunc2 && (*grpfunc2)(&bn, retval2 ? &cnts : NULL, b, g, e, NULL, tp, skip_nils, 1) == GDK_FAIL) bn = NULL; if (bn != NULL && (grpfunc1 == BATgroupmin || grpfunc1 == BATgroupmax)) { @@ -609,16 +620,18 @@ AGGRmedian3(bat *retval, bat *bid, bat * // XXX: when are these functions called? -aggr_export str AGGRquantile3(bat *retval, bat *bid, bat *gid, bat *eid, double *quantile); +aggr_export str AGGRquantile3(bat *retval, bat *bid, bat *gid, bat *eid, bat *quantile); str -AGGRquantile3(bat *retval, bat *bid, bat *gid, bat *eid, double *quantile) +AGGRquantile3(bat *retval, bat *bid, bat *gid, bat *eid, bat *quantile) { // this is inlined from AGGRgrouped3 to avoid changing all the other functions for now - BAT *b, *g, *e; + BAT *b, *g, *e, *q; b = BATdescriptor(*bid); /* [head,value] */ g = BATdescriptor(*gid); /* [head,gid] */ e = BATdescriptor(*eid); /* [gid,any] */ - return AGGRgrouped(retval, NULL, b, g, e, TYPE_any, NULL, NULL, BATgroupquantile, *quantile, 0, "aggr.quantile"); + e = BATdescriptor(*eid); /* [gid,any] */ + q = BATdescriptor(*quantile); + return AGGRgrouped(retval, NULL, b, g, e, TYPE_any, NULL, NULL, BATgroupquantile, q, 0, "aggr.quantile"); } static str @@ -627,10 +640,11 @@ AGGRsubgroupedExt(bat *retval1, bat *ret BAT *(*grpfunc1)(BAT *, BAT *, BAT *, BAT *, int, int, int), gdk_return (*grpfunc2)(BAT **, BAT **, BAT *, BAT *, BAT *, BAT *, int, int, int), BAT *(*quantilefunc)(BAT *, BAT *, BAT *, BAT *, int, double, int, int), - double quantile, + bat *quantile, const char *malfunc) { - BAT *b, *g, *e, *s, *bn, *cnts = NULL; + BAT *b, *g, *e, *s, *bn, *cnts, *q = NULL; + double qvalue; /* one of grpfunc1, grpfunc2 and quantilefunc is non-NULL and the others are */ assert((grpfunc1 && grpfunc2 == NULL && quantilefunc == NULL) || @@ -643,6 +657,8 @@ AGGRsubgroupedExt(bat *retval1, bat *ret b = BATdescriptor(*bid); g = gid ? BATdescriptor(*gid) : NULL; e = eid ? BATdescriptor(*eid) : NULL; + q = quantile ? BATdescriptor(*quantile) : NULL; + if (b == NULL || (gid != NULL && g == NULL) || (eid != NULL && e == NULL)) { if (b) BBPreleaseref(b->batCacheid); @@ -677,8 +693,17 @@ AGGRsubgroupedExt(bat *retval1, bat *ret } if (grpfunc1) bn = (*grpfunc1)(b, g, e, s, tp, skip_nils, abort_on_error); - if (quantilefunc) - bn = (*quantilefunc)(b, g, e, s, tp, quantile, skip_nils, abort_on_error); + if (quantilefunc) { + assert(BATcount(q)>0); + assert(q->ttype == TYPE_dbl); + qvalue = ((const double *)Tloc(q, BUNfirst(q)))[0]; + if (qvalue < 0|| qvalue > 1) { + char *s; + s = createException(MAL, malfunc, "quantile value of %f is not in range [0,1]", qvalue); + return s; + } + bn = (*quantilefunc)(b, g, e, s, tp, qvalue, skip_nils, abort_on_error); + } if (grpfunc2 && (*grpfunc2)(&bn, retval2 ? &cnts : NULL, b, g, e, s, tp, skip_nils, abort_on_error) == GDK_FAIL) bn = NULL; @@ -1204,26 +1229,26 @@ AGGRsubmediancand(bat *retval, bat *bid, } /* quantile functions, could make median functions obsolete completely */ -aggr_export str AGGRquantile(bat *retval, bat *bid, double *quantile, bit *skip_nils); +aggr_export str AGGRquantile(bat *retval, bat *bid, bat *quantile, bit *skip_nils); str -AGGRquantile(bat *retval, bat *bid, double *quantile, bit *skip_nils) +AGGRquantile(bat *retval, bat *bid, bat *quantile, bit *skip_nils) { return AGGRsubgroupedExt(retval, NULL, bid, NULL, NULL, NULL, *skip_nils, - 0, TYPE_any, NULL, NULL,BATgroupquantile, *quantile ,"aggr.subquantile"); + 0, TYPE_any, NULL, NULL,BATgroupquantile, quantile ,"aggr.subquantile"); } -aggr_export str AGGRsubquantile(bat *retval, bat *bid, bat *gid, bat *eid, double *quantile, bit *skip_nils); +aggr_export str AGGRsubquantile(bat *retval, bat *bid,bat *quantile,bat *gid, bat *eid, bit *skip_nils); str -AGGRsubquantile(bat *retval, bat *bid, bat *gid, bat *eid, double *quantile, bit *skip_nils) +AGGRsubquantile(bat *retval, bat *bid,bat *quantile, bat *gid, bat *eid, bit *skip_nils) { return AGGRsubgroupedExt(retval, NULL, bid, gid, eid, NULL, *skip_nils, - 0, TYPE_any, NULL, NULL, BATgroupquantile, *quantile , "aggr.subquantile"); + 0, TYPE_any, NULL, NULL, BATgroupquantile, quantile , "aggr.subquantile"); } -aggr_export str AGGRsubquantilecand(bat *retval, bat *bid, bat *gid, bat *eid, bat *sid, double *quantile, bit *skip_nils); +aggr_export str AGGRsubquantilecand(bat *retval, bat *bid, bat *quantile, bat *gid, bat *eid, bat *sid, bit *skip_nils); str -AGGRsubquantilecand(bat *retval, bat *bid, bat *gid, bat *eid, bat *sid, double *quantile, bit *skip_nils) +AGGRsubquantilecand(bat *retval, bat *bid, bat *quantile, bat *gid, bat *eid, bat *sid, bit *skip_nils) { return AGGRsubgroupedExt(retval, NULL, bid, gid, eid, sid, *skip_nils, - 0, TYPE_any, NULL, NULL,BATgroupquantile,*quantile, "aggr.subquantile"); + 0, TYPE_any, NULL, NULL,BATgroupquantile, quantile, "aggr.subquantile"); } diff --git a/monetdb5/modules/kernel/aggr.mal b/monetdb5/modules/kernel/aggr.mal --- a/monetdb5/modules/kernel/aggr.mal +++ b/monetdb5/modules/kernel/aggr.mal @@ -1456,24 +1456,24 @@ address AGGRsubmediancand comment "Grouped median aggregate with candidate list"; -command quantile(b:bat[:oid,:any_1],g:bat[:oid,:oid],e:bat[:oid,:any_2],q:dbl) :bat[:oid,:any_1] +command quantile(b:bat[:oid,:any_1],g:bat[:oid,:oid],e:bat[:oid,:any_2],q:bat[:oid,:dbl]) :bat[:oid,:any_1] address AGGRquantile3 comment "Grouped quantile aggregate"; -function quantile(b:bat[:oid,:any_1],q:dbl) :any_1; +function quantile(b:bat[:oid,:any_1],q:bat[:oid,:dbl]) :any_1; bn := subquantile(b, q, false); return algebra.fetch(bn, 0); end aggr.quantile; -command subquantile(b:bat[:oid,:any_1],q:dbl,skip_nils:bit) :bat[:oid,:any_1] +command subquantile(b:bat[:oid,:any_1],q:bat[:oid,:dbl],skip_nils:bit) :bat[:oid,:any_1] address AGGRquantile comment "Quantile aggregate"; -command subquantile(b:bat[:oid,:any_1],g:bat[:oid,:oid],e:bat[:oid,:any_2],q:dbl,skip_nils:bit) :bat[:oid,:any_1] +command subquantile(b:bat[:oid,:any_1],q:bat[:oid,:dbl],g:bat[:oid,:oid],e:bat[:oid,:any_2],skip_nils:bit) :bat[:oid,:any_1] address AGGRsubquantile comment "Grouped quantile aggregate"; -command subquantile(b:bat[:oid,:any_1],g:bat[:oid,:oid],e:bat[:oid,:any_2],s:bat[:oid,:oid],q:dbl,skip_nils:bit) :bat[:oid,:any_1] +command subquantile(b:bat[:oid,:any_1],q:bat[:oid,:dbl],g:bat[:oid,:oid],e:bat[:oid,:any_2],s:bat[:oid,:oid],skip_nils:bit) :bat[:oid,:any_1] address AGGRsubquantilecand comment "Grouped median quantile with candidate list"; diff --git a/monetdb5/modules/kernel/aggr.mal.sh b/monetdb5/modules/kernel/aggr.mal.sh --- a/monetdb5/modules/kernel/aggr.mal.sh +++ b/monetdb5/modules/kernel/aggr.mal.sh @@ -320,24 +320,24 @@ address AGGRsubmediancand comment "Grouped median aggregate with candidate list"; -command quantile(b:bat[:oid,:any_1],g:bat[:oid,:oid],e:bat[:oid,:any_2],q:dbl) :bat[:oid,:any_1] +command quantile(b:bat[:oid,:any_1],g:bat[:oid,:oid],e:bat[:oid,:any_2],q:bat[:oid,:dbl]) :bat[:oid,:any_1] address AGGRquantile3 comment "Grouped quantile aggregate"; -function quantile(b:bat[:oid,:any_1],q:dbl) :any_1; +function quantile(b:bat[:oid,:any_1],q:bat[:oid,:dbl]) :any_1; bn := subquantile(b, q, false); return algebra.fetch(bn, 0); end aggr.quantile; -command subquantile(b:bat[:oid,:any_1],q:dbl,skip_nils:bit) :bat[:oid,:any_1] +command subquantile(b:bat[:oid,:any_1],q:bat[:oid,:dbl],skip_nils:bit) :bat[:oid,:any_1] address AGGRquantile comment "Quantile aggregate"; -command subquantile(b:bat[:oid,:any_1],g:bat[:oid,:oid],e:bat[:oid,:any_2],q:dbl,skip_nils:bit) :bat[:oid,:any_1] +command subquantile(b:bat[:oid,:any_1],q:bat[:oid,:dbl],g:bat[:oid,:oid],e:bat[:oid,:any_2],skip_nils:bit) :bat[:oid,:any_1] address AGGRsubquantile _______________________________________________ checkin-list mailing list checkin-list@monetdb.org https://www.monetdb.org/mailman/listinfo/checkin-list