Changeset: 4582a46fc658 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=4582a46fc658
Modified Files:
        gdk/gdk_aggr.c
        monetdb5/modules/kernel/aggr.c
        monetdb5/modules/kernel/aggr.mal
        monetdb5/modules/kernel/aggr.mal.sh
        sql/scripts/39_analytics.sql
Branch: default
Log Message:

Quantiles now available from SQL.


diffs (truncated from 357 to 300 lines):

diff --git a/gdk/gdk_aggr.c b/gdk/gdk_aggr.c
--- a/gdk/gdk_aggr.c
+++ b/gdk/gdk_aggr.c
@@ -2224,10 +2224,11 @@ BAT *
                GDKerror("BATgroupquantile: cannot determine quantile for p=%f 
(p has to be in [0,1])\n",quantile);
                return NULL;
        }
+       assert(quantile >=0 && quantile <=1);
 
        if (BATcount(b) == 0 || ngrp == 0) {
-               /* trivial: no medians, so return bat aligned with e with
-                * nil in the tail */
+               /* trivial: no values, thus also no quantiles,
+                * so return bat aligned with e with nil in the tail */
                bn = BATconstant(tp, ATOMnilptr(tp), ngrp);
                BATseqbase(bn, ngrp == 0 ? 0 : min);
                return bn;
@@ -2254,12 +2255,12 @@ BAT *
                }
        }
 
-       /* we want to sort b so that we can figure out the median, but
+       /* we want to sort b so that we can figure out the quantile, but
         * if g is given, sort g and subsort b so that we can get the
-        * median for each group */
+        * quantile for each group */
        if (g) {
                if (BATtdense(g)) {
-                       /* singleton groups, so calculating medians is
+                       /* singleton groups, so calculating quantile is
                         * easy */
                        bn = BATcopy(b, TYPE_void, b->ttype, 0);
                        BATseqbase(bn, g->tseqbase);
@@ -2291,15 +2292,18 @@ BAT *
        nil = ATOMnilptr(b->ttype);
        atomcmp = BATatoms[b->ttype].atomCmp;
 
-       if (g) {
+       if (g) { /* we have to do this by group */
                const oid *grps;
                oid prev;
                BUN p, q, r;
 
                grps = (const oid *) Tloc(g, BUNfirst(g));
                prev = grps[0];
+                /* for each group (r and p are the beginning and end of the 
current group, respectively) */
                for (r = 0, p = 1, q = BATcount(g); p <= q; p++) {
-                       if (p == q || grps[p] != prev) {
+                       assert(r < p);
+                       if ( p == q || grps[p] != prev) {
+                               BUN qindex;
                                if (skip_nils) {
                                        while (r < p && (*atomcmp)(BUNtail(bi, 
BUNfirst(b) + r), nil) == 0)
                                                r++;
@@ -2309,17 +2313,14 @@ BAT *
                                                           nil, 0, Tsize(bn));
                                        nils++;
                                }
-                               if (r == p) {
-                                       bunfastins_nocheck(bn, BUNlast(bn), 0,
-                                                          nil, 0, Tsize(bn));
-                                       nils++;
-                               } else {
-                                       // actual selection of quantile value 
for groups
-                                       v = BUNtail(bi, (oid)( BUNfirst(b) + (r 
+ p - 1)  * quantile));
-                                       bunfastins_nocheck(bn, BUNlast(bn), 0,
-                                                          v, 0, Tsize(bn));
-                                       nils += (*atomcmp)(v, nil) == 0;
-                               }
+                               qindex = BUNfirst(b) + (BUN) (r + (p-r-1) * 
quantile);
+                               // be a little paranoid about the index
+                               assert(qindex >= (BUNfirst(b) + r ));
+                               assert(qindex <  (BUNfirst(b) + p));
+                               v = BUNtail(bi, qindex);
+                               bunfastins_nocheck(bn, BUNlast(bn), 0, v, 0, 
Tsize(bn));
+                               nils += (*atomcmp)(v, nil) == 0;
+
                                r = p;
                                if (p < q)
                                        prev = grps[p];
@@ -2330,9 +2331,10 @@ BAT *
                                           nil, 0, Tsize(bn));
                }
                BATseqbase(bn, min);
-       } else {
-               // actual selection of quantile value
-               v = BUNtail(bi, (oid) (BUNfirst(b) + (BATcount(b) - 1)  * 
quantile));
+       } else { /* quantiles for entire BAT b, EZ */
+
+               BUN index = BUNfirst(b) + (BUN) ((BATcount(b) - 1)  * quantile);
+               v = BUNtail(bi, index);
                BUNappend(bn, v, FALSE);
                BATseqbase(bn, 0);
                nils += (*atomcmp)(v, nil) == 0;
diff --git a/monetdb5/modules/kernel/aggr.c b/monetdb5/modules/kernel/aggr.c
--- a/monetdb5/modules/kernel/aggr.c
+++ b/monetdb5/modules/kernel/aggr.c
@@ -39,11 +39,12 @@ AGGRgrouped(bat *retval1, bat *retval2, 
                        BAT *(*grpfunc1)(BAT *, BAT *, BAT *, BAT *, int, int, 
int),
                        gdk_return (*grpfunc2)(BAT **, BAT **, BAT *, BAT *, 
BAT *, BAT *, int, int, int),
                        BAT *(*quantilefunc)(BAT *, BAT *, BAT *, BAT *, int, 
double, int, int),
-                       double quantile,
+                       BAT *quantile,
                        int skip_nils,
                        const char *malfunc)
 {
        BAT *bn, *cnts = NULL, *t, *map;
+       double qvalue;
 
    /* one of grpfunc1, grpfunc2 and quantilefunc is non-NULL and the others 
are */
        assert((grpfunc1 != NULL && grpfunc2 == NULL && quantilefunc == NULL) ||
@@ -51,6 +52,7 @@ AGGRgrouped(bat *retval1, bat *retval2, 
                        (grpfunc1 == NULL && grpfunc2 == NULL && quantilefunc 
!= NULL) );
        /* if retval2 is non-NULL, we must have grpfunc2 */
        assert(retval2 == NULL || grpfunc2 != NULL);
+       assert(quantile == NULL || quantilefunc != NULL);
 
        if (b == NULL || g == NULL || e == NULL) {
                if (b)
@@ -115,8 +117,17 @@ AGGRgrouped(bat *retval1, bat *retval2, 
        }
        if (grpfunc1)
                bn = (*grpfunc1)(b, g, e, NULL, tp, skip_nils, 1);
-       if (quantilefunc)
-               bn = (*quantilefunc)(b, g, e, NULL, tp, quantile, skip_nils, 1);
+       if (quantilefunc) {
+               assert(BATcount(quantile)>0);
+               assert(quantile->ttype == TYPE_dbl);
+               qvalue = ((const double *)Tloc(quantile, 
BUNfirst(quantile)))[0];
+               if (qvalue <  0|| qvalue > 1) {
+                       char *s;
+                       s = createException(MAL, malfunc, "quantile value of %f 
is not in range [0,1]", qvalue);
+                       return s;
+               }
+               bn = (*quantilefunc)(b, g, e, NULL, tp, qvalue, skip_nils, 1);
+       }
        if (grpfunc2 && (*grpfunc2)(&bn, retval2 ? &cnts : NULL, b, g, e, NULL, 
tp, skip_nils, 1) == GDK_FAIL)
                bn = NULL;
        if (bn != NULL && (grpfunc1 == BATgroupmin || grpfunc1 == BATgroupmax)) 
{
@@ -609,16 +620,18 @@ AGGRmedian3(bat *retval, bat *bid, bat *
 
 
 // XXX: when are these functions called?
-aggr_export str AGGRquantile3(bat *retval, bat *bid, bat *gid, bat *eid, 
double *quantile);
+aggr_export str AGGRquantile3(bat *retval, bat *bid, bat *gid, bat *eid, bat 
*quantile);
 str
-AGGRquantile3(bat *retval, bat *bid, bat *gid, bat *eid, double *quantile)
+AGGRquantile3(bat *retval, bat *bid, bat *gid, bat *eid, bat *quantile)
 {
        // this is inlined from AGGRgrouped3 to avoid changing all the other 
functions for now
-       BAT *b, *g, *e;
+       BAT *b, *g, *e, *q;
        b = BATdescriptor(*bid);        /* [head,value] */
        g = BATdescriptor(*gid);        /* [head,gid] */
        e = BATdescriptor(*eid);        /* [gid,any] */
-       return AGGRgrouped(retval, NULL, b, g, e, TYPE_any, NULL, NULL, 
BATgroupquantile, *quantile, 0,  "aggr.quantile");
+       e = BATdescriptor(*eid);        /* [gid,any] */
+       q = BATdescriptor(*quantile);
+       return AGGRgrouped(retval, NULL, b, g, e, TYPE_any, NULL, NULL, 
BATgroupquantile, q, 0,  "aggr.quantile");
 }
 
 static str
@@ -627,10 +640,11 @@ AGGRsubgroupedExt(bat *retval1, bat *ret
                           BAT *(*grpfunc1)(BAT *, BAT *, BAT *, BAT *, int, 
int, int),
                           gdk_return (*grpfunc2)(BAT **, BAT **, BAT *, BAT *, 
BAT *, BAT *, int, int, int),
                           BAT *(*quantilefunc)(BAT *, BAT *, BAT *, BAT *, 
int, double, int, int),
-                          double quantile,
+                          bat *quantile,
                           const char *malfunc)
 {
-       BAT *b, *g, *e, *s, *bn, *cnts = NULL;
+       BAT *b, *g, *e, *s, *bn, *cnts, *q = NULL;
+       double qvalue;
 
    /* one of grpfunc1, grpfunc2 and quantilefunc is non-NULL and the others 
are */
        assert((grpfunc1 && grpfunc2 == NULL && quantilefunc == NULL) ||
@@ -643,6 +657,8 @@ AGGRsubgroupedExt(bat *retval1, bat *ret
        b = BATdescriptor(*bid);
        g = gid ? BATdescriptor(*gid) : NULL;
        e = eid ? BATdescriptor(*eid) : NULL;
+       q = quantile ? BATdescriptor(*quantile) : NULL;
+
        if (b == NULL || (gid != NULL && g == NULL) || (eid != NULL && e == 
NULL)) {
                if (b)
                        BBPreleaseref(b->batCacheid);
@@ -677,8 +693,17 @@ AGGRsubgroupedExt(bat *retval1, bat *ret
        }
        if (grpfunc1)
                bn = (*grpfunc1)(b, g, e, s, tp, skip_nils, abort_on_error);
-       if (quantilefunc)
-                       bn = (*quantilefunc)(b, g, e, s, tp, quantile, 
skip_nils, abort_on_error);
+       if (quantilefunc) {
+               assert(BATcount(q)>0);
+               assert(q->ttype == TYPE_dbl);
+               qvalue = ((const double *)Tloc(q, BUNfirst(q)))[0];
+               if (qvalue <  0|| qvalue > 1) {
+                       char *s;
+                       s = createException(MAL, malfunc, "quantile value of %f 
is not in range [0,1]", qvalue);
+                       return s;
+               }
+               bn = (*quantilefunc)(b, g, e, s, tp, qvalue, skip_nils, 
abort_on_error);
+       }
        if (grpfunc2 && (*grpfunc2)(&bn, retval2 ? &cnts : NULL, b, g, e, s, 
tp, skip_nils, abort_on_error) == GDK_FAIL)
                bn = NULL;
 
@@ -1204,26 +1229,26 @@ AGGRsubmediancand(bat *retval, bat *bid,
 }
 
 /* quantile functions, could make median functions obsolete completely */
-aggr_export str AGGRquantile(bat *retval, bat *bid, double *quantile, bit 
*skip_nils);
+aggr_export str AGGRquantile(bat *retval, bat *bid, bat *quantile, bit 
*skip_nils);
 str
-AGGRquantile(bat *retval, bat *bid, double *quantile, bit *skip_nils)
+AGGRquantile(bat *retval, bat *bid, bat *quantile, bit *skip_nils)
 {
        return AGGRsubgroupedExt(retval, NULL, bid, NULL, NULL, NULL, 
*skip_nils,
-                                                 0, TYPE_any, NULL, 
NULL,BATgroupquantile, *quantile ,"aggr.subquantile");
+                                                 0, TYPE_any, NULL, 
NULL,BATgroupquantile, quantile ,"aggr.subquantile");
 }
 
-aggr_export str AGGRsubquantile(bat *retval, bat *bid, bat *gid, bat *eid,  
double *quantile, bit *skip_nils);
+aggr_export str AGGRsubquantile(bat *retval, bat *bid,bat *quantile,bat *gid, 
bat *eid,   bit *skip_nils);
 str
-AGGRsubquantile(bat *retval, bat *bid, bat *gid, bat *eid, double *quantile, 
bit *skip_nils)
+AGGRsubquantile(bat *retval, bat *bid,bat *quantile,  bat *gid, bat *eid, bit 
*skip_nils)
 {
        return AGGRsubgroupedExt(retval, NULL, bid, gid, eid, NULL, *skip_nils,
-                                                 0, TYPE_any, NULL, NULL, 
BATgroupquantile, *quantile , "aggr.subquantile");
+                                                 0, TYPE_any, NULL, NULL, 
BATgroupquantile, quantile , "aggr.subquantile");
 }
 
-aggr_export str AGGRsubquantilecand(bat *retval, bat *bid, bat *gid, bat *eid, 
bat *sid, double *quantile, bit *skip_nils);
+aggr_export str AGGRsubquantilecand(bat *retval, bat *bid, bat *quantile, bat 
*gid, bat *eid, bat *sid,  bit *skip_nils);
 str
-AGGRsubquantilecand(bat *retval, bat *bid, bat *gid, bat *eid, bat *sid, 
double *quantile,  bit *skip_nils)
+AGGRsubquantilecand(bat *retval, bat *bid, bat *quantile, bat *gid, bat *eid, 
bat *sid,   bit *skip_nils)
 {
        return AGGRsubgroupedExt(retval, NULL, bid, gid, eid, sid, *skip_nils,
-                                                 0, TYPE_any, NULL, 
NULL,BATgroupquantile,*quantile, "aggr.subquantile");
+                                                 0, TYPE_any, NULL, 
NULL,BATgroupquantile, quantile, "aggr.subquantile");
 }
diff --git a/monetdb5/modules/kernel/aggr.mal b/monetdb5/modules/kernel/aggr.mal
--- a/monetdb5/modules/kernel/aggr.mal
+++ b/monetdb5/modules/kernel/aggr.mal
@@ -1456,24 +1456,24 @@ address AGGRsubmediancand
 comment "Grouped median aggregate with candidate list";
 
 
-command quantile(b:bat[:oid,:any_1],g:bat[:oid,:oid],e:bat[:oid,:any_2],q:dbl) 
:bat[:oid,:any_1]
+command 
quantile(b:bat[:oid,:any_1],g:bat[:oid,:oid],e:bat[:oid,:any_2],q:bat[:oid,:dbl])
 :bat[:oid,:any_1]
 address AGGRquantile3
 comment "Grouped quantile aggregate";
 
-function quantile(b:bat[:oid,:any_1],q:dbl) :any_1;
+function quantile(b:bat[:oid,:any_1],q:bat[:oid,:dbl]) :any_1;
        bn := subquantile(b, q, false);
        return algebra.fetch(bn, 0);
 end aggr.quantile;
 
-command subquantile(b:bat[:oid,:any_1],q:dbl,skip_nils:bit) :bat[:oid,:any_1]
+command subquantile(b:bat[:oid,:any_1],q:bat[:oid,:dbl],skip_nils:bit) 
:bat[:oid,:any_1]
 address AGGRquantile
 comment "Quantile aggregate";
 
-command 
subquantile(b:bat[:oid,:any_1],g:bat[:oid,:oid],e:bat[:oid,:any_2],q:dbl,skip_nils:bit)
 :bat[:oid,:any_1]
+command 
subquantile(b:bat[:oid,:any_1],q:bat[:oid,:dbl],g:bat[:oid,:oid],e:bat[:oid,:any_2],skip_nils:bit)
 :bat[:oid,:any_1]
 address AGGRsubquantile
 comment "Grouped quantile aggregate";
 
-command 
subquantile(b:bat[:oid,:any_1],g:bat[:oid,:oid],e:bat[:oid,:any_2],s:bat[:oid,:oid],q:dbl,skip_nils:bit)
 :bat[:oid,:any_1]
+command 
subquantile(b:bat[:oid,:any_1],q:bat[:oid,:dbl],g:bat[:oid,:oid],e:bat[:oid,:any_2],s:bat[:oid,:oid],skip_nils:bit)
 :bat[:oid,:any_1]
 address AGGRsubquantilecand
 comment "Grouped median quantile with candidate list";
 
diff --git a/monetdb5/modules/kernel/aggr.mal.sh 
b/monetdb5/modules/kernel/aggr.mal.sh
--- a/monetdb5/modules/kernel/aggr.mal.sh
+++ b/monetdb5/modules/kernel/aggr.mal.sh
@@ -320,24 +320,24 @@ address AGGRsubmediancand
 comment "Grouped median aggregate with candidate list";
 
 
-command quantile(b:bat[:oid,:any_1],g:bat[:oid,:oid],e:bat[:oid,:any_2],q:dbl) 
:bat[:oid,:any_1]
+command 
quantile(b:bat[:oid,:any_1],g:bat[:oid,:oid],e:bat[:oid,:any_2],q:bat[:oid,:dbl])
 :bat[:oid,:any_1]
 address AGGRquantile3
 comment "Grouped quantile aggregate";
 
-function quantile(b:bat[:oid,:any_1],q:dbl) :any_1;
+function quantile(b:bat[:oid,:any_1],q:bat[:oid,:dbl]) :any_1;
        bn := subquantile(b, q, false);
        return algebra.fetch(bn, 0);
 end aggr.quantile;
 
-command subquantile(b:bat[:oid,:any_1],q:dbl,skip_nils:bit) :bat[:oid,:any_1]
+command subquantile(b:bat[:oid,:any_1],q:bat[:oid,:dbl],skip_nils:bit) 
:bat[:oid,:any_1]
 address AGGRquantile
 comment "Quantile aggregate";
 
-command 
subquantile(b:bat[:oid,:any_1],g:bat[:oid,:oid],e:bat[:oid,:any_2],q:dbl,skip_nils:bit)
 :bat[:oid,:any_1]
+command 
subquantile(b:bat[:oid,:any_1],q:bat[:oid,:dbl],g:bat[:oid,:oid],e:bat[:oid,:any_2],skip_nils:bit)
 :bat[:oid,:any_1]
 address AGGRsubquantile
_______________________________________________
checkin-list mailing list
checkin-list@monetdb.org
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to