Changeset: 433a8d810061 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=433a8d810061
Modified Files:
        monetdb5/modules/kernel/algebra.mx
        sql/backends/monet5/sql.mx
        sql/backends/monet5/sql_gencode.c
        sql/common/sql_types.c
        sql/common/sql_types.h
        sql/server/rel_bin.c
        sql/server/rel_exp.c
        sql/server/rel_exp.h
        sql/server/rel_select.c
        sql/server/sql_parser.y
        sql/server/sql_scan.c
        sql/server/sql_statement.c
        sql/server/sql_statement.h
        sql/test/leaks/Tests/check0.stable.out
        sql/test/leaks/Tests/check1.stable.out
        sql/test/leaks/Tests/check2.stable.out
        sql/test/leaks/Tests/check3.stable.out
        sql/test/leaks/Tests/check4.stable.out
        sql/test/leaks/Tests/check5.stable.out
        sql/test/mapi/Tests/php_monetdb.stable.out
Branch: default
Log Message:

implemented corr(elation) which required multi column aggregates


diffs (truncated from 1183 to 300 lines):

diff --git a/monetdb5/modules/kernel/algebra.mx 
b/monetdb5/modules/kernel/algebra.mx
--- a/monetdb5/modules/kernel/algebra.mx
+++ b/monetdb5/modules/kernel/algebra.mx
@@ -701,6 +701,7 @@ comment "Return the lowest tail value or
 command max(b:bat[:any_1,:any_2]):any_2 
 address ALGmaxany
 comment "Return the highest tail value or nil.";
+
 # @+ Type-Specific Sum, Prod, Max and Min
 # For X in @{ bte,sht,int,wrd,flt,dbl,lng @},  we generate the
 # aggregate functions using a macro.
@@ -2110,7 +2111,7 @@ ALGmaxany(ptr result, int *bid)
        BAT *b;
 
        if ((b = BATdescriptor(*bid)) == NULL) {
-               throw(MAL, "algebra.min", RUNTIME_OBJECT_MISSING);
+               throw(MAL, "algebra.max", RUNTIME_OBJECT_MISSING);
        }
        if (CMDmaxany(result, b) == GDK_SUCCEED) {
                BBPreleaseref(b->batCacheid);
@@ -2120,7 +2121,6 @@ ALGmaxany(ptr result, int *bid)
        throw(MAL, "algebra.max", GDK_EXCEPTION);
 }
 
-
 str
 ALGtopN(int *res, int *bid, lng *top)
 {
diff --git a/sql/backends/monet5/sql.mx b/sql/backends/monet5/sql.mx
--- a/sql/backends/monet5/sql.mx
+++ b/sql/backends/monet5/sql.mx
@@ -7067,3 +7067,113 @@ function aggr.median(a:bat[:oid,:any_1],
        sid := algebra.leftjoin(id, mv);
        return algebra.leftjoin(sid, a);
 end aggr.median;
+
+# assume nil's are removed
+# (sum (e1 * e2) - (sum(e1) * sum(e2)/count(e1))) / count(e1)
+
+@= analytic
+function aggr.covar(e1:bat[:oid,:@1], e2:bat[:oid,:@1]):@1;
+       e0:bat[:oid,:@1] := batcalc.*(e1,e2);
+       s0:@1 := aggr.sum(e0);
+       s1:@1 := aggr.sum(e1);
+       s2:@1 := aggr.sum(e2);
+       v2:@1 := calc.*(s1,s2);
+       c := aggr.count(e1);
+       n:@1 := calc.@1(c);
+       v3:@1 := calc./(v2,n);
+       v1:@1 := calc.-(s0,v3);
+       v:@1 := calc./(v1,n);
+       return v;
+end aggr.covar;
+
+function aggr.stddev(e:bat[:oid,:@1]):@1;
+       a:dbl := aggr.avg(e);
+       ed := batcalc.dbl(e);
+       ea := batcalc.-(ed,a);
+       ea2 := batcalc.*(ea,ea);
+       es := aggr.sum(ea2);
+       c := aggr.count(e);
+       v:dbl := calc./(es,c);
+       r := mmath.sqrt(v);
+       res:@1 := calc.@1(r);
+       return res;
+end aggr.stddev;
+
+function aggr.corr(e1:bat[:oid,:@1], e2:bat[:oid,:@1]):@1;
+       cv:@1 := aggr.covar(e1,e2);
+       sd1:@1 := aggr.stddev(e1);
+       sd2:@1 := aggr.stddev(e2);
+       sd := calc.*(sd1,sd2);
+       r := calc./(cv,sd);
+       res := calc.@1(r);
+       return res;
+end aggr.corr;
+
+function aggr.covar(e1:bat[:oid,:@1], e2:bat[:oid,:@1], g:bat[:oid,:oid], 
e:bat[:oid,:any_2]):bat[:oid,:@1];
+       e0:bat[:oid,:@1] := batcalc.*(e1,e2);
+       s0:bat[:oid,:@1] := aggr.sum(e0,g,e);
+       s1:bat[:oid,:@1] := aggr.sum(e1,g,e);
+       s2:bat[:oid,:@1] := aggr.sum(e2,g,e);
+       v2:bat[:oid,:@1] := batcalc.*(s1,s2);
+       c := aggr.count(e1,g,e);
+       n:bat[:oid,:@1] := batcalc.@1(c);
+       v3:bat[:oid,:@1] := batcalc./(v2,n);
+       v1:bat[:oid,:@1] := batcalc.-(s0,v3);
+       v:bat[:oid,:@1] := batcalc./(v1,n);
+       return v;
+end aggr.covar;
+
+function aggr.stddev(v:bat[:oid,:@1], g:bat[:oid,:oid], 
e:bat[:oid,:any_2]):bat[:oid,:@1];
+       a:bat[:oid,:dbl] := aggr.avg(v,g,e);
+       ed := batcalc.dbl(v);
+       ea := algebra.leftjoin(g,a);
+       va := batcalc.-(ed,ea);
+       ea2 := batcalc.*(va,va);
+       es := aggr.sum(ea2,g,e);
+       c := aggr.count(v,g,e);
+       cr := batcalc.dbl(c);
+       ev:bat[:oid,:dbl] := batcalc./(es,cr);
+       r := batmmath.sqrt(ev);
+       res:bat[:oid,:@1] := batcalc.@1(r);
+       return res;
+end aggr.stddev;
+
+# var_pop(expr) = (sum(expr) - sum(expr)/count(expr)) /count(expr)
+# stddev_pop(expr) = sqrt(var_pop(expr))
+function aggr.stddev_pop(v:bat[:oid,:@1], g:bat[:oid,:oid], 
e:bat[:oid,:any_2]):bat[:oid,:@1];
+       v2 := batcalc.*(v,v);
+       a1 := aggr.sum(v2,g,e);
+       a := aggr.sum(v,g,e);
+       a2 := batcalc.*(a,a);
+       c := aggr.count(v,g,e);
+       ad1 := batcalc.dbl(a1);
+       ad2 := batcalc.dbl(a2);
+       cd := batcalc.dbl(c);
+       sc := batcalc./(ad2,cd);
+       ssc := batcalc.-(ad1,sc);
+       sc2 := batcalc./(ssc,cd);
+       r := batmmath.sqrt(sc2);
+       res:bat[:oid,:@1] := batcalc.@1(r);
+       return res;
+end aggr.stddev_pop;
+
+function aggr.corr(e1:bat[:oid,:@1], e2:bat[:oid,:@1], 
g:bat[:oid,:oid],e:bat[:oid,:any_2]):bat[:oid,:@1];
+       cv:bat[:oid,:@1] := aggr.covar(e1,e2,g,e);
+       sd1:bat[:oid,:@1] := aggr.stddev_pop(e1,g,e);
+       sd2:bat[:oid,:@1] := aggr.stddev_pop(e2,g,e);
+       sd := batcalc.*(sd1,sd2);
+       sdn := batcalc.==(sd,0:@1);
+       sds := batcalc.ifthenelse(sdn,1:@1,sd);
+       r := batcalc./(cv,sds);
+       res := batcalc.@1(r);
+       return res;
+end aggr.corr;
+
+@mal
+@:analytic(bte)@
+@:analytic(sht)@
+@:analytic(int)@
+@:analytic(lng)@
+@:analytic(flt)@
+@:analytic(dbl)@
+
diff --git a/sql/backends/monet5/sql_gencode.c 
b/sql/backends/monet5/sql_gencode.c
--- a/sql/backends/monet5/sql_gencode.c
+++ b/sql/backends/monet5/sql_gencode.c
@@ -1705,7 +1705,7 @@ _dumpstmt(backend *sql, MalBlkPtr mb, st
                        s->rewritten = (void*)q;
                } break;
                case st_aggr:{
-                       int l = _dumpstmt(sql, mb, s->op1);
+                       int g = 0, e = 0, l = _dumpstmt(sql, mb, s->op1); /* 
maybe a list */
                        char *mod, *aggrfunc;
                        int restype = s->op4.aggrval->res.type->localtype;
                        int output_type_needed = 0;
@@ -1714,33 +1714,35 @@ _dumpstmt(backend *sql, MalBlkPtr mb, st
                        mod = s->op4.aggrval->aggr->mod;
                        aggrfunc = s->op4.aggrval->aggr->imp;
                        if (strcmp(aggrfunc, "sum") == 0 ||
-                                       strcmp(aggrfunc, "prod") == 0)
+                           strcmp(aggrfunc, "prod") == 0)
                                output_type_needed = 1;
 
-                       if (s->flag) {
-                               int l2 = _dumpstmt(sql, mb, s->op2);
-
-                               q = newStmt(mb, mod, aggrfunc);
-                               q = pushArgument(mb, q, l);
-                               q = pushArgument(mb, q, l2);
-
-                       } else if (s->op3) {
-                               int g = _dumpstmt(sql, mb, s->op2);
-                               int e = _dumpstmt(sql, mb, s->op3);
+                       if (s->op3) {
+                               g = _dumpstmt(sql, mb, s->op2);
+                               e = _dumpstmt(sql, mb, s->op3);
 
                                q = newStmt(mb, mod, aggrfunc);
                                setVarType(mb, getArg(q, 0), 
newBatType(TYPE_any, restype));
                                setVarUDFtype(mb, getArg(q, 0));
-                               q = pushArgument(mb, q, l);
-                               q = pushArgument(mb, q, g);
-                               q = pushArgument(mb, q, e);
                        } else {
                                q = newStmt(mb, mod, aggrfunc);
                                if (output_type_needed){
                                        setVarType(mb, getArg(q, 0), restype);
                                        setVarUDFtype(mb, getArg(q, 0));
                                }
+                       }
+                       if (s->op1->type != st_list) {
                                q = pushArgument(mb, q, l);
+                       } else {
+                               for (n = s->op1->op4.lval->h; n; n = n->next) {
+                                       stmt *op = n->data;
+
+                                       q = pushArgument(mb, q, op->nr);
+                               }
+                       }
+                       if (g) {
+                               q = pushArgument(mb, q, g);
+                               q = pushArgument(mb, q, e);
                        }
                        s->nr = getDestVar(q);
                }
diff --git a/sql/common/sql_types.c b/sql/common/sql_types.c
--- a/sql/common/sql_types.c
+++ b/sql/common/sql_types.c
@@ -398,6 +398,14 @@ subfunc_cmp( sql_subfunc *f1, sql_subfun
        return -1;
 }
 
+static int
+arg_subtype_cmp(sql_arg *a, sql_subtype *t)
+{
+       if (a->type.type->eclass == EC_ANY)
+               return 0;
+       return (is_subtype(t, &a->type )?0:-1);
+}
+
 sql_subaggr *
 sql_bind_aggr(sql_allocator *sa, sql_schema *s, char *sqlaname, sql_subtype 
*type)
 {
@@ -475,6 +483,76 @@ sql_bind_aggr(sql_allocator *sa, sql_sch
 }
 
 sql_subaggr *
+sql_bind_aggr_(sql_allocator *sa, sql_schema *s, char *sqlaname, list *ops)
+{
+       node *n = aggrs->h;
+       sql_subtype *type = NULL;
+
+       if (ops->h)
+               type = ops->h->data;
+
+       while (n) {
+               sql_func *a = n->data;
+
+               if (strcmp(a->base.name, sqlaname) == 0 &&  
+                   list_cmp(a->ops, ops, (fcmp) &arg_subtype_cmp) == 0) { 
+                       int scale = 0;
+                       int digits = 0;
+                       sql_subaggr *ares = SA_ZNEW(sa, sql_subaggr);
+
+                       ares->aggr = a;
+                       digits = a->res.digits;
+                       scale = a->res.scale;
+                       /* same scale as the input */
+                       if (type) {
+                               digits = type->digits;
+                               scale = type->scale;
+                       }
+                       /* same type as the input */
+                       if (a->res.type->eclass == EC_ANY) 
+                               sql_init_subtype(&ares->res, type->type, 
digits, scale);
+                       else
+                               sql_init_subtype(&ares->res, a->res.type, 
digits, scale);
+                       return ares;
+               }
+               n = n->next;
+       }
+       if (s) {
+               node *n;
+
+               if (s->funcs.set) for (n=s->funcs.set->h; n; n = n->next) {
+                       sql_func *a = n->data;
+
+                       if ((!IS_AGGR(a) || !a->res.type))
+                               continue;
+
+                       if (strcmp(a->base.name, sqlaname) == 0 && 
+                           list_cmp(a->ops, ops, (fcmp) &arg_subtype_cmp) == 
0) { 
+                               int scale = 0;
+                               int digits = 0;
+                               sql_subaggr *ares = SA_ZNEW(sa, sql_subaggr);
+               
+                               ares->aggr = a;
+                               digits = a->res.digits;
+                               scale = a->res.scale;
+                               /* same scale as the input */
+                               if (type) {
+                                       digits = type->digits;
+                                       scale = type->scale;
+                               }
+                               /* same type as the input */
+                               if (a->res.type->eclass == EC_ANY) 
+                                       sql_init_subtype(&ares->res, 
type->type, digits, scale);
+                               else
+                                       sql_init_subtype(&ares->res, 
a->res.type, digits, scale);
+                               return ares;
+                       }
+               }
+       }
+       return NULL;
+}
+
+sql_subaggr *
 sql_find_aggr(sql_allocator *sa, sql_schema *s, char *sqlaname)
_______________________________________________
Checkin-list mailing list
[email protected]
http://mail.monetdb.org/mailman/listinfo/checkin-list

Reply via email to