Changeset: 433a8d810061 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=433a8d810061
Modified Files:
monetdb5/modules/kernel/algebra.mx
sql/backends/monet5/sql.mx
sql/backends/monet5/sql_gencode.c
sql/common/sql_types.c
sql/common/sql_types.h
sql/server/rel_bin.c
sql/server/rel_exp.c
sql/server/rel_exp.h
sql/server/rel_select.c
sql/server/sql_parser.y
sql/server/sql_scan.c
sql/server/sql_statement.c
sql/server/sql_statement.h
sql/test/leaks/Tests/check0.stable.out
sql/test/leaks/Tests/check1.stable.out
sql/test/leaks/Tests/check2.stable.out
sql/test/leaks/Tests/check3.stable.out
sql/test/leaks/Tests/check4.stable.out
sql/test/leaks/Tests/check5.stable.out
sql/test/mapi/Tests/php_monetdb.stable.out
Branch: default
Log Message:
implemented corr(elation) which required multi column aggregates
diffs (truncated from 1183 to 300 lines):
diff --git a/monetdb5/modules/kernel/algebra.mx
b/monetdb5/modules/kernel/algebra.mx
--- a/monetdb5/modules/kernel/algebra.mx
+++ b/monetdb5/modules/kernel/algebra.mx
@@ -701,6 +701,7 @@ comment "Return the lowest tail value or
command max(b:bat[:any_1,:any_2]):any_2
address ALGmaxany
comment "Return the highest tail value or nil.";
+
# @+ Type-Specific Sum, Prod, Max and Min
# For X in @{ bte,sht,int,wrd,flt,dbl,lng @}, we generate the
# aggregate functions using a macro.
@@ -2110,7 +2111,7 @@ ALGmaxany(ptr result, int *bid)
BAT *b;
if ((b = BATdescriptor(*bid)) == NULL) {
- throw(MAL, "algebra.min", RUNTIME_OBJECT_MISSING);
+ throw(MAL, "algebra.max", RUNTIME_OBJECT_MISSING);
}
if (CMDmaxany(result, b) == GDK_SUCCEED) {
BBPreleaseref(b->batCacheid);
@@ -2120,7 +2121,6 @@ ALGmaxany(ptr result, int *bid)
throw(MAL, "algebra.max", GDK_EXCEPTION);
}
-
str
ALGtopN(int *res, int *bid, lng *top)
{
diff --git a/sql/backends/monet5/sql.mx b/sql/backends/monet5/sql.mx
--- a/sql/backends/monet5/sql.mx
+++ b/sql/backends/monet5/sql.mx
@@ -7067,3 +7067,113 @@ function aggr.median(a:bat[:oid,:any_1],
sid := algebra.leftjoin(id, mv);
return algebra.leftjoin(sid, a);
end aggr.median;
+
+# assume nil's are removed
+# (sum (e1 * e2) - (sum(e1) * sum(e2)/count(e1))) / count(e1)
+
+@= analytic
+function aggr.covar(e1:bat[:oid,:@1], e2:bat[:oid,:@1]):@1;
+ e0:bat[:oid,:@1] := batcalc.*(e1,e2);
+ s0:@1 := aggr.sum(e0);
+ s1:@1 := aggr.sum(e1);
+ s2:@1 := aggr.sum(e2);
+ v2:@1 := calc.*(s1,s2);
+ c := aggr.count(e1);
+ n:@1 := calc.@1(c);
+ v3:@1 := calc./(v2,n);
+ v1:@1 := calc.-(s0,v3);
+ v:@1 := calc./(v1,n);
+ return v;
+end aggr.covar;
+
+function aggr.stddev(e:bat[:oid,:@1]):@1;
+ a:dbl := aggr.avg(e);
+ ed := batcalc.dbl(e);
+ ea := batcalc.-(ed,a);
+ ea2 := batcalc.*(ea,ea);
+ es := aggr.sum(ea2);
+ c := aggr.count(e);
+ v:dbl := calc./(es,c);
+ r := mmath.sqrt(v);
+ res:@1 := calc.@1(r);
+ return res;
+end aggr.stddev;
+
+function aggr.corr(e1:bat[:oid,:@1], e2:bat[:oid,:@1]):@1;
+ cv:@1 := aggr.covar(e1,e2);
+ sd1:@1 := aggr.stddev(e1);
+ sd2:@1 := aggr.stddev(e2);
+ sd := calc.*(sd1,sd2);
+ r := calc./(cv,sd);
+ res := calc.@1(r);
+ return res;
+end aggr.corr;
+
+function aggr.covar(e1:bat[:oid,:@1], e2:bat[:oid,:@1], g:bat[:oid,:oid],
e:bat[:oid,:any_2]):bat[:oid,:@1];
+ e0:bat[:oid,:@1] := batcalc.*(e1,e2);
+ s0:bat[:oid,:@1] := aggr.sum(e0,g,e);
+ s1:bat[:oid,:@1] := aggr.sum(e1,g,e);
+ s2:bat[:oid,:@1] := aggr.sum(e2,g,e);
+ v2:bat[:oid,:@1] := batcalc.*(s1,s2);
+ c := aggr.count(e1,g,e);
+ n:bat[:oid,:@1] := batcalc.@1(c);
+ v3:bat[:oid,:@1] := batcalc./(v2,n);
+ v1:bat[:oid,:@1] := batcalc.-(s0,v3);
+ v:bat[:oid,:@1] := batcalc./(v1,n);
+ return v;
+end aggr.covar;
+
+function aggr.stddev(v:bat[:oid,:@1], g:bat[:oid,:oid],
e:bat[:oid,:any_2]):bat[:oid,:@1];
+ a:bat[:oid,:dbl] := aggr.avg(v,g,e);
+ ed := batcalc.dbl(v);
+ ea := algebra.leftjoin(g,a);
+ va := batcalc.-(ed,ea);
+ ea2 := batcalc.*(va,va);
+ es := aggr.sum(ea2,g,e);
+ c := aggr.count(v,g,e);
+ cr := batcalc.dbl(c);
+ ev:bat[:oid,:dbl] := batcalc./(es,cr);
+ r := batmmath.sqrt(ev);
+ res:bat[:oid,:@1] := batcalc.@1(r);
+ return res;
+end aggr.stddev;
+
+# var_pop(expr) = (sum(expr) - sum(expr)/count(expr)) /count(expr)
+# stddev_pop(expr) = sqrt(var_pop(expr))
+function aggr.stddev_pop(v:bat[:oid,:@1], g:bat[:oid,:oid],
e:bat[:oid,:any_2]):bat[:oid,:@1];
+ v2 := batcalc.*(v,v);
+ a1 := aggr.sum(v2,g,e);
+ a := aggr.sum(v,g,e);
+ a2 := batcalc.*(a,a);
+ c := aggr.count(v,g,e);
+ ad1 := batcalc.dbl(a1);
+ ad2 := batcalc.dbl(a2);
+ cd := batcalc.dbl(c);
+ sc := batcalc./(ad2,cd);
+ ssc := batcalc.-(ad1,sc);
+ sc2 := batcalc./(ssc,cd);
+ r := batmmath.sqrt(sc2);
+ res:bat[:oid,:@1] := batcalc.@1(r);
+ return res;
+end aggr.stddev_pop;
+
+function aggr.corr(e1:bat[:oid,:@1], e2:bat[:oid,:@1],
g:bat[:oid,:oid],e:bat[:oid,:any_2]):bat[:oid,:@1];
+ cv:bat[:oid,:@1] := aggr.covar(e1,e2,g,e);
+ sd1:bat[:oid,:@1] := aggr.stddev_pop(e1,g,e);
+ sd2:bat[:oid,:@1] := aggr.stddev_pop(e2,g,e);
+ sd := batcalc.*(sd1,sd2);
+ sdn := batcalc.==(sd,0:@1);
+ sds := batcalc.ifthenelse(sdn,1:@1,sd);
+ r := batcalc./(cv,sds);
+ res := batcalc.@1(r);
+ return res;
+end aggr.corr;
+
+@mal
+@:analytic(bte)@
+@:analytic(sht)@
+@:analytic(int)@
+@:analytic(lng)@
+@:analytic(flt)@
+@:analytic(dbl)@
+
diff --git a/sql/backends/monet5/sql_gencode.c
b/sql/backends/monet5/sql_gencode.c
--- a/sql/backends/monet5/sql_gencode.c
+++ b/sql/backends/monet5/sql_gencode.c
@@ -1705,7 +1705,7 @@ _dumpstmt(backend *sql, MalBlkPtr mb, st
s->rewritten = (void*)q;
} break;
case st_aggr:{
- int l = _dumpstmt(sql, mb, s->op1);
+ int g = 0, e = 0, l = _dumpstmt(sql, mb, s->op1); /*
maybe a list */
char *mod, *aggrfunc;
int restype = s->op4.aggrval->res.type->localtype;
int output_type_needed = 0;
@@ -1714,33 +1714,35 @@ _dumpstmt(backend *sql, MalBlkPtr mb, st
mod = s->op4.aggrval->aggr->mod;
aggrfunc = s->op4.aggrval->aggr->imp;
if (strcmp(aggrfunc, "sum") == 0 ||
- strcmp(aggrfunc, "prod") == 0)
+ strcmp(aggrfunc, "prod") == 0)
output_type_needed = 1;
- if (s->flag) {
- int l2 = _dumpstmt(sql, mb, s->op2);
-
- q = newStmt(mb, mod, aggrfunc);
- q = pushArgument(mb, q, l);
- q = pushArgument(mb, q, l2);
-
- } else if (s->op3) {
- int g = _dumpstmt(sql, mb, s->op2);
- int e = _dumpstmt(sql, mb, s->op3);
+ if (s->op3) {
+ g = _dumpstmt(sql, mb, s->op2);
+ e = _dumpstmt(sql, mb, s->op3);
q = newStmt(mb, mod, aggrfunc);
setVarType(mb, getArg(q, 0),
newBatType(TYPE_any, restype));
setVarUDFtype(mb, getArg(q, 0));
- q = pushArgument(mb, q, l);
- q = pushArgument(mb, q, g);
- q = pushArgument(mb, q, e);
} else {
q = newStmt(mb, mod, aggrfunc);
if (output_type_needed){
setVarType(mb, getArg(q, 0), restype);
setVarUDFtype(mb, getArg(q, 0));
}
+ }
+ if (s->op1->type != st_list) {
q = pushArgument(mb, q, l);
+ } else {
+ for (n = s->op1->op4.lval->h; n; n = n->next) {
+ stmt *op = n->data;
+
+ q = pushArgument(mb, q, op->nr);
+ }
+ }
+ if (g) {
+ q = pushArgument(mb, q, g);
+ q = pushArgument(mb, q, e);
}
s->nr = getDestVar(q);
}
diff --git a/sql/common/sql_types.c b/sql/common/sql_types.c
--- a/sql/common/sql_types.c
+++ b/sql/common/sql_types.c
@@ -398,6 +398,14 @@ subfunc_cmp( sql_subfunc *f1, sql_subfun
return -1;
}
+static int
+arg_subtype_cmp(sql_arg *a, sql_subtype *t)
+{
+ if (a->type.type->eclass == EC_ANY)
+ return 0;
+ return (is_subtype(t, &a->type )?0:-1);
+}
+
sql_subaggr *
sql_bind_aggr(sql_allocator *sa, sql_schema *s, char *sqlaname, sql_subtype
*type)
{
@@ -475,6 +483,76 @@ sql_bind_aggr(sql_allocator *sa, sql_sch
}
sql_subaggr *
+sql_bind_aggr_(sql_allocator *sa, sql_schema *s, char *sqlaname, list *ops)
+{
+ node *n = aggrs->h;
+ sql_subtype *type = NULL;
+
+ if (ops->h)
+ type = ops->h->data;
+
+ while (n) {
+ sql_func *a = n->data;
+
+ if (strcmp(a->base.name, sqlaname) == 0 &&
+ list_cmp(a->ops, ops, (fcmp) &arg_subtype_cmp) == 0) {
+ int scale = 0;
+ int digits = 0;
+ sql_subaggr *ares = SA_ZNEW(sa, sql_subaggr);
+
+ ares->aggr = a;
+ digits = a->res.digits;
+ scale = a->res.scale;
+ /* same scale as the input */
+ if (type) {
+ digits = type->digits;
+ scale = type->scale;
+ }
+ /* same type as the input */
+ if (a->res.type->eclass == EC_ANY)
+ sql_init_subtype(&ares->res, type->type,
digits, scale);
+ else
+ sql_init_subtype(&ares->res, a->res.type,
digits, scale);
+ return ares;
+ }
+ n = n->next;
+ }
+ if (s) {
+ node *n;
+
+ if (s->funcs.set) for (n=s->funcs.set->h; n; n = n->next) {
+ sql_func *a = n->data;
+
+ if ((!IS_AGGR(a) || !a->res.type))
+ continue;
+
+ if (strcmp(a->base.name, sqlaname) == 0 &&
+ list_cmp(a->ops, ops, (fcmp) &arg_subtype_cmp) ==
0) {
+ int scale = 0;
+ int digits = 0;
+ sql_subaggr *ares = SA_ZNEW(sa, sql_subaggr);
+
+ ares->aggr = a;
+ digits = a->res.digits;
+ scale = a->res.scale;
+ /* same scale as the input */
+ if (type) {
+ digits = type->digits;
+ scale = type->scale;
+ }
+ /* same type as the input */
+ if (a->res.type->eclass == EC_ANY)
+ sql_init_subtype(&ares->res,
type->type, digits, scale);
+ else
+ sql_init_subtype(&ares->res,
a->res.type, digits, scale);
+ return ares;
+ }
+ }
+ }
+ return NULL;
+}
+
+sql_subaggr *
sql_find_aggr(sql_allocator *sa, sql_schema *s, char *sqlaname)
_______________________________________________
Checkin-list mailing list
[email protected]
http://mail.monetdb.org/mailman/listinfo/checkin-list