Changeset: d19e20576c8b for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/d19e20576c8b
Modified Files:
        sql/server/rel_optimizer.c
        sql/server/rel_rel.c
        sql/server/rel_rel.h
        sql/server/rel_unnest.c
        sql/test/miscellaneous/Tests/simple_plans.test
Branch: default
Log Message:

Be less restrict pushing down selections under projections.

Push down selections before projections requiring distinct get rewritten into 
groupings. Small cleanup

If I get a performance regression I will backout this.


diffs (202 lines):

diff --git a/sql/server/rel_optimizer.c b/sql/server/rel_optimizer.c
--- a/sql/server/rel_optimizer.c
+++ b/sql/server/rel_optimizer.c
@@ -25,7 +25,8 @@
 typedef struct global_props {
        int cnt[ddl_maxops];
        uint8_t
-               has_mergetable:1;
+               has_mergetable:1,
+               needs_distinct:1;
 } global_props;
 
 static int
@@ -337,6 +338,7 @@ rel_properties(mvc *sql, global_props *g
        case op_topn:
        case op_sample:
        case op_truncate:
+               gp->needs_distinct |= need_distinct(rel);
                if (rel->l)
                        rel_properties(sql, gp, rel->l);
                break;
@@ -2593,7 +2595,7 @@ is_fk_column_of_pk(mvc *sql, sql_rel *re
        return NULL;
 }
 
-static inline sql_rel *
+static sql_rel *
 rel_distinct_project2groupby(visitor *v, sql_rel *rel)
 {
        sql_rel *l = rel->l;
@@ -2894,7 +2896,7 @@ rel_merge_projects(visitor *v, sql_rel *
            prj && prj->op == op_project && !(rel_is_ref(prj)) && !prj->r) {
                int all = 1;
 
-               if (project_unsafe(rel,0,0) || project_unsafe(prj,0,0) || 
exps_share_expensive_exp(rel->exps, prj->exps))
+               if (project_unsafe(rel,0) || project_unsafe(prj,0) || 
exps_share_expensive_exp(rel->exps, prj->exps))
                        return rel;
 
                /* here we need to fix aliases */
@@ -3396,8 +3398,8 @@ rel_merge_union(visitor *v, sql_rel *rel
        sql_rel *ref = NULL;
 
        if (is_union(rel->op) &&
-           l && is_project(l->op) && !project_unsafe(l,0,0) &&
-           r && is_project(r->op) && !project_unsafe(r,0,0) &&
+           l && is_project(l->op) && !project_unsafe(l,0) &&
+           r && is_project(r->op) && !project_unsafe(r,0) &&
            (ref = rel_find_ref(l)) != NULL && ref == rel_find_ref(r)) {
                /* Find selects and try to merge */
                sql_rel *ls = rel_find_select(l);
@@ -4686,8 +4688,8 @@ rel_push_select_down(visitor *v, sql_rel
 
        if (is_select(rel->op) && r && r->op == op_project && !rel_is_ref(r) && 
!is_single(r)){
                sql_rel *pl = r->l;
-               /* we cannot push through rank (row_number etc) functions or 
projects with distinct */
-               if (pl && !project_unsafe(r, 1, 1)) {
+               /* we cannot push through window functions (for safety I 
disabled projects over DDL too) */
+               if (pl && pl->op != op_ddl && !exps_have_unsafe(r->exps, 1)) {
                        /* introduce selects under the project (if needed) */
                        set_processed(pl);
                        for (n = exps->h; n;) {
@@ -5552,7 +5554,7 @@ rel_push_project_down_union(visitor *v, 
                sql_rel *ul = u->l;
                sql_rel *ur = u->r;
 
-               if (!u || !is_union(u->op) || need_distinct(u) || !u->exps || 
rel_is_ref(u) || project_unsafe(rel,0,0))
+               if (!u || !is_union(u->op) || need_distinct(u) || !u->exps || 
rel_is_ref(u) || project_unsafe(rel,0))
                        return rel;
                /* don't push project down union of single values */
                if ((is_project(ul->op) && !ul->l) || (is_project(ur->op) && 
!ur->l))
@@ -6360,8 +6362,8 @@ rel_push_project_up(visitor *v, sql_rel 
                   (is_left(rel->op) && (rel->flag&MERGE_LEFT) /* can't push 
projections above merge statments left joins */) ||
                   (is_select(rel->op) && l->op != op_project) ||
                   (is_join(rel->op) && ((l->op != op_project && r->op != 
op_project) || is_topn(r->op) || is_sample(r->op))) ||
-                 ((l->op == op_project && (!l->l || l->r || 
project_unsafe(l,is_select(rel->op),0))) ||
-                  (is_join(rel->op) && (r->op == op_project && (!r->l || r->r 
|| project_unsafe(r,0,0))))))
+                 ((l->op == op_project && (!l->l || l->r || 
project_unsafe(l,is_select(rel->op)))) ||
+                  (is_join(rel->op) && (r->op == op_project && (!r->l || r->r 
|| project_unsafe(r,0))))))
                        return rel;
 
                if (l->op == op_project && l->l) {
@@ -6687,7 +6689,7 @@ rel_exps_mark_used(sql_allocator *sa, sq
        }
        /* for count/rank we need atleast one column */
        if (!nr && subrel && (is_project(subrel->op) || is_base(subrel->op)) && 
!list_empty(subrel->exps) &&
-               (is_simple_project(rel->op) && project_unsafe(rel, 0, 0))) {
+               (is_simple_project(rel->op) && project_unsafe(rel, 0))) {
                sql_exp *e = subrel->exps->h->data;
                e->used = 1;
        }
@@ -9504,7 +9506,6 @@ static sql_rel *
 rel_first_level_optimizations(visitor *v, sql_rel *rel)
 {
        rel = rel_distinct_aggregate_on_unique_values(v, rel);
-       rel = rel_distinct_project2groupby(v, rel);
        /* rel_simplify_math optimizer requires to clear the hash, so make sure 
it runs last in this batch */
        if (v->value_based_opt)
                rel = rel_simplify_math(v, rel);
@@ -9798,6 +9799,9 @@ optimize_rel(mvc *sql, sql_rel *rel, int
        if (gp.cnt[op_topn] || gp.cnt[op_sample])
                rel = rel_visitor_topdown(&v, rel, 
&rel_push_topn_and_sample_down);
 
+       if (level <= 0 && gp.needs_distinct && gp.cnt[op_project])
+               rel = rel_visitor_bottomup(&v, rel, 
&rel_distinct_project2groupby);
+
        if (gp.has_mergetable)
                rel = rel_visitor_topdown(&v, rel, &rel_merge_table_rewrite);
 
diff --git a/sql/server/rel_rel.c b/sql/server/rel_rel.c
--- a/sql/server/rel_rel.c
+++ b/sql/server/rel_rel.c
@@ -25,25 +25,24 @@ rel_set_exps(sql_rel *rel, list *exps)
 
 /* some projections results are order dependend (row_number etc) */
 int
-project_unsafe(sql_rel *rel, int allow_identity, int allow_self_reference)
+project_unsafe(sql_rel *rel, int allow_identity)
 {
        sql_rel *sub = rel->l;
-       node *n;
 
        if (need_distinct(rel) || rel->r /* order by */)
                return 1;
-       if (!rel->exps)
+       if (list_empty(rel->exps))
                return 0;
        /* projects without sub and projects around ddl's cannot be changed */
-       if (!sub || (sub && sub->op == op_ddl))
+       if (!sub || sub->op == op_ddl)
                return 1;
-       for(n = rel->exps->h; n; n = n->next) {
+       for(node *n = rel->exps->h; n; n = n->next) {
                sql_exp *e = n->data, *ne;
 
                /* aggr func in project ! */
                if (exp_unsafe(e, allow_identity))
                        return 1;
-               if (!allow_self_reference && (ne = rel_find_exp(rel, e)) && ne 
!= e)
+               if ((ne = rel_find_exp(rel, e)) && ne != e)
                        return 1; /* no self referencing */
        }
        return 0;
diff --git a/sql/server/rel_rel.h b/sql/server/rel_rel.h
--- a/sql/server/rel_rel.h
+++ b/sql/server/rel_rel.h
@@ -52,7 +52,7 @@
 #define is_sql_merge(X)        ((X & sql_merge) == sql_merge)
 
 extern void rel_set_exps(sql_rel *rel, list *exps);
-extern int project_unsafe(sql_rel *rel, int allow_identity, int 
allow_self_reference);
+extern int project_unsafe(sql_rel *rel, int allow_identity);
 extern const char *rel_name( sql_rel *r );
 extern sql_rel *rel_distinct(sql_rel *l);
 
diff --git a/sql/server/rel_unnest.c b/sql/server/rel_unnest.c
--- a/sql/server/rel_unnest.c
+++ b/sql/server/rel_unnest.c
@@ -3296,7 +3296,7 @@ rewrite_remove_xp_project(visitor *v, sq
        if (rel->op == op_join && list_empty(rel->exps)) {
                sql_rel *r = rel->r;
 
-               if (is_simple_project(r->op) && r->l && !project_unsafe(r, 1, 
0)) {
+               if (is_simple_project(r->op) && r->l && !project_unsafe(r, 1)) {
                        sql_rel *rl = r->l;
 
                        if (is_simple_project(rl->op) && !rl->l && 
list_length(rl->exps) == 1) {
diff --git a/sql/test/miscellaneous/Tests/simple_plans.test 
b/sql/test/miscellaneous/Tests/simple_plans.test
--- a/sql/test/miscellaneous/Tests/simple_plans.test
+++ b/sql/test/miscellaneous/Tests/simple_plans.test
@@ -561,6 +561,32 @@ project (
 | ) [ "x"."x", "x"."y" ]
 ) [ "x"."x", "x"."y" ]
 
+query T nosort
+plan select x, y from (select a as b, a / 3 as c from t1 union distinct select 
a as z, a / 5 as d from t2) x(x,y) where x > 2
+----
+project (
+| distinct union (
+| | project (
+| | | group by (
+| | | | project (
+| | | | | select (
+| | | | | | table("sys"."t1") [ "t1"."a" ] COUNT 
+| | | | | ) [ "t1"."a" > int "2" ]
+| | | | ) [ "t1"."a" as "b", "sys"."sql_div"("t1"."a", tinyint "3") as "c" ]
+| | | ) [ "b", "c" ] [ "b", "c" ]
+| | ) [ "b" as "x"."x", "c" as "x"."y" ],
+| | project (
+| | | group by (
+| | | | project (
+| | | | | select (
+| | | | | | table("sys"."t2") [ "t2"."a" ] COUNT 
+| | | | | ) [ "t2"."a" > int "2" ]
+| | | | ) [ "t2"."a" as "z", "sys"."sql_div"("t2"."a", tinyint "5") as "d" ]
+| | | ) [ "z", "d" ] [ "z", "d" ]
+| | ) [ "z" as "x"."x", "d" as "x"."y" ]
+| ) [ "x"."x", "x"."y" ]
+) [ "x"."x", "x"."y" ]
+
 statement ok
 rollback
 
_______________________________________________
checkin-list mailing list
checkin-list@monetdb.org
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to