Changeset: 4ea9c4d4ff60 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=4ea9c4d4ff60
Modified Files:
        sql/server/rel_statistics.c
Branch: properties
Log Message:

Reduce number of iterations in the AST, also prune intersect relations if 
that's the case


diffs (truncated from 402 to 300 lines):

diff --git a/sql/server/rel_statistics.c b/sql/server/rel_statistics.c
--- a/sql/server/rel_statistics.c
+++ b/sql/server/rel_statistics.c
@@ -44,6 +44,9 @@ rel_propagate_column_ref_statistics(mvc 
                case op_semi: {
                        bool found_without_semantics = false, found_left = 
false, found_right = false;
 
+                       if ((is_innerjoin(rel->op) || is_select(rel->op)) && 
list_length(rel->exps) == 1 && exp_is_false(rel->exps->h->data))
+                               return NULL; /* nothing will pass, skip */
+
                        /* propagate from the bottom first */
                        if (rel_propagate_column_ref_statistics(sql, rel->l, e))
                                found_left = true;
@@ -232,28 +235,33 @@ rel_basetable_get_statistics(visitor *v,
        return e;
 }
 
-static void
+static bool
 rel_setop_get_statistics(mvc *sql, sql_rel *rel, list *lexps, list *rexps, 
sql_exp *e, int i)
 {
        sql_exp *le = list_fetch(lexps, i), *re = list_fetch(rexps, i);
-       atom *lval, *rval;
+       atom *lval_min = find_prop_and_get(le->p, PROP_MIN), *lval_max = 
find_prop_and_get(le->p, PROP_MAX),
+                *rval_min = find_prop_and_get(re->p, PROP_MIN), *rval_max = 
find_prop_and_get(re->p, PROP_MAX);
 
-       assert(le && re && e);
-       if ((lval = find_prop_and_get(le->p, PROP_MAX)) && (rval = 
find_prop_and_get(re->p, PROP_MAX))) {
+       if (is_inter(rel->op) && exp_is_not_null(le) && exp_is_not_null(re) &&
+               lval_min && rval_min && lval_max && rval_max && 
!lval_min->isnull && !rval_min->isnull && !lval_max->isnull && 
!rval_max->isnull &&
+               (atom_cmp(rval_max, lval_min) < 0 || atom_cmp(rval_min, 
lval_max) > 0))
+               return true;
+
+       if (lval_min && rval_min) {
                if (is_union(rel->op))
-                       set_property(sql, e, PROP_MAX, statistics_atom_max(sql, 
lval, rval)); /* for union the new max will be the max of the two */
+                       set_property(sql, e, PROP_MIN, statistics_atom_min(sql, 
lval_min, rval_min)); /* for union the new min will be the min of the two */
                else if (is_inter(rel->op))
-                       set_property(sql, e, PROP_MAX, statistics_atom_min(sql, 
lval, rval)); /* for intersect the new max will be the min of the two */
+                       set_property(sql, e, PROP_MIN, statistics_atom_max(sql, 
lval_min, rval_min)); /* for intersect the new min will be the max of the two */
                else /* except */
-                       set_property(sql, e, PROP_MAX, lval);
+                       set_property(sql, e, PROP_MIN, lval_min);
        }
-       if ((lval = find_prop_and_get(le->p, PROP_MIN)) && (rval = 
find_prop_and_get(re->p, PROP_MIN))) {
+       if (lval_max && rval_max) {
                if (is_union(rel->op))
-                       set_property(sql, e, PROP_MIN, statistics_atom_min(sql, 
lval, rval)); /* for union the new min will be the min of the two */
+                       set_property(sql, e, PROP_MAX, statistics_atom_max(sql, 
lval_max, rval_max)); /* for union the new max will be the max of the two */
                else if (is_inter(rel->op))
-                       set_property(sql, e, PROP_MIN, statistics_atom_max(sql, 
lval, rval)); /* for intersect the new min will be the max of the two */
+                       set_property(sql, e, PROP_MAX, statistics_atom_min(sql, 
lval_max, rval_max)); /* for intersect the new max will be the min of the two */
                else /* except */
-                       set_property(sql, e, PROP_MIN, lval);
+                       set_property(sql, e, PROP_MAX, lval_max);
        }
 
        if (is_union(rel->op)) {
@@ -267,6 +275,7 @@ rel_setop_get_statistics(mvc *sql, sql_r
                if (!has_nil(le))
                        set_has_no_nil(e);
        }
+       return false;
 }
 
 static sql_exp *
@@ -407,6 +416,130 @@ rel_propagate_statistics(visitor *v, sql
        return e;
 }
 
+static list * /* Remove predicates always false from min/max values */
+rel_prune_predicates(visitor *v, sql_rel *rel)
+{
+       for (node *n = rel->exps->h ; n ; n = n->next) {
+               sql_exp *e = n->data;
+
+               if (e->type == e_cmp && (is_theta_exp(e->flag) || e->f)) {
+                       sql_exp *le = e->l, *re = e->r, *fe = e->f;
+                       atom *lval_min = find_prop_and_get(le->p, PROP_MIN), 
*lval_max = find_prop_and_get(le->p, PROP_MAX),
+                               *rval_min = find_prop_and_get(re->p, PROP_MIN), 
*rval_max = find_prop_and_get(re->p, PROP_MAX);
+                       bool always_false = false, always_true = false;
+
+                       if (fe && !(e->flag & CMP_SYMMETRIC)) {
+                               atom *fval_min = find_prop_and_get(fe->p, 
PROP_MIN), *fval_max = find_prop_and_get(fe->p, PROP_MAX);
+                               comp_type lower = range2lcompare(e->flag), 
higher = range2rcompare(e->flag);
+                               int not_int1 = rval_min && lval_max && 
!rval_min->isnull && !lval_max->isnull && /* the middle and left intervals 
don't overlap */
+                                       (!e->anti && (lower == cmp_gte ? 
atom_cmp(rval_min, lval_max) > 0 : atom_cmp(rval_min, lval_max) >= 0)),
+                                       not_int2 = lval_min && fval_max && 
!lval_min->isnull && !fval_max->isnull && /* the middle and right intervals 
don't overlap */
+                                       (!e->anti && (higher == cmp_lte ? 
atom_cmp(lval_min, fval_max) > 0 : atom_cmp(lval_min, fval_max) >= 0)),
+                                       not_int3 = rval_min && fval_max && 
!rval_min->isnull && !fval_max->isnull && /* the left interval is after the 
right one */
+                                       (!e->anti && (atom_cmp(rval_min, 
fval_max) > 0));
+
+                               always_false |= not_int1 || not_int2 || 
not_int3;
+                               /* for anti the middle must be before the left 
or after the right or the right after the left, for the other the middle must 
be always between the left and right intervals */
+                               always_true |= exp_is_not_null(le) && 
exp_is_not_null(re) && exp_is_not_null(fe) &&
+                                       lval_min && lval_max && rval_min && 
rval_max && fval_min && fval_max && !lval_min->isnull && !lval_max->isnull && 
!rval_min->isnull && !rval_max->isnull && !fval_min->isnull && 
!fval_max->isnull &&
+                                       (e->anti ? ((lower == cmp_gte ? 
atom_cmp(rval_min, lval_max) > 0 : atom_cmp(rval_min, lval_max) >= 0) || 
(higher == cmp_lte ? atom_cmp(lval_min, fval_max) > 0 : atom_cmp(lval_min, 
fval_max) >= 0) || atom_cmp(rval_min, fval_max) > 0) :
+                                       ((lower == cmp_gte ? atom_cmp(lval_min, 
rval_max) >= 0 : atom_cmp(lval_min, rval_max) > 0) && (higher == cmp_lte ? 
atom_cmp(fval_min, lval_max) >= 0 : atom_cmp(fval_min, lval_max) > 0)));
+                       } else {
+                               switch (e->flag) {
+                               case cmp_equal:
+                                       if (lval_min && lval_max && rval_min && 
rval_max && !lval_min->isnull && !lval_max->isnull && !rval_min->isnull && 
!rval_max->isnull)
+                                               always_false |= (!e->anti && 
(atom_cmp(rval_max, lval_min) < 0 || atom_cmp(rval_min, lval_max) > 0)) || 
(e->anti && atom_cmp(lval_min, rval_min) == 0 && atom_cmp(lval_max, rval_max) 
<= 0);
+                                       if (is_semantics(e))
+                                               always_false |= is_semantics(e) 
?
+                                                       e->anti ? 
(exp_is_null(le) && exp_is_null(re)) || (exp_is_not_null(le) && 
exp_is_not_null(re)) : (exp_is_not_null(le) && exp_is_null(re)) || 
(exp_is_null(le) && exp_is_not_null(re)) :
+                                                       e->anti ? 
exp_is_not_null(le) && exp_is_not_null(re) : (exp_is_null(le) && 
exp_is_null(re)) || (exp_is_not_null(le) && exp_is_null(re)) || 
(exp_is_null(le) && exp_is_not_null(re));
+                                       break;
+                               case cmp_gt:
+                                       if (lval_max && rval_min && 
!lval_max->isnull && !rval_min->isnull)
+                                               always_false |= e->anti ? 
atom_cmp(lval_max, rval_min) > 0 : atom_cmp(lval_max, rval_min) <= 0;
+                                       if (lval_min && rval_max && 
!lval_min->isnull && !rval_max->isnull)
+                                               always_true |= 
exp_is_not_null(le) && exp_is_not_null(re) && (e->anti ? atom_cmp(lval_min, 
rval_max) <= 0 : atom_cmp(lval_min, rval_max) > 0);
+                                       break;
+                               case cmp_gte:
+                                       if (lval_max && rval_min && 
!lval_max->isnull && !rval_min->isnull)
+                                               always_false |= e->anti ? 
atom_cmp(lval_max, rval_min) >= 0 : atom_cmp(lval_max, rval_min) < 0;
+                                       if (lval_min && rval_max && 
!lval_min->isnull && !rval_max->isnull)
+                                               always_true |= 
exp_is_not_null(le) && exp_is_not_null(re) && (e->anti ? atom_cmp(lval_min, 
rval_max) < 0 : atom_cmp(lval_min, rval_max) >= 0);
+                                       break;
+                               case cmp_lt:
+                                       if (lval_min && rval_max && 
!lval_min->isnull && !rval_max->isnull)
+                                               always_false |= e->anti ? 
atom_cmp(lval_min, rval_max) < 0 : atom_cmp(lval_min, rval_max) >= 0;
+                                       if (lval_max && rval_min && 
!lval_max->isnull && !rval_min->isnull)
+                                               always_true |= 
exp_is_not_null(le) && exp_is_not_null(re) && (e->anti ? atom_cmp(lval_max, 
rval_min) >= 0 : atom_cmp(lval_max, rval_min) < 0);
+                                       break;
+                               case cmp_lte:
+                                       if (lval_min && rval_max && 
!lval_min->isnull && !rval_max->isnull)
+                                               always_false |= e->anti ? 
atom_cmp(lval_min, rval_max) <= 0 : atom_cmp(lval_min, rval_max) > 0;
+                                       if (lval_max && rval_min && 
!lval_max->isnull && !rval_min->isnull)
+                                               always_true |= 
exp_is_not_null(le) && exp_is_not_null(re) && (e->anti ? atom_cmp(lval_max, 
rval_min) > 0 : atom_cmp(lval_max, rval_min) <= 0);
+                                       break;
+                               default: /* Maybe later I can do cmp_in and 
cmp_notin */
+                                       break;
+                               }
+                       }
+                       assert(!always_false || !always_true);
+                       if (always_false || always_true) {
+                               sql_exp *ne = exp_atom_bool(v->sql->sa, 
always_true);
+                               if (exp_name(e))
+                                       exp_prop_alias(v->sql->sa, ne, e);
+                               n->data = ne;
+                               v->changes++;
+                       }
+               }
+       }
+       return rel->exps;
+}
+
+static list*
+rel_simplify_count(visitor *v, sql_rel *rel)
+{
+       mvc *sql = v->sql;
+       int ncountstar = 0;
+
+       /* Convert count(no null) into count(*) */
+       for (node *n = rel->exps->h; n ; n = n->next) {
+               sql_exp *e = n->data;
+
+               if (exp_aggr_is_count(e) && !need_distinct(e)) {
+                       if (list_length(e->l) == 0) {
+                               ncountstar++;
+                       } else if (list_length(e->l) == 1 && 
!has_nil((sql_exp*)((list*)e->l)->h->data)) {
+                               sql_subfunc *cf = sql_bind_func(sql, "sys", 
"count", sql_bind_localtype("void"), NULL, F_AGGR);
+                               sql_exp *ne = exp_aggr(sql->sa, NULL, cf, 0, 0, 
e->card, 0);
+                               if (exp_name(e))
+                                       exp_prop_alias(sql->sa, ne, e);
+                               n->data = ne;
+                               ncountstar++;
+                               v->changes++;
+                       }
+               }
+       }
+       /* With multiple count(*), use exp_ref to reduce the number of calls to 
this aggregate */
+       if (ncountstar > 1) {
+               sql_exp *count_star = NULL;
+               for (node *n = rel->exps->h; n ; n = n->next) {
+                       sql_exp *e = n->data;
+
+                       if (exp_aggr_is_count(e) && !need_distinct(e) && 
list_length(e->l) == 0) {
+                               if (!count_star) {
+                                       count_star = e;
+                               } else {
+                                       sql_exp *ne = exp_ref(sql, count_star);
+                                       if (exp_name(e))
+                                               exp_prop_alias(sql->sa, ne, e);
+                                       n->data = ne;
+                               }
+                       }
+               }
+       }
+       return rel->exps;
+}
+
 static sql_rel *
 rel_get_statistics(visitor *v, sql_rel *rel)
 {
@@ -417,6 +550,7 @@ rel_get_statistics(visitor *v, sql_rel *
        case op_union:
        case op_inter:
        case op_except: {
+               bool can_be_pruned = false;
                int i = 0;
                sql_rel *l = rel->l, *r = rel->r;
 
@@ -434,10 +568,24 @@ rel_get_statistics(visitor *v, sql_rel *
                        r->exps = exps_exp_visitor_bottomup(v, r, r->exps, 0, 
&rel_propagate_statistics, false);
                }
 
-               for (node *n = rel->exps->h ; n ; n = n->next) {
-                       rel_setop_get_statistics(v->sql, rel, l->exps, r->exps, 
n->data, i);
+               for (node *n = rel->exps->h ; n && !can_be_pruned ; n = 
n->next) {
+                       can_be_pruned |= rel_setop_get_statistics(v->sql, rel, 
l->exps, r->exps, n->data, i);
                        i++;
                }
+               if (can_be_pruned) {
+                       rel_destroy(rel->l);
+                       rel->l = NULL;
+                       rel_destroy(rel->r);
+                       rel->r = NULL;
+                       for (node *n = rel->exps->h ; n ; n = n->next) {
+                               sql_exp *e = n->data, *a = exp_atom(v->sql->sa, 
atom_general(v->sql->sa, exp_subtype(e), NULL));
+                               exp_prop_alias(v->sql->sa, a, e);
+                               n->data = a;
+                       }
+                       rel = rel_project(v->sql->sa, NULL, rel->exps);
+                       rel = rel_select(v->sql->sa, rel, 
exp_atom_bool(v->sql->sa, 0));
+                       rel = rel_project(v->sql->sa, rel, 
rel_projections(v->sql, rel, NULL, 1, 1));
+               }
        } break;
        case op_join:
        case op_left:
@@ -450,8 +598,13 @@ rel_get_statistics(visitor *v, sql_rel *
        case op_groupby:
        case op_ddl:
                rel->exps = exps_exp_visitor_bottomup(v, rel, rel->exps, 0, 
&rel_propagate_statistics, false);
-               if ((is_simple_project(rel->op) || is_groupby(rel->op)) && 
!list_empty(rel->r))
+               if (is_simple_project(rel->op) && !list_empty(rel->r))
                        rel->r = exps_exp_visitor_bottomup(v, rel, rel->r, 0, 
&rel_propagate_statistics, false);
+               /* The following optimizations can only be applied after 
propagating the statistics to rel->exps */
+               if (is_groupby(rel->op) && rel->exps)
+                       rel->exps = rel_simplify_count(v, rel);
+               if ((is_join(rel->op) || is_select(rel->op)) && rel->exps)
+                       rel->exps = rel_prune_predicates(v, rel);
                break;
        /*These relations are less important for now
        case op_table:
@@ -468,131 +621,6 @@ rel_get_statistics(visitor *v, sql_rel *
        return rel;
 }
 
-static sql_rel *
-rel_simplify_count(visitor *v, sql_rel *rel)
-{
-       mvc *sql = v->sql;
-
-       if (is_groupby(rel->op)) {
-               int ncountstar = 0;
-
-               /* Convert count(no null) into count(*) */
-               for (node *n = rel->exps->h; n ; n = n->next) {
-                       sql_exp *e = n->data;
-
-                       if (exp_aggr_is_count(e) && !need_distinct(e)) {
-                               if (list_length(e->l) == 0) {
-                                       ncountstar++;
-                               } else if (list_length(e->l) == 1 && 
!has_nil((sql_exp*)((list*)e->l)->h->data)) {
-                                       sql_subfunc *cf = sql_bind_func(sql, 
"sys", "count", sql_bind_localtype("void"), NULL, F_AGGR);
-                                       sql_exp *ne = exp_aggr(sql->sa, NULL, 
cf, 0, 0, e->card, 0);
-                                       if (exp_name(e))
-                                               exp_prop_alias(sql->sa, ne, e);
-                                       n->data = ne;
-                                       ncountstar++;
-                                       v->changes++;
-                               }
-                       }
-               }
-               /* With multiple count(*), use exp_ref to reduce the number of 
calls to this aggregate */
-               if (ncountstar > 1) {
-                       sql_exp *count_star = NULL;
-                       for (node *n = rel->exps->h; n ; n = n->next) {
-                               sql_exp *e = n->data;
-
-                               if (exp_aggr_is_count(e) && !need_distinct(e) 
&& list_length(e->l) == 0) {
-                                       if (!count_star) {
-                                               count_star = e;
-                                       } else {
-                                               sql_exp *ne = exp_ref(sql, 
count_star);
-                                               if (exp_name(e))
-                                                       exp_prop_alias(sql->sa, 
ne, e);
-                                               n->data = ne;
-                                       }
-                               }
-                       }
-               }
-       }
-       return rel;
-}
-
-static sql_exp * /* Remove predicates always false from min/max values */
_______________________________________________
checkin-list mailing list
checkin-list@monetdb.org
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to