On Tue, Sep 8, 2020 at 12:59 PM Surafel Temesgen <surafel3...@gmail.com> wrote:
> Hi Tom > > On Tue, Sep 8, 2020 at 4:46 AM Tom Lane <t...@sss.pgh.pa.us> wrote: > > >> The "check_null_side" code you're proposing seems really horrid. >> For one thing, it seems quite out of place for eval_const_expressions >> to be doing that. For another, it's wrong in principle because >> eval_const_expressions doesn't know which part of the query tree >> it's being invoked on, so it cannot know whether outer-join >> nullability is an issue. For another, doing that work over again >> from scratch every time we see a potentially optimizable NullTest >> looks expensive. (I wonder whether you have tried to measure the >> performance penalty imposed by this patch in cases where it fails >> to make any proof.) >> >> > I was thinking about collecting data about joins only once at the start of > eval_const_expressions but I assume most queries don't have NULL check > expressions and postpone it until we find one. Thinking about it again I > think it can be done better by storing check_null_side_state into > eval_const_expressions_context to use it for subsequent evaluation. > > Attached patch does like the above and includes NOT NULL constraint column. regards Surafel
diff --git a/src/backend/optimizer/util/clauses.c b/src/backend/optimizer/util/clauses.c index 750586fceb..5fe4d88b5d 100644 --- a/src/backend/optimizer/util/clauses.c +++ b/src/backend/optimizer/util/clauses.c @@ -20,8 +20,10 @@ #include "postgres.h" #include "access/htup_details.h" +#include "access/table.h" #include "catalog/pg_aggregate.h" #include "catalog/pg_class.h" +#include "catalog/pg_constraint.h" #include "catalog/pg_language.h" #include "catalog/pg_operator.h" #include "catalog/pg_proc.h" @@ -39,6 +41,7 @@ #include "optimizer/plancat.h" #include "optimizer/planmain.h" #include "parser/analyze.h" +#include "parser/parsetree.h" #include "parser/parse_agg.h" #include "parser/parse_coerce.h" #include "parser/parse_func.h" @@ -50,6 +53,7 @@ #include "utils/fmgroids.h" #include "utils/lsyscache.h" #include "utils/memutils.h" +#include "utils/rel.h" #include "utils/syscache.h" #include "utils/typcache.h" @@ -61,6 +65,14 @@ typedef struct AggClauseCosts *costs; } get_agg_clause_costs_context; +typedef struct check_null_side_state +{ + Relids relids; /* base relids within this subtree */ + bool contains_outer; /* does subtree contain outer join(s)? */ + JoinType jointype; /* type of join */ + List *sub_states; /* List of states for subtree components */ +} check_null_side_state; + typedef struct { ParamListInfo boundParams; @@ -68,6 +80,7 @@ typedef struct List *active_fns; Node *case_val; bool estimate; + check_null_side_state *state; } eval_const_expressions_context; typedef struct @@ -156,6 +169,9 @@ static Query *substitute_actual_srf_parameters(Query *expr, int nargs, List *args); static Node *substitute_actual_srf_parameters_mutator(Node *node, substitute_actual_srf_parameters_context *context); +static bool check_null_side(PlannerInfo *root, int relid, eval_const_expressions_context *context); +static check_null_side_state * collect_jointree_data(Node *jtnode); + /***************************************************************************** @@ -2296,6 +2312,7 @@ eval_const_expressions(PlannerInfo *root, Node *node) context.active_fns = NIL; /* nothing being recursively simplified */ context.case_val = NULL; /* no CASE being examined */ context.estimate = false; /* safe transformations only */ + context.state= NULL; return eval_const_expressions_mutator(node, &context); } @@ -2626,6 +2643,7 @@ eval_const_expressions_mutator(Node *node, { has_null_input |= ((Const *) lfirst(arg))->constisnull; all_null_input &= ((Const *) lfirst(arg))->constisnull; + } else has_nonconst_input = true; @@ -3382,7 +3400,52 @@ eval_const_expressions_mutator(Node *node, return makeBoolConst(result, false); } + if (IsA(arg, Var) && + ((Var *) arg)->varlevelsup == 0 && context->root) + { + /* + * Evaluate the test if it is on NOT NULL Constraint column and the + * relation is not mentioned on nullable side of outer + * join + */ + Var *var = (Var *) arg; + Query *parse = context->root->parse; + int relid; + RangeTblEntry *rte; + relid = var->varno; + rte = rt_fetch(relid, parse->rtable); + if (rte->relkind ==RELKIND_RELATION) + { + Relation rel; + Form_pg_attribute att; + rel = table_open(rte->relid, NoLock); + att = TupleDescAttr(rel->rd_att, var->varattno - 1); + + if (att->attnotnull && !check_null_side(context->root, relid, context)) + { + bool result; + + switch (ntest->nulltesttype) + { + case IS_NULL: + result = false; + break; + case IS_NOT_NULL: + result = true; + break; + default: + elog(ERROR, "unrecognized nulltesttype: %d", + (int) ntest->nulltesttype); + result = false; /* keep compiler quiet */ + break; + } + table_close(rel,NoLock); + return makeBoolConst(result, false); + } + table_close(rel,NoLock); + } + } newntest = makeNode(NullTest); newntest->arg = (Expr *) arg; newntest->nulltesttype = ntest->nulltesttype; @@ -3572,6 +3635,131 @@ eval_const_expressions_mutator(Node *node, return ece_generic_processing(node); } +/* + * Check a relation attributes on nullable side of the outer join. + */ +static bool +check_null_side(PlannerInfo *root, int relid, eval_const_expressions_context *context) +{ + ListCell *l; + check_null_side_state *state = context->state; + + if (!state) + { + state = collect_jointree_data((Node *) root->parse->jointree); + context->state = state; + } + + /* if no outer joins the relation is not on nullable side */ + if (state == NULL || !state->contains_outer) + return false; + + /* scan the state and check relation on nullable outer join side */ + foreach(l, state->sub_states) + { + check_null_side_state *sub_state = (check_null_side_state *) lfirst(l); + + if (sub_state->contains_outer) + { + if (sub_state->jointype == JOIN_LEFT) + { + check_null_side_state *right_state = (check_null_side_state *) lsecond(sub_state->sub_states); + + if (bms_is_member(relid, right_state->relids)) + return true; + } + + if (sub_state->jointype == JOIN_RIGHT) + { + check_null_side_state *left_state = (check_null_side_state *) linitial(sub_state->sub_states); + + if (bms_is_member(relid, left_state->relids)) + return true; + } + + if (sub_state->jointype == JOIN_FULL) + { + check_null_side_state *left_state = (check_null_side_state *) linitial(sub_state->sub_states); + check_null_side_state *right_state = (check_null_side_state *) lsecond(sub_state->sub_states); + + if (bms_is_member(relid, left_state->relids) ||bms_is_member(relid, right_state->relids)) + return true; + } + + } + } + return false; +} + +/* + * Collecting data about join. this function is similar to reduce_outer_joins_pass1 + * + * Returns a state node describing the given jointree node. + */ +static check_null_side_state * +collect_jointree_data(Node *jtnode) +{ + check_null_side_state *result; + + result = (check_null_side_state *) + palloc(sizeof(check_null_side_state)); + result->relids = NULL; + result->contains_outer = false; + result->sub_states = NIL; + + if (jtnode == NULL) + return result; + if (IsA(jtnode, RangeTblRef)) + { + int varno = ((RangeTblRef *) jtnode)->rtindex; + + result->relids = bms_make_singleton(varno); + } + else if (IsA(jtnode, FromExpr)) + { + FromExpr *f = (FromExpr *) jtnode; + ListCell *l; + + foreach(l, f->fromlist) + { + check_null_side_state *sub_state; + + sub_state = collect_jointree_data(lfirst(l)); + result->relids = bms_add_members(result->relids, + sub_state->relids); + result->contains_outer |= sub_state->contains_outer; + result->sub_states = lappend(result->sub_states, sub_state); + } + } + else if (IsA(jtnode, JoinExpr)) + { + JoinExpr *j = (JoinExpr *) jtnode; + check_null_side_state *sub_state; + + /* join's own RT index is not wanted in result->relids */ + if (IS_OUTER_JOIN(j->jointype)) + result->contains_outer = true; + + sub_state = collect_jointree_data(j->larg); + result->jointype = j->jointype; + result->relids = bms_add_members(result->relids, + sub_state->relids); + sub_state->contains_outer |= sub_state->contains_outer; + result->sub_states = lappend(result->sub_states, sub_state); + + sub_state = collect_jointree_data(j->rarg); + result->jointype = j->jointype; + result->relids = bms_add_members(result->relids, + sub_state->relids); + result->contains_outer |= sub_state->contains_outer; + result->sub_states = lappend(result->sub_states, sub_state); + } + else + elog(ERROR, "unrecognized node type: %d", + (int) nodeTag(jtnode)); + return result; +} + /* * Subroutine for eval_const_expressions: check for non-Const nodes. * diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out index a46b1573bd..a42f58c9f8 100644 --- a/src/test/regress/expected/join.out +++ b/src/test/regress/expected/join.out @@ -4577,6 +4577,32 @@ SELECT b.* FROM b LEFT JOIN a ON (b.a_id = a.id) WHERE (a.id IS NULL OR a.id > 0 1 | (1 row) +explain (costs off) +SELECT * FROM b LEFT JOIN a ON (b.a_id = a.id) WHERE (a.id IS NULL OR a.id > 0); + QUERY PLAN +------------------------------------------ + Hash Left Join + Hash Cond: (b.a_id = a.id) + Filter: ((a.id IS NULL) OR (a.id > 0)) + -> Seq Scan on b + -> Hash + -> Seq Scan on a +(6 rows) + +explain (costs off) +SELECT * FROM b RIGHT JOIN a ON (b.a_id = a.id) WHERE (a.id IS NULL OR a.id > 0); + QUERY PLAN +----------------------------------------------- + Hash Right Join + Hash Cond: (b.a_id = a.id) + -> Seq Scan on b + -> Hash + -> Bitmap Heap Scan on a + Recheck Cond: (id > 0) + -> Bitmap Index Scan on a_pkey + Index Cond: (id > 0) +(8 rows) + rollback; -- another join removal bug: this is not optimizable, either begin; diff --git a/src/test/regress/expected/select.out b/src/test/regress/expected/select.out index c441049f41..ef2df889bd 100644 --- a/src/test/regress/expected/select.out +++ b/src/test/regress/expected/select.out @@ -903,6 +903,37 @@ select unique1, unique2 from onek2 0 | 998 (2 rows) +CREATE TEMP TABLE a (id int PRIMARY KEY, name text NOT NULL); +explain (costs off) +SELECT * FROM a WHERE id IS NULL; + QUERY PLAN +-------------------------- + Result + One-Time Filter: false +(2 rows) + +explain (costs off) +SELECT * FROM a WHERE id IS NOT NULL; + QUERY PLAN +--------------- + Seq Scan on a +(1 row) + +explain (costs off) +SELECT * FROM a WHERE name IS NOT NULL; + QUERY PLAN +--------------- + Seq Scan on a +(1 row) + +explain (costs off) +SELECT * FROM a WHERE name IS NULL; + QUERY PLAN +-------------------------- + Result + One-Time Filter: false +(2 rows) + -- -- Test some corner cases that have been known to confuse the planner -- diff --git a/src/test/regress/sql/join.sql b/src/test/regress/sql/join.sql index 1403e0ffe7..4146562c3b 100644 --- a/src/test/regress/sql/join.sql +++ b/src/test/regress/sql/join.sql @@ -1589,6 +1589,11 @@ INSERT INTO b VALUES (0, 0), (1, NULL); SELECT * FROM b LEFT JOIN a ON (b.a_id = a.id) WHERE (a.id IS NULL OR a.id > 0); SELECT b.* FROM b LEFT JOIN a ON (b.a_id = a.id) WHERE (a.id IS NULL OR a.id > 0); +explain (costs off) +SELECT * FROM b LEFT JOIN a ON (b.a_id = a.id) WHERE (a.id IS NULL OR a.id > 0); + +explain (costs off) +SELECT * FROM b RIGHT JOIN a ON (b.a_id = a.id) WHERE (a.id IS NULL OR a.id > 0); rollback; -- another join removal bug: this is not optimizable, either diff --git a/src/test/regress/sql/select.sql b/src/test/regress/sql/select.sql index b5929b2eca..13c777863e 100644 --- a/src/test/regress/sql/select.sql +++ b/src/test/regress/sql/select.sql @@ -234,6 +234,19 @@ select unique1, unique2 from onek2 select unique1, unique2 from onek2 where (unique2 = 11 and stringu1 < 'B') or unique1 = 0; +CREATE TEMP TABLE a (id int PRIMARY KEY, name text NOT NULL); +explain (costs off) +SELECT * FROM a WHERE id IS NULL; + +explain (costs off) +SELECT * FROM a WHERE id IS NOT NULL; + +explain (costs off) +SELECT * FROM a WHERE name IS NOT NULL; + +explain (costs off) +SELECT * FROM a WHERE name IS NULL; + -- -- Test some corner cases that have been known to confuse the planner --