On Tue, Sep 8, 2020 at 12:59 PM Surafel Temesgen <[email protected]>
wrote:
> Hi Tom
>
> On Tue, Sep 8, 2020 at 4:46 AM Tom Lane <[email protected]> wrote:
>
>
>> The "check_null_side" code you're proposing seems really horrid.
>> For one thing, it seems quite out of place for eval_const_expressions
>> to be doing that. For another, it's wrong in principle because
>> eval_const_expressions doesn't know which part of the query tree
>> it's being invoked on, so it cannot know whether outer-join
>> nullability is an issue. For another, doing that work over again
>> from scratch every time we see a potentially optimizable NullTest
>> looks expensive. (I wonder whether you have tried to measure the
>> performance penalty imposed by this patch in cases where it fails
>> to make any proof.)
>>
>>
> I was thinking about collecting data about joins only once at the start of
> eval_const_expressions but I assume most queries don't have NULL check
> expressions and postpone it until we find one. Thinking about it again I
> think it can be done better by storing check_null_side_state into
> eval_const_expressions_context to use it for subsequent evaluation.
>
>
Attached patch does like the above and includes NOT NULL constraint column.
regards
Surafel
diff --git a/src/backend/optimizer/util/clauses.c b/src/backend/optimizer/util/clauses.c
index 750586fceb..5fe4d88b5d 100644
--- a/src/backend/optimizer/util/clauses.c
+++ b/src/backend/optimizer/util/clauses.c
@@ -20,8 +20,10 @@
#include "postgres.h"
#include "access/htup_details.h"
+#include "access/table.h"
#include "catalog/pg_aggregate.h"
#include "catalog/pg_class.h"
+#include "catalog/pg_constraint.h"
#include "catalog/pg_language.h"
#include "catalog/pg_operator.h"
#include "catalog/pg_proc.h"
@@ -39,6 +41,7 @@
#include "optimizer/plancat.h"
#include "optimizer/planmain.h"
#include "parser/analyze.h"
+#include "parser/parsetree.h"
#include "parser/parse_agg.h"
#include "parser/parse_coerce.h"
#include "parser/parse_func.h"
@@ -50,6 +53,7 @@
#include "utils/fmgroids.h"
#include "utils/lsyscache.h"
#include "utils/memutils.h"
+#include "utils/rel.h"
#include "utils/syscache.h"
#include "utils/typcache.h"
@@ -61,6 +65,14 @@ typedef struct
AggClauseCosts *costs;
} get_agg_clause_costs_context;
+typedef struct check_null_side_state
+{
+ Relids relids; /* base relids within this subtree */
+ bool contains_outer; /* does subtree contain outer join(s)? */
+ JoinType jointype; /* type of join */
+ List *sub_states; /* List of states for subtree components */
+} check_null_side_state;
+
typedef struct
{
ParamListInfo boundParams;
@@ -68,6 +80,7 @@ typedef struct
List *active_fns;
Node *case_val;
bool estimate;
+ check_null_side_state *state;
} eval_const_expressions_context;
typedef struct
@@ -156,6 +169,9 @@ static Query *substitute_actual_srf_parameters(Query *expr,
int nargs, List *args);
static Node *substitute_actual_srf_parameters_mutator(Node *node,
substitute_actual_srf_parameters_context *context);
+static bool check_null_side(PlannerInfo *root, int relid, eval_const_expressions_context *context);
+static check_null_side_state * collect_jointree_data(Node *jtnode);
+
/*****************************************************************************
@@ -2296,6 +2312,7 @@ eval_const_expressions(PlannerInfo *root, Node *node)
context.active_fns = NIL; /* nothing being recursively simplified */
context.case_val = NULL; /* no CASE being examined */
context.estimate = false; /* safe transformations only */
+ context.state= NULL;
return eval_const_expressions_mutator(node, &context);
}
@@ -2626,6 +2643,7 @@ eval_const_expressions_mutator(Node *node,
{
has_null_input |= ((Const *) lfirst(arg))->constisnull;
all_null_input &= ((Const *) lfirst(arg))->constisnull;
+
}
else
has_nonconst_input = true;
@@ -3382,7 +3400,52 @@ eval_const_expressions_mutator(Node *node,
return makeBoolConst(result, false);
}
+ if (IsA(arg, Var) &&
+ ((Var *) arg)->varlevelsup == 0 && context->root)
+ {
+ /*
+ * Evaluate the test if it is on NOT NULL Constraint column and the
+ * relation is not mentioned on nullable side of outer
+ * join
+ */
+ Var *var = (Var *) arg;
+ Query *parse = context->root->parse;
+ int relid;
+ RangeTblEntry *rte;
+ relid = var->varno;
+ rte = rt_fetch(relid, parse->rtable);
+ if (rte->relkind ==RELKIND_RELATION)
+ {
+ Relation rel;
+ Form_pg_attribute att;
+ rel = table_open(rte->relid, NoLock);
+ att = TupleDescAttr(rel->rd_att, var->varattno - 1);
+
+ if (att->attnotnull && !check_null_side(context->root, relid, context))
+ {
+ bool result;
+
+ switch (ntest->nulltesttype)
+ {
+ case IS_NULL:
+ result = false;
+ break;
+ case IS_NOT_NULL:
+ result = true;
+ break;
+ default:
+ elog(ERROR, "unrecognized nulltesttype: %d",
+ (int) ntest->nulltesttype);
+ result = false; /* keep compiler quiet */
+ break;
+ }
+ table_close(rel,NoLock);
+ return makeBoolConst(result, false);
+ }
+ table_close(rel,NoLock);
+ }
+ }
newntest = makeNode(NullTest);
newntest->arg = (Expr *) arg;
newntest->nulltesttype = ntest->nulltesttype;
@@ -3572,6 +3635,131 @@ eval_const_expressions_mutator(Node *node,
return ece_generic_processing(node);
}
+/*
+ * Check a relation attributes on nullable side of the outer join.
+ */
+static bool
+check_null_side(PlannerInfo *root, int relid, eval_const_expressions_context *context)
+{
+ ListCell *l;
+ check_null_side_state *state = context->state;
+
+ if (!state)
+ {
+ state = collect_jointree_data((Node *) root->parse->jointree);
+ context->state = state;
+ }
+
+ /* if no outer joins the relation is not on nullable side */
+ if (state == NULL || !state->contains_outer)
+ return false;
+
+ /* scan the state and check relation on nullable outer join side */
+ foreach(l, state->sub_states)
+ {
+ check_null_side_state *sub_state = (check_null_side_state *) lfirst(l);
+
+ if (sub_state->contains_outer)
+ {
+ if (sub_state->jointype == JOIN_LEFT)
+ {
+ check_null_side_state *right_state = (check_null_side_state *) lsecond(sub_state->sub_states);
+
+ if (bms_is_member(relid, right_state->relids))
+ return true;
+ }
+
+ if (sub_state->jointype == JOIN_RIGHT)
+ {
+ check_null_side_state *left_state = (check_null_side_state *) linitial(sub_state->sub_states);
+
+ if (bms_is_member(relid, left_state->relids))
+ return true;
+ }
+
+ if (sub_state->jointype == JOIN_FULL)
+ {
+ check_null_side_state *left_state = (check_null_side_state *) linitial(sub_state->sub_states);
+ check_null_side_state *right_state = (check_null_side_state *) lsecond(sub_state->sub_states);
+
+ if (bms_is_member(relid, left_state->relids) ||bms_is_member(relid, right_state->relids))
+ return true;
+ }
+
+ }
+ }
+ return false;
+}
+
+/*
+ * Collecting data about join. this function is similar to reduce_outer_joins_pass1
+ *
+ * Returns a state node describing the given jointree node.
+ */
+static check_null_side_state *
+collect_jointree_data(Node *jtnode)
+{
+ check_null_side_state *result;
+
+ result = (check_null_side_state *)
+ palloc(sizeof(check_null_side_state));
+ result->relids = NULL;
+ result->contains_outer = false;
+ result->sub_states = NIL;
+
+ if (jtnode == NULL)
+ return result;
+ if (IsA(jtnode, RangeTblRef))
+ {
+ int varno = ((RangeTblRef *) jtnode)->rtindex;
+
+ result->relids = bms_make_singleton(varno);
+ }
+ else if (IsA(jtnode, FromExpr))
+ {
+ FromExpr *f = (FromExpr *) jtnode;
+ ListCell *l;
+
+ foreach(l, f->fromlist)
+ {
+ check_null_side_state *sub_state;
+
+ sub_state = collect_jointree_data(lfirst(l));
+ result->relids = bms_add_members(result->relids,
+ sub_state->relids);
+ result->contains_outer |= sub_state->contains_outer;
+ result->sub_states = lappend(result->sub_states, sub_state);
+ }
+ }
+ else if (IsA(jtnode, JoinExpr))
+ {
+ JoinExpr *j = (JoinExpr *) jtnode;
+ check_null_side_state *sub_state;
+
+ /* join's own RT index is not wanted in result->relids */
+ if (IS_OUTER_JOIN(j->jointype))
+ result->contains_outer = true;
+
+ sub_state = collect_jointree_data(j->larg);
+ result->jointype = j->jointype;
+ result->relids = bms_add_members(result->relids,
+ sub_state->relids);
+ sub_state->contains_outer |= sub_state->contains_outer;
+ result->sub_states = lappend(result->sub_states, sub_state);
+
+ sub_state = collect_jointree_data(j->rarg);
+ result->jointype = j->jointype;
+ result->relids = bms_add_members(result->relids,
+ sub_state->relids);
+ result->contains_outer |= sub_state->contains_outer;
+ result->sub_states = lappend(result->sub_states, sub_state);
+ }
+ else
+ elog(ERROR, "unrecognized node type: %d",
+ (int) nodeTag(jtnode));
+ return result;
+}
+
/*
* Subroutine for eval_const_expressions: check for non-Const nodes.
*
diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out
index a46b1573bd..a42f58c9f8 100644
--- a/src/test/regress/expected/join.out
+++ b/src/test/regress/expected/join.out
@@ -4577,6 +4577,32 @@ SELECT b.* FROM b LEFT JOIN a ON (b.a_id = a.id) WHERE (a.id IS NULL OR a.id > 0
1 |
(1 row)
+explain (costs off)
+SELECT * FROM b LEFT JOIN a ON (b.a_id = a.id) WHERE (a.id IS NULL OR a.id > 0);
+ QUERY PLAN
+------------------------------------------
+ Hash Left Join
+ Hash Cond: (b.a_id = a.id)
+ Filter: ((a.id IS NULL) OR (a.id > 0))
+ -> Seq Scan on b
+ -> Hash
+ -> Seq Scan on a
+(6 rows)
+
+explain (costs off)
+SELECT * FROM b RIGHT JOIN a ON (b.a_id = a.id) WHERE (a.id IS NULL OR a.id > 0);
+ QUERY PLAN
+-----------------------------------------------
+ Hash Right Join
+ Hash Cond: (b.a_id = a.id)
+ -> Seq Scan on b
+ -> Hash
+ -> Bitmap Heap Scan on a
+ Recheck Cond: (id > 0)
+ -> Bitmap Index Scan on a_pkey
+ Index Cond: (id > 0)
+(8 rows)
+
rollback;
-- another join removal bug: this is not optimizable, either
begin;
diff --git a/src/test/regress/expected/select.out b/src/test/regress/expected/select.out
index c441049f41..ef2df889bd 100644
--- a/src/test/regress/expected/select.out
+++ b/src/test/regress/expected/select.out
@@ -903,6 +903,37 @@ select unique1, unique2 from onek2
0 | 998
(2 rows)
+CREATE TEMP TABLE a (id int PRIMARY KEY, name text NOT NULL);
+explain (costs off)
+SELECT * FROM a WHERE id IS NULL;
+ QUERY PLAN
+--------------------------
+ Result
+ One-Time Filter: false
+(2 rows)
+
+explain (costs off)
+SELECT * FROM a WHERE id IS NOT NULL;
+ QUERY PLAN
+---------------
+ Seq Scan on a
+(1 row)
+
+explain (costs off)
+SELECT * FROM a WHERE name IS NOT NULL;
+ QUERY PLAN
+---------------
+ Seq Scan on a
+(1 row)
+
+explain (costs off)
+SELECT * FROM a WHERE name IS NULL;
+ QUERY PLAN
+--------------------------
+ Result
+ One-Time Filter: false
+(2 rows)
+
--
-- Test some corner cases that have been known to confuse the planner
--
diff --git a/src/test/regress/sql/join.sql b/src/test/regress/sql/join.sql
index 1403e0ffe7..4146562c3b 100644
--- a/src/test/regress/sql/join.sql
+++ b/src/test/regress/sql/join.sql
@@ -1589,6 +1589,11 @@ INSERT INTO b VALUES (0, 0), (1, NULL);
SELECT * FROM b LEFT JOIN a ON (b.a_id = a.id) WHERE (a.id IS NULL OR a.id > 0);
SELECT b.* FROM b LEFT JOIN a ON (b.a_id = a.id) WHERE (a.id IS NULL OR a.id > 0);
+explain (costs off)
+SELECT * FROM b LEFT JOIN a ON (b.a_id = a.id) WHERE (a.id IS NULL OR a.id > 0);
+
+explain (costs off)
+SELECT * FROM b RIGHT JOIN a ON (b.a_id = a.id) WHERE (a.id IS NULL OR a.id > 0);
rollback;
-- another join removal bug: this is not optimizable, either
diff --git a/src/test/regress/sql/select.sql b/src/test/regress/sql/select.sql
index b5929b2eca..13c777863e 100644
--- a/src/test/regress/sql/select.sql
+++ b/src/test/regress/sql/select.sql
@@ -234,6 +234,19 @@ select unique1, unique2 from onek2
select unique1, unique2 from onek2
where (unique2 = 11 and stringu1 < 'B') or unique1 = 0;
+CREATE TEMP TABLE a (id int PRIMARY KEY, name text NOT NULL);
+explain (costs off)
+SELECT * FROM a WHERE id IS NULL;
+
+explain (costs off)
+SELECT * FROM a WHERE id IS NOT NULL;
+
+explain (costs off)
+SELECT * FROM a WHERE name IS NOT NULL;
+
+explain (costs off)
+SELECT * FROM a WHERE name IS NULL;
+
--
-- Test some corner cases that have been known to confuse the planner
--