Hi, while writting a book in 2025, i read an played with the postgresql code. now my book is finished and i think that maybe a next step is to try to patch postgresql for some optimisations. I'm a beginner about postgresql code (but i patched a lot of opensource programs while working on a linux distro).
this first email aim is to discuss about the fact that this kind of code (just a draft here) have a chance to be commited or not (once comments added, reident, cleaning, ...). Details about each implementation could be discussed later. There are some optimisations at the planner level that are not mandatory when you know how it works, but all the year, i get query to optimize because people doesn't know the pg internals or write not well written queries. So, the patches i would like to suggest are more "non mandatory optimisations". like: - detect anti join on "a left join b where x is null" where x is a non null var b (b being a rte) this is the object of the attached patched. it is not finished, but working for a demonstation (this is a quick and dirty patch just to try if i were able to do it). it shows me that it has drawbacks : for example : it requires to know the details on some tables sooner on the planner, (and thus, sometimes, before we detect that we could just remove a table => so we build some tables for nothing except optimisations) - remove unrequirered distinct, group by (select distinct id_unique from people;) - remove double order (select * from (select * from a order by x) order by y) (where * doesn't containt functions based on row nums) - detect anti join on "not in(...)" - have a way to view the rewritten query ? (like explain) and so on.
From 5149b2e2c2fec6d172c4997271c9fde1ebc01c86 Mon Sep 17 00:00:00 2001 From: Nicolas Adenis-Lamarre <[email protected]> Date: Wed, 31 Dec 2025 10:35:01 +0100 Subject: [PATCH] planner: anti join on left joins Signed-off-by: Nicolas Adenis-Lamarre <[email protected]> --- src/backend/optimizer/plan/planner.c | 4 ++ src/backend/optimizer/prep/prepjointree.c | 33 ++++++++----- src/backend/optimizer/util/clauses.c | 59 ++++++++++++++++++++++- src/include/optimizer/clauses.h | 1 + 4 files changed, 84 insertions(+), 13 deletions(-) diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index 1268ea92b6f..32211c2ffc3 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -1228,6 +1228,10 @@ subquery_planner(PlannerGlobal *glob, Query *parse, char *plan_name, } parse->havingQual = (Node *) newHaving; + // nae temporary for tests + /* Set up RTE/RelOptInfo arrays */ + setup_simple_rel_arrays(root); + /* * If we have any outer joins, try to reduce them to plain inner joins. * This step is most easily done after we've done expression diff --git a/src/backend/optimizer/prep/prepjointree.c b/src/backend/optimizer/prep/prepjointree.c index c3b726e93e7..b4b9428acb7 100644 --- a/src/backend/optimizer/prep/prepjointree.c +++ b/src/backend/optimizer/prep/prepjointree.c @@ -3450,20 +3450,29 @@ reduce_outer_joins_pass2(Node *jtnode, */ if (jointype == JOIN_LEFT) { - List *nonnullable_vars; - Bitmapset *overlap; + if(forced_null_vars != NIL) { + List *nonnullable_vars; + List *nonnullable_cols = NULL; + Bitmapset *overlap; - /* Find Vars in j->quals that must be non-null in joined rows */ - nonnullable_vars = find_nonnullable_vars(j->quals); + /* Find Vars in j->quals that must be non-null in joined rows */ + nonnullable_vars = find_nonnullable_vars(j->quals); - /* - * It's not sufficient to check whether nonnullable_vars and - * forced_null_vars overlap: we need to know if the overlap - * includes any RHS variables. - */ - overlap = mbms_overlap_sets(nonnullable_vars, forced_null_vars); - if (bms_overlap(overlap, right_state->relids)) - jointype = JOIN_ANTI; + /* + * It's not sufficient to check whether nonnullable_vars and + * forced_null_vars overlap: we need to know if the overlap + * includes any RHS variables. + */ + + nonnullable_cols = find_nonnullable_cols(root, j->rarg); + if(nonnullable_cols != NIL) { + mbms_add_members(nonnullable_vars, nonnullable_cols); + } + overlap = mbms_overlap_sets(nonnullable_vars, forced_null_vars); + + if (bms_overlap(overlap, right_state->relids)) + jointype = JOIN_ANTI; + } } /* diff --git a/src/backend/optimizer/util/clauses.c b/src/backend/optimizer/util/clauses.c index 67b7de16fc5..a00d4bfa9a7 100644 --- a/src/backend/optimizer/util/clauses.c +++ b/src/backend/optimizer/util/clauses.c @@ -112,6 +112,7 @@ static bool contain_context_dependent_node_walker(Node *node, int *flags); static bool contain_leaked_vars_walker(Node *node, void *context); static Relids find_nonnullable_rels_walker(Node *node, bool top_level); static List *find_nonnullable_vars_walker(Node *node, bool top_level); +static List *find_nonnullable_cols_walker(PlannerInfo *root, Node *node); static bool is_strict_saop(ScalarArrayOpExpr *expr, bool falseOK); static bool convert_saop_to_hashed_saop_walker(Node *node, void *context); static Node *eval_const_expressions_mutator(Node *node, @@ -1736,10 +1737,11 @@ find_nonnullable_vars_walker(Node *node, bool top_level) { Var *var = (Var *) node; - if (var->varlevelsup == 0) + if (var->varlevelsup == 0) { result = mbms_add_member(result, var->varno, var->varattno - FirstLowInvalidHeapAttributeNumber); + } } else if (IsA(node, List)) { @@ -5906,3 +5908,58 @@ make_SAOP_expr(Oid oper, Node *leftexpr, Oid coltype, Oid arraycollid, return saopexpr; } + +List * +find_nonnullable_cols(PlannerInfo *root, Node *node) +{ + return find_nonnullable_cols_walker(root, node); +} + +/* + * find_nonnullable_cols + * Determine which Vars are forced nonnullable by given clause. + * + */ +static List * +find_nonnullable_cols_walker(PlannerInfo *root, Node *node) +{ + List *result = NIL; + int x; + + if (node == NULL) + return NIL; + + if (IsA(node, RangeTblRef)) { + ListCell *lc; + + int varno = ((RangeTblRef *) node)->rtindex; + (void) build_simple_rel(root, varno, NULL); + + RelOptInfo *rel; + rel = root->simple_rel_array[varno]; + + x = -1; + while ((x = bms_next_member(rel->notnullattnums, x)) >= 0) { + result = mbms_add_member(result, + varno, + x - FirstLowInvalidHeapAttributeNumber); + } + } + + else if (IsA(node, JoinExpr)) { + JoinExpr *j = (JoinExpr *) node; + + // we could consider as non null variables from other side via join clause expression, + // but not needed while this expression is analyzed too later + + if(j->jointype == JOIN_INNER || j->jointype == JOIN_LEFT) { + result = list_concat(result, find_nonnullable_cols_walker(root, j->larg)); + } + + if(j->jointype == JOIN_INNER || j->jointype == JOIN_RIGHT) { + result = list_concat(result, find_nonnullable_cols_walker(root, j->rarg)); + } + } + + return result; +} diff --git a/src/include/optimizer/clauses.h b/src/include/optimizer/clauses.h index fc38eae5c5a..cfc5821fecd 100644 --- a/src/include/optimizer/clauses.h +++ b/src/include/optimizer/clauses.h @@ -40,6 +40,7 @@ extern bool contain_leaked_vars(Node *clause); extern Relids find_nonnullable_rels(Node *clause); extern List *find_nonnullable_vars(Node *clause); +extern List *find_nonnullable_cols(PlannerInfo *root, Node *clause); extern List *find_forced_null_vars(Node *node); extern Var *find_forced_null_var(Node *node); -- 2.34.1
