This is an automated email from the ASF dual-hosted git repository. maxyang pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/cloudberry.git
commit 745b89a8e83f8f4ebcbf19dd8b65b47db4c2e7f2 Author: Zhenghua Lyu <[email protected]> AuthorDate: Mon Jun 5 14:23:32 2023 +0000 Refactor cdbpullup_missingVarWalker. Previously the API cdbpullup_missingVarWalker() is used to check if an expression can be represented by a targetlist. There are two problems for the previous design: * the API return bool type, the walker help function also return bool, but the walker's return value is to control expression_tree_walker's termination, it is not good to use walker's return value as the API's return value. * how to handle the case if there are no Vars in the node expression. This commit fixes the above two issues: * use context to store result needed by users, not the walker's return value * if there are no vars in the node expression, take this case as missing (cannot represented via targetlist). Fix Github Issue #15079. --- src/backend/cdb/cdbpullup.c | 112 +++++++++++++++------ src/test/regress/expected/bfv_subquery.out | 10 ++ .../regress/expected/bfv_subquery_optimizer.out | 10 ++ src/test/regress/sql/bfv_subquery.sql | 5 + 4 files changed, 104 insertions(+), 33 deletions(-) diff --git a/src/backend/cdb/cdbpullup.c b/src/backend/cdb/cdbpullup.c index c6f243ae08..ae5945d0b6 100644 --- a/src/backend/cdb/cdbpullup.c +++ b/src/backend/cdb/cdbpullup.c @@ -27,9 +27,18 @@ #include "cdb/cdbpullup.h" /* me */ -static bool cdbpullup_missingVarWalker(Expr *node, void *targetlist); +static bool cdbpullup_missingVarCheck(Expr *node, List *targetlist); +static List *cdbpullup_getall_vars(Node *node); +static bool cdbpullup_getall_vars_walker(Node *node, void *context); +static bool cdbpullup_missingVarCheckWalker(Node *node, void *context); static Expr *cdbpullup_make_expr(Index varno, AttrNumber varattno, Expr *oldexpr, bool modifyOld); + +typedef struct MissingVarContext +{ + List *vars; + List *findvars; +} MissingVarContext; /* * cdbpullup_expr * @@ -317,10 +326,8 @@ cdbpullup_findEclassInTargetList(EquivalenceClass *eclass, List *targetlist, /* Return this item if all referenced Vars are in targetlist. */ if (!IsA(key, Var) && - !cdbpullup_missingVarWalker(key, targetlist)) - { + !cdbpullup_missingVarCheck(key, targetlist)) return key; - } } return NULL; @@ -414,46 +421,85 @@ cdbpullup_make_expr(Index varno, AttrNumber varattno, Expr *oldexpr, bool modify } } /* cdbpullup_make_var */ - /* - * cdbpullup_missingVarWalker - * - * Returns true if some Var in expr is not in targetlist. - * 'targetlist' is either a List of TargetEntry, or a plain List of Expr. + * cdbpullup_missingVarCheck + * Given an expression and a targetlist, check if the expression + * can be represented by the exprs in the targetlist. * - * NB: A Var in the expr is considered as matching a Var in the targetlist - * without regard for whether or not there is a RelabelType node atop the - * targetlist Var. + * This function return true when Expr node cannot be represented + * based on targetlist (which matches the name that we have missing vars). * + * The implementaion is as below: + * 1. build the list of all Vars in the Expr node, it is a set (created + * by list_append_unique_ptr in the helper function cdbpullup_getall_vars) + * 2. walk the targetlist to build a set (created by list_append_unique_ptr) + * for all Vars in targetlist that is member of Varset built in 1st step, + * here membership test is using list_member which call equal to test. + * 3. if the size of the 2nd step's var set is smaller than the 1st step's, + * it means the targetlist misses some vars. */ static bool -cdbpullup_missingVarWalker(Expr *node, void *targetlist) +cdbpullup_missingVarCheck(Expr *node, List *targetlist) { - if (!node) + List *vars = NIL; + MissingVarContext context; + bool missing; + + vars = cdbpullup_getall_vars((Node *) node); + if (list_length(vars) == 0) + return true; + + context.vars = vars; + context.findvars = NIL; + (void) expression_tree_walker((Node *) targetlist, + cdbpullup_missingVarCheckWalker, + (void *) &context); + missing = (list_length(context.findvars) < list_length(vars)); + list_free(vars); + list_free(context.findvars); + return missing; +} + +static List * +cdbpullup_getall_vars(Node *node) +{ + List *vars = NIL; + + (void) expression_tree_walker(node, cdbpullup_getall_vars_walker, (void *) &vars); + return vars; +} + +static bool +cdbpullup_getall_vars_walker(Node *node, void *context) +{ + List **vars = context; + + if (node == NULL) return false; - /* - * Should also consider PlaceHolderVar in the targetlist. - * See github issue: https://github.com/greenplum-db/gpdb/issues/10315 - */ - if (IsA(node, Var) || IsA(node, PlaceHolderVar)) + if (IsA(node, Var)) { - if (!targetlist) - return true; + *vars = list_append_unique_ptr(*vars, node); + return false; + } - /* is targetlist a List of TargetEntry? */ - if (IsA(linitial(targetlist), TargetEntry)) - { - if (tlist_member_ignore_relabel(node, (List *) targetlist)) - return false; /* this Var ok - go on to check rest of expr */ - } + return expression_tree_walker(node, cdbpullup_getall_vars_walker, (void *) vars); +} + +static bool +cdbpullup_missingVarCheckWalker(Node *node, void *context) +{ + MissingVarContext *ctx = context; - /* targetlist must be a List of Expr */ - else if (list_member((List *) targetlist, node)) - return false; /* this Var ok - go on to check rest of expr */ + if (node == NULL) + return false; - return true; /* Var is not in targetlist - quit the walk */ + if (IsA(node, Var)) + { + if (list_member(ctx->vars, node)) + ctx->findvars = list_append_unique_ptr(ctx->findvars, node); + return false; } - return expression_tree_walker((Node *) node, cdbpullup_missingVarWalker, targetlist); -} /* cdbpullup_missingVarWalker */ + return expression_tree_walker(node, cdbpullup_missingVarCheckWalker, (void *) ctx); +} diff --git a/src/test/regress/expected/bfv_subquery.out b/src/test/regress/expected/bfv_subquery.out index 741211b3b3..ff9f04adc2 100644 --- a/src/test/regress/expected/bfv_subquery.out +++ b/src/test/regress/expected/bfv_subquery.out @@ -555,3 +555,13 @@ limit 15; 2 | 102 (15 rows) +-- test case for Github Issue 15079 +create table if not exists issue_15079(c0 real, c1 boolean); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'c0' as the Greenplum Database data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +select * from (select bool_or(issue_15079.c1) from issue_15079 group by random()) as result; + bool_or +--------- +(0 rows) + +drop table issue_15079; diff --git a/src/test/regress/expected/bfv_subquery_optimizer.out b/src/test/regress/expected/bfv_subquery_optimizer.out index f89dd20935..db858dd038 100644 --- a/src/test/regress/expected/bfv_subquery_optimizer.out +++ b/src/test/regress/expected/bfv_subquery_optimizer.out @@ -550,3 +550,13 @@ limit 15; 2 | 102 (15 rows) +-- test case for Github Issue 15079 +create table if not exists issue_15079(c0 real, c1 boolean); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'c0' as the Greenplum Database data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +select * from (select bool_or(issue_15079.c1) from issue_15079 group by random()) as result; + bool_or +--------- +(0 rows) + +drop table issue_15079; diff --git a/src/test/regress/sql/bfv_subquery.sql b/src/test/regress/sql/bfv_subquery.sql index e03fcad15b..3e968e3e74 100644 --- a/src/test/regress/sql/bfv_subquery.sql +++ b/src/test/regress/sql/bfv_subquery.sql @@ -325,3 +325,8 @@ select with_test2.* from with_test2 where value < all (select total from my_group_sum where my_group_sum.i = with_test2.i) order by 1,2 limit 15; + +-- test case for Github Issue 15079 +create table if not exists issue_15079(c0 real, c1 boolean); +select * from (select bool_or(issue_15079.c1) from issue_15079 group by random()) as result; +drop table issue_15079; --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
