This is an automated email from the ASF dual-hosted git repository. maxyang pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/cloudberry.git
commit 2de0e285f5e8f2b580cbe4f3e4e200f0855f5e68 Author: Zhenghua Lyu <[email protected]> AuthorDate: Sat Jun 17 02:30:51 2023 +0000 Revert "Refactor cdbpullup_missingVarWalker." This reverts commit aff2a34333ab31d86e65505cefd3e6144dbe67da. The commit aff2a343 fixes some issues but introduces some new failures: * Github Issue 15794 * Github Issue 15767 * Github Issue 15793 I think it is better to revert it now and do a new design and and refactor later. --- src/backend/cdb/cdbpullup.c | 112 ++++++--------------- src/test/regress/expected/bfv_subquery.out | 10 -- .../regress/expected/bfv_subquery_optimizer.out | 10 -- src/test/regress/sql/bfv_subquery.sql | 5 - 4 files changed, 33 insertions(+), 104 deletions(-) diff --git a/src/backend/cdb/cdbpullup.c b/src/backend/cdb/cdbpullup.c index ae5945d0b6..c6f243ae08 100644 --- a/src/backend/cdb/cdbpullup.c +++ b/src/backend/cdb/cdbpullup.c @@ -27,18 +27,9 @@ #include "cdb/cdbpullup.h" /* me */ -static bool cdbpullup_missingVarCheck(Expr *node, List *targetlist); -static List *cdbpullup_getall_vars(Node *node); -static bool cdbpullup_getall_vars_walker(Node *node, void *context); -static bool cdbpullup_missingVarCheckWalker(Node *node, void *context); +static bool cdbpullup_missingVarWalker(Expr *node, void *targetlist); static Expr *cdbpullup_make_expr(Index varno, AttrNumber varattno, Expr *oldexpr, bool modifyOld); - -typedef struct MissingVarContext -{ - List *vars; - List *findvars; -} MissingVarContext; /* * cdbpullup_expr * @@ -326,8 +317,10 @@ cdbpullup_findEclassInTargetList(EquivalenceClass *eclass, List *targetlist, /* Return this item if all referenced Vars are in targetlist. */ if (!IsA(key, Var) && - !cdbpullup_missingVarCheck(key, targetlist)) + !cdbpullup_missingVarWalker(key, targetlist)) + { return key; + } } return NULL; @@ -421,85 +414,46 @@ cdbpullup_make_expr(Index varno, AttrNumber varattno, Expr *oldexpr, bool modify } } /* cdbpullup_make_var */ + /* - * cdbpullup_missingVarCheck - * Given an expression and a targetlist, check if the expression - * can be represented by the exprs in the targetlist. + * cdbpullup_missingVarWalker + * + * Returns true if some Var in expr is not in targetlist. + * 'targetlist' is either a List of TargetEntry, or a plain List of Expr. * - * This function return true when Expr node cannot be represented - * based on targetlist (which matches the name that we have missing vars). + * NB: A Var in the expr is considered as matching a Var in the targetlist + * without regard for whether or not there is a RelabelType node atop the + * targetlist Var. * - * The implementaion is as below: - * 1. build the list of all Vars in the Expr node, it is a set (created - * by list_append_unique_ptr in the helper function cdbpullup_getall_vars) - * 2. walk the targetlist to build a set (created by list_append_unique_ptr) - * for all Vars in targetlist that is member of Varset built in 1st step, - * here membership test is using list_member which call equal to test. - * 3. if the size of the 2nd step's var set is smaller than the 1st step's, - * it means the targetlist misses some vars. */ static bool -cdbpullup_missingVarCheck(Expr *node, List *targetlist) +cdbpullup_missingVarWalker(Expr *node, void *targetlist) { - List *vars = NIL; - MissingVarContext context; - bool missing; - - vars = cdbpullup_getall_vars((Node *) node); - if (list_length(vars) == 0) - return true; - - context.vars = vars; - context.findvars = NIL; - (void) expression_tree_walker((Node *) targetlist, - cdbpullup_missingVarCheckWalker, - (void *) &context); - missing = (list_length(context.findvars) < list_length(vars)); - list_free(vars); - list_free(context.findvars); - return missing; -} - -static List * -cdbpullup_getall_vars(Node *node) -{ - List *vars = NIL; - - (void) expression_tree_walker(node, cdbpullup_getall_vars_walker, (void *) &vars); - return vars; -} - -static bool -cdbpullup_getall_vars_walker(Node *node, void *context) -{ - List **vars = context; - - if (node == NULL) + if (!node) return false; - if (IsA(node, Var)) + /* + * Should also consider PlaceHolderVar in the targetlist. + * See github issue: https://github.com/greenplum-db/gpdb/issues/10315 + */ + if (IsA(node, Var) || IsA(node, PlaceHolderVar)) { - *vars = list_append_unique_ptr(*vars, node); - return false; - } + if (!targetlist) + return true; - return expression_tree_walker(node, cdbpullup_getall_vars_walker, (void *) vars); -} - -static bool -cdbpullup_missingVarCheckWalker(Node *node, void *context) -{ - MissingVarContext *ctx = context; + /* is targetlist a List of TargetEntry? */ + if (IsA(linitial(targetlist), TargetEntry)) + { + if (tlist_member_ignore_relabel(node, (List *) targetlist)) + return false; /* this Var ok - go on to check rest of expr */ + } - if (node == NULL) - return false; + /* targetlist must be a List of Expr */ + else if (list_member((List *) targetlist, node)) + return false; /* this Var ok - go on to check rest of expr */ - if (IsA(node, Var)) - { - if (list_member(ctx->vars, node)) - ctx->findvars = list_append_unique_ptr(ctx->findvars, node); - return false; + return true; /* Var is not in targetlist - quit the walk */ } - return expression_tree_walker(node, cdbpullup_missingVarCheckWalker, (void *) ctx); -} + return expression_tree_walker((Node *) node, cdbpullup_missingVarWalker, targetlist); +} /* cdbpullup_missingVarWalker */ diff --git a/src/test/regress/expected/bfv_subquery.out b/src/test/regress/expected/bfv_subquery.out index ff9f04adc2..741211b3b3 100644 --- a/src/test/regress/expected/bfv_subquery.out +++ b/src/test/regress/expected/bfv_subquery.out @@ -555,13 +555,3 @@ limit 15; 2 | 102 (15 rows) --- test case for Github Issue 15079 -create table if not exists issue_15079(c0 real, c1 boolean); -NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'c0' as the Greenplum Database data distribution key for this table. -HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. -select * from (select bool_or(issue_15079.c1) from issue_15079 group by random()) as result; - bool_or ---------- -(0 rows) - -drop table issue_15079; diff --git a/src/test/regress/expected/bfv_subquery_optimizer.out b/src/test/regress/expected/bfv_subquery_optimizer.out index db858dd038..f89dd20935 100644 --- a/src/test/regress/expected/bfv_subquery_optimizer.out +++ b/src/test/regress/expected/bfv_subquery_optimizer.out @@ -550,13 +550,3 @@ limit 15; 2 | 102 (15 rows) --- test case for Github Issue 15079 -create table if not exists issue_15079(c0 real, c1 boolean); -NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'c0' as the Greenplum Database data distribution key for this table. -HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. -select * from (select bool_or(issue_15079.c1) from issue_15079 group by random()) as result; - bool_or ---------- -(0 rows) - -drop table issue_15079; diff --git a/src/test/regress/sql/bfv_subquery.sql b/src/test/regress/sql/bfv_subquery.sql index 3e968e3e74..e03fcad15b 100644 --- a/src/test/regress/sql/bfv_subquery.sql +++ b/src/test/regress/sql/bfv_subquery.sql @@ -325,8 +325,3 @@ select with_test2.* from with_test2 where value < all (select total from my_group_sum where my_group_sum.i = with_test2.i) order by 1,2 limit 15; - --- test case for Github Issue 15079 -create table if not exists issue_15079(c0 real, c1 boolean); -select * from (select bool_or(issue_15079.c1) from issue_15079 group by random()) as result; -drop table issue_15079; --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
