This is an automated email from the ASF dual-hosted git repository.

maxyang pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/cloudberry.git

commit 2de0e285f5e8f2b580cbe4f3e4e200f0855f5e68
Author: Zhenghua Lyu <[email protected]>
AuthorDate: Sat Jun 17 02:30:51 2023 +0000

    Revert "Refactor cdbpullup_missingVarWalker."
    
    This reverts commit aff2a34333ab31d86e65505cefd3e6144dbe67da.
    
    The commit aff2a343  fixes some issues but introduces some new
    failures:
    
      * Github Issue 15794
      * Github Issue 15767
      * Github Issue 15793
    
    I think it is better to revert it now and do a new design and
    and refactor later.
---
 src/backend/cdb/cdbpullup.c                        | 112 ++++++---------------
 src/test/regress/expected/bfv_subquery.out         |  10 --
 .../regress/expected/bfv_subquery_optimizer.out    |  10 --
 src/test/regress/sql/bfv_subquery.sql              |   5 -
 4 files changed, 33 insertions(+), 104 deletions(-)

diff --git a/src/backend/cdb/cdbpullup.c b/src/backend/cdb/cdbpullup.c
index ae5945d0b6..c6f243ae08 100644
--- a/src/backend/cdb/cdbpullup.c
+++ b/src/backend/cdb/cdbpullup.c
@@ -27,18 +27,9 @@
 #include "cdb/cdbpullup.h"             /* me */
 
 
-static bool  cdbpullup_missingVarCheck(Expr *node, List *targetlist);
-static List *cdbpullup_getall_vars(Node *node);
-static bool  cdbpullup_getall_vars_walker(Node *node, void *context);
-static bool  cdbpullup_missingVarCheckWalker(Node *node, void *context);
+static bool cdbpullup_missingVarWalker(Expr *node, void *targetlist);
 static Expr *cdbpullup_make_expr(Index varno, AttrNumber varattno, Expr 
*oldexpr, bool modifyOld);
 
-
-typedef struct MissingVarContext
-{
-       List   *vars;
-       List   *findvars;
-} MissingVarContext;
 /*
  * cdbpullup_expr
  *
@@ -326,8 +317,10 @@ cdbpullup_findEclassInTargetList(EquivalenceClass *eclass, 
List *targetlist,
 
                /* Return this item if all referenced Vars are in targetlist. */
                if (!IsA(key, Var) &&
-                       !cdbpullup_missingVarCheck(key, targetlist))
+                       !cdbpullup_missingVarWalker(key, targetlist))
+               {
                        return key;
+               }
        }
 
        return NULL;
@@ -421,85 +414,46 @@ cdbpullup_make_expr(Index varno, AttrNumber varattno, 
Expr *oldexpr, bool modify
        }
 }                                                              /* 
cdbpullup_make_var */
 
+
 /*
- * cdbpullup_missingVarCheck
- *   Given an expression and a targetlist, check if the expression
- *   can be represented by the exprs in the targetlist.
+ * cdbpullup_missingVarWalker
+ *
+ * Returns true if some Var in expr is not in targetlist.
+ * 'targetlist' is either a List of TargetEntry, or a plain List of Expr.
  *
- *   This function return true when Expr node cannot be represented
- *   based on targetlist (which matches the name that we have missing vars).
+ * NB:  A Var in the expr is considered as matching a Var in the targetlist
+ * without regard for whether or not there is a RelabelType node atop the
+ * targetlist Var.
  *
- *   The implementaion is as below:
- *     1. build the list of all Vars in the Expr node, it is a set (created
- *        by list_append_unique_ptr in the helper function 
cdbpullup_getall_vars)
- *     2. walk the targetlist to build a set (created by 
list_append_unique_ptr)
- *        for all Vars in targetlist that is member of Varset built in 1st 
step,
- *        here membership test is using list_member which call equal to test.
- *     3. if the size of the 2nd step's var set is smaller than the 1st step's,
- *        it means the targetlist misses some vars.
  */
 static bool
-cdbpullup_missingVarCheck(Expr *node, List *targetlist)
+cdbpullup_missingVarWalker(Expr *node, void *targetlist)
 {
-       List             *vars    = NIL;
-       MissingVarContext context;
-       bool              missing;
-
-       vars = cdbpullup_getall_vars((Node *) node);
-       if (list_length(vars) == 0)
-               return true;
-
-       context.vars = vars;
-       context.findvars = NIL;
-       (void) expression_tree_walker((Node *) targetlist,
-                                                                 
cdbpullup_missingVarCheckWalker,
-                                                                 (void *) 
&context);
-       missing = (list_length(context.findvars) < list_length(vars));
-       list_free(vars);
-       list_free(context.findvars);
-       return missing;
-}
-
-static List *
-cdbpullup_getall_vars(Node *node)
-{
-       List *vars = NIL;
-
-       (void) expression_tree_walker(node, cdbpullup_getall_vars_walker, (void 
*) &vars);
-       return vars;
-}
-
-static bool
-cdbpullup_getall_vars_walker(Node *node, void *context)
-{
-       List **vars = context;
-
-       if (node == NULL)
+       if (!node)
                return false;
 
-       if (IsA(node, Var))
+       /*
+        * Should also consider PlaceHolderVar in the targetlist.
+        * See github issue: https://github.com/greenplum-db/gpdb/issues/10315
+        */
+       if (IsA(node, Var) || IsA(node, PlaceHolderVar))
        {
-               *vars = list_append_unique_ptr(*vars, node);
-               return false;
-       }
+               if (!targetlist)
+                       return true;
 
-       return expression_tree_walker(node, cdbpullup_getall_vars_walker, (void 
*) vars);
-}
-
-static bool
-cdbpullup_missingVarCheckWalker(Node *node, void *context)
-{
-       MissingVarContext *ctx = context;
+               /* is targetlist a List of TargetEntry? */
+               if (IsA(linitial(targetlist), TargetEntry))
+               {
+                       if (tlist_member_ignore_relabel(node, (List *) 
targetlist))
+                               return false;   /* this Var ok - go on to check 
rest of expr */
+               }
 
-       if (node == NULL)
-               return false;
+               /* targetlist must be a List of Expr */
+               else if (list_member((List *) targetlist, node))
+                       return false;           /* this Var ok - go on to check 
rest of expr */
 
-       if (IsA(node, Var))
-       {
-               if (list_member(ctx->vars, node))
-                       ctx->findvars = list_append_unique_ptr(ctx->findvars, 
node);
-               return false;
+               return true;                    /* Var is not in targetlist - 
quit the walk */
        }
 
-       return expression_tree_walker(node, cdbpullup_missingVarCheckWalker, 
(void *) ctx);
-}
+       return expression_tree_walker((Node *) node, 
cdbpullup_missingVarWalker, targetlist);
+}                                                              /* 
cdbpullup_missingVarWalker */
diff --git a/src/test/regress/expected/bfv_subquery.out 
b/src/test/regress/expected/bfv_subquery.out
index ff9f04adc2..741211b3b3 100644
--- a/src/test/regress/expected/bfv_subquery.out
+++ b/src/test/regress/expected/bfv_subquery.out
@@ -555,13 +555,3 @@ limit 15;
  2 |   102
 (15 rows)
 
--- test case for Github Issue 15079
-create table if not exists issue_15079(c0 real, c1 boolean);
-NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'c0' 
as the Greenplum Database data distribution key for this table.
-HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make 
sure column(s) chosen are the optimal data distribution key to minimize skew.
-select * from (select bool_or(issue_15079.c1) from issue_15079 group by 
random()) as result;
- bool_or 
----------
-(0 rows)
-
-drop table issue_15079;
diff --git a/src/test/regress/expected/bfv_subquery_optimizer.out 
b/src/test/regress/expected/bfv_subquery_optimizer.out
index db858dd038..f89dd20935 100644
--- a/src/test/regress/expected/bfv_subquery_optimizer.out
+++ b/src/test/regress/expected/bfv_subquery_optimizer.out
@@ -550,13 +550,3 @@ limit 15;
  2 |   102
 (15 rows)
 
--- test case for Github Issue 15079
-create table if not exists issue_15079(c0 real, c1 boolean);
-NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'c0' 
as the Greenplum Database data distribution key for this table.
-HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make 
sure column(s) chosen are the optimal data distribution key to minimize skew.
-select * from (select bool_or(issue_15079.c1) from issue_15079 group by 
random()) as result;
- bool_or 
----------
-(0 rows)
-
-drop table issue_15079;
diff --git a/src/test/regress/sql/bfv_subquery.sql 
b/src/test/regress/sql/bfv_subquery.sql
index 3e968e3e74..e03fcad15b 100644
--- a/src/test/regress/sql/bfv_subquery.sql
+++ b/src/test/regress/sql/bfv_subquery.sql
@@ -325,8 +325,3 @@ select with_test2.* from with_test2
 where value < all (select total from my_group_sum where my_group_sum.i = 
with_test2.i)
 order by 1,2
 limit 15;
-
--- test case for Github Issue 15079
-create table if not exists issue_15079(c0 real, c1 boolean);
-select * from (select bool_or(issue_15079.c1) from issue_15079 group by 
random()) as result;
-drop table issue_15079;


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to