This is an automated email from the ASF dual-hosted git repository.

reshke pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/cloudberry.git


The following commit(s) were added to refs/heads/main by this push:
     new f5dcd91b847 Check MergeAppend node in share input mutator (#1204)
f5dcd91b847 is described below

commit f5dcd91b84757f4afcfe274abb38128ad72191a7
Author: reshke <[email protected]>
AuthorDate: Wed Jul 2 18:31:42 2025 +0500

    Check MergeAppend node in share input mutator (#1204)
    
    * Check MergeAppend node in share input mutator
    
    Do recursive call in plan walker for MergeAppend type of plan.
    
    When planner decides to merge two sorted sub-plans one
    of which has Share Input Scan node, executor fails to execute this,
    because of wrongly aligned internal structures.
    
    It turns out, it was forgotten to do proper recursion call
    in shareinput tree walker
---
 src/backend/cdb/cdbmutate.c                        |  8 ++++
 src/test/regress/expected/with_clause.out          | 50 ++++++++++++++++++++++
 .../regress/expected/with_clause_optimizer.out     | 50 ++++++++++++++++++++++
 src/test/regress/sql/with_clause.sql               | 29 +++++++++++++
 4 files changed, 137 insertions(+)

diff --git a/src/backend/cdb/cdbmutate.c b/src/backend/cdb/cdbmutate.c
index 9672ce25b59..1b5820587ef 100644
--- a/src/backend/cdb/cdbmutate.c
+++ b/src/backend/cdb/cdbmutate.c
@@ -403,6 +403,14 @@ shareinput_walker(SHAREINPUT_MUTATOR f, Node *node, 
PlannerInfo *root)
                        foreach(cell, app->appendplans)
                                shareinput_walker(f, (Node *) lfirst(cell), 
root);
                }
+               else if (IsA(node, MergeAppend))
+               {
+                       ListCell   *cell;
+                       MergeAppend *mapp = (MergeAppend *) node;
+
+                       foreach(cell, mapp->mergeplans)
+                               shareinput_walker(f, (Node *) lfirst(cell), 
root);
+               }
                /* GPDB_14_MERGE_FIXME: double check on following logics. */
                // else if (IsA(node, ModifyTable))
                // {
diff --git a/src/test/regress/expected/with_clause.out 
b/src/test/regress/expected/with_clause.out
index 2e574f1a146..2c3aff9951f 100644
--- a/src/test/regress/expected/with_clause.out
+++ b/src/test/regress/expected/with_clause.out
@@ -2355,3 +2355,53 @@ UNION ALL
  c      | 1
 (7 rows)
 
+-- Test issue from PR#1204
+-- Executor used to fail to execute this query
+CREATE TABLE foo_issue_1204_test (a INT, b INT, c INT, d INT, e INT, f INT);
+NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' 
as the Apache Cloudberry data distribution key for this table.
+HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make 
sure column(s) chosen are the optimal data distribution key to minimize skew.
+INSERT INTO foo_issue_1204_test SELECT generate_series(1,10000);
+ANALYZE foo_issue_1204_test;
+-- Assert than plan uses Merge Append strategy, and has Share Input Scan node.
+-- Also we are not actaully interested in output so discard it using SELECT 
EXISTS() hack
+EXPLAIN (COSTS OFF, TIMING OFF, BUFFERS OFF)
+SELECT EXISTS(
+       with inp as MATERIALIZED (select * from  foo_issue_1204_test ) select 
a,b,c, count(distinct d), count(distinct e), count(distinct f) from inp group 
by 1,2,3
+       UNION ALL
+       select a,b,c, count(distinct d), count(distinct e), count(distinct f) 
from foo_issue_1204_test group by 1,2,3
+       ORDER BY 1
+);
+                                                   QUERY PLAN                  
                                  
+-----------------------------------------------------------------------------------------------------------------
+ Result
+   InitPlan 1 (returns $0)  (slice1)
+     ->  Gather Motion 3:1  (slice2; segments: 3)
+           ->  Merge Append
+                 Sort Key: share0_ref1.a
+                 ->  GroupAggregate
+                       Group Key: share0_ref1.a, share0_ref1.b, share0_ref1.c
+                       ->  Sort
+                             Sort Key: share0_ref1.a, share0_ref1.b, 
share0_ref1.c
+                             ->  Shared Scan (share slice:id 2:0)
+                                   ->  Seq Scan on foo_issue_1204_test
+                 ->  GroupAggregate
+                       Group Key: foo_issue_1204_test_1.a, 
foo_issue_1204_test_1.b, foo_issue_1204_test_1.c
+                       ->  Sort
+                             Sort Key: foo_issue_1204_test_1.a, 
foo_issue_1204_test_1.b, foo_issue_1204_test_1.c
+                             ->  Seq Scan on foo_issue_1204_test 
foo_issue_1204_test_1
+ Optimizer: Postgres query optimizer
+(17 rows)
+
+-- Check execution is ok
+SELECT EXISTS(
+       with inp as MATERIALIZED (select * from  foo_issue_1204_test ) select 
a,b,c, count(distinct d), count(distinct e), count(distinct f) from inp group 
by 1,2,3
+       UNION ALL
+       select a,b,c, count(distinct d), count(distinct e), count(distinct f) 
from foo_issue_1204_test group by 1,2,3
+       ORDER BY 1
+);
+ exists 
+--------
+ t
+(1 row)
+
+DROP TABLE foo_issue_1204_test;
diff --git a/src/test/regress/expected/with_clause_optimizer.out 
b/src/test/regress/expected/with_clause_optimizer.out
index 3fbea887ef7..b95ef3ac3a6 100644
--- a/src/test/regress/expected/with_clause_optimizer.out
+++ b/src/test/regress/expected/with_clause_optimizer.out
@@ -2364,3 +2364,53 @@ UNION ALL
  c      | 1
 (7 rows)
 
+-- Test issue from PR#1204
+-- Executor used to fail to execute this query
+CREATE TABLE foo_issue_1204_test (a INT, b INT, c INT, d INT, e INT, f INT);
+NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' 
as the Apache Cloudberry data distribution key for this table.
+HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make 
sure column(s) chosen are the optimal data distribution key to minimize skew.
+INSERT INTO foo_issue_1204_test SELECT generate_series(1,10000);
+ANALYZE foo_issue_1204_test;
+-- Assert than plan uses Merge Append strategy, and has Share Input Scan node.
+-- Also we are not actaully interested in output so discard it using SELECT 
EXISTS() hack
+EXPLAIN (COSTS OFF, TIMING OFF, BUFFERS OFF)
+SELECT EXISTS(
+       with inp as MATERIALIZED (select * from  foo_issue_1204_test ) select 
a,b,c, count(distinct d), count(distinct e), count(distinct f) from inp group 
by 1,2,3
+       UNION ALL
+       select a,b,c, count(distinct d), count(distinct e), count(distinct f) 
from foo_issue_1204_test group by 1,2,3
+       ORDER BY 1
+);
+                                                   QUERY PLAN                  
                                  
+-----------------------------------------------------------------------------------------------------------------
+ Result
+   InitPlan 1 (returns $0)  (slice1)
+     ->  Gather Motion 3:1  (slice2; segments: 3)
+           ->  Merge Append
+                 Sort Key: share0_ref1.a
+                 ->  GroupAggregate
+                       Group Key: share0_ref1.a, share0_ref1.b, share0_ref1.c
+                       ->  Sort
+                             Sort Key: share0_ref1.a, share0_ref1.b, 
share0_ref1.c
+                             ->  Shared Scan (share slice:id 2:0)
+                                   ->  Seq Scan on foo_issue_1204_test
+                 ->  GroupAggregate
+                       Group Key: foo_issue_1204_test_1.a, 
foo_issue_1204_test_1.b, foo_issue_1204_test_1.c
+                       ->  Sort
+                             Sort Key: foo_issue_1204_test_1.a, 
foo_issue_1204_test_1.b, foo_issue_1204_test_1.c
+                             ->  Seq Scan on foo_issue_1204_test 
foo_issue_1204_test_1
+ Optimizer: Postgres query optimizer
+(17 rows)
+
+-- Check execution is ok
+SELECT EXISTS(
+       with inp as MATERIALIZED (select * from  foo_issue_1204_test ) select 
a,b,c, count(distinct d), count(distinct e), count(distinct f) from inp group 
by 1,2,3
+       UNION ALL
+       select a,b,c, count(distinct d), count(distinct e), count(distinct f) 
from foo_issue_1204_test group by 1,2,3
+       ORDER BY 1
+);
+ exists 
+--------
+ t
+(1 row)
+
+DROP TABLE foo_issue_1204_test;
diff --git a/src/test/regress/sql/with_clause.sql 
b/src/test/regress/sql/with_clause.sql
index 0ef209415ab..149edfe92af 100644
--- a/src/test/regress/sql/with_clause.sql
+++ b/src/test/regress/sql/with_clause.sql
@@ -462,3 +462,32 @@ UNION ALL
   SELECT 'sleep', 1 where pg_sleep(1) is not null
 UNION ALL
   SELECT 'c', j FROM cte;
+
+
+-- Test issue from PR#1204
+-- Executor used to fail to execute this query
+
+CREATE TABLE foo_issue_1204_test (a INT, b INT, c INT, d INT, e INT, f INT);
+INSERT INTO foo_issue_1204_test SELECT generate_series(1,10000);
+
+ANALYZE foo_issue_1204_test;
+
+-- Assert than plan uses Merge Append strategy, and has Share Input Scan node.
+-- Also we are not actaully interested in output so discard it using SELECT 
EXISTS() hack
+EXPLAIN (COSTS OFF, TIMING OFF, BUFFERS OFF)
+SELECT EXISTS(
+       with inp as MATERIALIZED (select * from  foo_issue_1204_test ) select 
a,b,c, count(distinct d), count(distinct e), count(distinct f) from inp group 
by 1,2,3
+       UNION ALL
+       select a,b,c, count(distinct d), count(distinct e), count(distinct f) 
from foo_issue_1204_test group by 1,2,3
+       ORDER BY 1
+);
+
+-- Check execution is ok
+SELECT EXISTS(
+       with inp as MATERIALIZED (select * from  foo_issue_1204_test ) select 
a,b,c, count(distinct d), count(distinct e), count(distinct f) from inp group 
by 1,2,3
+       UNION ALL
+       select a,b,c, count(distinct d), count(distinct e), count(distinct f) 
from foo_issue_1204_test group by 1,2,3
+       ORDER BY 1
+);
+
+DROP TABLE foo_issue_1204_test;


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to