(cloudberry) 27/30: Fix bug of wrong idx position in targetlist as ExecTupleSplit (#14954)

maxyang Fri, 24 Jan 2025 03:20:29 -0800

This is an automated email from the ASF dual-hosted git repository.

maxyang pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/cloudberry.git


commit 2bbaddcdb2795356e33a7011e7b061aa97ac146c
Author: Haotian Chen <[email protected]>
AuthorDate: Fri Mar 10 16:58:05 2023 +0800

    Fix bug of wrong idx position in targetlist as ExecTupleSplit (#14954)
    
    Targetlists are allowed to project non-group columns which not wrapped in 
Aggregate funciton
    when group-by clause includes any contraint primary key, such as:
    ```
    select count(distinct a), count(distinct b), e from t1 group by a, b;
    if (a, b) is a primary of table t1, column e could be projected directly.
    ```
    As case above, non-group columns are not only in final targetlist not also 
in targetlist
    of any subplans, such as node TupleSplit.
    The commit is going to fix inconsistant relative position of those columns 
in Node TupleSplit targetlist.
    We could store relative postion of column towards targetlist(te->resno) 
from one instead of zero
    in the whole life time of ExecTuplesplit.
---
 src/backend/executor/nodeTupleSplit.c          |  7 +++-
 src/test/regress/expected/gp_dqa.out           | 48 ++++++++++++++++++++++++
 src/test/regress/expected/gp_dqa_optimizer.out | 52 ++++++++++++++++++++++++++
 src/test/regress/sql/gp_dqa.sql                |  7 ++++
 4 files changed, 112 insertions(+), 2 deletions(-)

diff --git a/src/backend/executor/nodeTupleSplit.c 
b/src/backend/executor/nodeTupleSplit.c
index 90d14d3622..85a9f3ecab 100644
--- a/src/backend/executor/nodeTupleSplit.c
+++ b/src/backend/executor/nodeTupleSplit.c
@@ -118,8 +118,11 @@ ExecInitTupleSplit(TupleSplit *node, EState *estate, int 
eflags)
         * fetch all columns which is not referenced by all DQAs
         */
        Bitmapset *all_input_attr_bms = NULL;
-       for (int id = 0; id < list_length(outerPlan(node)->targetlist); id++)
-               all_input_attr_bms = bms_add_member(all_input_attr_bms, id);
+       foreach(lc, outerPlan(node)->targetlist)
+       {
+               TargetEntry *te = (TargetEntry *)lfirst(lc);
+               all_input_attr_bms = bms_add_member(all_input_attr_bms, 
te->resno);
+       }
 
        Bitmapset *dqa_not_used_bms = all_input_attr_bms;
        for (int id = 0; id < tup_spl_state->numDisDQAs; id++)
diff --git a/src/test/regress/expected/gp_dqa.out 
b/src/test/regress/expected/gp_dqa.out
index 09c8aed08b..af9ee92998 100644
--- a/src/test/regress/expected/gp_dqa.out
+++ b/src/test/regress/expected/gp_dqa.out
@@ -2199,6 +2199,54 @@ select count(distinct a), sum(b), sum(c) from dqa_f1;
     17 | 2000 | 1000
 (1 row)
 
+-- multi DQA with primary key
+create table dqa_unique(a int, b int, c int, d int, primary key(a, b));
+insert into dqa_unique select i%3, i%5, i%7, i%9 from generate_series(1, 10) i;
+explain(verbose on, costs off) select count(distinct a), count(distinct d), c 
from dqa_unique group by a, b;
+                                           QUERY PLAN                          
                 
+------------------------------------------------------------------------------------------------
+ Finalize HashAggregate
+   Output: count(a), count(d), c, a, b
+   Group Key: dqa_unique.a, dqa_unique.b
+   ->  Gather Motion 3:1  (slice1; segments: 3)
+         Output: a, b, (PARTIAL count(a)), (PARTIAL count(d)), c
+         ->  Partial HashAggregate
+               Output: a, b, PARTIAL count(a), PARTIAL count(d), c
+               Group Key: dqa_unique.a, dqa_unique.b
+               ->  HashAggregate
+                     Output: a, b, d, c, (AggExprId)
+                     Group Key: (AggExprId), dqa_unique.d, dqa_unique.a, 
dqa_unique.b
+                     ->  Redistribute Motion 3:3  (slice2; segments: 3)
+                           Output: a, b, d, c, (AggExprId)
+                           Hash Key: a, b, d, (AggExprId)
+                           ->  Streaming HashAggregate
+                                 Output: a, b, d, c, (AggExprId)
+                                 Group Key: AggExprId, dqa_unique.d, 
dqa_unique.a, dqa_unique.b
+                                 ->  TupleSplit
+                                       Output: a, b, d, c, AggExprId
+                                       Split by Col: (dqa_unique.a), 
(dqa_unique.d)
+                                       Group Key: dqa_unique.a, dqa_unique.b
+                                       ->  Seq Scan on public.dqa_unique
+                                             Output: a, b, c, d
+ Optimizer: Postgres query optimizer
+ Settings: enable_groupagg = 'off', gp_motion_cost_per_row = '1'
+(25 rows)
+
+select count(distinct a), count(distinct d), c from dqa_unique group by a, b;
+ count | count | c 
+-------+-------+---
+     1 |     1 | 5
+     1 |     1 | 6
+     1 |     1 | 0
+     1 |     1 | 2
+     1 |     1 | 1
+     1 |     1 | 1
+     1 |     1 | 2
+     1 |     1 | 4
+     1 |     1 | 3
+     1 |     1 | 3
+(10 rows)
+
 -- multi DQA with type conversions
 create table dqa_f3(a character varying, b bigint) distributed by (a);
 insert into dqa_f3 values ('123', 2), ('213', 0), ('231', 2), ('312', 0), 
('321', 2), ('132', 1), ('4', 0);
diff --git a/src/test/regress/expected/gp_dqa_optimizer.out 
b/src/test/regress/expected/gp_dqa_optimizer.out
index cb090292d3..2cf2943d35 100644
--- a/src/test/regress/expected/gp_dqa_optimizer.out
+++ b/src/test/regress/expected/gp_dqa_optimizer.out
@@ -2332,6 +2332,58 @@ select count(distinct a), sum(b), sum(c) from dqa_f1;
     17 | 2000 | 1000
 (1 row)
 
+-- multi DQA with primary key
+create table dqa_unique(a int, b int, c int, d int, primary key(a, b));
+insert into dqa_unique select i%3, i%5, i%7, i%9 from generate_series(1, 10) i;
+explain(verbose on, costs off) select count(distinct a), count(distinct d), c 
from dqa_unique group by a, b;
+INFO:  GPORCA failed to produce a plan, falling back to planner
+DETAIL:  Query-to-DXL Translation: No attribute entry found due to incorrect 
normalization of query
+                                           QUERY PLAN                          
                 
+------------------------------------------------------------------------------------------------
+ Finalize HashAggregate
+   Output: count(a), count(d), c, a, b
+   Group Key: dqa_unique.a, dqa_unique.b
+   ->  Gather Motion 3:1  (slice1; segments: 3)
+         Output: a, b, (PARTIAL count(a)), (PARTIAL count(d)), c
+         ->  Partial HashAggregate
+               Output: a, b, PARTIAL count(a), PARTIAL count(d), c
+               Group Key: dqa_unique.a, dqa_unique.b
+               ->  HashAggregate
+                     Output: a, b, d, c, (AggExprId)
+                     Group Key: (AggExprId), dqa_unique.d, dqa_unique.a, 
dqa_unique.b
+                     ->  Redistribute Motion 3:3  (slice2; segments: 3)
+                           Output: a, b, d, c, (AggExprId)
+                           Hash Key: a, b, d, (AggExprId)
+                           ->  Streaming HashAggregate
+                                 Output: a, b, d, c, (AggExprId)
+                                 Group Key: AggExprId, dqa_unique.d, 
dqa_unique.a, dqa_unique.b
+                                 ->  TupleSplit
+                                       Output: a, b, d, c, AggExprId
+                                       Split by Col: (dqa_unique.a), 
(dqa_unique.d)
+                                       Group Key: dqa_unique.a, dqa_unique.b
+                                       ->  Seq Scan on public.dqa_unique
+                                             Output: a, b, c, d
+ Optimizer: Postgres query optimizer
+ Settings: enable_groupagg = 'off', gp_motion_cost_per_row = '1'
+(25 rows)
+
+select count(distinct a), count(distinct d), c from dqa_unique group by a, b;
+INFO:  GPORCA failed to produce a plan, falling back to planner
+DETAIL:  Query-to-DXL Translation: No attribute entry found due to incorrect 
normalization of query
+ count | count | c 
+-------+-------+---
+     1 |     1 | 5
+     1 |     1 | 6
+     1 |     1 | 0
+     1 |     1 | 2
+     1 |     1 | 1
+     1 |     1 | 1
+     1 |     1 | 2
+     1 |     1 | 4
+     1 |     1 | 3
+     1 |     1 | 3
+(10 rows)
+
 -- multi DQA with type conversions
 create table dqa_f3(a character varying, b bigint) distributed by (a);
 insert into dqa_f3 values ('123', 2), ('213', 0), ('231', 2), ('312', 0), 
('321', 2), ('132', 1), ('4', 0);
diff --git a/src/test/regress/sql/gp_dqa.sql b/src/test/regress/sql/gp_dqa.sql
index 75a113c6f7..fcd2408d84 100644
--- a/src/test/regress/sql/gp_dqa.sql
+++ b/src/test/regress/sql/gp_dqa.sql
@@ -360,6 +360,13 @@ explain select count(distinct a) filter (where a > 
3),count( distinct b) filter
 explain select count(distinct a), sum(b), sum(c) from dqa_f1;
 select count(distinct a), sum(b), sum(c) from dqa_f1;
 
+-- multi DQA with primary key
+create table dqa_unique(a int, b int, c int, d int, primary key(a, b));
+insert into dqa_unique select i%3, i%5, i%7, i%9 from generate_series(1, 10) i;
+
+explain(verbose on, costs off) select count(distinct a), count(distinct d), c 
from dqa_unique group by a, b;
+select count(distinct a), count(distinct d), c from dqa_unique group by a, b;
+
 -- multi DQA with type conversions
 create table dqa_f3(a character varying, b bigint) distributed by (a);
 insert into dqa_f3 values ('123', 2), ('213', 0), ('231', 2), ('312', 0), 
('321', 2), ('132', 1), ('4', 0);


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

(cloudberry) 27/30: Fix bug of wrong idx position in targetlist as ExecTupleSplit (#14954)

Reply via email to