This is an automated email from the ASF dual-hosted git repository. maxyang pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/cloudberry.git
commit 2bbaddcdb2795356e33a7011e7b061aa97ac146c Author: Haotian Chen <[email protected]> AuthorDate: Fri Mar 10 16:58:05 2023 +0800 Fix bug of wrong idx position in targetlist as ExecTupleSplit (#14954) Targetlists are allowed to project non-group columns which not wrapped in Aggregate funciton when group-by clause includes any contraint primary key, such as: ``` select count(distinct a), count(distinct b), e from t1 group by a, b; if (a, b) is a primary of table t1, column e could be projected directly. ``` As case above, non-group columns are not only in final targetlist not also in targetlist of any subplans, such as node TupleSplit. The commit is going to fix inconsistant relative position of those columns in Node TupleSplit targetlist. We could store relative postion of column towards targetlist(te->resno) from one instead of zero in the whole life time of ExecTuplesplit. --- src/backend/executor/nodeTupleSplit.c | 7 +++- src/test/regress/expected/gp_dqa.out | 48 ++++++++++++++++++++++++ src/test/regress/expected/gp_dqa_optimizer.out | 52 ++++++++++++++++++++++++++ src/test/regress/sql/gp_dqa.sql | 7 ++++ 4 files changed, 112 insertions(+), 2 deletions(-) diff --git a/src/backend/executor/nodeTupleSplit.c b/src/backend/executor/nodeTupleSplit.c index 90d14d3622..85a9f3ecab 100644 --- a/src/backend/executor/nodeTupleSplit.c +++ b/src/backend/executor/nodeTupleSplit.c @@ -118,8 +118,11 @@ ExecInitTupleSplit(TupleSplit *node, EState *estate, int eflags) * fetch all columns which is not referenced by all DQAs */ Bitmapset *all_input_attr_bms = NULL; - for (int id = 0; id < list_length(outerPlan(node)->targetlist); id++) - all_input_attr_bms = bms_add_member(all_input_attr_bms, id); + foreach(lc, outerPlan(node)->targetlist) + { + TargetEntry *te = (TargetEntry *)lfirst(lc); + all_input_attr_bms = bms_add_member(all_input_attr_bms, te->resno); + } Bitmapset *dqa_not_used_bms = all_input_attr_bms; for (int id = 0; id < tup_spl_state->numDisDQAs; id++) diff --git a/src/test/regress/expected/gp_dqa.out b/src/test/regress/expected/gp_dqa.out index 09c8aed08b..af9ee92998 100644 --- a/src/test/regress/expected/gp_dqa.out +++ b/src/test/regress/expected/gp_dqa.out @@ -2199,6 +2199,54 @@ select count(distinct a), sum(b), sum(c) from dqa_f1; 17 | 2000 | 1000 (1 row) +-- multi DQA with primary key +create table dqa_unique(a int, b int, c int, d int, primary key(a, b)); +insert into dqa_unique select i%3, i%5, i%7, i%9 from generate_series(1, 10) i; +explain(verbose on, costs off) select count(distinct a), count(distinct d), c from dqa_unique group by a, b; + QUERY PLAN +------------------------------------------------------------------------------------------------ + Finalize HashAggregate + Output: count(a), count(d), c, a, b + Group Key: dqa_unique.a, dqa_unique.b + -> Gather Motion 3:1 (slice1; segments: 3) + Output: a, b, (PARTIAL count(a)), (PARTIAL count(d)), c + -> Partial HashAggregate + Output: a, b, PARTIAL count(a), PARTIAL count(d), c + Group Key: dqa_unique.a, dqa_unique.b + -> HashAggregate + Output: a, b, d, c, (AggExprId) + Group Key: (AggExprId), dqa_unique.d, dqa_unique.a, dqa_unique.b + -> Redistribute Motion 3:3 (slice2; segments: 3) + Output: a, b, d, c, (AggExprId) + Hash Key: a, b, d, (AggExprId) + -> Streaming HashAggregate + Output: a, b, d, c, (AggExprId) + Group Key: AggExprId, dqa_unique.d, dqa_unique.a, dqa_unique.b + -> TupleSplit + Output: a, b, d, c, AggExprId + Split by Col: (dqa_unique.a), (dqa_unique.d) + Group Key: dqa_unique.a, dqa_unique.b + -> Seq Scan on public.dqa_unique + Output: a, b, c, d + Optimizer: Postgres query optimizer + Settings: enable_groupagg = 'off', gp_motion_cost_per_row = '1' +(25 rows) + +select count(distinct a), count(distinct d), c from dqa_unique group by a, b; + count | count | c +-------+-------+--- + 1 | 1 | 5 + 1 | 1 | 6 + 1 | 1 | 0 + 1 | 1 | 2 + 1 | 1 | 1 + 1 | 1 | 1 + 1 | 1 | 2 + 1 | 1 | 4 + 1 | 1 | 3 + 1 | 1 | 3 +(10 rows) + -- multi DQA with type conversions create table dqa_f3(a character varying, b bigint) distributed by (a); insert into dqa_f3 values ('123', 2), ('213', 0), ('231', 2), ('312', 0), ('321', 2), ('132', 1), ('4', 0); diff --git a/src/test/regress/expected/gp_dqa_optimizer.out b/src/test/regress/expected/gp_dqa_optimizer.out index cb090292d3..2cf2943d35 100644 --- a/src/test/regress/expected/gp_dqa_optimizer.out +++ b/src/test/regress/expected/gp_dqa_optimizer.out @@ -2332,6 +2332,58 @@ select count(distinct a), sum(b), sum(c) from dqa_f1; 17 | 2000 | 1000 (1 row) +-- multi DQA with primary key +create table dqa_unique(a int, b int, c int, d int, primary key(a, b)); +insert into dqa_unique select i%3, i%5, i%7, i%9 from generate_series(1, 10) i; +explain(verbose on, costs off) select count(distinct a), count(distinct d), c from dqa_unique group by a, b; +INFO: GPORCA failed to produce a plan, falling back to planner +DETAIL: Query-to-DXL Translation: No attribute entry found due to incorrect normalization of query + QUERY PLAN +------------------------------------------------------------------------------------------------ + Finalize HashAggregate + Output: count(a), count(d), c, a, b + Group Key: dqa_unique.a, dqa_unique.b + -> Gather Motion 3:1 (slice1; segments: 3) + Output: a, b, (PARTIAL count(a)), (PARTIAL count(d)), c + -> Partial HashAggregate + Output: a, b, PARTIAL count(a), PARTIAL count(d), c + Group Key: dqa_unique.a, dqa_unique.b + -> HashAggregate + Output: a, b, d, c, (AggExprId) + Group Key: (AggExprId), dqa_unique.d, dqa_unique.a, dqa_unique.b + -> Redistribute Motion 3:3 (slice2; segments: 3) + Output: a, b, d, c, (AggExprId) + Hash Key: a, b, d, (AggExprId) + -> Streaming HashAggregate + Output: a, b, d, c, (AggExprId) + Group Key: AggExprId, dqa_unique.d, dqa_unique.a, dqa_unique.b + -> TupleSplit + Output: a, b, d, c, AggExprId + Split by Col: (dqa_unique.a), (dqa_unique.d) + Group Key: dqa_unique.a, dqa_unique.b + -> Seq Scan on public.dqa_unique + Output: a, b, c, d + Optimizer: Postgres query optimizer + Settings: enable_groupagg = 'off', gp_motion_cost_per_row = '1' +(25 rows) + +select count(distinct a), count(distinct d), c from dqa_unique group by a, b; +INFO: GPORCA failed to produce a plan, falling back to planner +DETAIL: Query-to-DXL Translation: No attribute entry found due to incorrect normalization of query + count | count | c +-------+-------+--- + 1 | 1 | 5 + 1 | 1 | 6 + 1 | 1 | 0 + 1 | 1 | 2 + 1 | 1 | 1 + 1 | 1 | 1 + 1 | 1 | 2 + 1 | 1 | 4 + 1 | 1 | 3 + 1 | 1 | 3 +(10 rows) + -- multi DQA with type conversions create table dqa_f3(a character varying, b bigint) distributed by (a); insert into dqa_f3 values ('123', 2), ('213', 0), ('231', 2), ('312', 0), ('321', 2), ('132', 1), ('4', 0); diff --git a/src/test/regress/sql/gp_dqa.sql b/src/test/regress/sql/gp_dqa.sql index 75a113c6f7..fcd2408d84 100644 --- a/src/test/regress/sql/gp_dqa.sql +++ b/src/test/regress/sql/gp_dqa.sql @@ -360,6 +360,13 @@ explain select count(distinct a) filter (where a > 3),count( distinct b) filter explain select count(distinct a), sum(b), sum(c) from dqa_f1; select count(distinct a), sum(b), sum(c) from dqa_f1; +-- multi DQA with primary key +create table dqa_unique(a int, b int, c int, d int, primary key(a, b)); +insert into dqa_unique select i%3, i%5, i%7, i%9 from generate_series(1, 10) i; + +explain(verbose on, costs off) select count(distinct a), count(distinct d), c from dqa_unique group by a, b; +select count(distinct a), count(distinct d), c from dqa_unique group by a, b; + -- multi DQA with type conversions create table dqa_f3(a character varying, b bigint) distributed by (a); insert into dqa_f3 values ('123', 2), ('213', 0), ('231', 2), ('312', 0), ('321', 2), ('132', 1), ('4', 0); --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
