This is an automated email from the ASF dual-hosted git repository. maxyang pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/cloudberry.git
commit 87b3b32999774fd53f264955a89943e34dc8e278 Author: Haotian Chen <[email protected]> AuthorDate: Fri Dec 9 16:06:10 2022 +0800 Fix crash of AggNode in executor casued by ORCA plan (#14577) Using Aggref split type of output targetlist instead of Aggnode split typle as building trans/combine function in executor. --- src/backend/executor/execExpr.c | 3 +- src/backend/executor/nodeAgg.c | 2 +- src/test/regress/expected/gp_dqa.out | 10 ++-- src/test/regress/expected/gp_dqa_optimizer.out | 74 +++++++++++++------------- src/test/regress/sql/gp_dqa.sql | 2 + 5 files changed, 49 insertions(+), 42 deletions(-) diff --git a/src/backend/executor/execExpr.c b/src/backend/executor/execExpr.c index 428b1be04b..03fe797e93 100644 --- a/src/backend/executor/execExpr.c +++ b/src/backend/executor/execExpr.c @@ -3576,7 +3576,6 @@ ExecBuildAggTrans(AggState *aggstate, AggStatePerPhase phase, ExprState *state = makeNode(ExprState); PlanState *parent = &aggstate->ss.ps; ExprEvalStep scratch = {0}; - bool isCombine = DO_AGGSPLIT_COMBINE(aggstate->aggsplit); LastAttnumInfo deform = {0, 0, 0}; state->expr = (Expr *) aggstate; @@ -3626,6 +3625,8 @@ ExecBuildAggTrans(AggState *aggstate, AggStatePerPhase phase, ListCell *bail; if (!bms_is_member(transno, aggstate->aggs_used)) continue; + + bool isCombine = DO_AGGSPLIT_COMBINE(pertrans->aggref->aggsplit); /* * If filter present, emit. Do so before evaluating the input, to * avoid potentially unneeded computations, or even worse, unintended diff --git a/src/backend/executor/nodeAgg.c b/src/backend/executor/nodeAgg.c index 75f160856c..790821324a 100644 --- a/src/backend/executor/nodeAgg.c +++ b/src/backend/executor/nodeAgg.c @@ -4233,7 +4233,7 @@ build_pertrans_for_aggref(AggStatePerTrans pertrans, * transfn and transfn_oid fields of pertrans refer to the combine * function rather than the transition function. */ - if (DO_AGGSPLIT_COMBINE(aggstate->aggsplit)) + if (DO_AGGSPLIT_COMBINE(aggref->aggsplit)) { Expr *combinefnexpr; size_t numTransArgs; diff --git a/src/test/regress/expected/gp_dqa.out b/src/test/regress/expected/gp_dqa.out index 0e731124f7..5ed20b2f3e 100644 --- a/src/test/regress/expected/gp_dqa.out +++ b/src/test/regress/expected/gp_dqa.out @@ -2409,6 +2409,8 @@ select count(distinct a) from t_issue_659; reset gp_eager_distinct_dedup; reset optimizer_force_three_stage_scalar_dqa; +reset optimizer_enable_use_distribution_in_dqa; +drop table t_issue_659; -- fix dqa bug when optimizer_force_multistage_agg is on set optimizer_force_multistage_agg = on; create table multiagg1(a int, b bigint, c int); @@ -2435,8 +2437,8 @@ explain (verbose, costs off) select count(distinct b), sum(c) from multiagg1; Hash Key: b -> Seq Scan on public.multiagg1 Output: b, c - Settings: enable_groupagg = 'off', enable_hashagg = 'on', gp_motion_cost_per_row = '2', optimizer = 'off' Optimizer: Postgres query optimizer + Settings: enable_groupagg = 'off', gp_motion_cost_per_row = '1', optimizer_force_multistage_agg = 'on' (13 rows) select count(distinct b), sum(c) from multiagg1; @@ -2446,8 +2448,8 @@ select count(distinct b), sum(c) from multiagg1; (1 row) explain (verbose, costs off) select count(distinct b), sum(c) from multiagg2; - QUERY PLAN ------------------------------------------------------------------------------------------------------------ + QUERY PLAN +-------------------------------------------------------------------------------------------------------- Finalize Aggregate Output: count(DISTINCT b), sum(c) -> Gather Motion 3:1 (slice1; segments: 3) @@ -2459,8 +2461,8 @@ explain (verbose, costs off) select count(distinct b), sum(c) from multiagg2; Hash Key: b -> Seq Scan on public.multiagg2 Output: b, c - Settings: enable_groupagg = 'off', enable_hashagg = 'on', gp_motion_cost_per_row = '2', optimizer = 'off' Optimizer: Postgres query optimizer + Settings: enable_groupagg = 'off', gp_motion_cost_per_row = '1', optimizer_force_multistage_agg = 'on' (13 rows) select count(distinct b), sum(c) from multiagg2; diff --git a/src/test/regress/expected/gp_dqa_optimizer.out b/src/test/regress/expected/gp_dqa_optimizer.out index c870359c83..787180b791 100644 --- a/src/test/regress/expected/gp_dqa_optimizer.out +++ b/src/test/regress/expected/gp_dqa_optimizer.out @@ -2559,13 +2559,15 @@ select count(distinct a) from t_issue_659; reset gp_eager_distinct_dedup; reset optimizer_force_three_stage_scalar_dqa; +reset optimizer_enable_use_distribution_in_dqa; +drop table t_issue_659; -- fix dqa bug when optimizer_force_multistage_agg is on set optimizer_force_multistage_agg = on; create table multiagg1(a int, b bigint, c int); -NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Cloudberry Database data distribution key for this table. +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Greenplum Database data distribution key for this table. HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. create table multiagg2(a int, b bigint, c numeric(8, 4)); -NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Cloudberry Database data distribution key for this table. +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Greenplum Database data distribution key for this table. HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. insert into multiagg1 values(generate_series(1, 10), generate_series(1, 10), generate_series(1, 10)); INFO: GPORCA failed to produce a plan, falling back to planner @@ -2576,56 +2578,56 @@ DETAIL: Feature not supported: Unexpected target list entries in ProjectSet nod analyze multiagg1; analyze multiagg2; explain (verbose, costs off) select count(distinct b), sum(c) from multiagg1; -INFO: GPORCA failed to produce a plan, falling back to planner -DETAIL: GPDB Expression type: GPDB_96_MERGE_FIXME: Intermediate aggregate stage not implemented not supported in DXL - QUERY PLAN ----------------------------------------------------------------------------------------- - Finalize Aggregate - Output: count(DISTINCT b), sum(c) + QUERY PLAN +-------------------------------------------------------------------------------------------------------- + Aggregate + Output: count(b), sum(c) -> Gather Motion 3:1 (slice1; segments: 3) - Output: (PARTIAL count(DISTINCT b)), (PARTIAL sum(c)) - -> Partial Aggregate - Output: PARTIAL count(DISTINCT b), PARTIAL sum(c) + Output: b, (PARTIAL sum(c)) + -> Partial HashAggregate + Output: b, PARTIAL sum(c) + Group Key: multiagg1.b -> Redistribute Motion 3:3 (slice2; segments: 3) - Output: b, c + Output: b, (PARTIAL sum(c)) Hash Key: b - -> Seq Scan on public.multiagg1 - Output: b, c - Settings: enable_groupagg = 'off', enable_hashagg = 'on', gp_motion_cost_per_row = '2' - Optimizer: Postgres query optimizer -(13 rows) + -> Streaming Partial HashAggregate + Output: b, PARTIAL sum(c) + Group Key: multiagg1.b + -> Seq Scan on public.multiagg1 + Output: b, c + Optimizer: Pivotal Optimizer (GPORCA) + Settings: enable_groupagg = 'off', gp_motion_cost_per_row = '1', optimizer_force_multistage_agg = 'on' +(17 rows) select count(distinct b), sum(c) from multiagg1; -INFO: GPORCA failed to produce a plan, falling back to planner -DETAIL: GPDB Expression type: GPDB_96_MERGE_FIXME: Intermediate aggregate stage not implemented not supported in DXL count | sum -------+----- 10 | 55 (1 row) explain (verbose, costs off) select count(distinct b), sum(c) from multiagg2; -INFO: GPORCA failed to produce a plan, falling back to planner -DETAIL: GPDB Expression type: GPDB_96_MERGE_FIXME: Intermediate aggregate stage not implemented not supported in DXL - QUERY PLAN ----------------------------------------------------------------------------------------- - Finalize Aggregate - Output: count(DISTINCT b), sum(c) + QUERY PLAN +-------------------------------------------------------------------------------------------------------- + Aggregate + Output: count(b), sum(c) -> Gather Motion 3:1 (slice1; segments: 3) - Output: (PARTIAL count(DISTINCT b)), (PARTIAL sum(c)) - -> Partial Aggregate - Output: PARTIAL count(DISTINCT b), PARTIAL sum(c) + Output: b, (PARTIAL sum(c)) + -> Partial HashAggregate + Output: b, PARTIAL sum(c) + Group Key: multiagg2.b -> Redistribute Motion 3:3 (slice2; segments: 3) - Output: b, c + Output: b, (PARTIAL sum(c)) Hash Key: b - -> Seq Scan on public.multiagg2 - Output: b, c - Settings: enable_groupagg = 'off', enable_hashagg = 'on', gp_motion_cost_per_row = '2' - Optimizer: Postgres query optimizer -(13 rows) + -> Streaming Partial HashAggregate + Output: b, PARTIAL sum(c) + Group Key: multiagg2.b + -> Seq Scan on public.multiagg2 + Output: b, c + Optimizer: Pivotal Optimizer (GPORCA) + Settings: enable_groupagg = 'off', gp_motion_cost_per_row = '1', optimizer_force_multistage_agg = 'on' +(17 rows) select count(distinct b), sum(c) from multiagg2; -INFO: GPORCA failed to produce a plan, falling back to planner -DETAIL: GPDB Expression type: GPDB_96_MERGE_FIXME: Intermediate aggregate stage not implemented not supported in DXL count | sum -------+----------- 10 | 5555.5000 diff --git a/src/test/regress/sql/gp_dqa.sql b/src/test/regress/sql/gp_dqa.sql index b5180e49e3..8b4cda3a8c 100644 --- a/src/test/regress/sql/gp_dqa.sql +++ b/src/test/regress/sql/gp_dqa.sql @@ -431,6 +431,8 @@ select count(distinct a) from t_issue_659; select count(distinct a) from t_issue_659; reset gp_eager_distinct_dedup; reset optimizer_force_three_stage_scalar_dqa; +reset optimizer_enable_use_distribution_in_dqa; +drop table t_issue_659; -- fix dqa bug when optimizer_force_multistage_agg is on --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
