Hi Robert,
On Mon, Feb 26, 2018 at 8:03 PM, Robert Haas <robertmh...@gmail.com> wrote: > > Committed after incorporating your other fixes and updating the > optimizer README. > Thanks Robert. Off-list Rajkumar has reported an issue. When we have enable_hashagg set to false, and Gather Merge path is chosen, it ended-up in an error saying "ERROR: Aggref found in non-Agg plan node". I had a look over his provided testcase and observed that when we create a Gather Merge path over a cheapest partial path by sorting it explicitly as generate_gather_paths won't consider it, we accidentally used cheapest partial path from the input_rel to create a Gather Merge; instead we need a cheapest partial path from the partially_grouped_rel. Attached fix_aggref_in_non-agg_error.patch fixing this. test_for_aggref_in_non-agg_error.patch has a testcase reported by Rajkumar which I have added in a aggregates.sql. While doing so, I have observed few cleanup changes, added those in misc_cleanup.patch. --- While re-basing my partitionwise aggregate changes, I observed that when we want to create partial aggregation paths for a child partition, we don't need to add Gather or Gather Merge on top of it as we first want to append them all and then want to stick a gather on it. So it will be better to have that code part in a separate function so that we can call it from required places. I have attached patch (create_non_partial_paths.patch) for it including all above fix. Thanks > -- > Robert Haas > EnterpriseDB: http://www.enterprisedb.com > The Enterprise PostgreSQL Company > -- Jeevan Chalke Technical Architect, Product Development EnterpriseDB Corporation The Enterprise PostgreSQL Company
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index e8f6cc5..8190675 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -6339,6 +6339,9 @@ add_paths_to_partial_grouping_rel(PlannerInfo *root, */ generate_gather_paths(root, partially_grouped_rel, true); + /* Get cheapest partial path from partially_grouped_rel */ + cheapest_partial_path = linitial(partially_grouped_rel->partial_pathlist); + /* * generate_gather_paths won't consider sorting the cheapest path to match * the group keys and then applying a Gather Merge node to the result;
diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out index f85e913..fa56f6b 100644 --- a/src/test/regress/expected/aggregates.out +++ b/src/test/regress/expected/aggregates.out @@ -2065,3 +2065,48 @@ SELECT balk(hundred) FROM tenk1; (1 row) ROLLBACK; +-- Test for partially_grouped_rel +SET parallel_setup_cost=0; +SET parallel_tuple_cost=0; +SET enable_hashagg TO false; +CREATE TABLE pagg_tab1(x int, y int) PARTITION BY RANGE(x); +CREATE TABLE pagg_tab1_p1 PARTITION OF pagg_tab1 FOR VALUES FROM (0) TO (10); +CREATE TABLE pagg_tab1_p2 PARTITION OF pagg_tab1 FOR VALUES FROM (10) TO (20); +CREATE TABLE pagg_tab1_p3 PARTITION OF pagg_tab1 FOR VALUES FROM (20) TO (30); +CREATE TABLE pagg_tab2(x int, y int) PARTITION BY RANGE(y); +CREATE TABLE pagg_tab2_p1 PARTITION OF pagg_tab2 FOR VALUES FROM (0) TO (10); +CREATE TABLE pagg_tab2_p2 PARTITION OF pagg_tab2 FOR VALUES FROM (10) TO (20); +CREATE TABLE pagg_tab2_p3 PARTITION OF pagg_tab2 FOR VALUES FROM (20) TO (30); +INSERT INTO pagg_tab1 SELECT i%30, i FROM generate_series(0, 29) i; +INSERT INTO pagg_tab2 SELECT i, i%30 FROM generate_series(0, 29) i; +ANALYZE pagg_tab1; +ANALYZE pagg_tab2; +EXPLAIN (COSTS OFF) +SELECT t1.y, sum(t1.x), COUNT(*) FROM pagg_tab1 t1, pagg_tab2 t2 WHERE t1.x = t2.y GROUP BY t1.y ORDER BY 1, 2, 3; + QUERY PLAN +---------------------------------------------------------------------------------------- + Sort + Sort Key: t1.y, (sum(t1.x)), (count(*)) + -> Finalize GroupAggregate + Group Key: t1.y + -> Gather Merge + Workers Planned: 2 + -> Partial GroupAggregate + Group Key: t1.y + -> Sort + Sort Key: t1.y + -> Parallel Hash Join + Hash Cond: (t1.x = t2.y) + -> Parallel Append + -> Parallel Seq Scan on pagg_tab1_p1 t1 + -> Parallel Seq Scan on pagg_tab1_p2 t1_1 + -> Parallel Seq Scan on pagg_tab1_p3 t1_2 + -> Parallel Hash + -> Parallel Append + -> Parallel Seq Scan on pagg_tab2_p1 t2 + -> Parallel Seq Scan on pagg_tab2_p2 t2_1 + -> Parallel Seq Scan on pagg_tab2_p3 t2_2 +(21 rows) + +DROP TABLE pagg_tab2; +DROP TABLE pagg_tab1; diff --git a/src/test/regress/sql/aggregates.sql b/src/test/regress/sql/aggregates.sql index 506d044..17c25e5 100644 --- a/src/test/regress/sql/aggregates.sql +++ b/src/test/regress/sql/aggregates.sql @@ -907,3 +907,30 @@ EXPLAIN (COSTS OFF) SELECT balk(hundred) FROM tenk1; SELECT balk(hundred) FROM tenk1; ROLLBACK; + +-- Test for partially_grouped_rel +SET parallel_setup_cost=0; +SET parallel_tuple_cost=0; +SET enable_hashagg TO false; + +CREATE TABLE pagg_tab1(x int, y int) PARTITION BY RANGE(x); +CREATE TABLE pagg_tab1_p1 PARTITION OF pagg_tab1 FOR VALUES FROM (0) TO (10); +CREATE TABLE pagg_tab1_p2 PARTITION OF pagg_tab1 FOR VALUES FROM (10) TO (20); +CREATE TABLE pagg_tab1_p3 PARTITION OF pagg_tab1 FOR VALUES FROM (20) TO (30); + +CREATE TABLE pagg_tab2(x int, y int) PARTITION BY RANGE(y); +CREATE TABLE pagg_tab2_p1 PARTITION OF pagg_tab2 FOR VALUES FROM (0) TO (10); +CREATE TABLE pagg_tab2_p2 PARTITION OF pagg_tab2 FOR VALUES FROM (10) TO (20); +CREATE TABLE pagg_tab2_p3 PARTITION OF pagg_tab2 FOR VALUES FROM (20) TO (30); + +INSERT INTO pagg_tab1 SELECT i%30, i FROM generate_series(0, 29) i; +INSERT INTO pagg_tab2 SELECT i, i%30 FROM generate_series(0, 29) i; + +ANALYZE pagg_tab1; +ANALYZE pagg_tab2; + +EXPLAIN (COSTS OFF) +SELECT t1.y, sum(t1.x), COUNT(*) FROM pagg_tab1 t1, pagg_tab2 t2 WHERE t1.x = t2.y GROUP BY t1.y ORDER BY 1, 2, 3; + +DROP TABLE pagg_tab2; +DROP TABLE pagg_tab1;
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index e8f6cc5..107d5f3 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -195,12 +195,11 @@ static void add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel, double dNumGroups, List *havingQual); static void add_paths_to_partial_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel, - RelOptInfo *partial_grouped_rel, + RelOptInfo *partially_grouped_rel, AggClauseCosts *agg_partial_costs, grouping_sets_data *gd, bool can_sort, - bool can_hash, - List *havingQual); + bool can_hash); static bool can_parallel_agg(PlannerInfo *root, RelOptInfo *input_rel, RelOptInfo *grouped_rel, const AggClauseCosts *agg_costs); @@ -3838,8 +3837,7 @@ create_grouping_paths(PlannerInfo *root, add_paths_to_partial_grouping_rel(root, input_rel, partially_grouped_rel, &agg_partial_costs, - gd, can_sort, can_hash, - (List *) parse->havingQual); + gd, can_sort, can_hash); } /* Build final grouping paths */ @@ -6224,8 +6222,7 @@ add_paths_to_partial_grouping_rel(PlannerInfo *root, AggClauseCosts *agg_partial_costs, grouping_sets_data *gd, bool can_sort, - bool can_hash, - List *havingQual) + bool can_hash) { Query *parse = root->parse; Path *cheapest_partial_path = linitial(input_rel->partial_pathlist);
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index e8f6cc5..8ceef22 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -195,12 +195,13 @@ static void add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel, double dNumGroups, List *havingQual); static void add_paths_to_partial_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel, - RelOptInfo *partial_grouped_rel, + RelOptInfo *partially_grouped_rel, AggClauseCosts *agg_partial_costs, grouping_sets_data *gd, bool can_sort, - bool can_hash, - List *havingQual); + bool can_hash); +static void create_non_partial_paths(PlannerInfo *root, + RelOptInfo *partially_grouped_rel); static bool can_parallel_agg(PlannerInfo *root, RelOptInfo *input_rel, RelOptInfo *grouped_rel, const AggClauseCosts *agg_costs); @@ -3838,8 +3839,10 @@ create_grouping_paths(PlannerInfo *root, add_paths_to_partial_grouping_rel(root, input_rel, partially_grouped_rel, &agg_partial_costs, - gd, can_sort, can_hash, - (List *) parse->havingQual); + gd, can_sort, can_hash); + + /* Add Gather or Gather Merge atop cheapest partial path. */ + create_non_partial_paths(root, partially_grouped_rel); } /* Build final grouping paths */ @@ -6224,8 +6227,7 @@ add_paths_to_partial_grouping_rel(PlannerInfo *root, AggClauseCosts *agg_partial_costs, grouping_sets_data *gd, bool can_sort, - bool can_hash, - List *havingQual) + bool can_hash) { Query *parse = root->parse; Path *cheapest_partial_path = linitial(input_rel->partial_pathlist); @@ -6332,13 +6334,26 @@ add_paths_to_partial_grouping_rel(PlannerInfo *root, UPPERREL_PARTIAL_GROUP_AGG, input_rel, partially_grouped_rel); } +} - /* - * Try adding Gather or Gather Merge to partial paths to produce - * non-partial paths. - */ +/* + * create_non_partial_paths + * + * Try adding Gather or Gather Merge to partial paths to produce non-partial + * paths. + */ +static void +create_non_partial_paths(PlannerInfo *root, + RelOptInfo *partially_grouped_rel) +{ + Path *cheapest_partial_path; + + /* Gather all partial paths */ generate_gather_paths(root, partially_grouped_rel, true); + /* Get cheapest partial path from partially_grouped_rel */ + cheapest_partial_path = linitial(partially_grouped_rel->partial_pathlist); + /* * generate_gather_paths won't consider sorting the cheapest path to match * the group keys and then applying a Gather Merge node to the result;