Hi Robert,
On Mon, Feb 26, 2018 at 8:03 PM, Robert Haas <[email protected]> wrote:
>
> Committed after incorporating your other fixes and updating the
> optimizer README.
>
Thanks Robert.
Off-list Rajkumar has reported an issue. When we have enable_hashagg set to
false, and Gather Merge path is chosen, it ended-up in an error saying
"ERROR: Aggref found in non-Agg plan node".
I had a look over his provided testcase and observed that when we create a
Gather Merge path over a cheapest partial path by sorting it explicitly as
generate_gather_paths won't consider it, we accidentally used cheapest
partial path from the input_rel to create a Gather Merge; instead we need a
cheapest partial path from the partially_grouped_rel.
Attached fix_aggref_in_non-agg_error.patch fixing this.
test_for_aggref_in_non-agg_error.patch has a testcase reported by Rajkumar
which I have added in a aggregates.sql.
While doing so, I have observed few cleanup changes, added those in
misc_cleanup.patch.
---
While re-basing my partitionwise aggregate changes, I observed that when we
want to create partial aggregation paths for a child partition, we don't
need to add Gather or Gather Merge on top of it as we first want to append
them all and then want to stick a gather on it. So it will be better to
have that code part in a separate function so that we can call it from
required places.
I have attached patch (create_non_partial_paths.patch) for it including all
above fix.
Thanks
> --
> Robert Haas
> EnterpriseDB: http://www.enterprisedb.com
> The Enterprise PostgreSQL Company
>
--
Jeevan Chalke
Technical Architect, Product Development
EnterpriseDB Corporation
The Enterprise PostgreSQL Company
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index e8f6cc5..8190675 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -6339,6 +6339,9 @@ add_paths_to_partial_grouping_rel(PlannerInfo *root,
*/
generate_gather_paths(root, partially_grouped_rel, true);
+ /* Get cheapest partial path from partially_grouped_rel */
+ cheapest_partial_path = linitial(partially_grouped_rel->partial_pathlist);
+
/*
* generate_gather_paths won't consider sorting the cheapest path to match
* the group keys and then applying a Gather Merge node to the result;
diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out
index f85e913..fa56f6b 100644
--- a/src/test/regress/expected/aggregates.out
+++ b/src/test/regress/expected/aggregates.out
@@ -2065,3 +2065,48 @@ SELECT balk(hundred) FROM tenk1;
(1 row)
ROLLBACK;
+-- Test for partially_grouped_rel
+SET parallel_setup_cost=0;
+SET parallel_tuple_cost=0;
+SET enable_hashagg TO false;
+CREATE TABLE pagg_tab1(x int, y int) PARTITION BY RANGE(x);
+CREATE TABLE pagg_tab1_p1 PARTITION OF pagg_tab1 FOR VALUES FROM (0) TO (10);
+CREATE TABLE pagg_tab1_p2 PARTITION OF pagg_tab1 FOR VALUES FROM (10) TO (20);
+CREATE TABLE pagg_tab1_p3 PARTITION OF pagg_tab1 FOR VALUES FROM (20) TO (30);
+CREATE TABLE pagg_tab2(x int, y int) PARTITION BY RANGE(y);
+CREATE TABLE pagg_tab2_p1 PARTITION OF pagg_tab2 FOR VALUES FROM (0) TO (10);
+CREATE TABLE pagg_tab2_p2 PARTITION OF pagg_tab2 FOR VALUES FROM (10) TO (20);
+CREATE TABLE pagg_tab2_p3 PARTITION OF pagg_tab2 FOR VALUES FROM (20) TO (30);
+INSERT INTO pagg_tab1 SELECT i%30, i FROM generate_series(0, 29) i;
+INSERT INTO pagg_tab2 SELECT i, i%30 FROM generate_series(0, 29) i;
+ANALYZE pagg_tab1;
+ANALYZE pagg_tab2;
+EXPLAIN (COSTS OFF)
+SELECT t1.y, sum(t1.x), COUNT(*) FROM pagg_tab1 t1, pagg_tab2 t2 WHERE t1.x = t2.y GROUP BY t1.y ORDER BY 1, 2, 3;
+ QUERY PLAN
+----------------------------------------------------------------------------------------
+ Sort
+ Sort Key: t1.y, (sum(t1.x)), (count(*))
+ -> Finalize GroupAggregate
+ Group Key: t1.y
+ -> Gather Merge
+ Workers Planned: 2
+ -> Partial GroupAggregate
+ Group Key: t1.y
+ -> Sort
+ Sort Key: t1.y
+ -> Parallel Hash Join
+ Hash Cond: (t1.x = t2.y)
+ -> Parallel Append
+ -> Parallel Seq Scan on pagg_tab1_p1 t1
+ -> Parallel Seq Scan on pagg_tab1_p2 t1_1
+ -> Parallel Seq Scan on pagg_tab1_p3 t1_2
+ -> Parallel Hash
+ -> Parallel Append
+ -> Parallel Seq Scan on pagg_tab2_p1 t2
+ -> Parallel Seq Scan on pagg_tab2_p2 t2_1
+ -> Parallel Seq Scan on pagg_tab2_p3 t2_2
+(21 rows)
+
+DROP TABLE pagg_tab2;
+DROP TABLE pagg_tab1;
diff --git a/src/test/regress/sql/aggregates.sql b/src/test/regress/sql/aggregates.sql
index 506d044..17c25e5 100644
--- a/src/test/regress/sql/aggregates.sql
+++ b/src/test/regress/sql/aggregates.sql
@@ -907,3 +907,30 @@ EXPLAIN (COSTS OFF) SELECT balk(hundred) FROM tenk1;
SELECT balk(hundred) FROM tenk1;
ROLLBACK;
+
+-- Test for partially_grouped_rel
+SET parallel_setup_cost=0;
+SET parallel_tuple_cost=0;
+SET enable_hashagg TO false;
+
+CREATE TABLE pagg_tab1(x int, y int) PARTITION BY RANGE(x);
+CREATE TABLE pagg_tab1_p1 PARTITION OF pagg_tab1 FOR VALUES FROM (0) TO (10);
+CREATE TABLE pagg_tab1_p2 PARTITION OF pagg_tab1 FOR VALUES FROM (10) TO (20);
+CREATE TABLE pagg_tab1_p3 PARTITION OF pagg_tab1 FOR VALUES FROM (20) TO (30);
+
+CREATE TABLE pagg_tab2(x int, y int) PARTITION BY RANGE(y);
+CREATE TABLE pagg_tab2_p1 PARTITION OF pagg_tab2 FOR VALUES FROM (0) TO (10);
+CREATE TABLE pagg_tab2_p2 PARTITION OF pagg_tab2 FOR VALUES FROM (10) TO (20);
+CREATE TABLE pagg_tab2_p3 PARTITION OF pagg_tab2 FOR VALUES FROM (20) TO (30);
+
+INSERT INTO pagg_tab1 SELECT i%30, i FROM generate_series(0, 29) i;
+INSERT INTO pagg_tab2 SELECT i, i%30 FROM generate_series(0, 29) i;
+
+ANALYZE pagg_tab1;
+ANALYZE pagg_tab2;
+
+EXPLAIN (COSTS OFF)
+SELECT t1.y, sum(t1.x), COUNT(*) FROM pagg_tab1 t1, pagg_tab2 t2 WHERE t1.x = t2.y GROUP BY t1.y ORDER BY 1, 2, 3;
+
+DROP TABLE pagg_tab2;
+DROP TABLE pagg_tab1;
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index e8f6cc5..107d5f3 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -195,12 +195,11 @@ static void add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
double dNumGroups, List *havingQual);
static void add_paths_to_partial_grouping_rel(PlannerInfo *root,
RelOptInfo *input_rel,
- RelOptInfo *partial_grouped_rel,
+ RelOptInfo *partially_grouped_rel,
AggClauseCosts *agg_partial_costs,
grouping_sets_data *gd,
bool can_sort,
- bool can_hash,
- List *havingQual);
+ bool can_hash);
static bool can_parallel_agg(PlannerInfo *root, RelOptInfo *input_rel,
RelOptInfo *grouped_rel, const AggClauseCosts *agg_costs);
@@ -3838,8 +3837,7 @@ create_grouping_paths(PlannerInfo *root,
add_paths_to_partial_grouping_rel(root, input_rel,
partially_grouped_rel,
&agg_partial_costs,
- gd, can_sort, can_hash,
- (List *) parse->havingQual);
+ gd, can_sort, can_hash);
}
/* Build final grouping paths */
@@ -6224,8 +6222,7 @@ add_paths_to_partial_grouping_rel(PlannerInfo *root,
AggClauseCosts *agg_partial_costs,
grouping_sets_data *gd,
bool can_sort,
- bool can_hash,
- List *havingQual)
+ bool can_hash)
{
Query *parse = root->parse;
Path *cheapest_partial_path = linitial(input_rel->partial_pathlist);
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index e8f6cc5..8ceef22 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -195,12 +195,13 @@ static void add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
double dNumGroups, List *havingQual);
static void add_paths_to_partial_grouping_rel(PlannerInfo *root,
RelOptInfo *input_rel,
- RelOptInfo *partial_grouped_rel,
+ RelOptInfo *partially_grouped_rel,
AggClauseCosts *agg_partial_costs,
grouping_sets_data *gd,
bool can_sort,
- bool can_hash,
- List *havingQual);
+ bool can_hash);
+static void create_non_partial_paths(PlannerInfo *root,
+ RelOptInfo *partially_grouped_rel);
static bool can_parallel_agg(PlannerInfo *root, RelOptInfo *input_rel,
RelOptInfo *grouped_rel, const AggClauseCosts *agg_costs);
@@ -3838,8 +3839,10 @@ create_grouping_paths(PlannerInfo *root,
add_paths_to_partial_grouping_rel(root, input_rel,
partially_grouped_rel,
&agg_partial_costs,
- gd, can_sort, can_hash,
- (List *) parse->havingQual);
+ gd, can_sort, can_hash);
+
+ /* Add Gather or Gather Merge atop cheapest partial path. */
+ create_non_partial_paths(root, partially_grouped_rel);
}
/* Build final grouping paths */
@@ -6224,8 +6227,7 @@ add_paths_to_partial_grouping_rel(PlannerInfo *root,
AggClauseCosts *agg_partial_costs,
grouping_sets_data *gd,
bool can_sort,
- bool can_hash,
- List *havingQual)
+ bool can_hash)
{
Query *parse = root->parse;
Path *cheapest_partial_path = linitial(input_rel->partial_pathlist);
@@ -6332,13 +6334,26 @@ add_paths_to_partial_grouping_rel(PlannerInfo *root,
UPPERREL_PARTIAL_GROUP_AGG,
input_rel, partially_grouped_rel);
}
+}
- /*
- * Try adding Gather or Gather Merge to partial paths to produce
- * non-partial paths.
- */
+/*
+ * create_non_partial_paths
+ *
+ * Try adding Gather or Gather Merge to partial paths to produce non-partial
+ * paths.
+ */
+static void
+create_non_partial_paths(PlannerInfo *root,
+ RelOptInfo *partially_grouped_rel)
+{
+ Path *cheapest_partial_path;
+
+ /* Gather all partial paths */
generate_gather_paths(root, partially_grouped_rel, true);
+ /* Get cheapest partial path from partially_grouped_rel */
+ cheapest_partial_path = linitial(partially_grouped_rel->partial_pathlist);
+
/*
* generate_gather_paths won't consider sorting the cheapest path to match
* the group keys and then applying a Gather Merge node to the result;