This is an automated email from the ASF dual-hosted git repository. avamingli pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/cloudberry.git
commit 36e16d1ec4514d22f1e2e04ffc2832d5dda9b35a Author: QingMa <[email protected]> AuthorDate: Tue Sep 13 09:40:06 2022 +0800 Support multi stage hashagg for groupingsets with unsortable refs (#14056) We already support multi stage hashagg for groupingset by this commit 9a790f8a4ab443ffb2efb6fac8cb96c5ab36f5b7. There is no need to bail out if there are any unsortable refs. --- src/backend/cdb/cdbgroupingpaths.c | 13 ---- src/backend/optimizer/plan/planner.c | 20 +++-- src/test/regress/expected/groupingsets.out | 81 +++++++++++++++++++ .../regress/expected/groupingsets_optimizer.out | 91 ++++++++++++++++++++++ src/test/regress/sql/groupingsets.sql | 32 ++++++++ 5 files changed, 218 insertions(+), 19 deletions(-) diff --git a/src/backend/cdb/cdbgroupingpaths.c b/src/backend/cdb/cdbgroupingpaths.c index 65c53390b0..c987044433 100644 --- a/src/backend/cdb/cdbgroupingpaths.c +++ b/src/backend/cdb/cdbgroupingpaths.c @@ -413,19 +413,6 @@ cdb_create_multistage_grouping_paths(PlannerInfo *root, List *gcls; List *tlist; - /* GPDB_12_MERGE_FIXME: For now, bail out if there are any unsortable - * refs. PostgreSQL supports hashing with grouping sets nowadays, but - * the code in this file hasn't been updated to deal with it yet. - */ - ListCell *lc; - foreach(lc, parse->groupClause) - { - SortGroupClause *gc = lfirst_node(SortGroupClause, lc); - - if (!OidIsValid(gc->sortop)) - return; - } - gsetid = makeNode(GroupingSetId); grouping_sets_tlist = copyObject(root->processed_tlist); ctx.gsetid_sortref = add_gsetid_tlist(grouping_sets_tlist); diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index 4377f04f34..e9a55afad8 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -8935,21 +8935,29 @@ make_new_rollups_for_hash_grouping_set(PlannerInfo *root, { RollupData *rollup = lfirst_node(RollupData, lc); + /* + * If there are any empty grouping sets and all non-empty grouping + * sets are unsortable, there will be a rollup containing only + * empty groups. We handle those specially below. + * Note: This case only holds when path is equal to null. + */ + if (rollup->groupClause == NIL) + { + unhashed_rollup = rollup; + break; + } + /* * If we find an unhashable rollup that's not been skipped by the * "actually sorted" check above, we can't cope; we'd need sorted * input (with a different sort order) but we can't get that here. * So bail out; we'll get a valid path from the is_sorted case * instead. - * - * The mere presence of empty grouping sets doesn't make a rollup - * unhashable (see preprocess_grouping_sets), we handle those - * specially below. */ if (!rollup->hashable) return NULL; - else - sets_data = list_concat(sets_data, list_copy(rollup->gsets_data)); + + sets_data = list_concat(sets_data, list_copy(rollup->gsets_data)); } foreach(lc, sets_data) { diff --git a/src/test/regress/expected/groupingsets.out b/src/test/regress/expected/groupingsets.out index cfa38e8011..25b9ecbd58 100644 --- a/src/test/regress/expected/groupingsets.out +++ b/src/test/regress/expected/groupingsets.out @@ -82,6 +82,13 @@ values (1,1,b'0000','1'), (2,2,b'0001','1'), (3,4,b'0010','2'), (4,8,b'0011','2'), (5,16,b'0000','2'), (6,32,b'0001','2'), (7,64,b'0010','1'), (8,128,b'0011','1'); +create temp table gstest5(id integer, v integer, + unsortable_col1 xid, unsortable_col2 xid); +insert into gstest5 +values (1,1,'3','1'), (2,2,'3','1'), + (3,4,'4','2'), (4,8,'4','2'), + (5,16,'4','2'), (6,32,'4','2'), + (7,64,'3','1'), (8,128,'3','1'); create temp table gstest_empty (a integer, b integer, v integer); create function gstest_data(v integer, out a integer, out b integer) returns setof record @@ -1324,6 +1331,80 @@ explain (costs off) Optimizer: Postgres query optimizer (13 rows) +select unsortable_col1, unsortable_col2, + grouping(unsortable_col1, unsortable_col2), + count(*), sum(v) + from gstest5 group by grouping sets ((unsortable_col1),(unsortable_col2)) + order by 3,5; + unsortable_col1 | unsortable_col2 | grouping | count | sum +-----------------+-----------------+----------+-------+----- + 4 | | 1 | 4 | 60 + 3 | | 1 | 4 | 195 + | 2 | 2 | 4 | 60 + | 1 | 2 | 4 | 195 +(4 rows) + +explain (costs off) + select unsortable_col1, unsortable_col2, + grouping(unsortable_col1, unsortable_col2), + count(*), sum(v) + from gstest5 group by grouping sets ((unsortable_col1),(unsortable_col2)) + order by 3,5; + QUERY PLAN +------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + Merge Key: (GROUPING(unsortable_col1, unsortable_col2)), (sum(v)) + -> Sort + Sort Key: (GROUPING(unsortable_col1, unsortable_col2)), (sum(v)) + -> Finalize HashAggregate + Group Key: unsortable_col1, unsortable_col2, (GROUPINGSET_ID()) + -> Redistribute Motion 3:3 (slice2; segments: 3) + Hash Key: (GROUPINGSET_ID()) + -> Partial HashAggregate + Hash Key: unsortable_col1 + Hash Key: unsortable_col2 + -> Seq Scan on gstest5 + Optimizer: Postgres query optimizer +(13 rows) + +select unsortable_col1, unsortable_col2, + grouping(unsortable_col1, unsortable_col2), + count(*), sum(v) + from gstest5 group by grouping sets ((unsortable_col1),(unsortable_col2),()) + order by 3,5; + unsortable_col1 | unsortable_col2 | grouping | count | sum +-----------------+-----------------+----------+-------+----- + 4 | | 1 | 4 | 60 + 3 | | 1 | 4 | 195 + | 2 | 2 | 4 | 60 + | 1 | 2 | 4 | 195 + | | 3 | 8 | 255 +(5 rows) + +explain (costs off) + select unsortable_col1, unsortable_col2, + grouping(unsortable_col1, unsortable_col2), + count(*), sum(v) + from gstest5 group by grouping sets ((unsortable_col1),(unsortable_col2),()) + order by 3,5; + QUERY PLAN +------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + Merge Key: (GROUPING(unsortable_col1, unsortable_col2)), (sum(v)) + -> Sort + Sort Key: (GROUPING(unsortable_col1, unsortable_col2)), (sum(v)) + -> Finalize HashAggregate + Group Key: unsortable_col1, unsortable_col2, (GROUPINGSET_ID()) + -> Redistribute Motion 3:3 (slice2; segments: 3) + Hash Key: (GROUPINGSET_ID()) + -> Partial MixedAggregate + Hash Key: unsortable_col1 + Hash Key: unsortable_col2 + Group Key: () + -> Seq Scan on gstest5 + Optimizer: Postgres query optimizer +(14 rows) + -- empty input: first is 0 rows, second 1, third 3 etc. select a, b, sum(v), count(*) from gstest_empty group by grouping sets ((a,b),a); a | b | sum | count diff --git a/src/test/regress/expected/groupingsets_optimizer.out b/src/test/regress/expected/groupingsets_optimizer.out index bd72cd0b09..b0ea867c9a 100644 --- a/src/test/regress/expected/groupingsets_optimizer.out +++ b/src/test/regress/expected/groupingsets_optimizer.out @@ -88,6 +88,15 @@ values (1,1,b'0000','1'), (2,2,b'0001','1'), (3,4,b'0010','2'), (4,8,b'0011','2'), (5,16,b'0000','2'), (6,32,b'0001','2'), (7,64,b'0010','1'), (8,128,b'0011','1'); +create temp table gstest5(id integer, v integer, + unsortable_col1 xid, unsortable_col2 xid); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'id' as the Greenplum Database data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +insert into gstest5 +values (1,1,'3','1'), (2,2,'3','1'), + (3,4,'4','2'), (4,8,'4','2'), + (5,16,'4','2'), (6,32,'4','2'), + (7,64,'3','1'), (8,128,'3','1'); create temp table gstest_empty (a integer, b integer, v integer); NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Cloudberry Database data distribution key for this table. HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. @@ -1340,6 +1349,88 @@ explain (costs off) Optimizer: Postgres query optimizer (13 rows) +select unsortable_col1, unsortable_col2, + grouping(unsortable_col1, unsortable_col2), + count(*), sum(v) + from gstest5 group by grouping sets ((unsortable_col1),(unsortable_col2)) + order by 3,5; +INFO: GPORCA failed to produce a plan, falling back to planner +DETAIL: Feature not supported: Grouping function with multiple arguments + unsortable_col1 | unsortable_col2 | grouping | count | sum +-----------------+-----------------+----------+-------+----- + 4 | | 1 | 4 | 60 + 3 | | 1 | 4 | 195 + | 2 | 2 | 4 | 60 + | 1 | 2 | 4 | 195 +(4 rows) + +explain (costs off) + select unsortable_col1, unsortable_col2, + grouping(unsortable_col1, unsortable_col2), + count(*), sum(v) + from gstest5 group by grouping sets ((unsortable_col1),(unsortable_col2)) + order by 3,5; +INFO: GPORCA failed to produce a plan, falling back to planner +DETAIL: Feature not supported: Grouping function with multiple arguments + QUERY PLAN +------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + Merge Key: (GROUPING(unsortable_col1, unsortable_col2)), (sum(v)) + -> Sort + Sort Key: (GROUPING(unsortable_col1, unsortable_col2)), (sum(v)) + -> Finalize HashAggregate + Group Key: unsortable_col1, unsortable_col2, (GROUPINGSET_ID()) + -> Redistribute Motion 3:3 (slice2; segments: 3) + Hash Key: (GROUPINGSET_ID()) + -> Partial HashAggregate + Hash Key: unsortable_col1 + Hash Key: unsortable_col2 + -> Seq Scan on gstest5 + Optimizer: Postgres query optimizer +(13 rows) + +select unsortable_col1, unsortable_col2, + grouping(unsortable_col1, unsortable_col2), + count(*), sum(v) + from gstest5 group by grouping sets ((unsortable_col1),(unsortable_col2),()) + order by 3,5; +INFO: GPORCA failed to produce a plan, falling back to planner +DETAIL: Feature not supported: Grouping function with multiple arguments + unsortable_col1 | unsortable_col2 | grouping | count | sum +-----------------+-----------------+----------+-------+----- + 4 | | 1 | 4 | 60 + 3 | | 1 | 4 | 195 + | 2 | 2 | 4 | 60 + | 1 | 2 | 4 | 195 + | | 3 | 8 | 255 +(5 rows) + +explain (costs off) + select unsortable_col1, unsortable_col2, + grouping(unsortable_col1, unsortable_col2), + count(*), sum(v) + from gstest5 group by grouping sets ((unsortable_col1),(unsortable_col2),()) + order by 3,5; +INFO: GPORCA failed to produce a plan, falling back to planner +DETAIL: Feature not supported: Grouping function with multiple arguments + QUERY PLAN +------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + Merge Key: (GROUPING(unsortable_col1, unsortable_col2)), (sum(v)) + -> Sort + Sort Key: (GROUPING(unsortable_col1, unsortable_col2)), (sum(v)) + -> Finalize HashAggregate + Group Key: unsortable_col1, unsortable_col2, (GROUPINGSET_ID()) + -> Redistribute Motion 3:3 (slice2; segments: 3) + Hash Key: (GROUPINGSET_ID()) + -> Partial MixedAggregate + Hash Key: unsortable_col1 + Hash Key: unsortable_col2 + Group Key: () + -> Seq Scan on gstest5 + Optimizer: Postgres query optimizer +(14 rows) + -- empty input: first is 0 rows, second 1, third 3 etc. select a, b, sum(v), count(*) from gstest_empty group by grouping sets ((a,b),a); a | b | sum | count diff --git a/src/test/regress/sql/groupingsets.sql b/src/test/regress/sql/groupingsets.sql index d79815dd52..571f108b5b 100644 --- a/src/test/regress/sql/groupingsets.sql +++ b/src/test/regress/sql/groupingsets.sql @@ -105,6 +105,14 @@ values (1,1,b'0000','1'), (2,2,b'0001','1'), (5,16,b'0000','2'), (6,32,b'0001','2'), (7,64,b'0010','1'), (8,128,b'0011','1'); +create temp table gstest5(id integer, v integer, + unsortable_col1 xid, unsortable_col2 xid); +insert into gstest5 +values (1,1,'3','1'), (2,2,'3','1'), + (3,4,'4','2'), (4,8,'4','2'), + (5,16,'4','2'), (6,32,'4','2'), + (7,64,'3','1'), (8,128,'3','1'); + create temp table gstest_empty (a integer, b integer, v integer); create function gstest_data(v integer, out a integer, out b integer) @@ -433,6 +441,30 @@ explain (costs off) from gstest4 group by grouping sets ((v,unhashable_col),(v,unsortable_col)) order by 3,5; +select unsortable_col1, unsortable_col2, + grouping(unsortable_col1, unsortable_col2), + count(*), sum(v) + from gstest5 group by grouping sets ((unsortable_col1),(unsortable_col2)) + order by 3,5; +explain (costs off) + select unsortable_col1, unsortable_col2, + grouping(unsortable_col1, unsortable_col2), + count(*), sum(v) + from gstest5 group by grouping sets ((unsortable_col1),(unsortable_col2)) + order by 3,5; + +select unsortable_col1, unsortable_col2, + grouping(unsortable_col1, unsortable_col2), + count(*), sum(v) + from gstest5 group by grouping sets ((unsortable_col1),(unsortable_col2),()) + order by 3,5; +explain (costs off) + select unsortable_col1, unsortable_col2, + grouping(unsortable_col1, unsortable_col2), + count(*), sum(v) + from gstest5 group by grouping sets ((unsortable_col1),(unsortable_col2),()) + order by 3,5; + -- empty input: first is 0 rows, second 1, third 3 etc. select a, b, sum(v), count(*) from gstest_empty group by grouping sets ((a,b),a); explain (costs off) --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
