On Tue, 8 Nov 2022 at 19:51, Richard Guo <guofengli...@gmail.com> wrote:
> For unsorted paths, the original logic here is to explicitly add a Sort
> path only for the cheapest-total path.  This patch changes that and may
> add a Sort path for other paths besides the cheapest-total path.  I
> think this may introduce in some unnecessary path candidates.

Yeah, you're right. The patch shouldn't change that.  I've adjusted
the attached patch so that part works more like it did before.

v2 attached.

Thanks

David
diff --git a/src/backend/optimizer/path/costsize.c 
b/src/backend/optimizer/path/costsize.c
index 4c6b1d1f55..fe8573d92c 100644
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -1959,8 +1959,8 @@ cost_incremental_sort(Path *path,
                                          double input_tuples, int width, Cost 
comparison_cost, int sort_mem,
                                          double limit_tuples)
 {
-       Cost            startup_cost = 0,
-                               run_cost = 0,
+       Cost            startup_cost,
+                               run_cost,
                                input_run_cost = input_total_cost - 
input_startup_cost;
        double          group_tuples,
                                input_groups;
@@ -1969,10 +1969,9 @@ cost_incremental_sort(Path *path,
                                group_input_run_cost;
        List       *presortedExprs = NIL;
        ListCell   *l;
-       int                     i = 0;
        bool            unknown_varno = false;
 
-       Assert(presorted_keys != 0);
+       Assert(presorted_keys > 0 && presorted_keys < list_length(pathkeys));
 
        /*
         * We want to be sure the cost of a sort is never estimated as zero, 
even
@@ -2025,12 +2024,11 @@ cost_incremental_sort(Path *path,
                /* expression not containing any Vars with "varno 0" */
                presortedExprs = lappend(presortedExprs, member->em_expr);
 
-               i++;
-               if (i >= presorted_keys)
+               if (foreach_current_index(l) + 1 >= presorted_keys)
                        break;
        }
 
-       /* Estimate number of groups with equal presorted keys. */
+       /* Estimate the number of groups with equal presorted keys. */
        if (!unknown_varno)
                input_groups = estimate_num_groups(root, presortedExprs, 
input_tuples,
                                                                                
   NULL, NULL);
@@ -2039,22 +2037,19 @@ cost_incremental_sort(Path *path,
        group_input_run_cost = input_run_cost / input_groups;
 
        /*
-        * Estimate average cost of sorting of one group where presorted keys 
are
-        * equal.  Incremental sort is sensitive to distribution of tuples to 
the
-        * groups, where we're relying on quite rough assumptions.  Thus, we're
-        * pessimistic about incremental sort performance and increase its 
average
-        * group size by half.
+        * Estimate the average cost of sorting of one group where presorted 
keys
+        * are equal.
         */
        cost_tuplesort(&group_startup_cost, &group_run_cost,
-                                  1.5 * group_tuples, width, comparison_cost, 
sort_mem,
+                                  group_tuples, width, comparison_cost, 
sort_mem,
                                   limit_tuples);
 
        /*
         * Startup cost of incremental sort is the startup cost of its first 
group
         * plus the cost of its input.
         */
-       startup_cost += group_startup_cost
-               + input_startup_cost + group_input_run_cost;
+       startup_cost = group_startup_cost + input_startup_cost +
+                                  group_input_run_cost;
 
        /*
         * After we started producing tuples from the first group, the cost of
@@ -2062,17 +2057,20 @@ cost_incremental_sort(Path *path,
         * group, plus the total cost to process the remaining groups, plus the
         * remaining cost of input.
         */
-       run_cost += group_run_cost
-               + (group_run_cost + group_startup_cost) * (input_groups - 1)
-               + group_input_run_cost * (input_groups - 1);
+       run_cost = group_run_cost + (group_run_cost + group_startup_cost) *
+                          (input_groups - 1) + group_input_run_cost * 
(input_groups - 1);
 
        /*
         * Incremental sort adds some overhead by itself. Firstly, it has to
         * detect the sort groups. This is roughly equal to one extra copy and
-        * comparison per tuple. Secondly, it has to reset the tuplesort context
-        * for every group.
+        * comparison per tuple.
         */
        run_cost += (cpu_tuple_cost + comparison_cost) * input_tuples;
+
+       /*
+        * Additionally, we charge double cpu_tuple_cost for each input group to
+        * account for the tuplesort_reset that's performed after each group.
+        */
        run_cost += 2.0 * cpu_tuple_cost * input_groups;
 
        path->rows = input_tuples;
diff --git a/src/backend/optimizer/plan/planner.c 
b/src/backend/optimizer/plan/planner.c
index 493a3af0fa..67c87e2444 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -6491,12 +6491,12 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo 
*input_rel,
        {
                /*
                 * Use any available suitably-sorted path as input, and also 
consider
-                * sorting the cheapest-total path.
+                * sorting the cheapest-total path and incremental sort on any 
paths
+                * with pre-sorted keys.
                 */
                foreach(lc, input_rel->pathlist)
                {
                        Path       *path = (Path *) lfirst(lc);
-                       Path       *path_original = path;
                        bool            is_sorted;
                        int                     presorted_keys;
 
@@ -6504,90 +6504,40 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo 
*input_rel,
                                                                                
                        path->pathkeys,
                                                                                
                        &presorted_keys);
 
-                       if (path == cheapest_path || is_sorted)
+                       if (!is_sorted)
                        {
-                               /* Sort the cheapest-total path if it isn't 
already sorted */
-                               if (!is_sorted)
+                               /*
+                                * Try at least sorting the cheapest path and 
also try
+                                * incrementally sorting any path which is 
partially sorted
+                                * already (no need to worry about pre-sorted 
keys when
+                                * incremental sort is disabled).
+                                */
+                               if (path != cheapest_path &&
+                                       (presorted_keys == 0 || 
!enable_incremental_sort))
+                                       continue;
+
+                               /*
+                                * We've no need to consider both a sort and 
incremental sort.
+                                * We'll just do a sort if there are no 
pre-sorted keys and
+                                * an incremental sort when there are 
pre-sorted keys.
+                                */
+                               if (presorted_keys == 0 || 
!enable_incremental_sort)
+                               {
                                        path = (Path *) create_sort_path(root,
                                                                                
                         grouped_rel,
                                                                                
                         path,
                                                                                
                         root->group_pathkeys,
                                                                                
                         -1.0);
-
-                               /* Now decide what to stick atop it */
-                               if (parse->groupingSets)
-                               {
-                                       consider_groupingsets_paths(root, 
grouped_rel,
-                                                                               
                path, true, can_hash,
-                                                                               
                gd, agg_costs, dNumGroups);
-                               }
-                               else if (parse->hasAggs)
-                               {
-                                       /*
-                                        * We have aggregation, possibly with 
plain GROUP BY. Make
-                                        * an AggPath.
-                                        */
-                                       add_path(grouped_rel, (Path *)
-                                                        create_agg_path(root,
-                                                                               
         grouped_rel,
-                                                                               
         path,
-                                                                               
         grouped_rel->reltarget,
-                                                                               
         parse->groupClause ? AGG_SORTED : AGG_PLAIN,
-                                                                               
         AGGSPLIT_SIMPLE,
-                                                                               
         parse->groupClause,
-                                                                               
         havingQual,
-                                                                               
         agg_costs,
-                                                                               
         dNumGroups));
-                               }
-                               else if (parse->groupClause)
-                               {
-                                       /*
-                                        * We have GROUP BY without aggregation 
or grouping sets.
-                                        * Make a GroupPath.
-                                        */
-                                       add_path(grouped_rel, (Path *)
-                                                        create_group_path(root,
-                                                                               
           grouped_rel,
-                                                                               
           path,
-                                                                               
           parse->groupClause,
-                                                                               
           havingQual,
-                                                                               
           dNumGroups));
                                }
                                else
-                               {
-                                       /* Other cases should have been handled 
above */
-                                       Assert(false);
-                               }
+                                       path = (Path *) 
create_incremental_sort_path(root,
+                                                                               
                                                 grouped_rel,
+                                                                               
                                                 path,
+                                                                               
                                                 root->group_pathkeys,
+                                                                               
                                                 presorted_keys,
+                                                                               
                                                 -1.0);
                        }
 
-                       /*
-                        * Now we may consider incremental sort on this path, 
but only
-                        * when the path is not already sorted and when 
incremental sort
-                        * is enabled.
-                        */
-                       if (is_sorted || !enable_incremental_sort)
-                               continue;
-
-                       /* Restore the input path (we might have added Sort on 
top). */
-                       path = path_original;
-
-                       /* no shared prefix, no point in building incremental 
sort */
-                       if (presorted_keys == 0)
-                               continue;
-
-                       /*
-                        * We should have already excluded pathkeys of length 1 
because
-                        * then presorted_keys > 0 would imply is_sorted was 
true.
-                        */
-                       Assert(list_length(root->group_pathkeys) != 1);
-
-                       path = (Path *) create_incremental_sort_path(root,
-                                                                               
                                 grouped_rel,
-                                                                               
                                 path,
-                                                                               
                                 root->group_pathkeys,
-                                                                               
                                 presorted_keys,
-                                                                               
                                 -1.0);
-
                        /* Now decide what to stick atop it */
                        if (parse->groupingSets)
                        {
@@ -6602,16 +6552,16 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo 
*input_rel,
                                 * AggPath.
                                 */
                                add_path(grouped_rel, (Path *)
-                                                create_agg_path(root,
-                                                                               
 grouped_rel,
-                                                                               
 path,
-                                                                               
 grouped_rel->reltarget,
-                                                                               
 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
-                                                                               
 AGGSPLIT_SIMPLE,
-                                                                               
 parse->groupClause,
-                                                                               
 havingQual,
-                                                                               
 agg_costs,
-                                                                               
 dNumGroups));
+                                                       create_agg_path(root,
+                                                                               
        grouped_rel,
+                                                                               
        path,
+                                                                               
        grouped_rel->reltarget,
+                                                                               
        parse->groupClause ? AGG_SORTED : AGG_PLAIN,
+                                                                               
        AGGSPLIT_SIMPLE,
+                                                                               
        parse->groupClause,
+                                                                               
        havingQual,
+                                                                               
        agg_costs,
+                                                                               
        dNumGroups));
                        }
                        else if (parse->groupClause)
                        {
@@ -6620,12 +6570,12 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo 
*input_rel,
                                 * a GroupPath.
                                 */
                                add_path(grouped_rel, (Path *)
-                                                create_group_path(root,
-                                                                               
   grouped_rel,
-                                                                               
   path,
-                                                                               
   parse->groupClause,
-                                                                               
   havingQual,
-                                                                               
   dNumGroups));
+                                                       create_group_path(root,
+                                                                               
        grouped_rel,
+                                                                               
        path,
+                                                                               
        parse->groupClause,
+                                                                               
        havingQual,
+                                                                               
        dNumGroups));
                        }
                        else
                        {
diff --git a/src/test/regress/expected/incremental_sort.out 
b/src/test/regress/expected/incremental_sort.out
index 0a631124c2..1a1e8b2365 100644
--- a/src/test/regress/expected/incremental_sort.out
+++ b/src/test/regress/expected/incremental_sort.out
@@ -1,16 +1,3 @@
--- When we have to sort the entire table, incremental sort will
--- be slower than plain sort, so it should not be used.
-explain (costs off)
-select * from (select * from tenk1 order by four) t order by four, ten;
-            QUERY PLAN             
------------------------------------
- Sort
-   Sort Key: tenk1.four, tenk1.ten
-   ->  Sort
-         Sort Key: tenk1.four
-         ->  Seq Scan on tenk1
-(5 rows)
-
 -- When there is a LIMIT clause, incremental sort is beneficial because
 -- it only has to sort some of the groups, and not the entire table.
 explain (costs off)
diff --git a/src/test/regress/sql/incremental_sort.sql 
b/src/test/regress/sql/incremental_sort.sql
index 284a354dbb..071f8a5268 100644
--- a/src/test/regress/sql/incremental_sort.sql
+++ b/src/test/regress/sql/incremental_sort.sql
@@ -1,8 +1,3 @@
--- When we have to sort the entire table, incremental sort will
--- be slower than plain sort, so it should not be used.
-explain (costs off)
-select * from (select * from tenk1 order by four) t order by four, ten;
-
 -- When there is a LIMIT clause, incremental sort is beneficial because
 -- it only has to sort some of the groups, and not the entire table.
 explain (costs off)

Reply via email to