From bc80752c2f29e9c13828663b0e179ba86f3d74e2 Mon Sep 17 00:00:00 2001
From: David Rowley <dgrowley@gmail.com>
Date: Tue, 14 Oct 2025 18:24:54 +1300
Subject: [PATCH v1] Tidyup truncate_useless_pathkeys() function

This removes a few static functions and replaces them with 2 functions
which aim to be more reusable.  The upper planner's pathkey requirements
can be simplified down to operations which require pathkeys in the same
order as the pathkeys for the given operation, and operations which can
make use of a Path's useful pathkeys in any order.

Here we also add some short-circuiting paths to
truncate_useless_pathkeys().  Since the function will only return a
trimmed down pathkey list if *no* operation finds some of the pathkeys
useful, we might as well exit early as soon as we discover that some
operation finds *all* of the pathkeys will be useful.  I've also tried
to place the pathkey checks roughly in order of cheapest-first in the
hopes that the more expensive checks might get skipped once in a while
when the function discovers all pathkeys are useful.

In passing fixup has_useful_pathkeys() as it seems to have grown a
redundant check for group_pathkeys.  This isn't needed as
standard_qp_callback will set query_pathkeys if there's any requirement
to have group_pathkeys.
---
 src/backend/optimizer/path/pathkeys.c | 213 ++++++++++----------------
 1 file changed, 82 insertions(+), 131 deletions(-)

diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c
index 879dcb4608e..d002f23a62c 100644
--- a/src/backend/optimizer/path/pathkeys.c
+++ b/src/backend/optimizer/path/pathkeys.c
@@ -2147,174 +2147,126 @@ right_merge_direction(PlannerInfo *root, PathKey *pathkey)
 }
 
 /*
- * pathkeys_useful_for_ordering
- *		Count the number of pathkeys that are useful for meeting the
- *		query's requested output ordering.
- *
- * Because we the have the possibility of incremental sort, a prefix list of
- * keys is potentially useful for improving the performance of the requested
- * ordering. Thus we return 0, if no valuable keys are found, or the number
- * of leading keys shared by the list and the requested ordering.
+ * count_common_leading_pathkeys_ordered
+ *		Returns the number of leading pathkeys which both lists have in common
  */
 static int
-pathkeys_useful_for_ordering(PlannerInfo *root, List *pathkeys)
+count_common_leading_pathkeys_ordered(List *keys1, List *keys2)
 {
-	int			n_common_pathkeys;
+	int			ncommon;
 
-	(void) pathkeys_count_contained_in(root->sort_pathkeys, pathkeys,
-									   &n_common_pathkeys);
+	(void) pathkeys_count_contained_in(keys1, keys2, &ncommon);
 
-	return n_common_pathkeys;
+	return ncommon;
 }
 
 /*
- * pathkeys_useful_for_windowing
- *		Count the number of pathkeys that are useful for meeting the
- *		query's desired sort order for window function evaluation.
+ * count_common_leading_pathkeys_unordered
+ *		Returns the number of leading PathKeys in 'keys2' which exist in
+ *		'keys1'.
  */
 static int
-pathkeys_useful_for_windowing(PlannerInfo *root, List *pathkeys)
+count_common_leading_pathkeys_unordered(List *keys1, List *keys2)
 {
-	int			n_common_pathkeys;
+	int			ncommon = 0;
 
-	(void) pathkeys_count_contained_in(root->window_pathkeys,
-									   pathkeys,
-									   &n_common_pathkeys);
-
-	return n_common_pathkeys;
-}
-
-/*
- * pathkeys_useful_for_grouping
- *		Count the number of pathkeys that are useful for grouping (instead of
- *		explicit sort)
- *
- * Group pathkeys could be reordered to benefit from the ordering. The
- * ordering may not be "complete" and may require incremental sort, but that's
- * fine. So we simply count prefix pathkeys with a matching group key, and
- * stop once we find the first pathkey without a match.
- *
- * So e.g. with pathkeys (a,b,c) and group keys (a,b,e) this determines (a,b)
- * pathkeys are useful for grouping, and we might do incremental sort to get
- * path ordered by (a,b,e).
- *
- * This logic is necessary to retain paths with ordering not matching grouping
- * keys directly, without the reordering.
- *
- * Returns the length of pathkey prefix with matching group keys.
- */
-static int
-pathkeys_useful_for_grouping(PlannerInfo *root, List *pathkeys)
-{
-	ListCell   *key;
-	int			n = 0;
-
-	/* no special ordering requested for grouping */
-	if (root->group_pathkeys == NIL)
+	/* No pointing in searching keys2 when keys1 is empty */
+	if (keys1 == NIL)
 		return 0;
 
-	/* walk the pathkeys and search for matching group key */
-	foreach(key, pathkeys)
+	/* walk keys2 and search for matching PathKeys in keys1 */
+	foreach_node(PathKey, pathkey, keys2)
 	{
-		PathKey    *pathkey = (PathKey *) lfirst(key);
-
-		/* no matching group key, we're done */
-		if (!list_member_ptr(root->group_pathkeys, pathkey))
+		/*
+		 * return the number of matches so far as soon as keys1 doesn't
+		 * contain the given keys2 key.
+		 */
+		if (!list_member_ptr(keys1, pathkey))
 			break;
 
-		n++;
+		ncommon++;
 	}
 
-	return n;
+	return ncommon;
 }
 
 /*
- * pathkeys_useful_for_distinct
- *		Count the number of pathkeys that are useful for DISTINCT or DISTINCT
- *		ON clause.
- *
- * DISTINCT keys could be reordered to benefit from the given pathkey list.  As
- * with pathkeys_useful_for_grouping, we return the number of leading keys in
- * the list that are shared by the distinctClause pathkeys.
+ * truncate_useless_pathkeys
+ *		Shorten the given PathKey List to just the useful PathKeys.  If all
+ *		PathKeys are useful, return the input List, otherwise return a new
+ *		List containing only the useful PathKeys.
  */
-static int
-pathkeys_useful_for_distinct(PlannerInfo *root, List *pathkeys)
+List *
+truncate_useless_pathkeys(PlannerInfo *root,
+						  RelOptInfo *rel,
+						  List *pathkeys)
 {
-	int			n_common_pathkeys;
+	int			nuseful;
+	int			nuseful2;
+	int			ntotal = list_length(pathkeys);
 
 	/*
-	 * distinct_pathkeys may have become empty if all of the pathkeys were
-	 * determined to be redundant.  Return 0 in this case.
+	 * Here we determine how many items in 'pathkeys' might be useful for
+	 * various Path sort ordering requirements the planner has.  Operations
+	 * such as ORDER BY require a Path's pathkeys to match the PathKeys of the
+	 * ORDER BY in the same order, however operations such as GROUP BY and
+	 * DISTINCT are less critical as a Unique atop of a GroupAggregate only
+	 * needs to care that all PathKeys exist, and doesn't need to care if
+	 * they're in the same order as the clause in the query.
 	 */
-	if (root->distinct_pathkeys == NIL)
-		return 0;
+	nuseful = count_common_leading_pathkeys_ordered(root->sort_pathkeys,
+													pathkeys);
 
-	/* walk the pathkeys and search for matching DISTINCT key */
-	n_common_pathkeys = 0;
-	foreach_node(PathKey, pathkey, pathkeys)
-	{
-		/* no matching DISTINCT key, we're done */
-		if (!list_member_ptr(root->distinct_pathkeys, pathkey))
-			break;
+	/* Short-circuit at any point we discover *all* pathkeys are useful */
+	if (nuseful == ntotal)
+		return pathkeys;
 
-		n_common_pathkeys++;
-	}
+	nuseful2 = count_common_leading_pathkeys_ordered(root->window_pathkeys,
+													 pathkeys);
+	if (nuseful2 == ntotal)
+		return pathkeys;
 
-	return n_common_pathkeys;
-}
+	nuseful = Max(nuseful, nuseful2);
+	nuseful2 = count_common_leading_pathkeys_ordered(root->setop_pathkeys,
+													 pathkeys);
+	if (nuseful2 == ntotal)
+		return pathkeys;
 
-/*
- * pathkeys_useful_for_setop
- *		Count the number of leading common pathkeys root's 'setop_pathkeys' in
- *		'pathkeys'.
- */
-static int
-pathkeys_useful_for_setop(PlannerInfo *root, List *pathkeys)
-{
-	int			n_common_pathkeys;
+	nuseful = Max(nuseful, nuseful2);
 
-	(void) pathkeys_count_contained_in(root->setop_pathkeys, pathkeys,
-									   &n_common_pathkeys);
+	/*
+	 * Check if these pathkeys are useful for GROUP BY or DISTINCT.  The order
+	 * of the pathkeys does not matter here as GroupAggregate for these
+	 * operations can take advantage of Paths presorted by any of the GROUP
+	 * BY/DISTINCT pathkeys.
+	 */
+	nuseful2 = count_common_leading_pathkeys_unordered(root->group_pathkeys,
+													   pathkeys);
+	if (nuseful2 == ntotal)
+		return pathkeys;
 
-	return n_common_pathkeys;
-}
+	nuseful = Max(nuseful, nuseful2);
+	nuseful2 = count_common_leading_pathkeys_unordered(root->distinct_pathkeys,
+													   pathkeys);
 
-/*
- * truncate_useless_pathkeys
- *		Shorten the given pathkey list to just the useful pathkeys.
- */
-List *
-truncate_useless_pathkeys(PlannerInfo *root,
-						  RelOptInfo *rel,
-						  List *pathkeys)
-{
-	int			nuseful;
-	int			nuseful2;
+	if (nuseful2 == ntotal)
+		return pathkeys;
 
-	nuseful = pathkeys_useful_for_merging(root, rel, pathkeys);
-	nuseful2 = pathkeys_useful_for_ordering(root, pathkeys);
-	if (nuseful2 > nuseful)
-		nuseful = nuseful2;
-	nuseful2 = pathkeys_useful_for_windowing(root, pathkeys);
-	if (nuseful2 > nuseful)
-		nuseful = nuseful2;
-	nuseful2 = pathkeys_useful_for_grouping(root, pathkeys);
-	if (nuseful2 > nuseful)
-		nuseful = nuseful2;
-	nuseful2 = pathkeys_useful_for_distinct(root, pathkeys);
-	if (nuseful2 > nuseful)
-		nuseful = nuseful2;
-	nuseful2 = pathkeys_useful_for_setop(root, pathkeys);
-	if (nuseful2 > nuseful)
-		nuseful = nuseful2;
+	nuseful = Max(nuseful, nuseful2);
+
+	/*
+	 * Finally, check how many PathKeys might be useful for Merge Joins.  This
+	 * is a bit more expensive, so do it last and only if we've not figured
+	 * out that all the pathkeys are useful already.
+	 */
+	nuseful2 = pathkeys_useful_for_merging(root, rel, pathkeys);
+	nuseful = Max(nuseful, nuseful2);
 
 	/*
 	 * Note: not safe to modify input list destructively, but we can avoid
 	 * copying the list if we're not actually going to change it
 	 */
-	if (nuseful == 0)
-		return NIL;
-	else if (nuseful == list_length(pathkeys))
+	if (nuseful == ntotal)
 		return pathkeys;
 	else
 		return list_copy_head(pathkeys, nuseful);
@@ -2340,9 +2292,8 @@ has_useful_pathkeys(PlannerInfo *root, RelOptInfo *rel)
 {
 	if (rel->joininfo != NIL || rel->has_eclass_joins)
 		return true;			/* might be able to use pathkeys for merging */
-	if (root->group_pathkeys != NIL)
-		return true;			/* might be able to use pathkeys for grouping */
 	if (root->query_pathkeys != NIL)
-		return true;			/* might be able to use them for ordering */
+		return true;			/* the upper planner might need them */
+
 	return false;				/* definitely useless */
 }
-- 
2.43.0

