From 4ba8c1d67b323c9c2d9e41a2aad06d95df611bcb Mon Sep 17 00:00:00 2001
From: Alexandra Wang <alexandra.wang.oss@gmail.com>
Date: Wed, 23 Oct 2024 14:26:40 -0500
Subject: [PATCH v1] Remove unnecessary Sort node above Index Scan of
 btree-compatible AM

When determining whether two sort keys are equivalent, they may not
belong to the same operator family but could still use the same
underlying sort operator.  In such cases, a simple pointer comparison
of two pathkeys marks them as different, leading to the addition of an
unnecessary Sort node, even though the path is already sorted as
desired.

This patch improves pathkeys_contained_in() and
pathkeys_count_contained_in() by comparing the sort operators
directly, preventing redundant sorting.
---
 src/backend/optimizer/path/costsize.c   |  4 +--
 src/backend/optimizer/path/pathkeys.c   | 43 +++++++++++++++++++++++--
 src/backend/optimizer/plan/createplan.c |  6 ++--
 3 files changed, 43 insertions(+), 10 deletions(-)

diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c
index b7f6ab40dec..e95762ac9a4 100644
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -3607,9 +3607,7 @@ initial_cost_mergejoin(PlannerInfo *root, JoinCostWorkspace *workspace,
 		opathkey = (PathKey *) linitial(opathkeys);
 		ipathkey = (PathKey *) linitial(ipathkeys);
 		/* debugging check */
-		if (opathkey->pk_opfamily != ipathkey->pk_opfamily ||
-			opathkey->pk_eclass->ec_collation != ipathkey->pk_eclass->ec_collation ||
-			opathkey->pk_strategy != ipathkey->pk_strategy ||
+		if (opathkey->pk_eclass->ec_collation != ipathkey->pk_eclass->ec_collation ||
 			opathkey->pk_cmptype != ipathkey->pk_cmptype ||
 			opathkey->pk_nulls_first != ipathkey->pk_nulls_first)
 			elog(ERROR, "left and right pathkeys do not match in mergejoin");
diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c
index a0826ed5008..a709148ea9f 100644
--- a/src/backend/optimizer/path/pathkeys.c
+++ b/src/backend/optimizer/path/pathkeys.c
@@ -39,7 +39,7 @@ static bool matches_boolean_partition_clause(RestrictInfo *rinfo,
 											 int partkeycol);
 static Var *find_var_for_subquery_tle(RelOptInfo *rel, TargetEntry *tle);
 static bool right_merge_direction(PlannerInfo *root, PathKey *pathkey);
-
+static bool pathkeys_have_same_sortop(PathKey *pk1, PathKey *pk2);
 
 /****************************************************************************
  *		PATHKEY CONSTRUCTION AND REDUNDANCY TESTING
@@ -355,7 +355,7 @@ compare_pathkeys(List *keys1, List *keys2)
 		PathKey    *pathkey1 = (PathKey *) lfirst(key1);
 		PathKey    *pathkey2 = (PathKey *) lfirst(key2);
 
-		if (pathkey1 != pathkey2)
+		if (!pathkeys_have_same_sortop(pathkey1, pathkey2))
 			return PATHKEYS_DIFFERENT;	/* no need to keep looking */
 	}
 
@@ -585,6 +585,43 @@ get_useful_group_keys_orderings(PlannerInfo *root, Path *path)
 	return infos;
 }
 
+/*
+ * pathkeys_have_same_sortop
+ *
+ * Determines whether two PathKey objects should be considered equivalent
+ * for sorting purposes. Two PathKeys are treated as the same if:
+ *  - They are identical pointers OR
+ *  - They belong to the same equivalence class. AND
+ *  - They have the same comparison type and null ordering. AND
+ *  - They have the same sorting operator, even if they belong to different
+ *    operator families.
+ *
+ * The function retrieves the sort operator (sortop) for both PathKeys using
+ * their operator family, datatype, and strategy. If both PathKeys resolve to
+ * the same sort operator, they are considered equivalent.
+ */
+static bool
+pathkeys_have_same_sortop(PathKey *pk1, PathKey *pk2)
+{
+	Oid			pk_op1;
+	Oid			pk_op2;
+	Oid			pk_datatype;
+
+	if (pk1 == pk2)
+		return true;
+
+	if (pk1->pk_eclass != pk2->pk_eclass ||
+		pk1->pk_cmptype != pk2->pk_cmptype ||
+		pk1->pk_nulls_first != pk2->pk_nulls_first)
+		return false;
+
+	pk_datatype = ((EquivalenceMember *) linitial(pk1->pk_eclass->ec_members))->em_datatype;
+	pk_op1 = get_opfamily_member(pk1->pk_opfamily, pk_datatype, pk_datatype, pk1->pk_strategy);
+	pk_op2 = get_opfamily_member(pk2->pk_opfamily, pk_datatype, pk_datatype, pk2->pk_strategy);
+
+	return pk_op1 == pk_op2;
+}
+
 /*
  * pathkeys_count_contained_in
  *    Same as pathkeys_contained_in, but also sets length of longest
@@ -626,7 +663,7 @@ pathkeys_count_contained_in(List *keys1, List *keys2, int *n_common)
 		PathKey    *pathkey1 = (PathKey *) lfirst(key1);
 		PathKey    *pathkey2 = (PathKey *) lfirst(key2);
 
-		if (pathkey1 != pathkey2)
+		if (!pathkeys_have_same_sortop(pathkey1, pathkey2))
 		{
 			*n_common = n;
 			return false;
diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c
index 6a7a8d68db5..015f7f05801 100644
--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@@ -4747,12 +4747,10 @@ create_mergejoin_plan(PlannerInfo *root,
 		 * matter which way we imagine this column to be ordered.)  But a
 		 * non-redundant inner pathkey had better match outer's ordering too.
 		 */
-		if (opathkey->pk_opfamily != ipathkey->pk_opfamily ||
-			opathkey->pk_eclass->ec_collation != ipathkey->pk_eclass->ec_collation)
+		if (opathkey->pk_eclass->ec_collation != ipathkey->pk_eclass->ec_collation)
 			elog(ERROR, "left and right pathkeys do not match in mergejoin");
 		if (first_inner_match &&
-			(opathkey->pk_strategy != ipathkey->pk_strategy ||
-			 opathkey->pk_cmptype != ipathkey->pk_cmptype ||
+			(opathkey->pk_cmptype != ipathkey->pk_cmptype ||
 			 opathkey->pk_nulls_first != ipathkey->pk_nulls_first))
 			elog(ERROR, "left and right pathkeys do not match in mergejoin");
 
-- 
2.39.5 (Apple Git-154)

