From b16759153bf73bb75a22c31785b045564c7fba20 Mon Sep 17 00:00:00 2001
From: Richard Guo <guofenglinux@gmail.com>
Date: Mon, 20 Nov 2023 10:08:28 +0800
Subject: [PATCH v1] Propagate pathkeys from CTEs up to the outer query

---
 src/backend/optimizer/path/allpaths.c | 20 +++++++++++++++++++-
 src/backend/optimizer/util/pathnode.c |  5 +++--
 src/include/optimizer/pathnode.h      |  2 +-
 src/test/regress/expected/with.out    | 17 +++++++++++++++++
 src/test/regress/sql/with.sql         |  7 +++++++
 5 files changed, 47 insertions(+), 4 deletions(-)

diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c
index 67921a0826..8ba3a90b5e 100644
--- a/src/backend/optimizer/path/allpaths.c
+++ b/src/backend/optimizer/path/allpaths.c
@@ -2876,12 +2876,16 @@ static void
 set_cte_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
 {
 	Plan	   *cteplan;
+	PlannerInfo *ctesubroot;
 	PlannerInfo *cteroot;
 	Index		levelsup;
 	int			ndx;
 	ListCell   *lc;
 	int			plan_id;
 	Relids		required_outer;
+	RelOptInfo *cte_final_rel;
+	Path	   *cte_best_path;
+	List	   *pathkeys;
 
 	/*
 	 * Find the referenced CTE, and locate the plan previously made for it.
@@ -2921,6 +2925,20 @@ set_cte_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
 	/* Mark rel with estimated output rows, width, etc */
 	set_cte_size_estimates(root, rel, cteplan->plan_rows);
 
+	/*
+	 * Locate the best path previously made for the CTE.  We need to know its
+	 * pathkeys.
+	 */
+	ctesubroot = (PlannerInfo *) list_nth(root->glob->subroots, plan_id - 1);
+	cte_final_rel = fetch_upper_rel(ctesubroot, UPPERREL_FINAL, NULL);
+	cte_best_path = cte_final_rel->cheapest_total_path;
+
+	/* Convert the pathkeys to outer representation */
+	pathkeys = convert_subquery_pathkeys(root,
+										 rel,
+										 cte_best_path->pathkeys,
+										 make_tlist_from_pathtarget(cte_best_path->pathtarget));
+
 	/*
 	 * We don't support pushing join clauses into the quals of a CTE scan, but
 	 * it could still have required parameterization due to LATERAL refs in
@@ -2929,7 +2947,7 @@ set_cte_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
 	required_outer = rel->lateral_relids;
 
 	/* Generate appropriate path */
-	add_path(rel, create_ctescan_path(root, rel, required_outer));
+	add_path(rel, create_ctescan_path(root, rel, pathkeys, required_outer));
 }
 
 /*
diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c
index 0b1d17b9d3..05b85305fe 100644
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -2113,7 +2113,8 @@ create_valuesscan_path(PlannerInfo *root, RelOptInfo *rel,
  *	  returning the pathnode.
  */
 Path *
-create_ctescan_path(PlannerInfo *root, RelOptInfo *rel, Relids required_outer)
+create_ctescan_path(PlannerInfo *root, RelOptInfo *rel,
+					List *pathkeys, Relids required_outer)
 {
 	Path	   *pathnode = makeNode(Path);
 
@@ -2125,7 +2126,7 @@ create_ctescan_path(PlannerInfo *root, RelOptInfo *rel, Relids required_outer)
 	pathnode->parallel_aware = false;
 	pathnode->parallel_safe = rel->consider_parallel;
 	pathnode->parallel_workers = 0;
-	pathnode->pathkeys = NIL;	/* XXX for now, result is always unordered */
+	pathnode->pathkeys = pathkeys;
 
 	cost_ctescan(pathnode, root, rel, pathnode->param_info);
 
diff --git a/src/include/optimizer/pathnode.h b/src/include/optimizer/pathnode.h
index 6e557bebc4..2d76471814 100644
--- a/src/include/optimizer/pathnode.h
+++ b/src/include/optimizer/pathnode.h
@@ -115,7 +115,7 @@ extern Path *create_valuesscan_path(PlannerInfo *root, RelOptInfo *rel,
 extern Path *create_tablefuncscan_path(PlannerInfo *root, RelOptInfo *rel,
 									   Relids required_outer);
 extern Path *create_ctescan_path(PlannerInfo *root, RelOptInfo *rel,
-								 Relids required_outer);
+								 List *pathkeys, Relids required_outer);
 extern Path *create_namedtuplestorescan_path(PlannerInfo *root, RelOptInfo *rel,
 											 Relids required_outer);
 extern Path *create_resultscan_path(PlannerInfo *root, RelOptInfo *rel,
diff --git a/src/test/regress/expected/with.out b/src/test/regress/expected/with.out
index 69c56ce207..9549cdc3d1 100644
--- a/src/test/regress/expected/with.out
+++ b/src/test/regress/expected/with.out
@@ -654,6 +654,23 @@ select count(*) from tenk1 a
                ->  CTE Scan on x
 (8 rows)
 
+-- test that pathkeys from a materialized CTE are propagated up to the
+-- outer query
+explain (costs off)
+with x as materialized (select unique1 from tenk1 b order by unique1)
+select count(*) from tenk1 a
+  where unique1 in (select * from x);
+                         QUERY PLAN                         
+------------------------------------------------------------
+ Aggregate
+   CTE x
+     ->  Index Only Scan using tenk1_unique1 on tenk1 b
+   ->  Merge Semi Join
+         Merge Cond: (a.unique1 = x.unique1)
+         ->  Index Only Scan using tenk1_unique1 on tenk1 a
+         ->  CTE Scan on x
+(7 rows)
+
 -- SEARCH clause
 create temp table graph0( f int, t int, label text );
 insert into graph0 values
diff --git a/src/test/regress/sql/with.sql b/src/test/regress/sql/with.sql
index 3ef9898866..54da8b2d21 100644
--- a/src/test/regress/sql/with.sql
+++ b/src/test/regress/sql/with.sql
@@ -354,6 +354,13 @@ with x as materialized (select unique1 from tenk1 b)
 select count(*) from tenk1 a
   where unique1 in (select * from x);
 
+-- test that pathkeys from a materialized CTE are propagated up to the
+-- outer query
+explain (costs off)
+with x as materialized (select unique1 from tenk1 b order by unique1)
+select count(*) from tenk1 a
+  where unique1 in (select * from x);
+
 -- SEARCH clause
 
 create temp table graph0( f int, t int, label text );
-- 
2.31.0

