From 4e21f4fb467e9637af7c54f478af70ce873c208b Mon Sep 17 00:00:00 2001
From: "dgrowley@gmail.com" <dgrowley@gmail.com>
Date: Mon, 18 Jun 2018 22:21:28 +1200
Subject: [PATCH v24] Expand run-time partition pruning to work with
 MergeAppend

This expands the support for the run-time partition pruning which was added
for Append in 499be013de to also allow unneeded subnodes of a MergeAppend
to be removed.
---
 doc/src/sgml/perform.sgml                     |  12 +--
 src/backend/executor/nodeMergeAppend.c        | 139 ++++++++++++++++++++++----
 src/backend/nodes/copyfuncs.c                 |   1 +
 src/backend/nodes/outfuncs.c                  |   2 +
 src/backend/nodes/readfuncs.c                 |   1 +
 src/backend/optimizer/plan/createplan.c       |  34 +++++++
 src/include/nodes/execnodes.h                 |   9 ++
 src/include/nodes/plannodes.h                 |   3 +
 src/test/regress/expected/partition_prune.out | 137 +++++++++++++++++++++++++
 src/test/regress/sql/partition_prune.sql      |  41 ++++++++
 10 files changed, 352 insertions(+), 27 deletions(-)

diff --git a/doc/src/sgml/perform.sgml b/doc/src/sgml/perform.sgml
index b50357c3b4..262f30fd4c 100644
--- a/doc/src/sgml/perform.sgml
+++ b/doc/src/sgml/perform.sgml
@@ -899,12 +899,12 @@ EXPLAIN ANALYZE SELECT * FROM tenk1 WHERE unique1 &lt; 100 AND unique2 &gt; 9000
     Generally, the <command>EXPLAIN</command> output will display details for
     every plan node which was generated by the query planner.  However, there
     are cases where the executor is able to determine that certain nodes are
-    not required; currently, the only node type to support this is the
-    <literal>Append</literal> node.  This node type has the ability to discard
-    subnodes which it is able to determine won't contain any records required
-    by the query.  It is possible to determine that nodes have been removed in
-    this way by the presence of a "Subplans Removed" property in the
-    <command>EXPLAIN</command> output.
+    not required; currently, the only node types to support this are the
+    <literal>Append</literal> and <literal>MergeAppend</literal> nodes.  These
+    node types have the ability to discard subnodes which they are able to
+    determine won't contain any records required by the query.  It is possible
+    to determine that nodes have been removed in this way by the presence of a
+    "Subplans Removed" property in the <command>EXPLAIN</command> output.
    </para>
   </sect2>
 
diff --git a/src/backend/executor/nodeMergeAppend.c b/src/backend/executor/nodeMergeAppend.c
index 118f4ef07d..81122c28c7 100644
--- a/src/backend/executor/nodeMergeAppend.c
+++ b/src/backend/executor/nodeMergeAppend.c
@@ -39,6 +39,7 @@
 #include "postgres.h"
 
 #include "executor/execdebug.h"
+#include "executor/execPartition.h"
 #include "executor/nodeMergeAppend.h"
 #include "lib/binaryheap.h"
 #include "miscadmin.h"
@@ -65,8 +66,10 @@ ExecInitMergeAppend(MergeAppend *node, EState *estate, int eflags)
 {
 	MergeAppendState *mergestate = makeNode(MergeAppendState);
 	PlanState **mergeplanstates;
+	Bitmapset  *validsubplans;
 	int			nplans;
-	int			i;
+	int			i,
+				j;
 	ListCell   *lc;
 
 	/* check for unsupported flags */
@@ -78,19 +81,82 @@ ExecInitMergeAppend(MergeAppend *node, EState *estate, int eflags)
 	 */
 	ExecLockNonLeafAppendTables(node->partitioned_rels, estate);
 
-	/*
-	 * Set up empty vector of subplan states
-	 */
-	nplans = list_length(node->mergeplans);
-
-	mergeplanstates = (PlanState **) palloc0(nplans * sizeof(PlanState *));
-
 	/*
 	 * create new MergeAppendState for our node
 	 */
 	mergestate->ps.plan = (Plan *) node;
 	mergestate->ps.state = estate;
 	mergestate->ps.ExecProcNode = ExecMergeAppend;
+	mergestate->ms_noopscan = false;
+
+	/* If run-time partition pruning is enabled, then setup that up now */
+	if (node->part_prune_infos != NIL)
+	{
+		PartitionPruneState *prunestate;
+
+		/* We may need an expression context to evaluate partition exprs */
+		ExecAssignExprContext(estate, &mergestate->ps);
+
+		prunestate = ExecCreatePartitionPruneState(&mergestate->ps,
+												   node->part_prune_infos);
+		mergestate->ms_prune_state = prunestate;
+
+		/* Perform an initial partition prune, if required. */
+		if (prunestate->do_initial_prune)
+		{
+			/* Determine which subplans survive initial pruning */
+			validsubplans = ExecFindInitialMatchingSubPlans(prunestate,
+															list_length(node->mergeplans));
+
+			/*
+			 * The case where no subplans survive pruning must be handled
+			 * specially.  The problem here is that code in explain.c requires
+			 * an Append to have at least one subplan in order for it to
+			 * properly determine the Vars in that subplan's targetlist.  We
+			 * sidestep this issue by just initializing the first subplan and
+			 * setting ms_noopscan to true to indicate that we don't really
+			 * need to scan any subnodes.
+			 */
+			if (bms_is_empty(validsubplans))
+			{
+				mergestate->ms_noopscan = true;
+
+				/* Mark the first as valid so that it's initialized below */
+				validsubplans = bms_make_singleton(0);
+			}
+
+			nplans = bms_num_members(validsubplans);
+		}
+		else
+		{
+			/* We'll need to initialize all subplans */
+			nplans = list_length(node->mergeplans);
+			validsubplans = bms_add_range(NULL, 0, nplans - 1);
+		}
+
+		/*
+		 * If no runtime pruning is required, we can fill ms_valid_subplans
+		 * immediately, preventing later calls to ExecFindMatchingSubPlans.
+		 */
+		if (!prunestate->do_exec_prune)
+			mergestate->ms_valid_subplans = bms_add_range(NULL, 0, nplans - 1);
+		else
+			mergestate->ms_valid_subplans = NULL;
+	}
+	else
+	{
+		nplans = list_length(node->mergeplans);
+
+		/*
+		 * When run-time partition pruning is not enabled we can just mark all
+		 * subplans as valid; they must also all be initialized.
+		 */
+		mergestate->ms_valid_subplans = validsubplans =
+			bms_add_range(NULL, 0, nplans - 1);
+		mergestate->ms_prune_state = NULL;
+	}
+
+	mergeplanstates = (PlanState **) palloc(nplans * sizeof(PlanState *));
 	mergestate->mergeplans = mergeplanstates;
 	mergestate->ms_nplans = nplans;
 
@@ -101,26 +167,24 @@ ExecInitMergeAppend(MergeAppend *node, EState *estate, int eflags)
 	/*
 	 * Miscellaneous initialization
 	 *
-	 * MergeAppend plans don't have expression contexts because they never
-	 * call ExecQual or ExecProject.
-	 */
-
-	/*
 	 * MergeAppend nodes do have Result slots, which hold pointers to tuples,
 	 * so we have to initialize them.
 	 */
 	ExecInitResultTupleSlotTL(estate, &mergestate->ps);
 
 	/*
-	 * call ExecInitNode on each of the plans to be executed and save the
-	 * results into the array "mergeplans".
+	 * call ExecInitNode on each of the valid plans to be executed and save
+	 * the results into the mergeplanstates array.
 	 */
-	i = 0;
+	j = i = 0;
 	foreach(lc, node->mergeplans)
 	{
-		Plan	   *initNode = (Plan *) lfirst(lc);
+		if (bms_is_member(i, validsubplans))
+		{
+			Plan	   *initNode = (Plan *) lfirst(lc);
 
-		mergeplanstates[i] = ExecInitNode(initNode, estate, eflags);
+			mergeplanstates[j++] = ExecInitNode(initNode, estate, eflags);
+		}
 		i++;
 	}
 
@@ -178,11 +242,25 @@ ExecMergeAppend(PlanState *pstate)
 
 	if (!node->ms_initialized)
 	{
+		/* Handle the case for when all subplans were pruned */
+		if (node->ms_noopscan)
+			return ExecClearTuple(node->ps.ps_ResultTupleSlot);
+
 		/*
-		 * First time through: pull the first tuple from each subplan, and set
-		 * up the heap.
+		 * If we've yet to determine the valid subplans then do so now.  If
+		 * run-time pruning is disabled then the valid subplans will always be
+		 * set to all subplans.
 		 */
-		for (i = 0; i < node->ms_nplans; i++)
+		if (node->ms_valid_subplans == NULL)
+			node->ms_valid_subplans =
+				ExecFindMatchingSubPlans(node->ms_prune_state);
+
+		/*
+		 * First time through: pull the first tuple from each valid subplan,
+		 * and set up the heap.
+		 */
+		i = -1;
+		while ((i = bms_next_member(node->ms_valid_subplans, i)) >= 0)
 		{
 			node->ms_slots[i] = ExecProcNode(node->mergeplans[i]);
 			if (!TupIsNull(node->ms_slots[i]))
@@ -288,6 +366,12 @@ ExecEndMergeAppend(MergeAppendState *node)
 	 */
 	for (i = 0; i < nplans; i++)
 		ExecEndNode(mergeplans[i]);
+
+	/*
+	 * release any resources associated with run-time pruning
+	 */
+	if (node->ms_prune_state)
+		ExecDestroyPartitionPruneState(node->ms_prune_state);
 }
 
 void
@@ -295,6 +379,19 @@ ExecReScanMergeAppend(MergeAppendState *node)
 {
 	int			i;
 
+	/*
+	 * If any PARAM_EXEC Params used in pruning expressions have changed, then
+	 * we'd better unset the valid subplans so that they are reselected for
+	 * the new parameter values.
+	 */
+	if (node->ms_prune_state &&
+		bms_overlap(node->ps.chgParam,
+					node->ms_prune_state->execparamids))
+	{
+		bms_free(node->ms_valid_subplans);
+		node->ms_valid_subplans = NULL;
+	}
+
 	for (i = 0; i < node->ms_nplans; i++)
 	{
 		PlanState  *subnode = node->mergeplans[i];
diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c
index 1c12075b01..96836ef19c 100644
--- a/src/backend/nodes/copyfuncs.c
+++ b/src/backend/nodes/copyfuncs.c
@@ -273,6 +273,7 @@ _copyMergeAppend(const MergeAppend *from)
 	COPY_POINTER_FIELD(sortOperators, from->numCols * sizeof(Oid));
 	COPY_POINTER_FIELD(collations, from->numCols * sizeof(Oid));
 	COPY_POINTER_FIELD(nullsFirst, from->numCols * sizeof(bool));
+	COPY_NODE_FIELD(part_prune_infos);
 
 	return newnode;
 }
diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c
index 979d523e00..c4ec7cc7b5 100644
--- a/src/backend/nodes/outfuncs.c
+++ b/src/backend/nodes/outfuncs.c
@@ -434,6 +434,8 @@ _outMergeAppend(StringInfo str, const MergeAppend *node)
 	appendStringInfoString(str, " :nullsFirst");
 	for (i = 0; i < node->numCols; i++)
 		appendStringInfo(str, " %s", booltostr(node->nullsFirst[i]));
+
+	WRITE_NODE_FIELD(part_prune_infos);
 }
 
 static void
diff --git a/src/backend/nodes/readfuncs.c b/src/backend/nodes/readfuncs.c
index 42aff7f57a..9a01eb6b63 100644
--- a/src/backend/nodes/readfuncs.c
+++ b/src/backend/nodes/readfuncs.c
@@ -1634,6 +1634,7 @@ _readMergeAppend(void)
 	READ_OID_ARRAY(sortOperators, local_node->numCols);
 	READ_OID_ARRAY(collations, local_node->numCols);
 	READ_BOOL_ARRAY(nullsFirst, local_node->numCols);
+	READ_NODE_FIELD(part_prune_infos);
 
 	READ_DONE();
 }
diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c
index cf82b7052d..3f5d8aa7b8 100644
--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@@ -1142,6 +1142,8 @@ create_merge_append_plan(PlannerInfo *root, MergeAppendPath *best_path)
 	List	   *pathkeys = best_path->path.pathkeys;
 	List	   *subplans = NIL;
 	ListCell   *subpaths;
+	RelOptInfo *rel = best_path->path.parent;
+	List	   *partpruneinfos = NIL;
 
 	/*
 	 * We don't have the actual creation of the MergeAppend node split out
@@ -1227,8 +1229,40 @@ create_merge_append_plan(PlannerInfo *root, MergeAppendPath *best_path)
 		subplans = lappend(subplans, subplan);
 	}
 
+	if (rel->reloptkind == RELOPT_BASEREL &&
+		best_path->partitioned_rels != NIL)
+	{
+		List	   *prunequal;
+
+		prunequal = extract_actual_clauses(rel->baserestrictinfo, false);
+
+		if (best_path->path.param_info)
+		{
+
+			List	   *prmquals = best_path->path.param_info->ppi_clauses;
+
+			prmquals = extract_actual_clauses(prmquals, false);
+			prmquals = (List *) replace_nestloop_params(root,
+														(Node *) prmquals);
+
+			prunequal = list_concat(prunequal, prmquals);
+		}
+
+		/*
+		 * If any quals exist, then these may be useful to allow us to perform
+		 * further partition pruning during execution.  We'll generate a
+		 * PartitionPruneInfo for each partitioned rel to store these quals
+		 * and allow translation of partition indexes into subpath indexes.
+		 */
+		if (prunequal != NIL)
+			partpruneinfos = make_partition_pruneinfo(root,
+													  best_path->partitioned_rels,
+													  best_path->subpaths, prunequal);
+	}
+
 	node->partitioned_rels = best_path->partitioned_rels;
 	node->mergeplans = subplans;
+	node->part_prune_infos = partpruneinfos;
 
 	return (Plan *) node;
 }
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h
index da7f52cab0..d91babe978 100644
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -1106,6 +1106,12 @@ struct AppendState
  *		slots			current output tuple of each subplan
  *		heap			heap of active tuples
  *		initialized		true if we have fetched first tuple from each subplan
+ *		noopscan		true if partition pruning proved that none of the
+ *						mergeplans can contain a record to satisfy this query.
+ *		prune_state		details required to allow partitions to be
+ *						eliminated from the scan, or NULL if not possible.
+ *		valid_subplans	for runtime pruning, valid mergeplans indexes to
+ *						scan.
  * ----------------
  */
 typedef struct MergeAppendState
@@ -1118,6 +1124,9 @@ typedef struct MergeAppendState
 	TupleTableSlot **ms_slots;	/* array of length ms_nplans */
 	struct binaryheap *ms_heap; /* binary heap of slot indices */
 	bool		ms_initialized; /* are subplans started? */
+	bool		ms_noopscan;
+	struct PartitionPruneState *ms_prune_state;
+	Bitmapset  *ms_valid_subplans;
 } MergeAppendState;
 
 /* ----------------
diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h
index 5201c6d4bc..b80df601cd 100644
--- a/src/include/nodes/plannodes.h
+++ b/src/include/nodes/plannodes.h
@@ -281,6 +281,9 @@ typedef struct MergeAppend
 	Oid		   *sortOperators;	/* OIDs of operators to sort them by */
 	Oid		   *collations;		/* OIDs of collations */
 	bool	   *nullsFirst;		/* NULLS FIRST/LAST directions */
+
+	/* Info for run-time subplan pruning, one entry per partitioned_rels */
+	List	   *part_prune_infos;	/* List of PartitionPruneInfo */
 } MergeAppend;
 
 /* ----------------
diff --git a/src/test/regress/expected/partition_prune.out b/src/test/regress/expected/partition_prune.out
index 854b553d0a..306fa27e2e 100644
--- a/src/test/regress/expected/partition_prune.out
+++ b/src/test/regress/expected/partition_prune.out
@@ -2783,6 +2783,143 @@ select * from boolp where a = (select value from boolvalues where not value);
 (9 rows)
 
 drop table boolp;
+--
+-- Test run-time pruning of MergeAppend subnodes
+--
+set enable_seqscan = off;
+set enable_sort = off;
+create table ma_test (a int) partition by range (a);
+create table ma_test_p1 partition of ma_test for values from (0) to (10);
+create table ma_test_p2 partition of ma_test for values from (10) to (20);
+create table ma_test_p3 partition of ma_test for values from (20) to (30);
+insert into ma_test select x from generate_series(0,29) t(x);
+create index on ma_test (a);
+analyze ma_test;
+prepare mt_q1 (int) as select * from ma_test where a >= $1 and a % 10 = 5 order by a;
+-- Execute query 5 times to allow choose_custom_plan
+-- to start considering a generic plan.
+execute mt_q1(0);
+ a  
+----
+  5
+ 15
+ 25
+(3 rows)
+
+execute mt_q1(0);
+ a  
+----
+  5
+ 15
+ 25
+(3 rows)
+
+execute mt_q1(0);
+ a  
+----
+  5
+ 15
+ 25
+(3 rows)
+
+execute mt_q1(0);
+ a  
+----
+  5
+ 15
+ 25
+(3 rows)
+
+execute mt_q1(0);
+ a  
+----
+  5
+ 15
+ 25
+(3 rows)
+
+explain (analyze, costs off, summary off, timing off) execute mt_q1(15);
+                                  QUERY PLAN                                   
+-------------------------------------------------------------------------------
+ Merge Append (actual rows=2 loops=1)
+   Sort Key: ma_test_p2.a
+   Subplans Removed: 1
+   ->  Index Scan using ma_test_p2_a_idx on ma_test_p2 (actual rows=1 loops=1)
+         Index Cond: (a >= $1)
+         Filter: ((a % 10) = 5)
+         Rows Removed by Filter: 4
+   ->  Index Scan using ma_test_p3_a_idx on ma_test_p3 (actual rows=1 loops=1)
+         Index Cond: (a >= $1)
+         Filter: ((a % 10) = 5)
+         Rows Removed by Filter: 9
+(11 rows)
+
+execute mt_q1(15);
+ a  
+----
+ 15
+ 25
+(2 rows)
+
+explain (analyze, costs off, summary off, timing off) execute mt_q1(25);
+                                  QUERY PLAN                                   
+-------------------------------------------------------------------------------
+ Merge Append (actual rows=1 loops=1)
+   Sort Key: ma_test_p3.a
+   Subplans Removed: 2
+   ->  Index Scan using ma_test_p3_a_idx on ma_test_p3 (actual rows=1 loops=1)
+         Index Cond: (a >= $1)
+         Filter: ((a % 10) = 5)
+         Rows Removed by Filter: 4
+(7 rows)
+
+execute mt_q1(25);
+ a  
+----
+ 25
+(1 row)
+
+-- Ensure MergeAppend behaves correctly when no subplans match
+explain (analyze, costs off, summary off, timing off) execute mt_q1(35);
+                               QUERY PLAN                               
+------------------------------------------------------------------------
+ Merge Append (actual rows=0 loops=1)
+   Sort Key: ma_test_p1.a
+   Subplans Removed: 2
+   ->  Index Scan using ma_test_p1_a_idx on ma_test_p1 (never executed)
+         Index Cond: (a >= $1)
+         Filter: ((a % 10) = 5)
+(6 rows)
+
+execute mt_q1(35);
+ a 
+---
+(0 rows)
+
+deallocate mt_q1;
+-- ensure initplan params properly prune partitions
+explain (analyze, costs off, summary off, timing off) select * from ma_test where a >= (select min(a) from ma_test_p2) order by a;
+                                                 QUERY PLAN                                                 
+------------------------------------------------------------------------------------------------------------
+ Merge Append (actual rows=20 loops=1)
+   Sort Key: ma_test_p1.a
+   InitPlan 2 (returns $1)
+     ->  Result (actual rows=1 loops=1)
+           InitPlan 1 (returns $0)
+             ->  Limit (actual rows=1 loops=1)
+                   ->  Index Scan using ma_test_p2_a_idx on ma_test_p2 ma_test_p2_1 (actual rows=1 loops=1)
+                         Index Cond: (a IS NOT NULL)
+   ->  Index Scan using ma_test_p1_a_idx on ma_test_p1 (never executed)
+         Index Cond: (a >= $1)
+   ->  Index Scan using ma_test_p2_a_idx on ma_test_p2 (actual rows=10 loops=1)
+         Index Cond: (a >= $1)
+   ->  Index Scan using ma_test_p3_a_idx on ma_test_p3 (actual rows=10 loops=1)
+         Index Cond: (a >= $1)
+(14 rows)
+
+reset enable_seqscan;
+reset enable_sort;
+drop table ma_test;
 reset enable_indexonlyscan;
 --
 -- check that pruning works properly when the partition key is of a
diff --git a/src/test/regress/sql/partition_prune.sql b/src/test/regress/sql/partition_prune.sql
index ae361b52f9..506a46e3f5 100644
--- a/src/test/regress/sql/partition_prune.sql
+++ b/src/test/regress/sql/partition_prune.sql
@@ -707,6 +707,47 @@ select * from boolp where a = (select value from boolvalues where not value);
 
 drop table boolp;
 
+--
+-- Test run-time pruning of MergeAppend subnodes
+--
+set enable_seqscan = off;
+set enable_sort = off;
+create table ma_test (a int) partition by range (a);
+create table ma_test_p1 partition of ma_test for values from (0) to (10);
+create table ma_test_p2 partition of ma_test for values from (10) to (20);
+create table ma_test_p3 partition of ma_test for values from (20) to (30);
+insert into ma_test select x from generate_series(0,29) t(x);
+create index on ma_test (a);
+
+analyze ma_test;
+prepare mt_q1 (int) as select * from ma_test where a >= $1 and a % 10 = 5 order by a;
+
+-- Execute query 5 times to allow choose_custom_plan
+-- to start considering a generic plan.
+execute mt_q1(0);
+execute mt_q1(0);
+execute mt_q1(0);
+execute mt_q1(0);
+execute mt_q1(0);
+
+explain (analyze, costs off, summary off, timing off) execute mt_q1(15);
+execute mt_q1(15);
+explain (analyze, costs off, summary off, timing off) execute mt_q1(25);
+execute mt_q1(25);
+-- Ensure MergeAppend behaves correctly when no subplans match
+explain (analyze, costs off, summary off, timing off) execute mt_q1(35);
+execute mt_q1(35);
+
+deallocate mt_q1;
+
+-- ensure initplan params properly prune partitions
+explain (analyze, costs off, summary off, timing off) select * from ma_test where a >= (select min(a) from ma_test_p2) order by a;
+
+reset enable_seqscan;
+reset enable_sort;
+
+drop table ma_test;
+
 reset enable_indexonlyscan;
 
 --
-- 
2.16.2.windows.1

