On 2018-Nov-16, Alvaro Herrera wrote:

> One thing I don't quite like is the inconsistency in handling memory
> context switches in the various function allocating stuff.  It seems
> rather haphazard.  I'd rather have a memcxt member in
> PartitionTupleRouting, which is set when the struct is created, and then
> have all the other functions allocating stuff use that one.

So while researching this I finally realized that there was a "lexical
disconnect" between setting a ResultRelInfo's ri_PartitionInfo
struct/pointer and adding it to the PartitionTupleRoute arrays.
However, if you think about it, these things are one and the same, so we
don't need to do them separately; just merge the new function I wrote
into the existing ExecInitRoutingInfo().  Patch attached.

(This version also rebases across Andres' recent conflicting
TupleTableSlot changes.)

I'll now see about the commit message and push shortly.

-- 
Álvaro Herrera                https://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services
diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c
index e62e3d8fba..6588ebd6dc 100644
--- a/src/backend/commands/copy.c
+++ b/src/backend/commands/copy.c
@@ -2316,6 +2316,7 @@ CopyFrom(CopyState cstate)
 	bool	   *nulls;
 	ResultRelInfo *resultRelInfo;
 	ResultRelInfo *target_resultRelInfo;
+	ResultRelInfo *prevResultRelInfo = NULL;
 	EState	   *estate = CreateExecutorState(); /* for ExecConstraints() */
 	ModifyTableState *mtstate;
 	ExprContext *econtext;
@@ -2331,7 +2332,6 @@ CopyFrom(CopyState cstate)
 	CopyInsertMethod insertMethod;
 	uint64		processed = 0;
 	int			nBufferedTuples = 0;
-	int			prev_leaf_part_index = -1;
 	bool		has_before_insert_row_trig;
 	bool		has_instead_insert_row_trig;
 	bool		leafpart_use_multi_insert = false;
@@ -2515,8 +2515,12 @@ CopyFrom(CopyState cstate)
 	/*
 	 * If there are any triggers with transition tables on the named relation,
 	 * we need to be prepared to capture transition tuples.
+	 *
+	 * Because partition tuple routing would like to know about whether
+	 * transition capture is active, we also set it in mtstate, which is
+	 * passed to ExecFindPartition() below.
 	 */
-	cstate->transition_capture =
+	cstate->transition_capture = mtstate->mt_transition_capture =
 		MakeTransitionCaptureState(cstate->rel->trigdesc,
 								   RelationGetRelid(cstate->rel),
 								   CMD_INSERT);
@@ -2526,19 +2530,8 @@ CopyFrom(CopyState cstate)
 	 * CopyFrom tuple routing.
 	 */
 	if (cstate->rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
-	{
 		proute = ExecSetupPartitionTupleRouting(NULL, cstate->rel);
 
-		/*
-		 * If we are capturing transition tuples, they may need to be
-		 * converted from partition format back to partitioned table format
-		 * (this is only ever necessary if a BEFORE trigger modifies the
-		 * tuple).
-		 */
-		if (cstate->transition_capture != NULL)
-			ExecSetupChildParentMapForLeaf(proute);
-	}
-
 	/*
 	 * It's more efficient to prepare a bunch of tuples for insertion, and
 	 * insert them in one heap_multi_insert() call, than call heap_insert()
@@ -2694,25 +2687,17 @@ CopyFrom(CopyState cstate)
 		/* Determine the partition to heap_insert the tuple into */
 		if (proute)
 		{
-			int			leaf_part_index;
 			TupleConversionMap *map;
 
 			/*
-			 * Away we go ... If we end up not finding a partition after all,
-			 * ExecFindPartition() does not return and errors out instead.
-			 * Otherwise, the returned value is to be used as an index into
-			 * arrays mt_partitions[] and mt_partition_tupconv_maps[] that
-			 * will get us the ResultRelInfo and TupleConversionMap for the
-			 * partition, respectively.
+			 * Attempt to find a partition suitable for this tuple.
+			 * ExecFindPartition() will raise an error if none can be found or
+			 * if the found partition is not suitable for INSERTs.
 			 */
-			leaf_part_index = ExecFindPartition(target_resultRelInfo,
-												proute->partition_dispatch_info,
-												slot,
-												estate);
-			Assert(leaf_part_index >= 0 &&
-				   leaf_part_index < proute->num_partitions);
+			resultRelInfo = ExecFindPartition(mtstate, target_resultRelInfo,
+											  proute, slot, estate);
 
-			if (prev_leaf_part_index != leaf_part_index)
+			if (prevResultRelInfo != resultRelInfo)
 			{
 				/* Check if we can multi-insert into this partition */
 				if (insertMethod == CIM_MULTI_CONDITIONAL)
@@ -2725,12 +2710,9 @@ CopyFrom(CopyState cstate)
 					if (nBufferedTuples > 0)
 					{
 						ExprContext *swapcontext;
-						ResultRelInfo *presultRelInfo;
-
-						presultRelInfo = proute->partitions[prev_leaf_part_index];
 
 						CopyFromInsertBatch(cstate, estate, mycid, hi_options,
-											presultRelInfo, myslot, bistate,
+											prevResultRelInfo, myslot, bistate,
 											nBufferedTuples, bufferedTuples,
 											firstBufferedLineNo);
 						nBufferedTuples = 0;
@@ -2787,21 +2769,6 @@ CopyFrom(CopyState cstate)
 					}
 				}
 
-				/*
-				 * Overwrite resultRelInfo with the corresponding partition's
-				 * one.
-				 */
-				resultRelInfo = proute->partitions[leaf_part_index];
-				if (unlikely(resultRelInfo == NULL))
-				{
-					resultRelInfo = ExecInitPartitionInfo(mtstate,
-														  target_resultRelInfo,
-														  proute, estate,
-														  leaf_part_index);
-					proute->partitions[leaf_part_index] = resultRelInfo;
-					Assert(resultRelInfo != NULL);
-				}
-
 				/* Determine which triggers exist on this partition */
 				has_before_insert_row_trig = (resultRelInfo->ri_TrigDesc &&
 											  resultRelInfo->ri_TrigDesc->trig_insert_before_row);
@@ -2827,7 +2794,7 @@ CopyFrom(CopyState cstate)
 				 * buffer when the partition being inserted into changes.
 				 */
 				ReleaseBulkInsertStatePin(bistate);
-				prev_leaf_part_index = leaf_part_index;
+				prevResultRelInfo = resultRelInfo;
 			}
 
 			/*
@@ -2837,7 +2804,7 @@ CopyFrom(CopyState cstate)
 
 			/*
 			 * If we're capturing transition tuples, we might need to convert
-			 * from the partition rowtype to parent rowtype.
+			 * from the partition rowtype to root rowtype.
 			 */
 			if (cstate->transition_capture != NULL)
 			{
@@ -2850,8 +2817,7 @@ CopyFrom(CopyState cstate)
 					 */
 					cstate->transition_capture->tcs_original_insert_tuple = NULL;
 					cstate->transition_capture->tcs_map =
-						TupConvMapForLeaf(proute, target_resultRelInfo,
-										  leaf_part_index);
+						resultRelInfo->ri_PartitionInfo->pi_PartitionToRootMap;
 				}
 				else
 				{
@@ -2865,18 +2831,18 @@ CopyFrom(CopyState cstate)
 			}
 
 			/*
-			 * We might need to convert from the parent rowtype to the
-			 * partition rowtype.
+			 * We might need to convert from the root rowtype to the partition
+			 * rowtype.
 			 */
-			map = proute->parent_child_tupconv_maps[leaf_part_index];
+			map = resultRelInfo->ri_PartitionInfo->pi_RootToPartitionMap;
 			if (map != NULL)
 			{
 				TupleTableSlot *new_slot;
 				MemoryContext oldcontext;
 
-				Assert(proute->partition_tuple_slots != NULL &&
-					   proute->partition_tuple_slots[leaf_part_index] != NULL);
-				new_slot = proute->partition_tuple_slots[leaf_part_index];
+				new_slot = resultRelInfo->ri_PartitionInfo->pi_PartitionTupleSlot;
+				Assert(new_slot != NULL);
+
 				slot = execute_attr_map_slot(map->attrMap, slot, new_slot);
 
 				/*
@@ -3021,12 +2987,8 @@ CopyFrom(CopyState cstate)
 	{
 		if (insertMethod == CIM_MULTI_CONDITIONAL)
 		{
-			ResultRelInfo *presultRelInfo;
-
-			presultRelInfo = proute->partitions[prev_leaf_part_index];
-
 			CopyFromInsertBatch(cstate, estate, mycid, hi_options,
-								presultRelInfo, myslot, bistate,
+								prevResultRelInfo, myslot, bistate,
 								nBufferedTuples, bufferedTuples,
 								firstBufferedLineNo);
 		}
diff --git a/src/backend/executor/execMain.c b/src/backend/executor/execMain.c
index 74398eb464..757df0705d 100644
--- a/src/backend/executor/execMain.c
+++ b/src/backend/executor/execMain.c
@@ -1345,7 +1345,7 @@ InitResultRelInfo(ResultRelInfo *resultRelInfo,
 
 	resultRelInfo->ri_PartitionCheck = partition_check;
 	resultRelInfo->ri_PartitionRoot = partition_root;
-	resultRelInfo->ri_PartitionReadyForRouting = false;
+	resultRelInfo->ri_PartitionInfo = NULL; /* may be set later */
 }
 
 /*
diff --git a/src/backend/executor/execPartition.c b/src/backend/executor/execPartition.c
index e11fe68712..5216d0f93b 100644
--- a/src/backend/executor/execPartition.c
+++ b/src/backend/executor/execPartition.c
@@ -33,21 +33,98 @@
 
 
 /*-----------------------
- * PartitionDispatch - information about one partitioned table in a partition
- * hierarchy required to route a tuple to one of its partitions
+ * PartitionTupleRouting - Encapsulates all information required to
+ * route a tuple inserted into a partitioned table to one of its leaf
+ * partitions.
  *
- *	reldesc		Relation descriptor of the table
- *	key			Partition key information of the table
- *	keystate	Execution state required for expressions in the partition key
- *	partdesc	Partition descriptor of the table
- *	tupslot		A standalone TupleTableSlot initialized with this table's tuple
- *				descriptor
- *	tupmap		TupleConversionMap to convert from the parent's rowtype to
- *				this table's rowtype (when extracting the partition key of a
- *				tuple just before routing it through this table)
- *	indexes		Array with partdesc->nparts members (for details on what
- *				individual members represent, see how they are set in
- *				get_partition_dispatch_recurse())
+ * partition_root
+ *		The partitioned table that's the target of the command.
+ *
+ * partition_dispatch_info
+ *		Array of 'max_dispatch' elements containing a pointer to a
+ *		PartitionDispatch object for every partitioned table touched by tuple
+ *		routing.  The entry for the target partitioned table is *always*
+ *		present in the 0th element of this array.  See comment for
+ *		PartitionDispatchData->indexes for details on how this array is
+ *		indexed.
+ *
+ * num_dispatch
+ *		The current number of items stored in the 'partition_dispatch_info'
+ *		array.  Also serves as the index of the next free array element for
+ *		new PartitionDispatch objects that need to be stored.
+ *
+ * max_dispatch
+ *		The current allocated size of the 'partition_dispatch_info' array.
+ *
+ * partitions
+ *		Array of 'max_partitions' elements containing a pointer to a
+ *		ResultRelInfo for every leaf partitions touched by tuple routing.
+ *		Some of these are pointers to ResultRelInfos which are borrowed out of
+ *		'subplan_resultrel_hash'.  The remainder have been built especially
+ *		for tuple routing.  See comment for PartitionDispatchData->indexes for
+ *		details on how this array is indexed.
+ *
+ * num_partitions
+ *		The current number of items stored in the 'partitions' array.  Also
+ *		serves as the index of the next free array element for new
+ *		ResultRelInfo objects that need to be stored.
+ *
+ * max_partitions
+ *		The current allocated size of the 'partitions' array.
+ *
+ * subplan_resultrel_hash
+ *		Hash table to store subplan ResultRelInfos by Oid.  This is used to
+ *		cache ResultRelInfos from subplans of an UPDATE ModifyTable node;
+ *		NULL in other cases.  Some of these may be useful for tuple routing
+ *		to save having to build duplicates.
+ *
+ * memcxt
+ *		Memory context used to allocate subsidiary structs.
+ *-----------------------
+ */
+typedef struct PartitionTupleRouting
+{
+	Relation	partition_root;
+	PartitionDispatch *partition_dispatch_info;
+	int			num_dispatch;
+	int			max_dispatch;
+	ResultRelInfo **partitions;
+	int			num_partitions;
+	int			max_partitions;
+	HTAB	   *subplan_resultrel_hash;
+	MemoryContext memcxt;
+} PartitionTupleRouting;
+
+/*-----------------------
+ * PartitionDispatch - information about one partitioned table in a partition
+ * hierarchy required to route a tuple to any of its partitions.  A
+ * PartitionDispatch is always encapsulated inside a PartitionTupleRouting
+ * struct and stored inside its 'partition_dispatch_info' array.
+ *
+ * reldesc
+ *		Relation descriptor of the table
+ * key
+ *		Partition key information of the table
+ * keystate
+ *		Execution state required for expressions in the partition key
+ * partdesc
+ *		Partition descriptor of the table
+ * tupslot
+ *		A standalone TupleTableSlot initialized with this table's tuple
+ *		descriptor, or NULL if no tuple conversion between the parent is
+ *		required.
+ * tupmap
+ *		TupleConversionMap to convert from the parent's rowtype to this table's
+ *		rowtype  (when extracting the partition key of a tuple just before
+ *		routing it through this table). A NULL value is stored if no tuple
+ *		conversion is required.
+ * indexes
+ *		Array of partdesc->nparts elements.  For leaf partitions the index
+ *		corresponds to the partition's ResultRelInfo in the encapsulating
+ *		PartitionTupleRouting's partitions array.  For partitioned partitions,
+ *		the index corresponds to the PartitionDispatch for it in its
+ *		partition_dispatch_info array.  -1 indicates we've not yet allocated
+ *		anything in PartitionTupleRouting for the partition.
  *-----------------------
  */
 typedef struct PartitionDispatchData
@@ -58,14 +135,32 @@ typedef struct PartitionDispatchData
 	PartitionDesc partdesc;
 	TupleTableSlot *tupslot;
 	AttrNumber *tupmap;
-	int		   *indexes;
+	int			indexes[FLEXIBLE_ARRAY_MEMBER];
 } PartitionDispatchData;
 
+/* struct to hold result relations coming from UPDATE subplans */
+typedef struct SubplanResultRelHashElem
+{
+	Oid		relid;		/* hash key -- must be first */
+	ResultRelInfo *rri;
+} SubplanResultRelHashElem;
 
-static PartitionDispatch *RelationGetPartitionDispatchInfo(Relation rel,
-								 int *num_parted, List **leaf_part_oids);
-static void get_partition_dispatch_recurse(Relation rel, Relation parent,
-							   List **pds, List **leaf_part_oids);
+
+static void ExecHashSubPlanResultRelsByOid(ModifyTableState *mtstate,
+							   PartitionTupleRouting *proute);
+static ResultRelInfo *ExecInitPartitionInfo(ModifyTableState *mtstate,
+					  ResultRelInfo *rootResultRelInfo,
+					  PartitionTupleRouting *proute,
+					  EState *estate,
+					  PartitionDispatch dispatch, int partidx);
+static void ExecInitRoutingInfo(PartitionTupleRouting *proute,
+					PartitionDispatch dispatch,
+					ModifyTableState *mtstate,
+					EState *estate,
+					ResultRelInfo *partRelInfo,
+					int partidx);
+static PartitionDispatch ExecInitPartitionDispatchInfo(PartitionTupleRouting *proute,
+							  Oid partoid, PartitionDispatch parent_pd, int partidx);
 static void FormPartitionKeyDatum(PartitionDispatch pd,
 					  TupleTableSlot *slot,
 					  EState *estate,
@@ -92,131 +187,84 @@ static void find_matching_subplans_recurse(PartitionPruningData *prunedata,
  * Note that all the relations in the partition tree are locked using the
  * RowExclusiveLock mode upon return from this function.
  *
- * While we allocate the arrays of pointers of ResultRelInfo and
- * TupleConversionMap for all partitions here, actual objects themselves are
- * lazily allocated for a given partition if a tuple is actually routed to it;
- * see ExecInitPartitionInfo.  However, if the function is invoked for update
- * tuple routing, caller would already have initialized ResultRelInfo's for
- * some of the partitions, which are reused and assigned to their respective
- * slot in the aforementioned array.  For such partitions, we delay setting
- * up objects such as TupleConversionMap until those are actually chosen as
- * the partitions to route tuples to.  See ExecPrepareTupleRouting.
+ * Callers must use the returned PartitionTupleRouting during calls to
+ * ExecFindPartition().  The actual ResultRelInfo for a partition is only
+ * allocated when the partition is found for the first time.
+ *
+ * The current memory context is used to allocate this struct and all
+ * subsidiary structs that will be allocated from it later on.  Typically
+ * it should be estate->es_query_cxt.
  */
 PartitionTupleRouting *
 ExecSetupPartitionTupleRouting(ModifyTableState *mtstate, Relation rel)
 {
-	List	   *leaf_parts;
-	ListCell   *cell;
-	int			i;
-	ResultRelInfo *update_rri = NULL;
-	int			num_update_rri = 0,
-				update_rri_index = 0;
 	PartitionTupleRouting *proute;
-	int			nparts;
 	ModifyTable *node = mtstate ? (ModifyTable *) mtstate->ps.plan : NULL;
 
-	/*
-	 * Get the information about the partition tree after locking all the
-	 * partitions.
-	 */
+	/* Lock all the partitions. */
 	(void) find_all_inheritors(RelationGetRelid(rel), RowExclusiveLock, NULL);
-	proute = (PartitionTupleRouting *) palloc0(sizeof(PartitionTupleRouting));
-	proute->partition_dispatch_info =
-		RelationGetPartitionDispatchInfo(rel, &proute->num_dispatch,
-										 &leaf_parts);
-	proute->num_partitions = nparts = list_length(leaf_parts);
-	proute->partitions =
-		(ResultRelInfo **) palloc(nparts * sizeof(ResultRelInfo *));
-	proute->parent_child_tupconv_maps =
-		(TupleConversionMap **) palloc0(nparts * sizeof(TupleConversionMap *));
-	proute->partition_oids = (Oid *) palloc(nparts * sizeof(Oid));
-
-	/* Set up details specific to the type of tuple routing we are doing. */
-	if (node && node->operation == CMD_UPDATE)
-	{
-		update_rri = mtstate->resultRelInfo;
-		num_update_rri = list_length(node->plans);
-		proute->subplan_partition_offsets =
-			palloc(num_update_rri * sizeof(int));
-		proute->num_subplan_partition_offsets = num_update_rri;
-
-		/*
-		 * We need an additional tuple slot for storing transient tuples that
-		 * are converted to the root table descriptor.
-		 */
-		proute->root_tuple_slot = MakeTupleTableSlot(RelationGetDescr(rel),
-													 &TTSOpsHeapTuple);
-	}
-
-	i = 0;
-	foreach(cell, leaf_parts)
-	{
-		ResultRelInfo *leaf_part_rri = NULL;
-		Oid			leaf_oid = lfirst_oid(cell);
-
-		proute->partition_oids[i] = leaf_oid;
-
-		/*
-		 * If the leaf partition is already present in the per-subplan result
-		 * rels, we re-use that rather than initialize a new result rel. The
-		 * per-subplan resultrels and the resultrels of the leaf partitions
-		 * are both in the same canonical order. So while going through the
-		 * leaf partition oids, we need to keep track of the next per-subplan
-		 * result rel to be looked for in the leaf partition resultrels.
-		 */
-		if (update_rri_index < num_update_rri &&
-			RelationGetRelid(update_rri[update_rri_index].ri_RelationDesc) == leaf_oid)
-		{
-			leaf_part_rri = &update_rri[update_rri_index];
-
-			/*
-			 * This is required in order to convert the partition's tuple to
-			 * be compatible with the root partitioned table's tuple
-			 * descriptor.  When generating the per-subplan result rels, this
-			 * was not set.
-			 */
-			leaf_part_rri->ri_PartitionRoot = rel;
-
-			/* Remember the subplan offset for this ResultRelInfo */
-			proute->subplan_partition_offsets[update_rri_index] = i;
-
-			update_rri_index++;
-		}
-
-		proute->partitions[i] = leaf_part_rri;
-		i++;
-	}
 
 	/*
-	 * For UPDATE, we should have found all the per-subplan resultrels in the
-	 * leaf partitions.  (If this is an INSERT, both values will be zero.)
+	 * Here we attempt to expend as little effort as possible in setting up
+	 * the PartitionTupleRouting.  Each partition's ResultRelInfo is built on
+	 * demand, only when we actually need to route a tuple to that partition.
+	 * The reason for this is that a common case is for INSERT to insert a
+	 * single tuple into a partitioned table and this must be fast.
 	 */
-	Assert(update_rri_index == num_update_rri);
+	proute = (PartitionTupleRouting *) palloc0(sizeof(PartitionTupleRouting));
+	proute->partition_root = rel;
+	proute->memcxt = CurrentMemoryContext;
+	/* Rest of members initialized by zeroing */
+
+	/*
+	 * Initialize this table's PartitionDispatch object.  Here we pass in the
+	 * parent as NULL as we don't need to care about any parent of the target
+	 * partitioned table.
+	 */
+	ExecInitPartitionDispatchInfo(proute, RelationGetRelid(rel), NULL, 0);
+
+	/*
+	 * If performing an UPDATE with tuple routing, we can reuse partition
+	 * sub-plan result rels.  We build a hash table to map the OIDs of
+	 * partitions present in mtstate->resultRelInfo to their ResultRelInfos.
+	 * Every time a tuple is routed to a partition that we've yet to set the
+	 * ResultRelInfo for, before we go to the trouble of making one, we check
+	 * for a pre-made one in the hash table.
+	 */
+	if (node && node->operation == CMD_UPDATE)
+		ExecHashSubPlanResultRelsByOid(mtstate, proute);
 
 	return proute;
 }
 
 /*
- * ExecFindPartition -- Find a leaf partition in the partition tree rooted
- * at parent, for the heap tuple contained in *slot
+ * ExecFindPartition -- Find and return, or build and return the ResultRelInfo
+ * for the leaf partition that the tuple contained in *slot should belong to.
+ *
+ * If the partition's ResultRelInfo does not yet exist in 'proute' then we set
+ * one up or reuse one from mtstate's resultRelInfo array.  When reusing a
+ * ResultRelInfo from the mtstate we verify that the relation is a valid
+ * target for INSERTs and then set up a PartitionRoutingInfo for it.
  *
  * estate must be non-NULL; we'll need it to compute any expressions in the
- * partition key(s)
+ * partition keys.  Also, its per-tuple context is used.
  *
  * If no leaf partition is found, this routine errors out with the appropriate
- * error message, else it returns the leaf partition sequence number
- * as an index into the array of (ResultRelInfos of) all leaf partitions in
- * the partition tree.
+ * error message.  An error may also raised if the found target partition is
+ * not a valid target for an INSERT.
  */
-int
-ExecFindPartition(ResultRelInfo *resultRelInfo, PartitionDispatch *pd,
+ResultRelInfo *
+ExecFindPartition(ModifyTableState *mtstate,
+				  ResultRelInfo *rootResultRelInfo,
+				  PartitionTupleRouting *proute,
 				  TupleTableSlot *slot, EState *estate)
 {
-	int			result;
+	PartitionDispatch *pd = proute->partition_dispatch_info;
 	Datum		values[PARTITION_MAX_KEYS];
 	bool		isnull[PARTITION_MAX_KEYS];
 	Relation	rel;
 	PartitionDispatch dispatch;
+	PartitionDesc partdesc;
 	ExprContext *ecxt = GetPerTupleExprContext(estate);
 	TupleTableSlot *ecxt_scantuple_old = ecxt->ecxt_scantuple;
 	TupleTableSlot *myslot = NULL;
@@ -229,25 +277,31 @@ ExecFindPartition(ResultRelInfo *resultRelInfo, PartitionDispatch *pd,
 	 * First check the root table's partition constraint, if any.  No point in
 	 * routing the tuple if it doesn't belong in the root table itself.
 	 */
-	if (resultRelInfo->ri_PartitionCheck)
-		ExecPartitionCheck(resultRelInfo, slot, estate, true);
+	if (rootResultRelInfo->ri_PartitionCheck)
+		ExecPartitionCheck(rootResultRelInfo, slot, estate, true);
 
 	/* start with the root partitioned table */
 	dispatch = pd[0];
 	while (true)
 	{
 		AttrNumber *map = dispatch->tupmap;
-		int			cur_index = -1;
+		int			partidx = -1;
+
+		CHECK_FOR_INTERRUPTS();
 
 		rel = dispatch->reldesc;
+		partdesc = dispatch->partdesc;
 
 		/*
 		 * Convert the tuple to this parent's layout, if different from the
 		 * current relation.
 		 */
 		myslot = dispatch->tupslot;
-		if (myslot != NULL && map != NULL)
+		if (myslot != NULL)
+		{
+			Assert(map != NULL);
 			slot = execute_attr_map_slot(map, slot, myslot);
+		}
 
 		/*
 		 * Extract partition key from tuple. Expression evaluation machinery
@@ -261,97 +315,196 @@ ExecFindPartition(ResultRelInfo *resultRelInfo, PartitionDispatch *pd,
 		FormPartitionKeyDatum(dispatch, slot, estate, values, isnull);
 
 		/*
-		 * Nothing for get_partition_for_tuple() to do if there are no
-		 * partitions to begin with.
+		 * If this partitioned table has no partitions or no partition for
+		 * these values, error out.
 		 */
-		if (dispatch->partdesc->nparts == 0)
+		if (partdesc->nparts == 0 ||
+			(partidx = get_partition_for_tuple(dispatch, values, isnull)) < 0)
 		{
-			result = -1;
-			break;
+			char	   *val_desc;
+
+			val_desc = ExecBuildSlotPartitionKeyDescription(rel,
+															values, isnull, 64);
+			Assert(OidIsValid(RelationGetRelid(rel)));
+			ereport(ERROR,
+					(errcode(ERRCODE_CHECK_VIOLATION),
+					 errmsg("no partition of relation \"%s\" found for row",
+							RelationGetRelationName(rel)),
+					 val_desc ?
+					 errdetail("Partition key of the failing row contains %s.",
+							   val_desc) : 0));
 		}
 
-		cur_index = get_partition_for_tuple(dispatch, values, isnull);
+		if (partdesc->is_leaf[partidx])
+		{
+			ResultRelInfo *rri;
 
-		/*
-		 * cur_index < 0 means we failed to find a partition of this parent.
-		 * cur_index >= 0 means we either found the leaf partition, or the
-		 * next parent to find a partition of.
-		 */
-		if (cur_index < 0)
-		{
-			result = -1;
-			break;
-		}
-		else if (dispatch->indexes[cur_index] >= 0)
-		{
-			result = dispatch->indexes[cur_index];
-			/* success! */
-			break;
+			/*
+			 * Look to see if we've already got a ResultRelInfo for this
+			 * partition.
+			 */
+			if (likely(dispatch->indexes[partidx] >= 0))
+			{
+				/* ResultRelInfo already built */
+				Assert(dispatch->indexes[partidx] < proute->num_partitions);
+				rri = proute->partitions[dispatch->indexes[partidx]];
+			}
+			else
+			{
+				bool		found = false;
+
+				/*
+				 * We have not yet set up a ResultRelInfo for this partition,
+				 * but if we have a subplan hash table, we might have one
+				 * there.  If not, we'll have to create one.
+				 */
+				if (proute->subplan_resultrel_hash)
+				{
+					Oid			partoid = partdesc->oids[partidx];
+					SubplanResultRelHashElem   *elem;
+
+					elem = hash_search(proute->subplan_resultrel_hash,
+									   &partoid, HASH_FIND, NULL);
+					if (elem)
+					{
+						found = true;
+						rri = elem->rri;
+
+						/* Verify this ResultRelInfo allows INSERTs */
+						CheckValidResultRel(rri, CMD_INSERT);
+
+						/* Set up the PartitionRoutingInfo for it */
+						ExecInitRoutingInfo(proute, dispatch, mtstate, estate,
+											rri, partidx);
+					}
+				}
+
+				/* We need to create a new one. */
+				if (!found)
+					rri = ExecInitPartitionInfo(mtstate, rootResultRelInfo,
+												proute, estate,
+												dispatch, partidx);
+			}
+
+			/* Release the tuple in the lowest parent's dedicated slot. */
+			if (slot == myslot)
+				ExecClearTuple(myslot);
+
+			MemoryContextSwitchTo(oldcxt);
+			ecxt->ecxt_scantuple = ecxt_scantuple_old;
+			return rri;
 		}
 		else
 		{
-			/* move down one level */
-			dispatch = pd[-dispatch->indexes[cur_index]];
+			/*
+			 * Partition is a sub-partitioned table; get the PartitionDispatch
+			 */
+			if (likely(dispatch->indexes[partidx] >= 0))
+			{
+				/* Already built. */
+				Assert(dispatch->indexes[partidx] < proute->num_dispatch);
+
+				/*
+				 * Move down to the next partition level and search again
+				 * until we find a leaf partition that matches this tuple
+				 */
+				dispatch = pd[dispatch->indexes[partidx]];
+			}
+			else
+			{
+				/* Not yet built. Do that now. */
+				PartitionDispatch subdispatch;
+
+				/*
+				 * Create the new PartitionDispatch.  We pass the current one
+				 * in as the parent PartitionDispatch
+				 */
+				subdispatch = ExecInitPartitionDispatchInfo(proute,
+															partdesc->oids[partidx],
+															dispatch, partidx);
+				Assert(dispatch->indexes[partidx] >= 0 &&
+					   dispatch->indexes[partidx] < proute->num_dispatch);
+				dispatch = subdispatch;
+			}
 		}
 	}
+}
 
-	/* Release the tuple in the lowest parent's dedicated slot. */
-	if (slot == myslot)
-		ExecClearTuple(myslot);
+/*
+ * ExecHashSubPlanResultRelsByOid
+ *		Build a hash table to allow fast lookups of subplan ResultRelInfos by
+ *		partition Oid.  We also populate the subplan ResultRelInfo with an
+ *		ri_PartitionRoot.
+ */
+static void
+ExecHashSubPlanResultRelsByOid(ModifyTableState *mtstate,
+							   PartitionTupleRouting *proute)
+{
+	HASHCTL		ctl;
+	HTAB	   *htab;
+	int			i;
 
-	/* A partition was not found. */
-	if (result < 0)
+	memset(&ctl, 0, sizeof(ctl));
+	ctl.keysize = sizeof(Oid);
+	ctl.entrysize = sizeof(SubplanResultRelHashElem);
+	ctl.hcxt = CurrentMemoryContext;
+
+	htab = hash_create("PartitionTupleRouting table", mtstate->mt_nplans,
+					   &ctl, HASH_ELEM | HASH_BLOBS | HASH_CONTEXT);
+	proute->subplan_resultrel_hash = htab;
+
+	/* Hash all subplans by their Oid */
+	for (i = 0; i < mtstate->mt_nplans; i++)
 	{
-		char	   *val_desc;
+		ResultRelInfo *rri = &mtstate->resultRelInfo[i];
+		bool		found;
+		Oid			partoid = RelationGetRelid(rri->ri_RelationDesc);
+		SubplanResultRelHashElem   *elem;
 
-		val_desc = ExecBuildSlotPartitionKeyDescription(rel,
-														values, isnull, 64);
-		Assert(OidIsValid(RelationGetRelid(rel)));
-		ereport(ERROR,
-				(errcode(ERRCODE_CHECK_VIOLATION),
-				 errmsg("no partition of relation \"%s\" found for row",
-						RelationGetRelationName(rel)),
-				 val_desc ? errdetail("Partition key of the failing row contains %s.", val_desc) : 0));
+		elem = (SubplanResultRelHashElem *)
+			hash_search(htab, &partoid, HASH_ENTER, &found);
+		Assert(!found);
+		elem->rri = rri;
+
+		/*
+		 * This is required in order to convert the partition's tuple to be
+		 * compatible with the root partitioned table's tuple descriptor. When
+		 * generating the per-subplan result rels, this was not set.
+		 */
+		rri->ri_PartitionRoot = proute->partition_root;
 	}
-
-	MemoryContextSwitchTo(oldcxt);
-	ecxt->ecxt_scantuple = ecxt_scantuple_old;
-
-	return result;
 }
 
 /*
  * ExecInitPartitionInfo
  *		Initialize ResultRelInfo and other information for a partition
+ *		and store it in the next empty slot in the proute->partitions array.
  *
  * Returns the ResultRelInfo
  */
-ResultRelInfo *
+static ResultRelInfo *
 ExecInitPartitionInfo(ModifyTableState *mtstate,
-					  ResultRelInfo *resultRelInfo,
+					  ResultRelInfo *rootResultRelInfo,
 					  PartitionTupleRouting *proute,
-					  EState *estate, int partidx)
+					  EState *estate,
+					  PartitionDispatch dispatch, int partidx)
 {
 	ModifyTable *node = (ModifyTable *) mtstate->ps.plan;
-	Relation	rootrel = resultRelInfo->ri_RelationDesc,
+	Relation	rootrel = rootResultRelInfo->ri_RelationDesc,
 				partrel;
 	Relation	firstResultRel = mtstate->resultRelInfo[0].ri_RelationDesc;
 	ResultRelInfo *leaf_part_rri;
-	MemoryContext oldContext;
+	MemoryContext oldcxt;
 	AttrNumber *part_attnos = NULL;
 	bool		found_whole_row;
 
+	oldcxt = MemoryContextSwitchTo(proute->memcxt);
+
 	/*
 	 * We locked all the partitions in ExecSetupPartitionTupleRouting
 	 * including the leaf partitions.
 	 */
-	partrel = heap_open(proute->partition_oids[partidx], NoLock);
-
-	/*
-	 * Keep ResultRelInfo and other information for this partition in the
-	 * per-query memory context so they'll survive throughout the query.
-	 */
-	oldContext = MemoryContextSwitchTo(estate->es_query_cxt);
+	partrel = heap_open(dispatch->partdesc->oids[partidx], NoLock);
 
 	leaf_part_rri = makeNode(ResultRelInfo);
 	InitResultRelInfo(leaf_part_rri,
@@ -368,18 +521,6 @@ ExecInitPartitionInfo(ModifyTableState *mtstate,
 	CheckValidResultRel(leaf_part_rri, CMD_INSERT);
 
 	/*
-	 * Since we've just initialized this ResultRelInfo, it's not in any list
-	 * attached to the estate as yet.  Add it, so that it can be found later.
-	 *
-	 * Note that the entries in this list appear in no predetermined order,
-	 * because partition result rels are initialized as and when they're
-	 * needed.
-	 */
-	estate->es_tuple_routing_result_relations =
-		lappend(estate->es_tuple_routing_result_relations,
-				leaf_part_rri);
-
-	/*
 	 * Open partition indices.  The user may have asked to check for conflicts
 	 * within this leaf partition and do "nothing" instead of throwing an
 	 * error.  Be prepared in that case by initializing the index information
@@ -522,14 +663,14 @@ ExecInitPartitionInfo(ModifyTableState *mtstate,
 	}
 
 	/* Set up information needed for routing tuples to the partition. */
-	ExecInitRoutingInfo(mtstate, estate, proute, leaf_part_rri, partidx);
+	ExecInitRoutingInfo(proute, dispatch, mtstate, estate,
+						leaf_part_rri, partidx);
 
 	/*
 	 * If there is an ON CONFLICT clause, initialize state for it.
 	 */
 	if (node && node->onConflictAction != ONCONFLICT_NONE)
 	{
-		TupleConversionMap *map = proute->parent_child_tupconv_maps[partidx];
 		int			firstVarno = mtstate->resultRelInfo[0].ri_RangeTableIndex;
 		TupleDesc	partrelDesc = RelationGetDescr(partrel);
 		ExprContext *econtext = mtstate->ps.ps_ExprContext;
@@ -542,7 +683,7 @@ ExecInitPartitionInfo(ModifyTableState *mtstate,
 		 * list and searching for ancestry relationships to each index in the
 		 * ancestor table.
 		 */
-		if (list_length(resultRelInfo->ri_onConflictArbiterIndexes) > 0)
+		if (list_length(rootResultRelInfo->ri_onConflictArbiterIndexes) > 0)
 		{
 			List	   *childIdxs;
 
@@ -555,7 +696,7 @@ ExecInitPartitionInfo(ModifyTableState *mtstate,
 				ListCell   *lc2;
 
 				ancestors = get_partition_ancestors(childIdx);
-				foreach(lc2, resultRelInfo->ri_onConflictArbiterIndexes)
+				foreach(lc2, rootResultRelInfo->ri_onConflictArbiterIndexes)
 				{
 					if (list_member_oid(ancestors, lfirst_oid(lc2)))
 						arbiterIndexes = lappend_oid(arbiterIndexes, childIdx);
@@ -569,7 +710,7 @@ ExecInitPartitionInfo(ModifyTableState *mtstate,
 		 * (This shouldn't happen, since arbiter index selection should not
 		 * pick up an invalid index.)
 		 */
-		if (list_length(resultRelInfo->ri_onConflictArbiterIndexes) !=
+		if (list_length(rootResultRelInfo->ri_onConflictArbiterIndexes) !=
 			list_length(arbiterIndexes))
 			elog(ERROR, "invalid arbiter index list");
 		leaf_part_rri->ri_onConflictArbiterIndexes = arbiterIndexes;
@@ -579,8 +720,12 @@ ExecInitPartitionInfo(ModifyTableState *mtstate,
 		 */
 		if (node->onConflictAction == ONCONFLICT_UPDATE)
 		{
+			TupleConversionMap *map;
+
+			map = leaf_part_rri->ri_PartitionInfo->pi_RootToPartitionMap;
+
 			Assert(node->onConflictSet != NIL);
-			Assert(resultRelInfo->ri_onConflict != NULL);
+			Assert(rootResultRelInfo->ri_onConflict != NULL);
 
 			/*
 			 * If the partition's tuple descriptor matches exactly the root
@@ -589,7 +734,7 @@ ExecInitPartitionInfo(ModifyTableState *mtstate,
 			 * need to create state specific to this partition.
 			 */
 			if (map == NULL)
-				leaf_part_rri->ri_onConflict = resultRelInfo->ri_onConflict;
+				leaf_part_rri->ri_onConflict = rootResultRelInfo->ri_onConflict;
 			else
 			{
 				List	   *onconflset;
@@ -680,37 +825,51 @@ ExecInitPartitionInfo(ModifyTableState *mtstate,
 		}
 	}
 
-	Assert(proute->partitions[partidx] == NULL);
-	proute->partitions[partidx] = leaf_part_rri;
+	/*
+	 * Since we've just initialized this ResultRelInfo, it's not in any list
+	 * attached to the estate as yet.  Add it, so that it can be found later.
+	 *
+	 * Note that the entries in this list appear in no predetermined order,
+	 * because partition result rels are initialized as and when they're
+	 * needed.
+	 */
+	MemoryContextSwitchTo(estate->es_query_cxt);
+	estate->es_tuple_routing_result_relations =
+		lappend(estate->es_tuple_routing_result_relations,
+				leaf_part_rri);
 
-	MemoryContextSwitchTo(oldContext);
+	MemoryContextSwitchTo(oldcxt);
 
 	return leaf_part_rri;
 }
 
 /*
  * ExecInitRoutingInfo
- *		Set up information needed for routing tuples to a leaf partition
+ *		Set up information needed for translating tuples between root
+ *		partitioned table format and partition format, and keep track of it
+ *		in PartitionTupleRouting.
  */
-void
-ExecInitRoutingInfo(ModifyTableState *mtstate,
+static void
+ExecInitRoutingInfo(PartitionTupleRouting *proute,
+					PartitionDispatch dispatch,
+					ModifyTableState *mtstate,
 					EState *estate,
-					PartitionTupleRouting *proute,
 					ResultRelInfo *partRelInfo,
 					int partidx)
 {
-	MemoryContext oldContext;
+	MemoryContext oldcxt;
+	PartitionRoutingInfo *partrouteinfo;
+	int		rri_index;
 
-	/*
-	 * Switch into per-query memory context.
-	 */
-	oldContext = MemoryContextSwitchTo(estate->es_query_cxt);
+	oldcxt = MemoryContextSwitchTo(proute->memcxt);
+
+	partrouteinfo = palloc(sizeof(PartitionRoutingInfo));
 
 	/*
 	 * Set up a tuple conversion map to convert a tuple routed to the
 	 * partition from the parent's type to the partition's.
 	 */
-	proute->parent_child_tupconv_maps[partidx] =
+	partrouteinfo->pi_RootToPartitionMap =
 		convert_tuples_by_name(RelationGetDescr(partRelInfo->ri_PartitionRoot),
 							   RelationGetDescr(partRelInfo->ri_RelationDesc),
 							   gettext_noop("could not convert row type"));
@@ -721,29 +880,36 @@ ExecInitRoutingInfo(ModifyTableState *mtstate,
 	 * for various operations that are applied to tuples after routing, such
 	 * as checking constraints.
 	 */
-	if (proute->parent_child_tupconv_maps[partidx] != NULL)
+	if (partrouteinfo->pi_RootToPartitionMap != NULL)
 	{
 		Relation	partrel = partRelInfo->ri_RelationDesc;
 
 		/*
-		 * Initialize the array in proute where these slots are stored, if not
-		 * already done.
-		 */
-		if (proute->partition_tuple_slots == NULL)
-			proute->partition_tuple_slots = (TupleTableSlot **)
-				palloc0(proute->num_partitions *
-						sizeof(TupleTableSlot *));
-
-		/*
 		 * Initialize the slot itself setting its descriptor to this
 		 * partition's TupleDesc; TupleDesc reference will be released at the
 		 * end of the command.
 		 */
-		proute->partition_tuple_slots[partidx] =
-			ExecInitExtraTupleSlot(estate,
-								   RelationGetDescr(partrel),
+		partrouteinfo->pi_PartitionTupleSlot =
+			ExecInitExtraTupleSlot(estate, RelationGetDescr(partrel),
 								   &TTSOpsHeapTuple);
 	}
+	else
+		partrouteinfo->pi_PartitionTupleSlot = NULL;
+
+	/*
+	 * Also, if transition capture is required, store a map to convert tuples
+	 * from partition's rowtype to the root partition table's.
+	 */
+	if (mtstate &&
+		(mtstate->mt_transition_capture || mtstate->mt_oc_transition_capture))
+	{
+		partrouteinfo->pi_PartitionToRootMap =
+			convert_tuples_by_name(RelationGetDescr(partRelInfo->ri_RelationDesc),
+								   RelationGetDescr(partRelInfo->ri_PartitionRoot),
+								   gettext_noop("could not convert row type"));
+	}
+	else
+		partrouteinfo->pi_PartitionToRootMap = NULL;
 
 	/*
 	 * If the partition is a foreign table, let the FDW init itself for
@@ -753,73 +919,138 @@ ExecInitRoutingInfo(ModifyTableState *mtstate,
 		partRelInfo->ri_FdwRoutine->BeginForeignInsert != NULL)
 		partRelInfo->ri_FdwRoutine->BeginForeignInsert(mtstate, partRelInfo);
 
-	MemoryContextSwitchTo(oldContext);
-
-	partRelInfo->ri_PartitionReadyForRouting = true;
-}
-
-/*
- * ExecSetupChildParentMapForLeaf -- Initialize the per-leaf-partition
- * child-to-root tuple conversion map array.
- *
- * This map is required for capturing transition tuples when the target table
- * is a partitioned table. For a tuple that is routed by an INSERT or UPDATE,
- * we need to convert it from the leaf partition to the target table
- * descriptor.
- */
-void
-ExecSetupChildParentMapForLeaf(PartitionTupleRouting *proute)
-{
-	Assert(proute != NULL);
+	partRelInfo->ri_PartitionInfo = partrouteinfo;
 
 	/*
-	 * These array elements get filled up with maps on an on-demand basis.
-	 * Initially just set all of them to NULL.
+	 * Keep track of it in the PartitionTupleRouting->partitions array.
 	 */
-	proute->child_parent_tupconv_maps =
-		(TupleConversionMap **) palloc0(sizeof(TupleConversionMap *) *
-										proute->num_partitions);
+	Assert(dispatch->indexes[partidx] == -1);
 
-	/* Same is the case for this array. All the values are set to false */
-	proute->child_parent_map_not_required =
-		(bool *) palloc0(sizeof(bool) * proute->num_partitions);
+	rri_index = proute->num_partitions++;
+
+	/* Allocate or enlarge the array, as needed */
+	if (proute->num_partitions >= proute->max_partitions)
+	{
+		if (proute->max_partitions == 0)
+		{
+			proute->max_partitions = 8;
+			proute->partitions = (ResultRelInfo **)
+				palloc(sizeof(ResultRelInfo *) * proute->max_partitions);
+		}
+		else
+		{
+			proute->max_partitions *= 2;
+			proute->partitions = (ResultRelInfo **)
+				repalloc(proute->partitions, sizeof(ResultRelInfo *) *
+						 proute->max_partitions);
+		}
+	}
+
+	proute->partitions[rri_index] = partRelInfo;
+	dispatch->indexes[partidx] = rri_index;
+
+	MemoryContextSwitchTo(oldcxt);
 }
 
 /*
- * TupConvMapForLeaf -- Get the tuple conversion map for a given leaf partition
- * index.
+ * ExecInitPartitionDispatchInfo
+ *		Initialize PartitionDispatch for a partitioned table and store it in
+ *		the next available slot in the proute->partition_dispatch_info array.
+ *		Also, record the index into this array in the parent_pd->indexes[]
+ *		array in the partidx element so that we can properly retrieve the
+ *		newly created PartitionDispatch later.
  */
-TupleConversionMap *
-TupConvMapForLeaf(PartitionTupleRouting *proute,
-				  ResultRelInfo *rootRelInfo, int leaf_index)
+static PartitionDispatch
+ExecInitPartitionDispatchInfo(PartitionTupleRouting *proute, Oid partoid,
+							  PartitionDispatch parent_pd, int partidx)
 {
-	ResultRelInfo **resultRelInfos = proute->partitions;
-	TupleConversionMap **map;
-	TupleDesc	tupdesc;
+	Relation	rel;
+	PartitionDesc partdesc;
+	PartitionDispatch pd;
+	int			dispatchidx;
+	MemoryContext oldcxt;
 
-	/* Don't call this if we're not supposed to be using this type of map. */
-	Assert(proute->child_parent_tupconv_maps != NULL);
+	oldcxt = MemoryContextSwitchTo(proute->memcxt);
 
-	/* If it's already known that we don't need a map, return NULL. */
-	if (proute->child_parent_map_not_required[leaf_index])
-		return NULL;
+	if (partoid != RelationGetRelid(proute->partition_root))
+		rel = heap_open(partoid, NoLock);
+	else
+		rel = proute->partition_root;
+	partdesc = RelationGetPartitionDesc(rel);
 
-	/* If we've already got a map, return it. */
-	map = &proute->child_parent_tupconv_maps[leaf_index];
-	if (*map != NULL)
-		return *map;
+	pd = (PartitionDispatch) palloc(offsetof(PartitionDispatchData, indexes) +
+									partdesc->nparts * sizeof(int));
+	pd->reldesc = rel;
+	pd->key = RelationGetPartitionKey(rel);
+	pd->keystate = NIL;
+	pd->partdesc = partdesc;
+	if (parent_pd != NULL)
+	{
+		TupleDesc	tupdesc = RelationGetDescr(rel);
 
-	/* No map yet; try to create one. */
-	tupdesc = RelationGetDescr(resultRelInfos[leaf_index]->ri_RelationDesc);
-	*map =
-		convert_tuples_by_name(tupdesc,
-							   RelationGetDescr(rootRelInfo->ri_RelationDesc),
-							   gettext_noop("could not convert row type"));
+		/*
+		 * For sub-partitioned tables where the column order differs from its
+		 * direct parent partitioned table, we must store a tuple table slot
+		 * initialized with its tuple descriptor and a tuple conversion map to
+		 * convert a tuple from its parent's rowtype to its own.  This is to
+		 * make sure that we are looking at the correct row using the correct
+		 * tuple descriptor when computing its partition key for tuple
+		 * routing.
+		 */
+		pd->tupmap = convert_tuples_by_name_map_if_req(RelationGetDescr(parent_pd->reldesc),
+													   tupdesc,
+													   gettext_noop("could not convert row type"));
+		pd->tupslot = pd->tupmap ?
+			MakeSingleTupleTableSlot(tupdesc, &TTSOpsHeapTuple) : NULL;
+	}
+	else
+	{
+		/* Not required for the root partitioned table */
+		pd->tupmap = NULL;
+		pd->tupslot = NULL;
+	}
 
-	/* If it turns out no map is needed, remember for next time. */
-	proute->child_parent_map_not_required[leaf_index] = (*map == NULL);
+	/*
+	 * Initialize with -1 to signify that the corresponding partition's
+	 * ResultRelInfo or PartitionDispatch has not been created yet.
+	 */
+	memset(pd->indexes, -1, sizeof(int) * partdesc->nparts);
 
-	return *map;
+	/* Track in PartitionTupleRouting for later use */
+	dispatchidx = proute->num_dispatch++;
+
+	/* Allocate or enlarge the array, as needed */
+	if (proute->num_dispatch >= proute->max_dispatch)
+	{
+		if (proute->max_dispatch == 0)
+		{
+			proute->max_dispatch = 4;
+			proute->partition_dispatch_info = (PartitionDispatch *)
+				palloc(sizeof(PartitionDispatch) * proute->max_dispatch);
+		}
+		else
+		{
+			proute->max_dispatch *= 2;
+			proute->partition_dispatch_info = (PartitionDispatch *)
+				repalloc(proute->partition_dispatch_info,
+						 sizeof(PartitionDispatch) * proute->max_dispatch);
+		}
+	}
+	proute->partition_dispatch_info[dispatchidx] = pd;
+
+	/*
+	 * Finally, if setting up a PartitionDispatch for a sub-partitioned table,
+	 * install a downlink in the parent to allow quick descent.
+	 */
+	if (parent_pd)
+	{
+		Assert(parent_pd->indexes[partidx] == -1);
+		parent_pd->indexes[partidx] = dispatchidx;
+	}
+
+	MemoryContextSwitchTo(oldcxt);
+
+	return pd;
 }
 
 /*
@@ -832,8 +1063,8 @@ void
 ExecCleanupTupleRouting(ModifyTableState *mtstate,
 						PartitionTupleRouting *proute)
 {
+	HTAB	   *resultrel_hash = proute->subplan_resultrel_hash;
 	int			i;
-	int			subplan_index = 0;
 
 	/*
 	 * Remember, proute->partition_dispatch_info[0] corresponds to the root
@@ -847,187 +1078,40 @@ ExecCleanupTupleRouting(ModifyTableState *mtstate,
 		PartitionDispatch pd = proute->partition_dispatch_info[i];
 
 		heap_close(pd->reldesc, NoLock);
-		ExecDropSingleTupleTableSlot(pd->tupslot);
+
+		if (pd->tupslot)
+			ExecDropSingleTupleTableSlot(pd->tupslot);
 	}
 
 	for (i = 0; i < proute->num_partitions; i++)
 	{
 		ResultRelInfo *resultRelInfo = proute->partitions[i];
 
-		/* skip further processing for uninitialized partitions */
-		if (resultRelInfo == NULL)
-			continue;
+		/*
+		 * Check if this result rel is one belonging to the node's subplans,
+		 * if so, let ExecEndPlan() clean it up.
+		 */
+		if (resultrel_hash)
+		{
+			Oid			partoid;
+			bool		found;
+
+			partoid = RelationGetRelid(resultRelInfo->ri_RelationDesc);
+
+			(void) hash_search(resultrel_hash, &partoid, HASH_FIND, &found);
+			if (found)
+				continue;
+		}
 
 		/* Allow any FDWs to shut down if they've been exercised */
-		if (resultRelInfo->ri_PartitionReadyForRouting &&
-			resultRelInfo->ri_FdwRoutine != NULL &&
+		if (resultRelInfo->ri_FdwRoutine != NULL &&
 			resultRelInfo->ri_FdwRoutine->EndForeignInsert != NULL)
 			resultRelInfo->ri_FdwRoutine->EndForeignInsert(mtstate->ps.state,
 														   resultRelInfo);
 
-		/*
-		 * If this result rel is one of the UPDATE subplan result rels, let
-		 * ExecEndPlan() close it. For INSERT or COPY,
-		 * proute->subplan_partition_offsets will always be NULL. Note that
-		 * the subplan_partition_offsets array and the partitions array have
-		 * the partitions in the same order. So, while we iterate over
-		 * partitions array, we also iterate over the
-		 * subplan_partition_offsets array in order to figure out which of the
-		 * result rels are present in the UPDATE subplans.
-		 */
-		if (proute->subplan_partition_offsets &&
-			subplan_index < proute->num_subplan_partition_offsets &&
-			proute->subplan_partition_offsets[subplan_index] == i)
-		{
-			subplan_index++;
-			continue;
-		}
-
 		ExecCloseIndices(resultRelInfo);
 		heap_close(resultRelInfo->ri_RelationDesc, NoLock);
 	}
-
-	/* Release the standalone partition tuple descriptors, if any */
-	if (proute->root_tuple_slot)
-		ExecDropSingleTupleTableSlot(proute->root_tuple_slot);
-}
-
-/*
- * RelationGetPartitionDispatchInfo
- *		Returns information necessary to route tuples down a partition tree
- *
- * The number of elements in the returned array (that is, the number of
- * PartitionDispatch objects for the partitioned tables in the partition tree)
- * is returned in *num_parted and a list of the OIDs of all the leaf
- * partitions of rel is returned in *leaf_part_oids.
- *
- * All the relations in the partition tree (including 'rel') must have been
- * locked (using at least the AccessShareLock) by the caller.
- */
-static PartitionDispatch *
-RelationGetPartitionDispatchInfo(Relation rel,
-								 int *num_parted, List **leaf_part_oids)
-{
-	List	   *pdlist = NIL;
-	PartitionDispatchData **pd;
-	ListCell   *lc;
-	int			i;
-
-	Assert(rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE);
-
-	*num_parted = 0;
-	*leaf_part_oids = NIL;
-
-	get_partition_dispatch_recurse(rel, NULL, &pdlist, leaf_part_oids);
-	*num_parted = list_length(pdlist);
-	pd = (PartitionDispatchData **) palloc(*num_parted *
-										   sizeof(PartitionDispatchData *));
-	i = 0;
-	foreach(lc, pdlist)
-	{
-		pd[i++] = lfirst(lc);
-	}
-
-	return pd;
-}
-
-/*
- * get_partition_dispatch_recurse
- *		Recursively expand partition tree rooted at rel
- *
- * As the partition tree is expanded in a depth-first manner, we maintain two
- * global lists: of PartitionDispatch objects corresponding to partitioned
- * tables in *pds and of the leaf partition OIDs in *leaf_part_oids.
- *
- * Note that the order of OIDs of leaf partitions in leaf_part_oids matches
- * the order in which the planner's expand_partitioned_rtentry() processes
- * them.  It's not necessarily the case that the offsets match up exactly,
- * because constraint exclusion might prune away some partitions on the
- * planner side, whereas we'll always have the complete list; but unpruned
- * partitions will appear in the same order in the plan as they are returned
- * here.
- */
-static void
-get_partition_dispatch_recurse(Relation rel, Relation parent,
-							   List **pds, List **leaf_part_oids)
-{
-	TupleDesc	tupdesc = RelationGetDescr(rel);
-	PartitionDesc partdesc = RelationGetPartitionDesc(rel);
-	PartitionKey partkey = RelationGetPartitionKey(rel);
-	PartitionDispatch pd;
-	int			i;
-
-	check_stack_depth();
-
-	/* Build a PartitionDispatch for this table and add it to *pds. */
-	pd = (PartitionDispatch) palloc(sizeof(PartitionDispatchData));
-	*pds = lappend(*pds, pd);
-	pd->reldesc = rel;
-	pd->key = partkey;
-	pd->keystate = NIL;
-	pd->partdesc = partdesc;
-	if (parent != NULL)
-	{
-		/*
-		 * For every partitioned table other than the root, we must store a
-		 * tuple table slot initialized with its tuple descriptor and a tuple
-		 * conversion map to convert a tuple from its parent's rowtype to its
-		 * own. That is to make sure that we are looking at the correct row
-		 * using the correct tuple descriptor when computing its partition key
-		 * for tuple routing.
-		 */
-		pd->tupslot = MakeSingleTupleTableSlot(tupdesc, &TTSOpsHeapTuple);
-		pd->tupmap = convert_tuples_by_name_map_if_req(RelationGetDescr(parent),
-													   tupdesc,
-													   gettext_noop("could not convert row type"));
-	}
-	else
-	{
-		/* Not required for the root partitioned table */
-		pd->tupslot = NULL;
-		pd->tupmap = NULL;
-	}
-
-	/*
-	 * Go look at each partition of this table.  If it's a leaf partition,
-	 * simply add its OID to *leaf_part_oids.  If it's a partitioned table,
-	 * recursively call get_partition_dispatch_recurse(), so that its
-	 * partitions are processed as well and a corresponding PartitionDispatch
-	 * object gets added to *pds.
-	 *
-	 * The 'indexes' array is used when searching for a partition matching a
-	 * given tuple.  The actual value we store here depends on whether the
-	 * array element belongs to a leaf partition or a subpartitioned table.
-	 * For leaf partitions we store the index into *leaf_part_oids, and for
-	 * sub-partitioned tables we store a negative version of the index into
-	 * the *pds list.  Both indexes are 0-based, but the first element of the
-	 * *pds list is the root partition, so 0 always means the first leaf. When
-	 * searching, if we see a negative value, the search must continue in the
-	 * corresponding sub-partition; otherwise, we've identified the correct
-	 * partition.
-	 */
-	pd->indexes = (int *) palloc(partdesc->nparts * sizeof(int));
-	for (i = 0; i < partdesc->nparts; i++)
-	{
-		Oid			partrelid = partdesc->oids[i];
-
-		if (get_rel_relkind(partrelid) != RELKIND_PARTITIONED_TABLE)
-		{
-			*leaf_part_oids = lappend_oid(*leaf_part_oids, partrelid);
-			pd->indexes[i] = list_length(*leaf_part_oids) - 1;
-		}
-		else
-		{
-			/*
-			 * We assume all tables in the partition tree were already locked
-			 * by the caller.
-			 */
-			Relation	partrel = heap_open(partrelid, NoLock);
-
-			pd->indexes[i] = -list_length(*pds);
-			get_partition_dispatch_recurse(partrel, rel, pds, leaf_part_oids);
-		}
-	}
 }
 
 /* ----------------
diff --git a/src/backend/executor/nodeModifyTable.c b/src/backend/executor/nodeModifyTable.c
index bb344a7070..65d46c8ea8 100644
--- a/src/backend/executor/nodeModifyTable.c
+++ b/src/backend/executor/nodeModifyTable.c
@@ -68,7 +68,6 @@ static TupleTableSlot *ExecPrepareTupleRouting(ModifyTableState *mtstate,
 						ResultRelInfo *targetRelInfo,
 						TupleTableSlot *slot);
 static ResultRelInfo *getTargetResultRelInfo(ModifyTableState *node);
-static void ExecSetupChildParentMapForTcs(ModifyTableState *mtstate);
 static void ExecSetupChildParentMapForSubplan(ModifyTableState *mtstate);
 static TupleConversionMap *tupconv_map_for_subplan(ModifyTableState *node,
 						int whichplan);
@@ -1157,7 +1156,8 @@ lreplace:;
 			tupconv_map = tupconv_map_for_subplan(mtstate, map_index);
 			if (tupconv_map != NULL)
 				slot = execute_attr_map_slot(tupconv_map->attrMap,
-											 slot, proute->root_tuple_slot);
+											 slot,
+											 mtstate->mt_root_tuple_slot);
 
 			/*
 			 * Prepare for tuple routing, making it look like we're inserting
@@ -1653,7 +1653,7 @@ ExecSetupTransitionCaptureState(ModifyTableState *mtstate, EState *estate)
 	if (mtstate->mt_transition_capture != NULL ||
 		mtstate->mt_oc_transition_capture != NULL)
 	{
-		ExecSetupChildParentMapForTcs(mtstate);
+		ExecSetupChildParentMapForSubplan(mtstate);
 
 		/*
 		 * Install the conversion map for the first plan for UPDATE and DELETE
@@ -1686,52 +1686,21 @@ ExecPrepareTupleRouting(ModifyTableState *mtstate,
 						TupleTableSlot *slot)
 {
 	ModifyTable *node;
-	int			partidx;
 	ResultRelInfo *partrel;
+	PartitionRoutingInfo *partrouteinfo;
 	HeapTuple	tuple;
 	TupleConversionMap *map;
 
 	/*
-	 * Determine the target partition.  If ExecFindPartition does not find a
-	 * partition after all, it doesn't return here; otherwise, the returned
-	 * value is to be used as an index into the arrays for the ResultRelInfo
-	 * and TupleConversionMap for the partition.
+	 * Lookup the target partition's ResultRelInfo.  If ExecFindPartition does
+	 * not find a valid partition for the tuple in 'slot' then an error is
+	 * raised.  An error may also be raised if the found partition is not a
+	 * valid target for INSERTs.  This is required since a partitioned table
+	 * UPDATE to another partition becomes a DELETE+INSERT.
 	 */
-	partidx = ExecFindPartition(targetRelInfo,
-								proute->partition_dispatch_info,
-								slot,
-								estate);
-	Assert(partidx >= 0 && partidx < proute->num_partitions);
-
-	/*
-	 * Get the ResultRelInfo corresponding to the selected partition; if not
-	 * yet there, initialize it.
-	 */
-	partrel = proute->partitions[partidx];
-	if (partrel == NULL)
-		partrel = ExecInitPartitionInfo(mtstate, targetRelInfo,
-										proute, estate,
-										partidx);
-
-	/*
-	 * Check whether the partition is routable if we didn't yet
-	 *
-	 * Note: an UPDATE of a partition key invokes an INSERT that moves the
-	 * tuple to a new partition.  This check would be applied to a subplan
-	 * partition of such an UPDATE that is chosen as the partition to route
-	 * the tuple to.  The reason we do this check here rather than in
-	 * ExecSetupPartitionTupleRouting is to avoid aborting such an UPDATE
-	 * unnecessarily due to non-routable subplan partitions that may not be
-	 * chosen for update tuple movement after all.
-	 */
-	if (!partrel->ri_PartitionReadyForRouting)
-	{
-		/* Verify the partition is a valid target for INSERT. */
-		CheckValidResultRel(partrel, CMD_INSERT);
-
-		/* Set up information needed for routing tuples to the partition. */
-		ExecInitRoutingInfo(mtstate, estate, proute, partrel, partidx);
-	}
+	partrel = ExecFindPartition(mtstate, targetRelInfo, proute, slot, estate);
+	partrouteinfo = partrel->ri_PartitionInfo;
+	Assert(partrouteinfo != NULL);
 
 	/*
 	 * Make it look like we are inserting into the partition.
@@ -1743,7 +1712,7 @@ ExecPrepareTupleRouting(ModifyTableState *mtstate,
 
 	/*
 	 * If we're capturing transition tuples, we might need to convert from the
-	 * partition rowtype to parent rowtype.
+	 * partition rowtype to root partitioned table's rowtype.
 	 */
 	if (mtstate->mt_transition_capture != NULL)
 	{
@@ -1756,7 +1725,7 @@ ExecPrepareTupleRouting(ModifyTableState *mtstate,
 			 */
 			mtstate->mt_transition_capture->tcs_original_insert_tuple = NULL;
 			mtstate->mt_transition_capture->tcs_map =
-				TupConvMapForLeaf(proute, targetRelInfo, partidx);
+				partrouteinfo->pi_PartitionToRootMap;
 		}
 		else
 		{
@@ -1771,20 +1740,17 @@ ExecPrepareTupleRouting(ModifyTableState *mtstate,
 	if (mtstate->mt_oc_transition_capture != NULL)
 	{
 		mtstate->mt_oc_transition_capture->tcs_map =
-			TupConvMapForLeaf(proute, targetRelInfo, partidx);
+			partrouteinfo->pi_PartitionToRootMap;
 	}
 
 	/*
 	 * Convert the tuple, if necessary.
 	 */
-	map = proute->parent_child_tupconv_maps[partidx];
+	map = partrouteinfo->pi_RootToPartitionMap;
 	if (map != NULL)
 	{
-		TupleTableSlot *new_slot;
+		TupleTableSlot *new_slot = partrouteinfo->pi_PartitionTupleSlot;
 
-		Assert(proute->partition_tuple_slots != NULL &&
-			   proute->partition_tuple_slots[partidx] != NULL);
-		new_slot = proute->partition_tuple_slots[partidx];
 		slot = execute_attr_map_slot(map->attrMap, slot, new_slot);
 	}
 
@@ -1823,17 +1789,6 @@ ExecSetupChildParentMapForSubplan(ModifyTableState *mtstate)
 	int			i;
 
 	/*
-	 * First check if there is already a per-subplan array allocated. Even if
-	 * there is already a per-leaf map array, we won't require a per-subplan
-	 * one, since we will use the subplan offset array to convert the subplan
-	 * index to per-leaf index.
-	 */
-	if (mtstate->mt_per_subplan_tupconv_maps ||
-		(mtstate->mt_partition_tuple_routing &&
-		 mtstate->mt_partition_tuple_routing->child_parent_tupconv_maps))
-		return;
-
-	/*
 	 * Build array of conversion maps from each child's TupleDesc to the one
 	 * used in the target relation.  The map pointers may be NULL when no
 	 * conversion is necessary, which is hopefully a common case.
@@ -1855,78 +1810,17 @@ ExecSetupChildParentMapForSubplan(ModifyTableState *mtstate)
 }
 
 /*
- * Initialize the child-to-root tuple conversion map array required for
- * capturing transition tuples.
- *
- * The map array can be indexed either by subplan index or by leaf-partition
- * index.  For transition tables, we need a subplan-indexed access to the map,
- * and where tuple-routing is present, we also require a leaf-indexed access.
- */
-static void
-ExecSetupChildParentMapForTcs(ModifyTableState *mtstate)
-{
-	PartitionTupleRouting *proute = mtstate->mt_partition_tuple_routing;
-
-	/*
-	 * If partition tuple routing is set up, we will require partition-indexed
-	 * access. In that case, create the map array indexed by partition; we
-	 * will still be able to access the maps using a subplan index by
-	 * converting the subplan index to a partition index using
-	 * subplan_partition_offsets. If tuple routing is not set up, it means we
-	 * don't require partition-indexed access. In that case, create just a
-	 * subplan-indexed map.
-	 */
-	if (proute)
-	{
-		/*
-		 * If a partition-indexed map array is to be created, the subplan map
-		 * array has to be NULL.  If the subplan map array is already created,
-		 * we won't be able to access the map using a partition index.
-		 */
-		Assert(mtstate->mt_per_subplan_tupconv_maps == NULL);
-
-		ExecSetupChildParentMapForLeaf(proute);
-	}
-	else
-		ExecSetupChildParentMapForSubplan(mtstate);
-}
-
-/*
  * For a given subplan index, get the tuple conversion map.
  */
 static TupleConversionMap *
 tupconv_map_for_subplan(ModifyTableState *mtstate, int whichplan)
 {
-	/*
-	 * If a partition-index tuple conversion map array is allocated, we need
-	 * to first get the index into the partition array. Exactly *one* of the
-	 * two arrays is allocated. This is because if there is a partition array
-	 * required, we don't require subplan-indexed array since we can translate
-	 * subplan index into partition index. And, we create a subplan-indexed
-	 * array *only* if partition-indexed array is not required.
-	 */
+	/* If nobody else set the per-subplan array of maps, do so ourselves. */
 	if (mtstate->mt_per_subplan_tupconv_maps == NULL)
-	{
-		int			leaf_index;
-		PartitionTupleRouting *proute = mtstate->mt_partition_tuple_routing;
+		ExecSetupChildParentMapForSubplan(mtstate);
 
-		/*
-		 * If subplan-indexed array is NULL, things should have been arranged
-		 * to convert the subplan index to partition index.
-		 */
-		Assert(proute && proute->subplan_partition_offsets != NULL &&
-			   whichplan < proute->num_subplan_partition_offsets);
-
-		leaf_index = proute->subplan_partition_offsets[whichplan];
-
-		return TupConvMapForLeaf(proute, getTargetResultRelInfo(mtstate),
-								 leaf_index);
-	}
-	else
-	{
-		Assert(whichplan >= 0 && whichplan < mtstate->mt_nplans);
-		return mtstate->mt_per_subplan_tupconv_maps[whichplan];
-	}
+	Assert(whichplan >= 0 && whichplan < mtstate->mt_nplans);
+	return mtstate->mt_per_subplan_tupconv_maps[whichplan];
 }
 
 /* ----------------------------------------------------------------
@@ -2370,10 +2264,15 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags)
 	 * descriptor of a source partition does not match the root partitioned
 	 * table descriptor.  In such a case we need to convert tuples to the root
 	 * tuple descriptor, because the search for destination partition starts
-	 * from the root.  Skip this setup if it's not a partition key update.
+	 * from the root.  We'll also need a slot to store these converted tuples.
+	 * We can skip this setup if it's not a partition key update.
 	 */
 	if (update_tuple_routing_needed)
+	{
 		ExecSetupChildParentMapForSubplan(mtstate);
+		mtstate->mt_root_tuple_slot = MakeTupleTableSlot(RelationGetDescr(rel),
+														 &TTSOpsHeapTuple);
+	}
 
 	/*
 	 * Initialize any WITH CHECK OPTION constraints if needed.
@@ -2716,10 +2615,18 @@ ExecEndModifyTable(ModifyTableState *node)
 														   resultRelInfo);
 	}
 
-	/* Close all the partitioned tables, leaf partitions, and their indices */
+	/*
+	 * Close all the partitioned tables, leaf partitions, and their indices
+	 * and release the slot used for tuple routing, if set.
+	 */
 	if (node->mt_partition_tuple_routing)
+	{
 		ExecCleanupTupleRouting(node, node->mt_partition_tuple_routing);
 
+		if (node->mt_root_tuple_slot)
+			ExecDropSingleTupleTableSlot(node->mt_root_tuple_slot);
+	}
+
 	/*
 	 * Free the exprcontext
 	 */
diff --git a/src/backend/optimizer/prep/prepunion.c b/src/backend/optimizer/prep/prepunion.c
index d5720518a8..2a1c1cb2e1 100644
--- a/src/backend/optimizer/prep/prepunion.c
+++ b/src/backend/optimizer/prep/prepunion.c
@@ -1657,9 +1657,6 @@ expand_inherited_rtentry(PlannerInfo *root, RangeTblEntry *rte, Index rti)
 /*
  * expand_partitioned_rtentry
  *		Recursively expand an RTE for a partitioned table.
- *
- * Note that RelationGetPartitionDispatchInfo will expand partitions in the
- * same order as this code.
  */
 static void
 expand_partitioned_rtentry(PlannerInfo *root, RangeTblEntry *parentrte,
diff --git a/src/backend/utils/cache/partcache.c b/src/backend/utils/cache/partcache.c
index 07653f312b..7856b47cdd 100644
--- a/src/backend/utils/cache/partcache.c
+++ b/src/backend/utils/cache/partcache.c
@@ -340,15 +340,23 @@ RelationBuildPartitionDesc(Relation rel)
 	oldcxt = MemoryContextSwitchTo(rel->rd_pdcxt);
 	partdesc->boundinfo = partition_bounds_copy(boundinfo, key);
 	partdesc->oids = (Oid *) palloc(partdesc->nparts * sizeof(Oid));
+	partdesc->is_leaf = (bool *) palloc(partdesc->nparts * sizeof(bool));
 
 	/*
 	 * Now assign OIDs from the original array into mapped indexes of the
-	 * result array.  Order of OIDs in the former is defined by the catalog
-	 * scan that retrieved them, whereas that in the latter is defined by
-	 * canonicalized representation of the partition bounds.
+	 * result array.  The order of OIDs in the former is defined by the
+	 * catalog scan that retrieved them, whereas that in the latter is defined
+	 * by canonicalized representation of the partition bounds.
 	 */
 	for (i = 0; i < partdesc->nparts; i++)
-		partdesc->oids[mapping[i]] = oids_orig[i];
+	{
+		int			index = mapping[i];
+
+		partdesc->oids[index] = oids_orig[i];
+		/* Record if the partition is a leaf partition */
+		partdesc->is_leaf[index] =
+				(get_rel_relkind(oids_orig[i]) != RELKIND_PARTITIONED_TABLE);
+	}
 	MemoryContextSwitchTo(oldcxt);
 
 	rel->rd_partdesc = partdesc;
diff --git a/src/include/catalog/partition.h b/src/include/catalog/partition.h
index a53de2372e..59c7a6ab69 100644
--- a/src/include/catalog/partition.h
+++ b/src/include/catalog/partition.h
@@ -25,7 +25,11 @@
 typedef struct PartitionDescData
 {
 	int			nparts;			/* Number of partitions */
-	Oid		   *oids;			/* OIDs of partitions */
+	Oid		   *oids;			/* Array of 'nparts' elements containing
+								 * partition OIDs in order of the their bounds */
+	bool	   *is_leaf;		/* Array of 'nparts' elements storing whether
+								 * the corresponding 'oids' element belongs to
+								 * a leaf partition or not */
 	PartitionBoundInfo boundinfo;	/* collection of partition bounds */
 } PartitionDescData;
 
diff --git a/src/include/executor/execPartition.h b/src/include/executor/execPartition.h
index 3e08104ea4..d3cfb55f9f 100644
--- a/src/include/executor/execPartition.h
+++ b/src/include/executor/execPartition.h
@@ -18,74 +18,36 @@
 #include "nodes/plannodes.h"
 #include "partitioning/partprune.h"
 
-/* See execPartition.c for the definition. */
+/* See execPartition.c for the definitions. */
 typedef struct PartitionDispatchData *PartitionDispatch;
+typedef struct PartitionTupleRouting PartitionTupleRouting;
 
-/*-----------------------
- * PartitionTupleRouting - Encapsulates all information required to execute
- * tuple-routing between partitions.
+/*
+ * PartitionRoutingInfo
  *
- * partition_dispatch_info		Array of PartitionDispatch objects with one
- *								entry for every partitioned table in the
- *								partition tree.
- * num_dispatch					number of partitioned tables in the partition
- *								tree (= length of partition_dispatch_info[])
- * partition_oids				Array of leaf partitions OIDs with one entry
- *								for every leaf partition in the partition tree,
- *								initialized in full by
- *								ExecSetupPartitionTupleRouting.
- * partitions					Array of ResultRelInfo* objects with one entry
- *								for every leaf partition in the partition tree,
- *								initialized lazily by ExecInitPartitionInfo.
- * num_partitions				Number of leaf partitions in the partition tree
- *								(= 'partitions_oid'/'partitions' array length)
- * parent_child_tupconv_maps	Array of TupleConversionMap objects with one
- *								entry for every leaf partition (required to
- *								convert tuple from the root table's rowtype to
- *								a leaf partition's rowtype after tuple routing
- *								is done)
- * child_parent_tupconv_maps	Array of TupleConversionMap objects with one
- *								entry for every leaf partition (required to
- *								convert an updated tuple from the leaf
- *								partition's rowtype to the root table's rowtype
- *								so that tuple routing can be done)
- * child_parent_map_not_required  Array of bool. True value means that a map is
- *								determined to be not required for the given
- *								partition. False means either we haven't yet
- *								checked if a map is required, or it was
- *								determined to be required.
- * subplan_partition_offsets	Integer array ordered by UPDATE subplans. Each
- *								element of this array has the index into the
- *								corresponding partition in partitions array.
- * num_subplan_partition_offsets  Length of 'subplan_partition_offsets' array
- * partition_tuple_slots		Array of TupleTableSlot objects; if non-NULL,
- *								contains one entry for every leaf partition,
- *								of which only those of the leaf partitions
- *								whose attribute numbers differ from the root
- *								parent have a non-NULL value.  NULL if all of
- *								the partitions encountered by a given command
- *								happen to have same rowtype as the root parent
- * root_tuple_slot				TupleTableSlot to be used to transiently hold
- *								copy of a tuple that's being moved across
- *								partitions in the root partitioned table's
- *								rowtype
- *-----------------------
+ * Additional result relation information specific to routing tuples to a
+ * table partition.
  */
-typedef struct PartitionTupleRouting
+typedef struct PartitionRoutingInfo
 {
-	PartitionDispatch *partition_dispatch_info;
-	int			num_dispatch;
-	Oid		   *partition_oids;
-	ResultRelInfo **partitions;
-	int			num_partitions;
-	TupleConversionMap **parent_child_tupconv_maps;
-	TupleConversionMap **child_parent_tupconv_maps;
-	bool	   *child_parent_map_not_required;
-	int		   *subplan_partition_offsets;
-	int			num_subplan_partition_offsets;
-	TupleTableSlot **partition_tuple_slots;
-	TupleTableSlot *root_tuple_slot;
-} PartitionTupleRouting;
+	/*
+	 * Map for converting tuples in root partitioned table format into
+	 * partition format, or NULL if no conversion is required.
+	 */
+	TupleConversionMap *pi_RootToPartitionMap;
+
+	/*
+	 * Map for converting tuples in partition format into the root partitioned
+	 * table format, or NULL if no conversion is required.
+	 */
+	TupleConversionMap *pi_PartitionToRootMap;
+
+	/*
+	 * Slot to store tuples in partition format, or NULL when no translation
+	 * is required between root and partition.
+	 */
+	TupleTableSlot *pi_PartitionTupleSlot;
+} PartitionRoutingInfo;
 
 /*
  * PartitionedRelPruningData - Per-partitioned-table data for run-time pruning
@@ -175,22 +137,11 @@ typedef struct PartitionPruneState
 
 extern PartitionTupleRouting *ExecSetupPartitionTupleRouting(ModifyTableState *mtstate,
 							   Relation rel);
-extern int ExecFindPartition(ResultRelInfo *resultRelInfo,
-				  PartitionDispatch *pd,
+extern ResultRelInfo *ExecFindPartition(ModifyTableState *mtstate,
+				  ResultRelInfo *rootResultRelInfo,
+				  PartitionTupleRouting *proute,
 				  TupleTableSlot *slot,
 				  EState *estate);
-extern ResultRelInfo *ExecInitPartitionInfo(ModifyTableState *mtstate,
-					  ResultRelInfo *resultRelInfo,
-					  PartitionTupleRouting *proute,
-					  EState *estate, int partidx);
-extern void ExecInitRoutingInfo(ModifyTableState *mtstate,
-					EState *estate,
-					PartitionTupleRouting *proute,
-					ResultRelInfo *partRelInfo,
-					int partidx);
-extern void ExecSetupChildParentMapForLeaf(PartitionTupleRouting *proute);
-extern TupleConversionMap *TupConvMapForLeaf(PartitionTupleRouting *proute,
-				  ResultRelInfo *rootRelInfo, int leaf_index);
 extern void ExecCleanupTupleRouting(ModifyTableState *mtstate,
 						PartitionTupleRouting *proute);
 extern PartitionPruneState *ExecCreatePartitionPruneState(PlanState *planstate,
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h
index 63c871e6d0..569cc7c476 100644
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -33,6 +33,7 @@
 
 
 struct PlanState;				/* forward references in this file */
+struct PartitionRoutingInfo;
 struct ParallelHashJoinState;
 struct ExecRowMark;
 struct ExprState;
@@ -469,8 +470,8 @@ typedef struct ResultRelInfo
 	/* relation descriptor for root partitioned table */
 	Relation	ri_PartitionRoot;
 
-	/* true if ready for tuple routing */
-	bool		ri_PartitionReadyForRouting;
+	/* Additional information specific to partition tuple routing */
+	struct PartitionRoutingInfo *ri_PartitionInfo;
 } ResultRelInfo;
 
 /* ----------------
@@ -1112,6 +1113,12 @@ typedef struct ModifyTableState
 	List	   *mt_excludedtlist;	/* the excluded pseudo relation's tlist  */
 	TupleTableSlot *mt_conflproj;	/* CONFLICT ... SET ... projection target */
 
+	/*
+	 * Slot for storing tuples in the root partitioned table's rowtype during
+	 * an UPDATE of a partitioned table.
+	 */
+	TupleTableSlot *mt_root_tuple_slot;
+
 	/* Tuple-routing support info */
 	struct PartitionTupleRouting *mt_partition_tuple_routing;
 

Reply via email to