From cf659e0b221ddc04f5851b91518cc123be547f21 Mon Sep 17 00:00:00 2001
From: amitlan <amitlangote09@gmail.com>
Date: Tue, 25 May 2021 22:48:47 +0900
Subject: [PATCH 1/2] ExecFindPartition: cache last used partition v3

---
 src/backend/executor/execPartition.c | 198 ++++++++++++++++++++++-----
 1 file changed, 162 insertions(+), 36 deletions(-)

diff --git a/src/backend/executor/execPartition.c b/src/backend/executor/execPartition.c
index 606c920b06..1d0d8e63f6 100644
--- a/src/backend/executor/execPartition.c
+++ b/src/backend/executor/execPartition.c
@@ -133,6 +133,16 @@ struct PartitionTupleRouting
  *		routing it through this table). A NULL value is stored if no tuple
  *		conversion is required.
  *
+ * savedPartInfo
+ *		If non-NULL, ResultRelInfo for the partition that was most recently
+ *		chosen as the routing target; ExecFindPartition() checks if the
+ *		same one can be used for the current row before applying the tuple-
+ *		routing algorithm to it.
+ *
+ * savedDispatchInfo
+ *		If non-NULL, PartititionDispatch for the sub-partitioned partition
+ *		that was most recently chosen as the routing target
+ *
  * indexes
  *		Array of partdesc->nparts elements.  For leaf partitions the index
  *		corresponds to the partition's ResultRelInfo in the encapsulating
@@ -150,6 +160,8 @@ typedef struct PartitionDispatchData
 	PartitionDesc partdesc;
 	TupleTableSlot *tupslot;
 	AttrMap    *tupmap;
+	ResultRelInfo *savedPartInfo;
+	PartitionDispatch savedDispatchInfo;
 	int			indexes[FLEXIBLE_ARRAY_MEMBER];
 }			PartitionDispatchData;
 
@@ -234,6 +246,82 @@ ExecSetupPartitionTupleRouting(EState *estate, Relation rel)
 	return proute;
 }
 
+/*
+ * Remember this partition for the next tuple inserted into this parent; see
+ * CanUseSavedPartitionForTuple() for how it's decided whether a tuple can
+ * indeed reuse this partition.
+ *
+ * Do this only if we have range/list partitions, because only
+ * in that case it's conceivable that consecutively inserted rows
+ * tend to go into the same partition.
+ */
+static inline void
+SavePartitionForNextTuple(PartitionDispatch dispatch,
+						  ResultRelInfo *partInfo,
+						  PartitionDispatch dispatchInfo)
+{
+	if ((dispatch->key->strategy == PARTITION_STRATEGY_RANGE ||
+		 dispatch->key->strategy == PARTITION_STRATEGY_LIST))
+	{
+		dispatch->savedPartInfo = partInfo;
+		dispatch->savedDispatchInfo = dispatchInfo;
+	}
+}
+
+/*
+ * Check if the saved partition accepts this tuple by evaluating its
+ * partition constraint against the tuple.  If it does, we save a trip
+ * to get_partition_for_tuple(), which can be a slightly more expensive
+ * way to get the same partition, especially if there are many
+ * partitions to search through.
+ */
+static inline bool
+CanUseSavedPartitionForTuple(PartitionDispatch dispatch,
+							 TupleTableSlot *rootslot,
+							 EState *estate)
+{
+	if (dispatch->savedPartInfo)
+	{
+		ResultRelInfo *rri;
+		TupleTableSlot *tmpslot;
+		TupleConversionMap *map;
+
+		rri = dispatch->savedPartInfo;
+		map = rri->ri_RootToPartitionMap;
+		if (map)
+			tmpslot = execute_attr_map_slot(map->attrMap, rootslot,
+											rri->ri_PartitionTupleSlot);
+		else
+			tmpslot = rootslot;
+		return ExecPartitionCheck(rri, tmpslot, estate, false);
+	}
+
+	return false;
+}
+
+/*
+ * Convert the tuple to a sub-partitioned partition's layout, if needed.
+ */
+static inline TupleTableSlot *
+ConvertTupleToPartition(PartitionDispatch dispatch,
+						TupleTableSlot *slot,
+						TupleTableSlot **parent_slot)
+{
+	if (dispatch->tupslot)
+	{
+		AttrMap    *map = dispatch->tupmap;
+		TupleTableSlot *tempslot = *parent_slot;
+
+		*parent_slot = dispatch->tupslot;
+		slot = execute_attr_map_slot(map, slot, *parent_slot);
+
+		if (tempslot != NULL)
+			ExecClearTuple(tempslot);
+	}
+
+	return slot;
+}
+
 /*
  * ExecFindPartition -- Return the ResultRelInfo for the leaf partition that
  * the tuple contained in *slot should belong to.
@@ -292,6 +380,34 @@ ExecFindPartition(ModifyTableState *mtstate,
 		CHECK_FOR_INTERRUPTS();
 
 		rel = dispatch->reldesc;
+
+		if (CanUseSavedPartitionForTuple(dispatch, rootslot, estate))
+		{
+			/* If the saved partition is leaf partition, just return it. */
+			if (dispatch->savedDispatchInfo == NULL)
+			{
+				/* Restore ecxt's scantuple before returning. */
+				ecxt->ecxt_scantuple = ecxt_scantuple_saved;
+				MemoryContextSwitchTo(oldcxt);
+				return dispatch->savedPartInfo;
+			}
+			else
+			{
+				/*
+				 * Saved partition is sub-partitioned, so continue the loop to
+				 * find the next level partition.
+				 */
+				dispatch = dispatch->savedDispatchInfo;
+				slot = ConvertTupleToPartition(dispatch, slot, &myslot);
+				continue;
+			}
+		}
+		else
+		{
+			dispatch->savedPartInfo = rri = NULL;
+			dispatch->savedDispatchInfo = NULL;
+		}
+
 		partdesc = dispatch->partdesc;
 
 		/*
@@ -372,6 +488,8 @@ ExecFindPartition(ModifyTableState *mtstate,
 			}
 			Assert(rri != NULL);
 
+			SavePartitionForNextTuple(dispatch, rri, NULL);
+
 			/* Signal to terminate the loop */
 			dispatch = NULL;
 		}
@@ -382,6 +500,8 @@ ExecFindPartition(ModifyTableState *mtstate,
 			 */
 			if (likely(dispatch->indexes[partidx] >= 0))
 			{
+				PartitionDispatch subdispatch;
+
 				/* Already built. */
 				Assert(dispatch->indexes[partidx] < proute->num_dispatch);
 
@@ -391,7 +511,11 @@ ExecFindPartition(ModifyTableState *mtstate,
 				 * Move down to the next partition level and search again
 				 * until we find a leaf partition that matches this tuple
 				 */
-				dispatch = pd[dispatch->indexes[partidx]];
+				subdispatch = pd[dispatch->indexes[partidx]];
+
+				SavePartitionForNextTuple(dispatch, rri, subdispatch);
+
+				dispatch = subdispatch;
 			}
 			else
 			{
@@ -411,24 +535,13 @@ ExecFindPartition(ModifyTableState *mtstate,
 					   dispatch->indexes[partidx] < proute->num_dispatch);
 
 				rri = proute->nonleaf_partitions[dispatch->indexes[partidx]];
-				dispatch = subdispatch;
-			}
 
-			/*
-			 * Convert the tuple to the new parent's layout, if different from
-			 * the previous parent.
-			 */
-			if (dispatch->tupslot)
-			{
-				AttrMap    *map = dispatch->tupmap;
-				TupleTableSlot *tempslot = myslot;
-
-				myslot = dispatch->tupslot;
-				slot = execute_attr_map_slot(map, slot, myslot);
+				SavePartitionForNextTuple(dispatch, rri, subdispatch);
 
-				if (tempslot != NULL)
-					ExecClearTuple(tempslot);
+				dispatch = subdispatch;
 			}
+
+			slot = ConvertTupleToPartition(dispatch, slot, &myslot);
 		}
 
 		/*
@@ -858,27 +971,11 @@ ExecInitPartitionInfo(ModifyTableState *mtstate, EState *estate,
 	return leaf_part_rri;
 }
 
-/*
- * ExecInitRoutingInfo
- *		Set up information needed for translating tuples between root
- *		partitioned table format and partition format, and keep track of it
- *		in PartitionTupleRouting.
- */
-static void
-ExecInitRoutingInfo(ModifyTableState *mtstate,
-					EState *estate,
-					PartitionTupleRouting *proute,
-					PartitionDispatch dispatch,
-					ResultRelInfo *partRelInfo,
-					int partidx,
-					bool is_borrowed_rel)
+static inline void
+InitRootToPartitionMap(ResultRelInfo *partRelInfo,
+					   ResultRelInfo *rootRelInfo,
+					   EState *estate)
 {
-	ResultRelInfo *rootRelInfo = partRelInfo->ri_RootResultRelInfo;
-	MemoryContext oldcxt;
-	int			rri_index;
-
-	oldcxt = MemoryContextSwitchTo(proute->memcxt);
-
 	/*
 	 * Set up a tuple conversion map to convert a tuple routed to the
 	 * partition from the parent's type to the partition's.
@@ -907,6 +1004,30 @@ ExecInitRoutingInfo(ModifyTableState *mtstate,
 	}
 	else
 		partRelInfo->ri_PartitionTupleSlot = NULL;
+}
+
+/*
+ * ExecInitRoutingInfo
+ *		Set up information needed for translating tuples between root
+ *		partitioned table format and partition format, and keep track of it
+ *		in PartitionTupleRouting.
+ */
+static void
+ExecInitRoutingInfo(ModifyTableState *mtstate,
+					EState *estate,
+					PartitionTupleRouting *proute,
+					PartitionDispatch dispatch,
+					ResultRelInfo *partRelInfo,
+					int partidx,
+					bool is_borrowed_rel)
+{
+	ResultRelInfo *rootRelInfo = partRelInfo->ri_RootResultRelInfo;
+	MemoryContext oldcxt;
+	int			rri_index;
+
+	oldcxt = MemoryContextSwitchTo(proute->memcxt);
+
+	InitRootToPartitionMap(partRelInfo, rootRelInfo, estate);
 
 	/*
 	 * If the partition is a foreign table, let the FDW init itself for
@@ -1051,6 +1172,9 @@ ExecInitPartitionDispatchInfo(EState *estate,
 		pd->tupslot = NULL;
 	}
 
+	pd->savedPartInfo = NULL;
+	pd->savedDispatchInfo = NULL;
+
 	/*
 	 * Initialize with -1 to signify that the corresponding partition's
 	 * ResultRelInfo or PartitionDispatch has not been created yet.
@@ -1094,6 +1218,8 @@ ExecInitPartitionDispatchInfo(EState *estate,
 		ResultRelInfo *rri = makeNode(ResultRelInfo);
 
 		InitResultRelInfo(rri, rel, 0, rootResultRelInfo, 0);
+		/* The map is needed in CanUseSavedPartitionForTuple(). */
+		InitRootToPartitionMap(rri, rootResultRelInfo, estate);
 		proute->nonleaf_partitions[dispatchidx] = rri;
 	}
 	else
-- 
2.24.1

