From 96f4340ddaf77bf7a4171d2b36af3fa8014089c3 Mon Sep 17 00:00:00 2001
From: Amul Sul <sulamul@gmail.com>
Date: Wed, 24 Jan 2018 10:28:15 +0530
Subject: [PATCH 1/2] Invalidate ip_blkid v5-wip

v5-wip: Update w.r.t Amit Kapila's comments[6].
 - Reverted error message in nodeModifyTable.c from 'tuple to be locked'
   to 'tuple to be updated'.

 - TODO:
 1. Yet to made a decision of having LOG/ELOG/ASSERT in the
    RelationFindReplTupleByIndex() and RelationFindReplTupleSeq().

v4: Rebased on "UPDATE of partition key v35" patch[5].

v3: Update w.r.t Amit Kapila's[3] & Alvaro Herrera[4] comments
 - typo in all error message and comment : "to an another" -> "to another"
 - error message change : "tuple to be updated" -> "tuple to be locked"
 - In ExecOnConflictUpdate(), error report converted into assert &
  comments added.

v2: Updated w.r.t Robert review comments[2]
 - Updated couple of comment of heap_delete argument and ItemPointerData
 - Added same concurrent update error logic in ExecOnConflictUpdate,
   RelationFindReplTupleByIndex and RelationFindReplTupleSeq

v1: Initial version -- as per Amit Kapila's suggestions[1]
 - When tuple is being moved to another partition then ip_blkid in the
   tuple header mark to InvalidBlockNumber.

 -------------
  References:
 -------------
 1] https://postgr.es/m/CAA4eK1KEZQ%2BCyXbBzfn1jFHoEfa_OemDLhLyy7xfD1QUZLo1DQ%40mail.gmail.com
 2] https://postgr.es/m/CA%2BTgmoYY98AEjh7RDtuzaLC--_0smCozXRu6bFmZTaX5Ne%3DB5Q%40mail.gmail.com
 3] https://postgr.es/m/CAA4eK1LQS6TmsGaEwR9HgF-9TZTHxrdAELuX6wOZBDbbjOfDjQ@mail.gmail.com
 4] https://postgr.es/m/20171124160756.eyljpmpfzwd6jmnr@alvherre.pgsql
 5] https://postgr.es/m/CAJ3gD9dixkkMzNnnP1CaZ1H17-U17ok_sVbjZZo+wnB=rJH6yg@mail.gmail.com
 6] https://postgr.es/m/CAA4eK1LHVnNWYF53F1gUGx6CTxuvznozvU-Lr-dfE=Qeu1gEcg@mail.gmail.com
---
 src/backend/access/heap/heapam.c       | 13 +++++++++++--
 src/backend/commands/trigger.c         |  5 +++++
 src/backend/executor/execMain.c        |  4 ++++
 src/backend/executor/execReplication.c |  8 ++++++++
 src/backend/executor/nodeLockRows.c    |  5 +++++
 src/backend/executor/nodeModifyTable.c | 28 ++++++++++++++++++++++++----
 src/include/access/heapam.h            |  2 +-
 src/include/storage/itemptr.h          |  4 +++-
 8 files changed, 61 insertions(+), 8 deletions(-)

diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c
index be263850cd..f93d450416 100644
--- a/src/backend/access/heap/heapam.c
+++ b/src/backend/access/heap/heapam.c
@@ -3017,6 +3017,8 @@ xmax_infomask_changed(uint16 new_infomask, uint16 old_infomask)
  *	crosscheck - if not InvalidSnapshot, also check tuple against this
  *	wait - true if should wait for any conflicting update to commit/abort
  *	hufd - output parameter, filled in failure cases (see below)
+ *	row_moved - true iff the tuple is being moved to another partition
+ *				table due to an update of partition key. Otherwise, false.
  *
  * Normal, successful return value is HeapTupleMayBeUpdated, which
  * actually means we did delete it.  Failure return codes are
@@ -3032,7 +3034,7 @@ xmax_infomask_changed(uint16 new_infomask, uint16 old_infomask)
 HTSU_Result
 heap_delete(Relation relation, ItemPointer tid,
 			CommandId cid, Snapshot crosscheck, bool wait,
-			HeapUpdateFailureData *hufd)
+			HeapUpdateFailureData *hufd, bool row_moved)
 {
 	HTSU_Result result;
 	TransactionId xid = GetCurrentTransactionId();
@@ -3300,6 +3302,13 @@ l1:
 	/* Make sure there is no forward chain link in t_ctid */
 	tp.t_data->t_ctid = tp.t_self;
 
+	/*
+	 * Sets a block identifier to the InvalidBlockNumber to indicate such an
+	 * update being moved tuple to another partition.
+	 */
+	if (row_moved)
+		BlockIdSet(&((tp.t_data->t_ctid).ip_blkid), InvalidBlockNumber);
+
 	MarkBufferDirty(buffer);
 
 	/*
@@ -3425,7 +3434,7 @@ simple_heap_delete(Relation relation, ItemPointer tid)
 	result = heap_delete(relation, tid,
 						 GetCurrentCommandId(true), InvalidSnapshot,
 						 true /* wait for commit */ ,
-						 &hufd);
+						 &hufd, false);
 	switch (result)
 	{
 		case HeapTupleSelfUpdated:
diff --git a/src/backend/commands/trigger.c b/src/backend/commands/trigger.c
index 160d941c00..a770531e14 100644
--- a/src/backend/commands/trigger.c
+++ b/src/backend/commands/trigger.c
@@ -3071,6 +3071,11 @@ ltrmark:;
 					ereport(ERROR,
 							(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
 							 errmsg("could not serialize access due to concurrent update")));
+				if (!BlockNumberIsValid(BlockIdGetBlockNumber(&((hufd.ctid).ip_blkid))))
+					ereport(ERROR,
+							(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+							 errmsg("tuple to be locked was already moved to another partition due to concurrent update")));
+
 				if (!ItemPointerEquals(&hufd.ctid, &tuple.t_self))
 				{
 					/* it was updated, so look at the updated version */
diff --git a/src/backend/executor/execMain.c b/src/backend/executor/execMain.c
index 410921cc40..98e198f0b7 100644
--- a/src/backend/executor/execMain.c
+++ b/src/backend/executor/execMain.c
@@ -2709,6 +2709,10 @@ EvalPlanQualFetch(EState *estate, Relation relation, int lockmode,
 						ereport(ERROR,
 								(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
 								 errmsg("could not serialize access due to concurrent update")));
+					if (!BlockNumberIsValid(BlockIdGetBlockNumber(&((hufd.ctid).ip_blkid))))
+						ereport(ERROR,
+								(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+								 errmsg("tuple to be locked was already moved to another partition due to concurrent update")));
 
 					/* Should not encounter speculative tuple on recheck */
 					Assert(!HeapTupleHeaderIsSpeculative(tuple.t_data));
diff --git a/src/backend/executor/execReplication.c b/src/backend/executor/execReplication.c
index 32891abbdf..9016d8fb11 100644
--- a/src/backend/executor/execReplication.c
+++ b/src/backend/executor/execReplication.c
@@ -194,6 +194,10 @@ retry:
 				ereport(LOG,
 						(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
 						 errmsg("concurrent update, retrying")));
+				if (!BlockNumberIsValid(BlockIdGetBlockNumber(&((hufd.ctid).ip_blkid))))
+					ereport(LOG,
+							(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+							 errmsg("tuple to be locked was already moved to another partition due to concurrent update, retrying")));
 				goto retry;
 			case HeapTupleInvisible:
 				elog(ERROR, "attempted to lock invisible tuple");
@@ -352,6 +356,10 @@ retry:
 				ereport(LOG,
 						(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
 						 errmsg("concurrent update, retrying")));
+				if (!BlockNumberIsValid(BlockIdGetBlockNumber(&((hufd.ctid).ip_blkid))))
+					ereport(LOG,
+							(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+							 errmsg("tuple to be locked was already moved to another partition due to concurrent update, retrying")));
 				goto retry;
 			case HeapTupleInvisible:
 				elog(ERROR, "attempted to lock invisible tuple");
diff --git a/src/backend/executor/nodeLockRows.c b/src/backend/executor/nodeLockRows.c
index 7961b4be6a..b07b7092de 100644
--- a/src/backend/executor/nodeLockRows.c
+++ b/src/backend/executor/nodeLockRows.c
@@ -218,6 +218,11 @@ lnext:
 					ereport(ERROR,
 							(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
 							 errmsg("could not serialize access due to concurrent update")));
+				if (!BlockNumberIsValid(BlockIdGetBlockNumber(&((hufd.ctid).ip_blkid))))
+					ereport(ERROR,
+							(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+							 errmsg("tuple to be locked was already moved to another partition due to concurrent update")));
+
 				if (ItemPointerEquals(&hufd.ctid, &tuple.t_self))
 				{
 					/* Tuple was deleted, so don't return it */
diff --git a/src/backend/executor/nodeModifyTable.c b/src/backend/executor/nodeModifyTable.c
index 6c2f8d4ec0..f45e0accb4 100644
--- a/src/backend/executor/nodeModifyTable.c
+++ b/src/backend/executor/nodeModifyTable.c
@@ -711,7 +711,8 @@ ExecDelete(ModifyTableState *mtstate,
 		   EState *estate,
 		   bool *tupleDeleted,
 		   bool processReturning,
-		   bool canSetTag)
+		   bool canSetTag,
+		   bool row_moved)
 {
 	ResultRelInfo *resultRelInfo;
 	Relation	resultRelationDesc;
@@ -802,7 +803,8 @@ ldelete:;
 							 estate->es_output_cid,
 							 estate->es_crosscheck_snapshot,
 							 true /* wait for commit */ ,
-							 &hufd);
+							 &hufd,
+							 row_moved);
 		switch (result)
 		{
 			case HeapTupleSelfUpdated:
@@ -848,6 +850,11 @@ ldelete:;
 					ereport(ERROR,
 							(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
 							 errmsg("could not serialize access due to concurrent update")));
+				if (!BlockNumberIsValid(BlockIdGetBlockNumber(&((hufd.ctid).ip_blkid))))
+					ereport(ERROR,
+							(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+							 errmsg("tuple to be updated was already moved to another partition due to concurrent update")));
+
 				if (!ItemPointerEquals(tupleid, &hufd.ctid))
 				{
 					TupleTableSlot *epqslot;
@@ -1150,7 +1157,7 @@ lreplace:;
 			 * processing. We want to return rows from INSERT.
 			 */
 			ExecDelete(mtstate, tupleid, oldtuple, planSlot, epqstate, estate,
-					   &tuple_deleted, false, false);
+					   &tuple_deleted, false, false, true);
 
 			/*
 			 * For some reason if DELETE didn't happen (e.g. trigger prevented
@@ -1295,6 +1302,11 @@ lreplace:;
 					ereport(ERROR,
 							(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
 							 errmsg("could not serialize access due to concurrent update")));
+				if (!BlockNumberIsValid(BlockIdGetBlockNumber(&((hufd.ctid).ip_blkid))))
+					ereport(ERROR,
+							(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+							 errmsg("tuple to be updated was already moved to another partition due to concurrent update")));
+
 				if (!ItemPointerEquals(tupleid, &hufd.ctid))
 				{
 					TupleTableSlot *epqslot;
@@ -1465,6 +1477,14 @@ ExecOnConflictUpdate(ModifyTableState *mtstate,
 						(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
 						 errmsg("could not serialize access due to concurrent update")));
 
+			/*
+			 * As long as we don't support an UPDATE of INSERT ON CONFLICT for
+			 * a partitioned table we shouldn't reach to a case where tuple to
+			 * be lock is moved to another partition due to concurrent update
+			 * of partition key.
+			 */
+			Assert(BlockNumberIsValid(BlockIdGetBlockNumber(&((hufd.ctid).ip_blkid))));
+
 			/*
 			 * Tell caller to try again from the very start.
 			 *
@@ -2053,7 +2073,7 @@ ExecModifyTable(PlanState *pstate)
 			case CMD_DELETE:
 				slot = ExecDelete(node, tupleid, oldtuple, planSlot,
 								  &node->mt_epqstate, estate,
-								  NULL, true, node->canSetTag);
+								  NULL, true, node->canSetTag, false);
 				break;
 			default:
 				elog(ERROR, "unknown operation");
diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h
index 4c0256b18a..44a211a740 100644
--- a/src/include/access/heapam.h
+++ b/src/include/access/heapam.h
@@ -156,7 +156,7 @@ extern void heap_multi_insert(Relation relation, HeapTuple *tuples, int ntuples,
 				  CommandId cid, int options, BulkInsertState bistate);
 extern HTSU_Result heap_delete(Relation relation, ItemPointer tid,
 			CommandId cid, Snapshot crosscheck, bool wait,
-			HeapUpdateFailureData *hufd);
+			HeapUpdateFailureData *hufd, bool row_moved);
 extern void heap_finish_speculative(Relation relation, HeapTuple tuple);
 extern void heap_abort_speculative(Relation relation, HeapTuple tuple);
 extern HTSU_Result heap_update(Relation relation, ItemPointer otid,
diff --git a/src/include/storage/itemptr.h b/src/include/storage/itemptr.h
index 6c9ed3696b..79dceb414f 100644
--- a/src/include/storage/itemptr.h
+++ b/src/include/storage/itemptr.h
@@ -23,7 +23,9 @@
  * This is a pointer to an item within a disk page of a known file
  * (for example, a cross-link from an index to its parent table).
  * blkid tells us which block, posid tells us which entry in the linp
- * (ItemIdData) array we want.
+ * (ItemIdData) array we want.  blkid is marked InvalidBlockNumber when
+ * a tuple is moved to another partition relation due to an update of
+ * partition key.
  *
  * Note: because there is an item pointer in each tuple header and index
  * tuple header on disk, it's very important not to waste space with
-- 
2.14.1

