From 8efe8f8f94d8f3195ba65b964799ca2c75f971fd Mon Sep 17 00:00:00 2001
From: Peter Geoghegan <pg@bowt.ie>
Date: Wed, 11 Sep 2019 17:46:11 -0700
Subject: [PATCH] Save original new heap TID in insert WAL record.

---
 src/backend/access/nbtree/nbtinsert.c | 14 ++++++++++++++
 src/backend/access/nbtree/nbtxlog.c   |  3 +++
 src/include/access/nbtxlog.h          |  4 +++-
 3 files changed, 20 insertions(+), 1 deletion(-)

diff --git a/src/backend/access/nbtree/nbtinsert.c b/src/backend/access/nbtree/nbtinsert.c
index 8fb17d6784..119e3fe5a6 100644
--- a/src/backend/access/nbtree/nbtinsert.c
+++ b/src/backend/access/nbtree/nbtinsert.c
@@ -1037,6 +1037,7 @@ _bt_insertonpg(Relation rel,
 	Size		itemsz;
 	IndexTuple	nposting = NULL;
 	IndexTuple	oposting;
+	ItemPointerData orig;
 
 	page = BufferGetPage(buf);
 	lpageop = (BTPageOpaque) PageGetSpecialPointer(page);
@@ -1061,6 +1062,7 @@ _bt_insertonpg(Relation rel,
 	itemsz = IndexTupleSize(itup);
 	itemsz = MAXALIGN(itemsz);	/* be safe, PageAddItem will do this but we
 								 * need to be consistent */
+	memset(&orig, 0, sizeof(ItemPointerData));
 
 	/*
 	 * Do we need to split an existing posting list item?
@@ -1092,6 +1094,8 @@ _bt_insertonpg(Relation rel,
 		Assert(in_posting_offset > 0);
 		oposting = (IndexTuple) PageGetItem(page, itemid);
 
+		/* HACK Save orig heap TID for WAL logging */
+		ItemPointerCopy(&itup->t_tid, &orig);
 		nposting = _bt_form_newposting(itup, oposting, in_posting_offset);
 
 		/* Alter new item offset, since effective new item changed */
@@ -1264,6 +1268,7 @@ _bt_insertonpg(Relation rel,
 
 			xlrec.offnum = itup_off;
 			xlrec.in_posting_offset = in_posting_offset;
+			xlrec.orig = orig;
 
 			XLogBeginInsert();
 			XLogRegisterData((char *) &xlrec, SizeOfBtreeInsert);
@@ -1856,6 +1861,15 @@ _bt_split(Relation rel, BTScanInsert itup_key, Buffer buf, Buffer cbuf,
 		 * with all the other items on the right page.
 		 * Otherwise, save in_posting_offset and newitem to construct
 		 * replacing tuple.
+		 *
+		 * FIXME: The same "original new item TID vs. rewritten new item TID"
+		 * issue exists here, but I haven't done anything with that.
+		 *
+		 * FIXME: Be careful about splits where the new item is also the first
+		 * item on the right half -- that would make the posting list that we
+		 * have to update in-place the last item on the left.  This is hard to
+		 * test because nbtsplitloc.c will avoid choosing a split point
+		 * between these two.
 		 */
 		xlrec.in_posting_offset = InvalidOffsetNumber;
 		if (replacepostingoff < firstright)
diff --git a/src/backend/access/nbtree/nbtxlog.c b/src/backend/access/nbtree/nbtxlog.c
index de9bc3b101..5bb38beda1 100644
--- a/src/backend/access/nbtree/nbtxlog.c
+++ b/src/backend/access/nbtree/nbtxlog.c
@@ -189,6 +189,9 @@ btree_xlog_insert(bool isleaf, bool ismeta, XLogReaderState *record)
 			IndexTuple newitem = (IndexTuple) datapos;
 			IndexTuple nposting;
 
+			/* Restore newitem to actual original state in _bt_insertonpg() */
+			newitem = CopyIndexTuple(newitem);
+			ItemPointerCopy(&xlrec->orig, &newitem->t_tid);
 			nposting = _bt_form_newposting(newitem, oposting,
 										   xlrec->in_posting_offset);
 			Assert(isleaf);
diff --git a/src/include/access/nbtxlog.h b/src/include/access/nbtxlog.h
index 075baaf6eb..2813e569dc 100644
--- a/src/include/access/nbtxlog.h
+++ b/src/include/access/nbtxlog.h
@@ -15,6 +15,7 @@
 
 #include "access/xlogreader.h"
 #include "lib/stringinfo.h"
+#include "storage/itemptr.h"
 #include "storage/off.h"
 
 /*
@@ -74,9 +75,10 @@ typedef struct xl_btree_insert
 {
 	OffsetNumber offnum;
 	OffsetNumber in_posting_offset;
+	ItemPointerData orig;
 } xl_btree_insert;
 
-#define SizeOfBtreeInsert	(offsetof(xl_btree_insert, in_posting_offset) + sizeof(OffsetNumber))
+#define SizeOfBtreeInsert	(offsetof(xl_btree_insert, orig) + sizeof(ItemPointerData))
 
 /*
  * On insert with split, we save all the items going into the right sibling
-- 
2.17.1

