From 3a6d6b107043b130faa44615e0ad67e1e87b5217 Mon Sep 17 00:00:00 2001
From: Andrey Borodin <amborodin@acm.org>
Date: Tue, 19 Dec 2017 15:38:32 +0500
Subject: [PATCH] Delete pages during GiST VACCUM

---
 src/backend/access/gist/gist.c       |   7 +++
 src/backend/access/gist/gistbuild.c  |   5 --
 src/backend/access/gist/gistutil.c   |   4 +-
 src/backend/access/gist/gistvacuum.c | 117 +++++++++++++++++++++++++++++++++--
 src/backend/access/gist/gistxlog.c   |  47 +++++++++++++-
 src/include/access/gist_private.h    |  23 ++++---
 src/include/access/gistxlog.h        |  16 ++++-
 src/test/regress/expected/gist.out   |   4 +-
 src/test/regress/sql/gist.sql        |   4 +-
 9 files changed, 201 insertions(+), 26 deletions(-)

diff --git a/src/backend/access/gist/gist.c b/src/backend/access/gist/gist.c
index cf4b319b4e..2dc6bedf8f 100644
--- a/src/backend/access/gist/gist.c
+++ b/src/backend/access/gist/gist.c
@@ -693,6 +693,13 @@ gistdoinsert(Relation r, IndexTuple itup, Size freespace, GISTSTATE *giststate)
 			GISTInsertStack *item;
 			OffsetNumber downlinkoffnum;
 
+			if(GistPageIsDeleted(stack->page))
+			{
+				UnlockReleaseBuffer(stack->buffer);
+				xlocked = false;
+				state.stack = stack = stack->parent;
+				continue;
+			}
 			downlinkoffnum = gistchoose(state.r, stack->page, itup, giststate);
 			iid = PageGetItemId(stack->page, downlinkoffnum);
 			idxtuple = (IndexTuple) PageGetItem(stack->page, iid);
diff --git a/src/backend/access/gist/gistbuild.c b/src/backend/access/gist/gistbuild.c
index 2415f00e06..fee533966f 100644
--- a/src/backend/access/gist/gistbuild.c
+++ b/src/backend/access/gist/gistbuild.c
@@ -1126,11 +1126,6 @@ gistGetMaxLevel(Relation index)
  * but will be added there the first time we visit them.
  */
 
-typedef struct
-{
-	BlockNumber childblkno;		/* hash key */
-	BlockNumber parentblkno;
-} ParentMapEntry;
 
 static void
 gistInitParentMap(GISTBuildState *buildstate)
diff --git a/src/backend/access/gist/gistutil.c b/src/backend/access/gist/gistutil.c
index d8d1c0acfc..3f6fff011b 100644
--- a/src/backend/access/gist/gistutil.c
+++ b/src/backend/access/gist/gistutil.c
@@ -24,6 +24,7 @@
 #include "storage/lmgr.h"
 #include "utils/builtins.h"
 #include "utils/syscache.h"
+#include "utils/snapmgr.h"
 
 
 /*
@@ -801,13 +802,14 @@ gistNewBuffer(Relation r)
 		if (ConditionalLockBuffer(buffer))
 		{
 			Page		page = BufferGetPage(buffer);
+			PageHeader	p = (PageHeader) page;
 
 			if (PageIsNew(page))
 				return buffer;	/* OK to use, if never initialized */
 
 			gistcheckpage(r, buffer);
 
-			if (GistPageIsDeleted(page))
+			if (GistPageIsDeleted(page) && TransactionIdPrecedes(p->pd_prune_xid, RecentGlobalDataXmin))
 				return buffer;	/* OK to use */
 
 			LockBuffer(buffer, GIST_UNLOCK);
diff --git a/src/backend/access/gist/gistvacuum.c b/src/backend/access/gist/gistvacuum.c
index 77d9d12f0b..30f968ff31 100644
--- a/src/backend/access/gist/gistvacuum.c
+++ b/src/backend/access/gist/gistvacuum.c
@@ -20,6 +20,8 @@
 #include "miscadmin.h"
 #include "storage/indexfsm.h"
 #include "storage/lmgr.h"
+#include "utils/snapmgr.h"
+#include "access/xact.h"
 
 
 /*
@@ -126,7 +128,6 @@ pushStackIfSplited(Page page, GistBDItem *stack)
 	}
 }
 
-
 /*
  * Bulk deletion of all index entries pointing to a set of heap tuples and
  * check invalid tuples left after upgrade.
@@ -136,12 +137,14 @@ pushStackIfSplited(Page page, GistBDItem *stack)
  * Result: a palloc'd struct containing statistical info for VACUUM displays.
  */
 IndexBulkDeleteResult *
-gistbulkdelete(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
-			   IndexBulkDeleteCallback callback, void *callback_state)
+gistbulkdelete(IndexVacuumInfo * info, IndexBulkDeleteResult * stats, IndexBulkDeleteCallback callback, void* callback_state)
 {
 	Relation	rel = info->index;
 	GistBDItem *stack,
 			   *ptr;
+	BlockNumber recentParent = InvalidBlockNumber;
+	List	   *rescanList = NULL;
+	ListCell   *cell;
 
 	/* first time through? */
 	if (stats == NULL)
@@ -234,9 +237,16 @@ gistbulkdelete(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
 				END_CRIT_SECTION();
 			}
 
+			if (ntodelete == maxoff && recentParent!=InvalidBlockNumber &&
+				(rescanList == NULL || (BlockNumber)llast_int(rescanList) != recentParent))
+			{
+				/* This page is a candidate to be deleted. Remember it's parent to rescan it later with xlock */
+				rescanList = lappend_int(rescanList, recentParent);
+			}
 		}
 		else
 		{
+			recentParent = stack->blkno;
 			/* check for split proceeded after look at parent */
 			pushStackIfSplited(page, stack);
 
@@ -271,5 +281,104 @@ gistbulkdelete(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
 		vacuum_delay_point();
 	}
 
+	/* rescan inner pages that had empty child pages */
+	foreach(cell,rescanList)
+	{
+		Buffer		buffer;
+		Page		page;
+		OffsetNumber i,
+					maxoff;
+		IndexTuple	idxtuple;
+		ItemId		iid;
+		OffsetNumber todelete[MaxOffsetNumber];
+		Buffer		buftodelete[MaxOffsetNumber];
+		int			ntodelete = 0;
+
+		buffer = ReadBufferExtended(rel, MAIN_FORKNUM, (BlockNumber)lfirst_int(cell),
+									RBM_NORMAL, info->strategy);
+		LockBuffer(buffer, GIST_EXCLUSIVE);
+		gistcheckpage(rel, buffer);
+		page = (Page) BufferGetPage(buffer);
+
+		Assert(!GistPageIsLeaf(page));
+
+		maxoff = PageGetMaxOffsetNumber(page);
+
+		for (i = OffsetNumberNext(FirstOffsetNumber); i <= maxoff; i = OffsetNumberNext(i))
+			{
+				Buffer		leafBuffer;
+				Page		leafPage;
+
+				iid = PageGetItemId(page, i);
+				idxtuple = (IndexTuple) PageGetItem(page, iid);
+
+				leafBuffer = ReadBufferExtended(rel, MAIN_FORKNUM, ItemPointerGetBlockNumber(&(idxtuple->t_tid)),
+									RBM_NORMAL, info->strategy);
+				LockBuffer(leafBuffer, GIST_EXCLUSIVE);
+				gistcheckpage(rel, leafBuffer);
+				leafPage = (Page) BufferGetPage(leafBuffer);
+				Assert(GistPageIsLeaf(leafPage));
+
+				if (PageGetMaxOffsetNumber(leafPage) == InvalidOffsetNumber /* Nothing left to split */
+					&& !(GistFollowRight(leafPage) || GistPageGetNSN(page) < GistPageGetNSN(leafPage)) /* No follow-right */
+					&& ntodelete < maxoff-1) /* We must keep at leaf one leaf page per each */
+				{
+					buftodelete[ntodelete] = leafBuffer;
+					todelete[ntodelete++] = i;
+				}
+				else
+					UnlockReleaseBuffer(leafBuffer);
+			}
+
+
+		if (ntodelete)
+		{
+			/* Drop references from internal page */
+			TransactionId txid = GetCurrentTransactionId();
+			START_CRIT_SECTION();
+
+			MarkBufferDirty(buffer);
+				PageIndexMultiDelete(page, todelete, ntodelete);
+
+			if (RelationNeedsWAL(rel))
+			{
+				XLogRecPtr	recptr;
+
+				recptr = gistXLogUpdate(buffer, todelete, ntodelete, NULL, 0, InvalidBuffer);
+					PageSetLSN(page, recptr);
+			}
+			else
+				PageSetLSN(page, gistGetFakeLSN(rel));
+
+			/* Mark pages as deleted */
+			for (i = 0; i < ntodelete; i++)
+			{
+				Page		leafPage = (Page)BufferGetPage(buftodelete[i]);
+				PageHeader	header = (PageHeader)leafPage;
+
+				header->pd_prune_xid = txid;
+
+				GistPageSetDeleted(leafPage);
+				MarkBufferDirty(buftodelete[i]);
+				stats->pages_deleted++;
+
+				if (RelationNeedsWAL(rel))
+				{
+					XLogRecPtr recptr = gistXLogSetDeleted(rel->rd_node, buftodelete[i], header->pd_prune_xid);
+					PageSetLSN(leafPage, recptr);
+				}
+				else
+					PageSetLSN(leafPage, gistGetFakeLSN(rel));
+
+				UnlockReleaseBuffer(buftodelete[i]);
+			}
+			END_CRIT_SECTION();
+		}
+
+		UnlockReleaseBuffer(buffer);
+	}
+
+	list_free(rescanList);
+
 	return stats;
-}
+}
\ No newline at end of file
diff --git a/src/backend/access/gist/gistxlog.c b/src/backend/access/gist/gistxlog.c
index 7fd91ce640..0a008a4a77 100644
--- a/src/backend/access/gist/gistxlog.c
+++ b/src/backend/access/gist/gistxlog.c
@@ -60,6 +60,29 @@ gistRedoClearFollowRight(XLogReaderState *record, uint8 block_id)
 		UnlockReleaseBuffer(buffer);
 }
 
+static void
+gistRedoPageSetDeleted(XLogReaderState *record)
+{
+	XLogRecPtr	lsn = record->EndRecPtr;
+	gistxlogPageDelete *xldata = (gistxlogPageDelete *) XLogRecGetData(record);
+	Buffer		buffer;
+	Page		page;
+	PageHeader		header;
+
+	if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
+	{
+		page = (Page) BufferGetPage(buffer);
+		header = (PageHeader) page;
+
+		header->pd_prune_xid = xldata->id;
+		GistPageSetDeleted(page);
+
+		PageSetLSN(page, lsn);
+		MarkBufferDirty(buffer);
+
+		UnlockReleaseBuffer(buffer);
+	}
+}
 /*
  * redo any page update (except page split)
  */
@@ -112,8 +135,8 @@ gistRedoPageUpdateRecord(XLogReaderState *record)
 			data += sizeof(OffsetNumber) * xldata->ntodelete;
 
 			PageIndexMultiDelete(page, todelete, xldata->ntodelete);
-			if (GistPageIsLeaf(page))
-				GistMarkTuplesDeleted(page);
+
+			GistMarkTuplesDeleted(page);
 		}
 
 		/* Add new tuples if any */
@@ -324,6 +347,9 @@ gist_redo(XLogReaderState *record)
 		case XLOG_GIST_CREATE_INDEX:
 			gistRedoCreateIndex(record);
 			break;
+		case XLOG_GIST_PAGE_DELETE:
+			gistRedoPageSetDeleted(record);
+			break;
 		default:
 			elog(PANIC, "gist_redo: unknown op code %u", info);
 	}
@@ -442,6 +468,23 @@ gistXLogSplit(bool page_is_leaf,
 	return recptr;
 }
 
+
+XLogRecPtr
+gistXLogSetDeleted(RelFileNode node, Buffer buffer, TransactionId xid) {
+	gistxlogPageDelete xlrec;
+	XLogRecPtr	recptr;
+
+	xlrec.id = xid;
+
+	XLogBeginInsert();
+	XLogRegisterData((char *) &xlrec, sizeof(gistxlogPageDelete));
+
+	XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
+	/* new tuples */
+	recptr = XLogInsert(RM_GIST_ID, XLOG_GIST_PAGE_DELETE);
+	return recptr;
+}
+
 /*
  * Write XLOG record describing a page update. The update can include any
  * number of deletions and/or insertions of tuples on a single index page.
diff --git a/src/include/access/gist_private.h b/src/include/access/gist_private.h
index eb1c6728d4..db7ceaf56f 100644
--- a/src/include/access/gist_private.h
+++ b/src/include/access/gist_private.h
@@ -16,6 +16,7 @@
 
 #include "access/amapi.h"
 #include "access/gist.h"
+#include "access/gistxlog.h"
 #include "access/itup.h"
 #include "fmgr.h"
 #include "lib/pairingheap.h"
@@ -51,6 +52,11 @@ typedef struct
 	char		tupledata[FLEXIBLE_ARRAY_MEMBER];
 } GISTNodeBufferPage;
 
+typedef struct
+{
+	BlockNumber childblkno;		/* hash key */
+	BlockNumber parentblkno;
+} ParentMapEntry;
 #define BUFFER_PAGE_DATA_OFFSET MAXALIGN(offsetof(GISTNodeBufferPage, tupledata))
 /* Returns free space in node buffer page */
 #define PAGE_FREE_SPACE(nbp) (nbp->freespace)
@@ -176,13 +182,6 @@ typedef struct GISTScanOpaqueData
 
 typedef GISTScanOpaqueData *GISTScanOpaque;
 
-/* despite the name, gistxlogPage is not part of any xlog record */
-typedef struct gistxlogPage
-{
-	BlockNumber blkno;
-	int			num;			/* number of index tuples following */
-} gistxlogPage;
-
 /* SplitedPageLayout - gistSplit function result */
 typedef struct SplitedPageLayout
 {
@@ -409,6 +408,16 @@ extern bool gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate,
 extern SplitedPageLayout *gistSplit(Relation r, Page page, IndexTuple *itup,
 		  int len, GISTSTATE *giststate);
 
+/* gistxlog.c */
+extern void gist_redo(XLogReaderState *record);
+extern void gist_desc(StringInfo buf, XLogReaderState *record);
+extern const char *gist_identify(uint8 info);
+extern void gist_xlog_startup(void);
+extern void gist_xlog_cleanup(void);
+
+extern XLogRecPtr gistXLogSetDeleted(RelFileNode node, Buffer buffer,
+					TransactionId xid);
+
 extern XLogRecPtr gistXLogUpdate(Buffer buffer,
 			   OffsetNumber *todelete, int ntodelete,
 			   IndexTuple *itup, int ntup,
diff --git a/src/include/access/gistxlog.h b/src/include/access/gistxlog.h
index 3b126eca2a..52ab3a9308 100644
--- a/src/include/access/gistxlog.h
+++ b/src/include/access/gistxlog.h
@@ -17,12 +17,14 @@
 #include "access/xlogreader.h"
 #include "lib/stringinfo.h"
 
+/* XLog stuff */
+
 #define XLOG_GIST_PAGE_UPDATE		0x00
  /* #define XLOG_GIST_NEW_ROOT			 0x20 */	/* not used anymore */
 #define XLOG_GIST_PAGE_SPLIT		0x30
  /* #define XLOG_GIST_INSERT_COMPLETE	 0x40 */	/* not used anymore */
 #define XLOG_GIST_CREATE_INDEX		0x50
- /* #define XLOG_GIST_PAGE_DELETE		 0x60 */	/* not used anymore */
+#define XLOG_GIST_PAGE_DELETE		 0x60
 
 /*
  * Backup Blk 0: updated page.
@@ -59,6 +61,18 @@ typedef struct gistxlogPageSplit
 	 */
 } gistxlogPageSplit;
 
+typedef struct gistxlogPageDelete
+{
+   TransactionId id;
+} gistxlogPageDelete;
+
+/* despite the name, gistxlogPage is not part of any xlog record */
+typedef struct gistxlogPage
+{
+   BlockNumber blkno;
+   int			num;			/* number of index tuples following */
+} gistxlogPage;
+
 extern void gist_redo(XLogReaderState *record);
 extern void gist_desc(StringInfo buf, XLogReaderState *record);
 extern const char *gist_identify(uint8 info);
diff --git a/src/test/regress/expected/gist.out b/src/test/regress/expected/gist.out
index f5a2993aaf..5b92f08c74 100644
--- a/src/test/regress/expected/gist.out
+++ b/src/test/regress/expected/gist.out
@@ -27,9 +27,7 @@ insert into gist_point_tbl (id, p)
 select g+100000, point(g*10+1, g*10+1) from generate_series(1, 10000) g;
 -- To test vacuum, delete some entries from all over the index.
 delete from gist_point_tbl where id % 2 = 1;
--- And also delete some concentration of values. (GiST doesn't currently
--- attempt to delete pages even when they become empty, but if it did, this
--- would exercise it)
+-- And also delete some concentration of values.
 delete from gist_point_tbl where id < 10000;
 vacuum analyze gist_point_tbl;
 -- rebuild the index with a different fillfactor
diff --git a/src/test/regress/sql/gist.sql b/src/test/regress/sql/gist.sql
index bae722fe13..e66396e851 100644
--- a/src/test/regress/sql/gist.sql
+++ b/src/test/regress/sql/gist.sql
@@ -28,9 +28,7 @@ select g+100000, point(g*10+1, g*10+1) from generate_series(1, 10000) g;
 -- To test vacuum, delete some entries from all over the index.
 delete from gist_point_tbl where id % 2 = 1;
 
--- And also delete some concentration of values. (GiST doesn't currently
--- attempt to delete pages even when they become empty, but if it did, this
--- would exercise it)
+-- And also delete some concentration of values.
 delete from gist_point_tbl where id < 10000;
 
 vacuum analyze gist_point_tbl;
-- 
2.14.3 (Apple Git-98)

