Hello. I am student from gsoc programm.
My project is sequantial access in vacuum of gist.

New vacuum has 2 big step:
physical order scan pages and cleaning after 1 step.


1 scan - scan all pages and create information map(hashmap) and add information to rescan stack( stack of pages that needed to rescanning

second step is work only with page(from rescan stack) where there is a changes. In new version of vacuum besides increased speed also there is a deleting of pages. Only leaf pages can be deleted. The process of deleteing pages is (1. delete link to page. 2. change rightlinks (if needed) 3. set deleted). I added 2 action in wal (when i set delete flag and when i change rightlinks). When i delete links to leaf pages from inner page i always save 1 link to leaf(avoiding situations with empty inner pages).

I attach some speed benchmarks.

i compare old and new version on my laptop(without ssd). the test: table "point_tbl" from regression database. i insert about 200 millions rows. after that i delete 33 million and run vacuum.

size of index is about 18 gb.

old version:

INFO: vacuuming "public.point_tbl"
INFO: scanned index "gpointind" to remove 11184520 row versions
DETAIL: CPU 84.70s/72.26u sec elapsed 27007.14 sec.
INFO: "point_tbl": removed 11184520 row versions in 400715 pages
DETAIL: CPU 3.96s/3.10u sec elapsed 233.12 sec.
INFO: scanned index "gpointind" to remove 11184523 row versions
DETAIL: CPU 87.10s/69.05u sec elapsed 26410.44 sec.
INFO: "point_tbl": removed 11184523 row versions in 400715 pages
DETAIL: CPU 4.23s/3.36u sec elapsed 331.43 sec.
INFO: scanned index "gpointind" to remove 11184523 row versions
DETAIL: CPU 87.65s/65.73u sec elapsed 26230.35 sec.
INFO: "point_tbl": removed 11184523 row versions in 400715 pages
DETAIL: CPU 4.47s/3.41u sec elapsed 342.93 sec.
INFO: scanned index "gpointind" to remove 866 row versions
DETAIL: CPU 79.97s/39.64u sec elapsed 23341.88 sec.
INFO: "point_tbl": removed 866 row versions in 31 pages
DETAIL: CPU 0.00s/0.00u sec elapsed 0.00 sec.
INFO: index "gpointind" now contains 201326592 row versions in 2336441 pages
DETAIL: 33554432 index row versions were removed.
0 index pages have been deleted, 0 are currently reusable.

 

 

new vacuum is about

 

INFO: vacuuming "public.point_tbl"
INFO: scanned index "gpointind" to remove 11184520 row versions
DETAIL: CPU 13.00s/27.57u sec elapsed 1864.22 sec.
INFO: "point_tbl": removed 11184520 row versions in 400715 pages
DETAIL: CPU 3.46s/2.86u sec elapsed 214.04 sec.
INFO: scanned index "gpointind" to remove 11184523 row versions
DETAIL: CPU 14.17s/27.02u sec elapsed 2163.67 sec.
INFO: "point_tbl": removed 11184523 row versions in 400715 pages
DETAIL: CPU 3.33s/2.99u sec elapsed 222.60 sec.
INFO: scanned index "gpointind" to remove 11184523 row versions
DETAIL: CPU 11.84s/25.23u sec elapsed 1828.71 sec.
INFO: "point_tbl": removed 11184523 row versions in 400715 pages
DETAIL: CPU 3.44s/2.81u sec elapsed 215.06 sec.
INFO: scanned index "gpointind" to remove 866 row versions
DETAIL: CPU 5.62s/6.68u sec elapsed 176.67 sec.
INFO: "point_tbl": removed 866 row versions in 31 pages
DETAIL: CPU 0.00s/0.00u sec elapsed 0.01 sec.
INFO: index "gpointind" now contains 201326592 row versions in 2336360 pages
DETAIL: 33554432 index row versions were removed.
150833 index pages have been deleted, 150833 are currently reusable.
CPU 5.54s/2.08u sec elapsed 165.61 sec.
INFO: "point_tbl": found 33554432 removable, 201326592 nonremovable row versions in 1202176 out of 1202176 pages
DETAIL: 0 dead row versions cannot be removed yet.
There were 0 unused item pointers.
Skipped 0 pages due to buffer pins.
0 pages are entirely empty.
CPU 73.50s/116.82u sec elapsed 8300.73 sec.
INFO: analyzing "public.point_tbl"
INFO: "point_tbl": scanned 100 of 1202176 pages, containing 16756 live rows and 0 dead rows; 100 rows in sample, 201326601 estimated total rows
VACUUM

 

There is a big speed up + we can reuse some pages.

Thanks.

diff --git a/src/backend/access/gist/gist.c b/src/backend/access/gist/gist.c
index 0e49959..229d3f4 100644
--- a/src/backend/access/gist/gist.c
+++ b/src/backend/access/gist/gist.c
@@ -619,6 +619,12 @@ gistdoinsert(Relation r, IndexTuple itup, Size freespace, GISTSTATE *giststate)
 			GISTInsertStack *item;
 			OffsetNumber downlinkoffnum;
 
+			if(GistPageIsDeleted(stack->page)) {
+				UnlockReleaseBuffer(stack->buffer);
+				xlocked = false;
+				state.stack = stack = stack->parent;
+				continue;
+			}
 			downlinkoffnum = gistchoose(state.r, stack->page, itup, giststate);
 			iid = PageGetItemId(stack->page, downlinkoffnum);
 			idxtuple = (IndexTuple) PageGetItem(stack->page, iid);
diff --git a/src/backend/access/gist/gistbuild.c b/src/backend/access/gist/gistbuild.c
index ff888e2..c99ff7e 100644
--- a/src/backend/access/gist/gistbuild.c
+++ b/src/backend/access/gist/gistbuild.c
@@ -1128,11 +1128,6 @@ gistGetMaxLevel(Relation index)
  * but will be added there the first time we visit them.
  */
 
-typedef struct
-{
-	BlockNumber childblkno;		/* hash key */
-	BlockNumber parentblkno;
-} ParentMapEntry;
 
 static void
 gistInitParentMap(GISTBuildState *buildstate)
diff --git a/src/backend/access/gist/gistutil.c b/src/backend/access/gist/gistutil.c
index 7d596a3..a5fc876 100644
--- a/src/backend/access/gist/gistutil.c
+++ b/src/backend/access/gist/gistutil.c
@@ -20,6 +20,7 @@
 #include "storage/indexfsm.h"
 #include "storage/lmgr.h"
 #include "utils/builtins.h"
+#include "utils/snapmgr.h"
 
 
 /*
@@ -777,15 +778,16 @@ gistNewBuffer(Relation r)
 		if (ConditionalLockBuffer(buffer))
 		{
 			Page		page = BufferGetPage(buffer);
+			PageHeader	p = (PageHeader) page;
 
 			if (PageIsNew(page))
 				return buffer;	/* OK to use, if never initialized */
 
 			gistcheckpage(r, buffer);
 
-			if (GistPageIsDeleted(page))
+			if (GistPageIsDeleted(page) && TransactionIdPrecedes(p->pd_prune_xid, RecentGlobalDataXmin)) {
 				return buffer;	/* OK to use */
-
+			}
 			LockBuffer(buffer, GIST_UNLOCK);
 		}
 
diff --git a/src/backend/access/gist/gistvacuum.c b/src/backend/access/gist/gistvacuum.c
index 2337dbd..12533a5 100644
--- a/src/backend/access/gist/gistvacuum.c
+++ b/src/backend/access/gist/gistvacuum.c
@@ -20,7 +20,8 @@
 #include "miscadmin.h"
 #include "storage/indexfsm.h"
 #include "storage/lmgr.h"
-
+#include "utils/snapmgr.h"
+#include "access/xact.h"
 
 /*
  * VACUUM cleanup: update FSM
@@ -108,6 +109,31 @@ typedef struct GistBDItem
 	struct GistBDItem *next;
 } GistBDItem;
 
+typedef struct GistBDSItem
+{
+	BlockNumber blkno;
+	bool isParent;
+	struct GistBDSItem *next;
+} GistBDSItem;
+
+typedef enum
+{
+	NOT_NEED_TO_PROCESS,	/* without action */
+	PROCESSED,				/* action is done */
+	NEED_TO_PROCESS,
+	NEED_TO_DELETE			/* */
+} GistBlockInfoFlag;
+
+typedef struct GistBlockInfo {
+	BlockNumber blkno;
+	BlockNumber parent;
+	BlockNumber leftblock;		/* block that has rightlink on blkno */
+	GistBlockInfoFlag flag;
+	//bool toDelete;				/* is need delete this block? */
+	//bool isDeleted;				/* this block was processed 	*/
+	bool hasRightLink;
+} GistBlockInfo;
+
 static void
 pushStackIfSplited(Page page, GistBDItem *stack)
 {
@@ -127,7 +153,150 @@ pushStackIfSplited(Page page, GistBDItem *stack)
 		stack->next = ptr;
 	}
 }
+static void
+gistFillBlockInfo(HTAB * map, BlockNumber blkno)
+{
+	GistBlockInfo *entry;
+	bool		found;
 
+	entry = (GistBlockInfo *) hash_search(map,
+										   (const void *) &blkno,
+										   HASH_ENTER,
+										   &found);
+	if(!found) {
+		entry->parent = InvalidBlockNumber;
+		entry->leftblock = InvalidBlockNumber;
+		entry->hasRightLink = false;
+		entry->flag = NOT_NEED_TO_PROCESS;
+		//entry->toDelete = false;
+		//entry->isDeleted = false;
+	}
+}
+
+static void
+gistMemorizeParentTab(HTAB * map, BlockNumber child, BlockNumber parent)
+{
+	GistBlockInfo *entry;
+	bool		found;
+
+	entry = (GistBlockInfo *) hash_search(map,
+										   (const void *) &child,
+										   HASH_ENTER,
+										   &found);
+	if(!found) {
+		entry->parent = InvalidBlockNumber;
+		entry->leftblock = InvalidBlockNumber;
+		entry->hasRightLink = false;
+		entry->flag = NOT_NEED_TO_PROCESS;
+	}
+	entry->parent = parent;
+}
+static BlockNumber
+gistGetParentTab(HTAB * map, BlockNumber child)
+{
+	GistBlockInfo *entry;
+	bool		found;
+
+	/* Find node buffer in hash table */
+	entry = (GistBlockInfo *) hash_search(map,
+										   (const void *) &child,
+										   HASH_FIND,
+										   &found);
+	if (!found)
+		elog(ERROR, "could not find parent of block %d in lookup table", child);
+
+	return entry->parent;
+}
+
+static BlockNumber
+gistGetLeftLink(HTAB * map, BlockNumber right)
+{
+	GistBlockInfo *entry;
+	bool		found;
+	entry = (GistBlockInfo *) hash_search(map,
+										   (const void *) &right,
+										   HASH_FIND,
+										   &found);
+	if (!found)
+		return InvalidBlockNumber;
+	if(entry->hasRightLink == false) {
+		return InvalidBlockNumber;
+	}
+	return entry->leftblock;
+}
+static void
+gistMemorizeLeftLink(HTAB * map, BlockNumber right, BlockNumber left, bool hasRightLink)
+{
+	GistBlockInfo *entry;
+	bool		found;
+	entry = (GistBlockInfo *) hash_search(map,
+										   (const void *) &right,
+										   HASH_ENTER,
+										   &found);
+	if (!found) {
+		entry->leftblock = InvalidBlockNumber;
+		entry->parent = InvalidBlockNumber;
+		entry->hasRightLink = false;
+		entry->flag = NOT_NEED_TO_PROCESS;
+	}
+
+	if(hasRightLink) {
+		entry->leftblock = left;
+		entry->hasRightLink = hasRightLink;
+	} else {
+		if(!found) {
+			entry->leftblock = left;
+			entry->hasRightLink = hasRightLink;
+		}
+	}
+
+}
+
+static bool
+gistGetDeleteLink(HTAB* map, BlockNumber blkno) {
+	GistBlockInfo *entry;
+	bool		found;
+
+	/* Find node buffer in hash table */
+	entry = (GistBlockInfo *) hash_search(map,
+										   (const void *) &blkno,
+										   HASH_FIND,
+										   &found);
+
+	if (!found)
+		return false;
+
+	return entry->flag == NEED_TO_DELETE;
+}
+static bool
+gistIsProcessed(HTAB* map, BlockNumber blkno) {
+	GistBlockInfo *entry;
+	bool		found;
+
+	/* Find node buffer in hash table */
+	entry = (GistBlockInfo *) hash_search(map,
+										   (const void *) &blkno,
+										   HASH_FIND,
+										   &found);
+
+	return entry ? entry->flag == PROCESSED: false;
+}
+static void
+gistMemorizeLinkToDelete(HTAB* map, BlockNumber blkno, GistBlockInfoFlag flag) {
+	GistBlockInfo *entry;
+	bool		found;
+	entry = (GistBlockInfo *) hash_search(map,
+										   (const void *) &blkno,
+										   HASH_ENTER,
+										   &found);
+	if (!found) {
+		entry->parent = InvalidBlockNumber;
+		entry->leftblock = InvalidBlockNumber;
+		entry->hasRightLink = false;
+		entry->flag = NOT_NEED_TO_PROCESS;
+	}
+	entry->flag = flag;
+}
 
 /*
  * Bulk deletion of all index entries pointing to a set of heap tuples and
@@ -137,21 +306,20 @@ pushStackIfSplited(Page page, GistBDItem *stack)
  *
  * Result: a palloc'd struct containing statistical info for VACUUM displays.
  */
-Datum
-gistbulkdelete(PG_FUNCTION_ARGS)
+static Datum
+gistbulkdeletelogical(IndexVacuumInfo * info, IndexBulkDeleteResult * stats, IndexBulkDeleteCallback callback, void* callback_state)
 {
+	/*
 	IndexVacuumInfo *info = (IndexVacuumInfo *) PG_GETARG_POINTER(0);
 	IndexBulkDeleteResult *stats = (IndexBulkDeleteResult *) PG_GETARG_POINTER(1);
 	IndexBulkDeleteCallback callback = (IndexBulkDeleteCallback) PG_GETARG_POINTER(2);
-	void	   *callback_state = (void *) PG_GETARG_POINTER(3);
+	void	   *callback_state = (void *) PG_GETARG_POINTER(3); */
 	Relation	rel = info->index;
 	GistBDItem *stack,
 			   *ptr;
 
-	/* first time through? */
 	if (stats == NULL)
 		stats = (IndexBulkDeleteResult *) palloc0(sizeof(IndexBulkDeleteResult));
-	/* we'll re-count the tuples each time */
 	stats->estimated_count = false;
 	stats->num_index_tuples = 0;
 
@@ -184,21 +352,12 @@ gistbulkdelete(PG_FUNCTION_ARGS)
 			page = (Page) BufferGetPage(buffer);
 			if (stack->blkno == GIST_ROOT_BLKNO && !GistPageIsLeaf(page))
 			{
-				/* only the root can become non-leaf during relock */
 				UnlockReleaseBuffer(buffer);
-				/* one more check */
 				continue;
 			}
 
-			/*
-			 * check for split proceeded after look at parent, we should check
-			 * it after relock
-			 */
 			pushStackIfSplited(page, stack);
 
-			/*
-			 * Remove deletable tuples from page
-			 */
 
 			maxoff = PageGetMaxOffsetNumber(page);
 
@@ -245,7 +404,6 @@ gistbulkdelete(PG_FUNCTION_ARGS)
 		}
 		else
 		{
-			/* check for split proceeded after look at parent */
 			pushStackIfSplited(page, stack);
 
 			maxoff = PageGetMaxOffsetNumber(page);
@@ -281,3 +439,614 @@ gistbulkdelete(PG_FUNCTION_ARGS)
 
 	PG_RETURN_POINTER(stats);
 }
+
+static void
+gistvacuumcheckrightlink(Relation rel, IndexVacuumInfo * info,
+		HTAB* infomap, BlockNumber blkno, Page page)
+{
+	/*
+	 *
+	 * if there is right link on this page but not rightlink from this page. remove rightlink from left page.
+	 * if there is right link on this page and there is a right link . right link of left page must be rightlink to rightlink of this page.
+	 * */
+
+	BlockNumber leftblkno;
+	GISTPageOpaque childopaque;
+
+	leftblkno = gistGetLeftLink(infomap, blkno);
+	if (leftblkno != InvalidBlockNumber) {
+		/*
+		 * there is a right link to child page
+		 * */
+		BlockNumber newRight = InvalidBuffer;
+		GISTPageOpaque leftOpaque;
+		Page left;
+		Buffer leftbuffer;
+		leftbuffer = ReadBufferExtended(rel, MAIN_FORKNUM, leftblkno,
+				RBM_NORMAL, info->strategy);
+		left = (Page) BufferGetPage(leftbuffer);
+
+		LockBuffer(leftbuffer, GIST_EXCLUSIVE);
+		childopaque = GistPageGetOpaque(page);
+		leftOpaque = GistPageGetOpaque(left);
+
+		while (leftOpaque->rightlink != InvalidBlockNumber
+				&& leftOpaque->rightlink != blkno) {
+			leftblkno = leftOpaque->rightlink;
+			UnlockReleaseBuffer(leftbuffer);
+			leftbuffer = ReadBufferExtended(rel, MAIN_FORKNUM, leftblkno,
+					RBM_NORMAL, info->strategy);
+			left = (Page) BufferGetPage(leftbuffer);
+
+			LockBuffer(leftbuffer, GIST_EXCLUSIVE);
+			leftOpaque = GistPageGetOpaque(left);
+
+		}
+		if (leftOpaque->rightlink == InvalidBlockNumber) {
+			/*
+			 * error!! we dont find leftpage.
+			 * */
+
+			UnlockReleaseBuffer(leftbuffer);
+			return;
+		}
+		if (childopaque->rightlink != InvalidBlockNumber) {
+			newRight = childopaque->rightlink;
+		}
+		leftOpaque->rightlink = newRight;
+
+		if (RelationNeedsWAL(rel)) {
+			XLogRecPtr recptr;
+			recptr = gistXLogRightLinkChange(rel->rd_node, leftbuffer, newRight);
+			PageSetLSN(left, recptr);
+		} else
+			PageSetLSN(left, gistGetFakeLSN(rel));
+
+		UnlockReleaseBuffer(leftbuffer);
+	}
+}
+static void
+gistvacuumrepairpage(Relation rel, IndexVacuumInfo * info, IndexBulkDeleteResult * stats,
+		IndexBulkDeleteCallback callback, void* callback_state,
+
+		HTAB* infomap, BlockNumber blkno)
+{
+	Buffer buffer;
+	Page page;
+	PageHeader header;
+	OffsetNumber maxoff, i;
+	IndexTuple idxtuple;
+	ItemId iid;
+	OffsetNumber todelete[MaxOffsetNumber];
+	int ntodelete = 0;
+	bool isNew;
+
+	buffer = ReadBufferExtended(rel, MAIN_FORKNUM, blkno,
+			RBM_NORMAL, info->strategy);
+	LockBuffer(buffer, GIST_EXCLUSIVE);
+
+	gistcheckpage(rel, buffer);
+	page = (Page) BufferGetPage(buffer);
+	/*
+	 * if page is inner do nothing.
+	 * */
+	if(GistPageIsLeaf(page)) {
+		maxoff = PageGetMaxOffsetNumber(page);
+		for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i)) {
+			iid = PageGetItemId(page, i);
+			idxtuple = (IndexTuple) PageGetItem(page, iid);
+
+			if (callback(&(idxtuple->t_tid), callback_state)) {
+				todelete[ntodelete] = i - ntodelete;
+				ntodelete++;
+			}
+		}
+		isNew = PageIsNew(page) || PageIsEmpty(page);
+		if (ntodelete || isNew) {
+			START_CRIT_SECTION();
+
+			MarkBufferDirty(buffer);
+
+			for (i = 0; i < ntodelete; i++)
+				PageIndexTupleDelete(page, todelete[i]);
+			GistMarkTuplesDeleted(page);
+
+			if (RelationNeedsWAL(rel)) {
+				XLogRecPtr recptr;
+
+				recptr = gistXLogUpdate(rel->rd_node, buffer, todelete,
+						ntodelete,
+						NULL, 0, InvalidBuffer);
+				PageSetLSN(page, recptr);
+			} else
+				PageSetLSN(page, gistGetFakeLSN(rel));
+			END_CRIT_SECTION();
+
+			/*
+			 * ok. links has been deleted. and this in wal! now we can set deleted and repair rightlinks
+			 * */
+
+			gistvacuumcheckrightlink(rel, info, infomap, blkno, page);
+
+			/*
+			 * ok. rightlinks has been repaired.
+			 * */
+			header = (PageHeader) page;
+
+			header->pd_prune_xid = GetCurrentTransactionId();
+
+			GistPageSetDeleted(page);
+			stats->pages_deleted++;
+
+			if (RelationNeedsWAL(rel)) {
+				XLogRecPtr recptr;
+
+				recptr = gistXLogSetDeleted(rel->rd_node, buffer, header->pd_prune_xid);
+				PageSetLSN(page, recptr);
+			} else
+				PageSetLSN(page, gistGetFakeLSN(rel));
+		}
+	}
+
+	UnlockReleaseBuffer(buffer);
+}
+static void
+gistphysicalvacuum(Relation rel, IndexVacuumInfo * info, IndexBulkDeleteResult * stats,
+		IndexBulkDeleteCallback callback, void* callback_state,
+		BlockNumber npages, HTAB* infomap,
+		GistBDSItem* rescanstack, GistBDSItem* tail)
+{
+	BlockNumber blkno = GIST_ROOT_BLKNO;
+	for (; blkno < npages; blkno++) {
+		Buffer buffer;
+		Page page;
+		OffsetNumber i, maxoff;
+		IndexTuple idxtuple;
+		ItemId iid;
+		OffsetNumber todelete[MaxOffsetNumber];
+		int ntodelete = 0;
+		GISTPageOpaque opaque;
+		BlockNumber child;
+		GistBDSItem *item;
+		bool isNew;
+
+		buffer = ReadBufferExtended(rel, MAIN_FORKNUM, blkno, RBM_NORMAL,
+				info->strategy);
+		LockBuffer(buffer, GIST_SHARE);
+		gistcheckpage(rel, buffer);
+		page = (Page) BufferGetPage(buffer);
+
+		isNew = PageIsNew(page) || PageIsEmpty(page);
+		opaque = GistPageGetOpaque(page);
+
+		gistFillBlockInfo(infomap, blkno);
+
+		gistMemorizeLeftLink(infomap, blkno, InvalidBlockNumber, false);
+
+		if(opaque->rightlink != InvalidBlockNumber) {
+			gistMemorizeLeftLink(infomap, opaque->rightlink, blkno, true);
+		}
+		if (GistPageIsLeaf(page)) {
+
+			LockBuffer(buffer, GIST_UNLOCK);
+			LockBuffer(buffer, GIST_EXCLUSIVE);
+
+			maxoff = PageGetMaxOffsetNumber(page);
+			for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i)) {
+				iid = PageGetItemId(page, i);
+				idxtuple = (IndexTuple) PageGetItem(page, iid);
+
+				if (callback(&(idxtuple->t_tid), callback_state)) {
+					todelete[ntodelete] = i - ntodelete;
+					ntodelete++;
+					stats->tuples_removed += 1;
+				} else
+					stats->num_index_tuples += 1;
+			}
+		} else {
+			/*
+			 * first scan
+			 * */
+
+			maxoff = PageGetMaxOffsetNumber(page);
+			if (blkno != GIST_ROOT_BLKNO
+					/*&& (GistFollowRight(page) || lastNSN < GistPageGetNSN(page)) */
+					&& opaque->rightlink != InvalidBlockNumber) {
+				/*
+				 * build left link map. add to rescan later.
+				 * */
+				item = (GistBDSItem *) palloc(sizeof(GistBDSItem));
+
+				item->isParent = false;
+				item->blkno = opaque->rightlink;
+				item->next = NULL;
+
+				tail->next = item;
+				tail = item;
+
+			}
+			for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i)) {
+				iid = PageGetItemId(page, i);
+				idxtuple = (IndexTuple) PageGetItem(page, iid);
+				child = ItemPointerGetBlockNumber(&(idxtuple->t_tid));
+
+				gistMemorizeParentTab(infomap, child, blkno);
+
+				if (GistTupleIsInvalid(idxtuple))
+					ereport(LOG,
+							(errmsg("index \"%s\" contains an inner tuple marked as invalid", RelationGetRelationName(rel)), errdetail("This is caused by an incomplete page split at crash recovery before upgrading to PostgreSQL 9.1."), errhint("Please REINDEX it.")));
+			}
+
+		}
+		if (ntodelete || isNew) {
+			if ((maxoff == ntodelete) || isNew) {
+
+				item = (GistBDSItem *) palloc(sizeof(GistBDSItem));
+				item->isParent = true;
+				item->blkno = blkno;
+				item->next = NULL;
+
+				tail->next = item;
+				tail = item;
+
+
+				gistMemorizeLinkToDelete(infomap, blkno, NEED_TO_DELETE);
+			} else {
+				START_CRIT_SECTION();
+
+				MarkBufferDirty(buffer);
+
+				for (i = 0; i < ntodelete; i++)
+					PageIndexTupleDelete(page, todelete[i]);
+				GistMarkTuplesDeleted(page);
+
+				if (RelationNeedsWAL(rel)) {
+					XLogRecPtr recptr;
+
+					recptr = gistXLogUpdate(rel->rd_node, buffer, todelete,
+							ntodelete,
+							NULL, 0, InvalidBuffer);
+					PageSetLSN(page, recptr);
+				} else
+					PageSetLSN(page, gistGetFakeLSN(rel));
+
+				END_CRIT_SECTION();
+			}
+		}
+
+		UnlockReleaseBuffer(buffer);
+		vacuum_delay_point();
+	}
+}
+static void
+gistrescanvacuum(Relation rel, IndexVacuumInfo * info, IndexBulkDeleteResult * stats,
+		IndexBulkDeleteCallback callback, void* callback_state,
+		HTAB* infomap,
+		GistBDSItem* rescanstack, GistBDSItem* tail)
+{
+	GistBDSItem * ptr;
+	while (rescanstack != NULL) {
+		Buffer buffer;
+		Page page;
+		OffsetNumber i, maxoff;
+		IndexTuple idxtuple;
+		ItemId iid;
+		OffsetNumber todelete[MaxOffsetNumber];
+		int ntodelete = 0;
+		GISTPageOpaque opaque;
+		BlockNumber blkno, child;
+		Buffer childBuffer;
+		GistBDSItem *item;
+		bool isNew;
+		bool isProcessed;
+
+		BlockNumber setdeletedblkno[MaxOffsetNumber];
+
+		blkno = rescanstack->blkno;
+		if (gistGetParentTab(infomap, blkno) == InvalidBlockNumber && blkno != GIST_ROOT_BLKNO) {
+			/*
+			 * strange pages. it's maybe(pages without parent but is not root).
+			 * for example when last vacuum shut down and we can delete link to this page but dont set deleted
+			 * repair that pages.
+			 * how repaire: remove data if exists. rightlink repair. set-deleted
+			 */
+			gistvacuumrepairpage(rel, info, stats, callback, callback_state, infomap, blkno);
+
+			ptr = rescanstack->next;
+			pfree(rescanstack);
+			rescanstack = ptr;
+
+			vacuum_delay_point();
+			continue;
+		}
+		if (rescanstack->isParent == true) {
+			blkno = gistGetParentTab(infomap, blkno);
+		}
+
+		isProcessed = gistIsProcessed(infomap, blkno);
+
+		if(isProcessed == true || blkno == InvalidBlockNumber) {
+
+			ptr = rescanstack->next;
+			pfree(rescanstack);
+			rescanstack = ptr;
+
+			vacuum_delay_point();
+			continue;
+		}
+		buffer = ReadBufferExtended(rel, MAIN_FORKNUM, blkno,
+				RBM_NORMAL, info->strategy);
+		LockBuffer(buffer, GIST_SHARE);
+
+		gistcheckpage(rel, buffer);
+		page = (Page) BufferGetPage(buffer);
+
+		opaque = GistPageGetOpaque(page);
+
+		if (blkno != GIST_ROOT_BLKNO
+				&& opaque->rightlink != InvalidBlockNumber) {
+			item = (GistBDSItem *) palloc(sizeof(GistBDSItem));
+
+			item->isParent = false;
+			item->blkno = opaque->rightlink;
+			item->next = rescanstack->next;
+
+			rescanstack->next = item;
+		}
+
+		if (GistPageIsLeaf(page)) {
+			/* usual procedure with leafs pages*/
+			LockBuffer(buffer, GIST_UNLOCK);
+			LockBuffer(buffer, GIST_EXCLUSIVE);
+
+			maxoff = PageGetMaxOffsetNumber(page);
+			for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i)) {
+				iid = PageGetItemId(page, i);
+				idxtuple = (IndexTuple) PageGetItem(page, iid);
+
+				if (callback(&(idxtuple->t_tid), callback_state)) {
+					todelete[ntodelete] = i - ntodelete;
+					ntodelete++;
+				}
+			}
+		} else {
+			LockBuffer(buffer, GIST_UNLOCK);
+			LockBuffer(buffer, GIST_EXCLUSIVE);
+			maxoff = PageGetMaxOffsetNumber(page);
+			for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i)) {
+				bool delete;
+				iid = PageGetItemId(page, i);
+				idxtuple = (IndexTuple) PageGetItem(page, iid);
+
+				child = ItemPointerGetBlockNumber(&(idxtuple->t_tid));
+
+				delete = gistGetDeleteLink(infomap, child);
+				/*
+				 * leaf is needed to delete????
+				 * */
+				if (delete) {
+					IndexTuple idxtuplechild;
+					ItemId iidchild;
+					OffsetNumber todeletechild[MaxOffsetNumber];
+					int ntodeletechild = 0;
+					OffsetNumber j, maxoffchild;
+					Page childpage;
+					bool childIsNew;
+
+					childBuffer = ReadBufferExtended(rel, MAIN_FORKNUM, child,
+							RBM_NORMAL, info->strategy);
+
+					LockBuffer(childBuffer, GIST_EXCLUSIVE);
+
+					childpage = (Page) BufferGetPage(childBuffer);
+					childIsNew = PageIsNew(childpage) || PageIsEmpty(childpage);
+
+					if (GistPageIsLeaf(childpage)) {
+						maxoffchild = PageGetMaxOffsetNumber(childpage);
+						for (j = FirstOffsetNumber; j <= maxoffchild; j =
+								OffsetNumberNext(j)) {
+							iidchild = PageGetItemId(childpage, j);
+							idxtuplechild = (IndexTuple) PageGetItem(childpage,
+									iidchild);
+
+							if (callback(&(idxtuplechild->t_tid),
+									callback_state)) {
+								todeletechild[ntodeletechild] = j
+										- ntodeletechild;
+								ntodeletechild++;
+							}
+						}
+						if (ntodeletechild || childIsNew) {
+							START_CRIT_SECTION();
+
+							MarkBufferDirty(childBuffer);
+
+							for (j = 0; j < ntodeletechild; j++)
+								PageIndexTupleDelete(childpage,
+										todeletechild[j]);
+							GistMarkTuplesDeleted(childpage);
+
+							if (RelationNeedsWAL(rel)) {
+								XLogRecPtr recptr;
+
+								recptr = gistXLogUpdate(rel->rd_node,
+										childBuffer, todeletechild,
+										ntodeletechild,
+										NULL, 0, InvalidBuffer);
+								PageSetLSN(childpage, recptr);
+							} else
+								PageSetLSN(childpage, gistGetFakeLSN(rel));
+
+							END_CRIT_SECTION();
+
+							if ((ntodeletechild == maxoffchild) || childIsNew) {
+								/*
+								 *
+								 * if there is right link on this page but not rightlink from this page. remove rightlink from left page.
+								 * if there is right link on this page and there is a right link . right link of left page must be rightlink to rightlink of this page.
+								 * */
+								todelete[ntodelete] = i - ntodelete;
+								setdeletedblkno[ntodelete] = child;
+								ntodelete++;
+							}
+						}
+					}
+					UnlockReleaseBuffer(childBuffer);
+				}
+			}
+		}
+		isNew = PageIsNew(page) || PageIsEmpty(page);
+		if (ntodelete || isNew) {
+			if(GistPageIsLeaf(page)) {
+				item = (GistBDSItem *) palloc(sizeof(GistBDSItem));
+
+				item->isParent = false;
+				item->blkno = gistGetParentTab(infomap, blkno);
+				item->next = rescanstack->next;
+				rescanstack->next = item;
+			} else {
+				/*
+				 * delete links to pages
+				 * */
+				if(ntodelete && (ntodelete == maxoff) ) {
+					// save 1 link on inner page
+					ntodelete--;
+				}
+				START_CRIT_SECTION();
+
+				MarkBufferDirty(buffer);
+
+				for (i = 0; i < ntodelete; i++)
+					PageIndexTupleDelete(page, todelete[i]);
+				GistMarkTuplesDeleted(page);
+
+				if (RelationNeedsWAL(rel)) {
+					XLogRecPtr recptr;
+
+					recptr = gistXLogUpdate(rel->rd_node, buffer, todelete,
+							ntodelete,
+							NULL, 0, InvalidBuffer);
+					PageSetLSN(page, recptr);
+				} else
+					PageSetLSN(page, gistGetFakeLSN(rel));
+				END_CRIT_SECTION();
+
+				/*
+				 * ok. links has been deleted. and this in wal! now we can set deleted and repair rightlinks
+				 * */
+				for (i = 0; i < ntodelete; i++) {
+					Buffer childBuffertodelete;
+					Page childpagetodelete;
+					PageHeader p;
+					gistMemorizeLinkToDelete(infomap, setdeletedblkno[i], PROCESSED);
+
+					childBuffertodelete = ReadBufferExtended(rel, MAIN_FORKNUM, setdeletedblkno[i],
+							RBM_NORMAL, info->strategy);
+
+					LockBuffer(childBuffertodelete, GIST_EXCLUSIVE);
+
+					childpagetodelete = (Page) BufferGetPage(childBuffertodelete);
+
+					p = (PageHeader) childpagetodelete;
+
+					p->pd_prune_xid = GetCurrentTransactionId();
+
+					gistvacuumcheckrightlink(rel, info, infomap,
+							setdeletedblkno[i], childpagetodelete);
+					GistPageSetDeleted(childpagetodelete);
+					MarkBufferDirty(childBuffertodelete);
+					UnlockReleaseBuffer(childBuffertodelete);
+					stats->pages_deleted++;
+				}
+			}
+		}
+		gistMemorizeLinkToDelete(infomap, blkno, PROCESSED);
+		UnlockReleaseBuffer(buffer);
+
+		ptr = rescanstack->next;
+		pfree(rescanstack);
+		rescanstack = ptr;
+
+		vacuum_delay_point();
+	}
+}
+
+Datum
+gistbulkdelete(PG_FUNCTION_ARGS)
+{
+	IndexVacuumInfo *info = (IndexVacuumInfo *) PG_GETARG_POINTER(0);
+	IndexBulkDeleteResult *stats = (IndexBulkDeleteResult *) PG_GETARG_POINTER(1);
+	IndexBulkDeleteCallback callback = (IndexBulkDeleteCallback) PG_GETARG_POINTER(2);
+	void	   *callback_state = (void *) PG_GETARG_POINTER(3);
+	Relation	rel = info->index;
+	GistBDSItem *rescanstack = NULL,
+			   *tail = NULL;
+
+	int memoryneeded = 0;
+
+	BlockNumber npages;
+
+	bool needLock;
+	HTAB	   *infomap;
+	HASHCTL		hashCtl;
+
+
+	if (stats == NULL)
+		stats = (IndexBulkDeleteResult *) palloc0(sizeof(IndexBulkDeleteResult));
+	stats->estimated_count = false;
+	stats->num_index_tuples = 0;
+
+	hashCtl.keysize = sizeof(BlockNumber);
+	hashCtl.entrysize = sizeof(GistBlockInfo);
+	hashCtl.hcxt = CurrentMemoryContext;
+
+	needLock = !RELATION_IS_LOCAL(rel);
+
+	if (needLock)
+		LockRelationForExtension(rel, ExclusiveLock);
+	npages = RelationGetNumberOfBlocks(rel);
+	if (needLock)
+		UnlockRelationForExtension(rel, ExclusiveLock);
+
+	/*
+	 * estimate memory limit
+	 * if map more than maintance_mem_work use old version of vacuum
+	 * */
+
+	memoryneeded = npages * (sizeof(GistBlockInfo));
+	if(memoryneeded > maintenance_work_mem * 1024) {
+		return gistbulkdeletelogical(info, stats, callback, callback_state);
+	}
+
+
+	infomap = hash_create("gistvacuum info map",
+										npages,
+										&hashCtl,
+									  HASH_ELEM | HASH_BLOBS | HASH_CONTEXT );
+
+	rescanstack = (GistBDSItem *) palloc(sizeof(GistBDSItem));
+
+	rescanstack->isParent = false;
+	rescanstack->blkno = GIST_ROOT_BLKNO;
+	rescanstack->next = NULL;
+	tail = rescanstack;
+
+	/*
+	 * this part of the vacuum use scan in physical order. Also this function fill hashmap `infomap`
+	 * that stores information about parent, rightlinks and etc. Pages is needed to rescan will be pushed to tail of rescanstack.
+	 * this function don't set flag gist_deleted.
+	 * */
+	gistphysicalvacuum(rel, info, stats, callback, callback_state, npages, infomap, rescanstack, tail);
+	/*
+	 * this part of the vacuum is not in physical order. It scans only pages from rescanstack.
+	 * we get page if this page is leaf we use usual procedure, but if pages is inner that we scan
+	 * it and delete links to childrens(but firstly recheck children and if all is ok).
+	 * if any pages is empty or new after processing set flag gist_delete , store prune_xid number
+	 * and etc. if all links from pages are deleted push parent of page to rescan stack to processing.
+	 * special case is when all tuples are deleted from index. in this case root block will be setted in leaf.
+	 * */
+	gistrescanvacuum(rel, info, stats, callback, callback_state, infomap, rescanstack, tail);
+
+	hash_destroy(infomap);
+	PG_RETURN_POINTER(stats);
+}
diff --git a/src/backend/access/gist/gistxlog.c b/src/backend/access/gist/gistxlog.c
index fbdbb3c..bd47550 100644
--- a/src/backend/access/gist/gistxlog.c
+++ b/src/backend/access/gist/gistxlog.c
@@ -58,6 +58,50 @@ gistRedoClearFollowRight(XLogReaderState *record, uint8 block_id)
 		UnlockReleaseBuffer(buffer);
 }
 
+static void
+gistRedoRightlinkChange(XLogReaderState *record) {
+	XLogRecPtr	lsn = record->EndRecPtr;
+	gistxlogPageRightlinkChange *xldata = (gistxlogPageRightlinkChange *) XLogRecGetData(record);
+	Buffer		buffer;
+	Page		page;
+	BlockNumber	newright;
+	GISTPageOpaque opaque;
+
+	if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
+	{
+		newright = xldata->newRightLink;
+		page = BufferGetPage(buffer);
+		opaque = GistPageGetOpaque(page);
+		opaque->rightlink = newright;
+		/*if(newright == InvalidBlockNumber) {
+			GistClearFollowRight(page);
+		}*/
+		PageSetLSN(page, lsn);
+		MarkBufferDirty(buffer);
+	}
+}
+
+static void
+gistRedoPageSetDeleted(XLogReaderState *record) {
+	XLogRecPtr	lsn = record->EndRecPtr;
+	gistxlogPageDelete *xldata = (gistxlogPageDelete *) XLogRecGetData(record);
+	Buffer		buffer;
+	Page		page;
+	PageHeader		header;
+
+	if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
+	{
+		page = (Page) BufferGetPage(buffer);
+		header = (PageHeader) page;
+
+		header->pd_prune_xid = xldata->id;
+		GistPageSetDeleted(page);
+
+		PageSetLSN(page, lsn);
+		MarkBufferDirty(buffer);
+
+	}
+}
 /*
  * redo any page update (except page split)
  */
@@ -75,23 +119,26 @@ gistRedoPageUpdateRecord(XLogReaderState *record)
 		char	   *data;
 		Size		datalen;
 		int			ninserted = 0;
+		//BlockNumber blkno;
 
 		data = begin = XLogRecGetBlockData(record, 0, &datalen);
 
 		page = (Page) BufferGetPage(buffer);
 
+		//blkno = BufferGetBlockNumber(buffer);
 		/* Delete old tuples */
 		if (xldata->ntodelete > 0)
 		{
 			int			i;
+			//OffsetNumber max = PageGetMaxOffsetNumber(page);
 			OffsetNumber *todelete = (OffsetNumber *) data;
 
 			data += sizeof(OffsetNumber) * xldata->ntodelete;
 
 			for (i = 0; i < xldata->ntodelete; i++)
 				PageIndexTupleDelete(page, todelete[i]);
-			if (GistPageIsLeaf(page))
-				GistMarkTuplesDeleted(page);
+
+			GistMarkTuplesDeleted(page);
 		}
 
 		/* add tuples */
@@ -301,6 +348,12 @@ gist_redo(XLogReaderState *record)
 		case XLOG_GIST_CREATE_INDEX:
 			gistRedoCreateIndex(record);
 			break;
+		case XLOG_GIST_PAGE_DELETE:
+			gistRedoPageSetDeleted(record);
+			break;
+		case XLOG_GIST_RIGHTLINK_CHANGE:
+			gistRedoRightlinkChange(record);
+			break;
 		default:
 			elog(PANIC, "gist_redo: unknown op code %u", info);
 	}
@@ -377,6 +430,40 @@ gistXLogSplit(RelFileNode node, BlockNumber blkno, bool page_is_leaf,
 	return recptr;
 }
 
+
+XLogRecPtr
+gistXLogSetDeleted(RelFileNode node, Buffer buffer, TransactionId xid) {
+	gistxlogPageDelete xlrec;
+	XLogRecPtr	recptr;
+
+	xlrec.id = xid;
+
+	XLogBeginInsert();
+	XLogRegisterData((char *) &xlrec, sizeof(gistxlogPageDelete));
+
+	XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
+	/* new tuples */
+	recptr = XLogInsert(RM_GIST_ID, XLOG_GIST_PAGE_DELETE);
+	return recptr;
+}
+
+XLogRecPtr
+gistXLogRightLinkChange(RelFileNode node, Buffer buffer,
+					BlockNumber newRightLink) {
+	gistxlogPageRightlinkChange xlrec;
+	XLogRecPtr	recptr;
+
+	xlrec.newRightLink = newRightLink;
+
+	XLogBeginInsert();
+	XLogRegisterData((char *) &xlrec, sizeof(gistxlogPageRightlinkChange));
+
+	XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
+	/* new tuples */
+	recptr = XLogInsert(RM_GIST_ID, XLOG_GIST_RIGHTLINK_CHANGE);
+	return recptr;
+}
+
 /*
  * Write XLOG record describing a page update. The update can include any
  * number of deletions and/or insertions of tuples on a single index page.
@@ -388,6 +475,7 @@ gistXLogSplit(RelFileNode node, BlockNumber blkno, bool page_is_leaf,
  * to the target buffer; they need not be stored in XLOG if XLogInsert decides
  * to log the whole buffer contents instead.
  */
+
 XLogRecPtr
 gistXLogUpdate(RelFileNode node, Buffer buffer,
 			   OffsetNumber *todelete, int ntodelete,
diff --git a/src/include/access/gist_private.h b/src/include/access/gist_private.h
index 4f1a5c3..61a2e50 100644
--- a/src/include/access/gist_private.h
+++ b/src/include/access/gist_private.h
@@ -50,6 +50,11 @@ typedef struct
 	char		tupledata[FLEXIBLE_ARRAY_MEMBER];
 } GISTNodeBufferPage;
 
+typedef struct
+{
+	BlockNumber childblkno;		/* hash key */
+	BlockNumber parentblkno;
+} ParentMapEntry;
 #define BUFFER_PAGE_DATA_OFFSET MAXALIGN(offsetof(GISTNodeBufferPage, tupledata))
 /* Returns free space in node buffer page */
 #define PAGE_FREE_SPACE(nbp) (nbp->freespace)
@@ -179,7 +184,8 @@ typedef GISTScanOpaqueData *GISTScanOpaque;
 #define XLOG_GIST_PAGE_SPLIT		0x30
  /* #define XLOG_GIST_INSERT_COMPLETE	 0x40 */	/* not used anymore */
 #define XLOG_GIST_CREATE_INDEX		0x50
- /* #define XLOG_GIST_PAGE_DELETE		 0x60 */	/* not used anymore */
+#define XLOG_GIST_PAGE_DELETE		 0x60
+#define XLOG_GIST_RIGHTLINK_CHANGE		 0x70
 
 /*
  * Backup Blk 0: updated page.
@@ -216,6 +222,18 @@ typedef struct gistxlogPageSplit
 	 */
 } gistxlogPageSplit;
 
+typedef struct gistxlogPageDelete
+{
+	TransactionId id;
+} gistxlogPageDelete;
+
+typedef struct gistxlogPageRightlinkChange
+{
+	BlockNumber newRightLink;
+
+} gistxlogPageRightlinkChange;
+
+
 typedef struct gistxlogPage
 {
 	BlockNumber blkno;
@@ -453,6 +471,12 @@ extern const char *gist_identify(uint8 info);
 extern void gist_xlog_startup(void);
 extern void gist_xlog_cleanup(void);
 
+extern XLogRecPtr gistXLogSetDeleted(RelFileNode node, Buffer buffer,
+					TransactionId xid);
+
+extern XLogRecPtr gistXLogRightLinkChange(RelFileNode node, Buffer buffer,
+					BlockNumber newRightLink) ;
+
 extern XLogRecPtr gistXLogUpdate(RelFileNode node, Buffer buffer,
 			   OffsetNumber *todelete, int ntodelete,
 			   IndexTuple *itup, int ntup,
-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to