Fixed patch is attached.

08.09.2015 13:47, Teodor Sigaev:
BTW, I slightly modify your test to provide more stable results.

Thank you, I tried it, Results are nearly the same.

Without microvacuum
Time: 312,955 ms
Time: 264,597 ms
Time: 243,286 ms
Time: 243,679 ms

With microvacuum:
Time: 354,097 ms
Time: 82,206 ms
Time: 11,714 ms
Time: 11,277 ms


--
Anastasia Lubennikova
Postgres Professional: http://www.postgrespro.com
The Russian Postgres Company

diff --git a/src/backend/access/gist/gist.c b/src/backend/access/gist/gist.c
index 0e49959..bb48782 100644
--- a/src/backend/access/gist/gist.c
+++ b/src/backend/access/gist/gist.c
@@ -36,6 +36,7 @@ static bool gistinserttuples(GISTInsertState *state, GISTInsertStack *stack,
 				 bool unlockbuf, bool unlockleftchild);
 static void gistfinishsplit(GISTInsertState *state, GISTInsertStack *stack,
 				GISTSTATE *giststate, List *splitinfo, bool releasebuf);
+static void gistvacuumpage(Relation rel, Page page, Buffer buffer);
 
 
 #define ROTATEDIST(d) do { \
@@ -209,6 +210,17 @@ gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate,
 	 * because the tuple vector passed to gistSplit won't include this tuple.
 	 */
 	is_split = gistnospace(page, itup, ntup, oldoffnum, freespace);
+
+	/*
+	 * If leaf page is full, try at first to delete dead tuples. And then
+	 * check again.
+	 */
+	if (is_split && GistPageIsLeaf(page) && GistPageHasGarbage(page))
+	{
+		gistvacuumpage(rel, page, buffer);
+		is_split = gistnospace(page, itup, ntup, oldoffnum, freespace);
+	}
+
 	if (is_split)
 	{
 		/* no space for insertion */
@@ -1440,3 +1452,70 @@ freeGISTstate(GISTSTATE *giststate)
 	/* It's sufficient to delete the scanCxt */
 	MemoryContextDelete(giststate->scanCxt);
 }
+
+/*
+ * gistvacuumpage() -- try to remove LP_DEAD items from the given page.
+ */
+static void
+gistvacuumpage(Relation rel, Page page, Buffer buffer)
+{
+	OffsetNumber deletable[MaxOffsetNumber];
+	int			ndeletable = 0;
+	OffsetNumber offnum, maxoff;
+
+	/*
+	 * Scan over all items to see which ones need to be deleted according to
+	 * LP_DEAD flags.
+	 */
+	maxoff = PageGetMaxOffsetNumber(page);
+	for (offnum = FirstOffsetNumber;
+		 offnum <= maxoff;
+		 offnum = OffsetNumberNext(offnum))
+	{
+		ItemId		itemId = PageGetItemId(page, offnum);
+
+		if (ItemIdIsDead(itemId))
+			deletable[ndeletable++] = offnum;
+	}
+
+	if (ndeletable > 0)
+	{
+		START_CRIT_SECTION();
+
+		PageIndexMultiDelete(page, deletable, ndeletable);
+
+		/*
+		 * Mark the page as not containing any LP_DEAD items.  This is not
+		 * certainly true (there might be some that have recently been marked,
+		 * but weren't included in our target-item list), but it will almost
+		 * always be true and it doesn't seem worth an additional page scan to
+		 * check it. Remember that F_HAS_GARBAGE is only a hint anyway.
+		 */
+		GistClearPageHasGarbage(page);
+
+		MarkBufferDirty(buffer);
+
+		/* XLOG stuff */
+		if (RelationNeedsWAL(rel))
+		{
+			XLogRecPtr	recptr;
+
+			recptr = gistXLogUpdate(rel->rd_node, buffer,
+									deletable, ndeletable,
+									NULL, 0, InvalidBuffer);
+
+			PageSetLSN(page, recptr);
+		}
+		else
+			PageSetLSN(page, gistGetFakeLSN(rel));
+
+		END_CRIT_SECTION();
+	}
+
+	/*
+	 * Note: if we didn't find any LP_DEAD items, then the page's
+	 * F_HAS_GARBAGE hint bit is falsely set.  We do not bother expending a
+	 * separate write to clear it, however.  We will clear it when we split
+	 * the page.
+	 */
+}
diff --git a/src/backend/access/gist/gistget.c b/src/backend/access/gist/gistget.c
index 20f695c..8a8d121 100644
--- a/src/backend/access/gist/gistget.c
+++ b/src/backend/access/gist/gistget.c
@@ -24,6 +24,74 @@
 #include "utils/memutils.h"
 #include "utils/rel.h"
 
+/*
+ * gistkillitems() -- set LP_DEAD state for items an indexscan caller has
+ * told us were killed.
+ *
+ * We re-read page here, so it's important to check page LSN. If the page
+ * has been modified since the last read (as determined by LSN), we cannot
+ * flag any entries because it is possible that the old entry was vacuumed
+ * away and the TID was re-used by a completely different heap tuple.
+ */
+static void
+gistkillitems(IndexScanDesc scan)
+{
+	GISTScanOpaque so = (GISTScanOpaque) scan->opaque;
+	Buffer		buffer;
+	Page		page;
+	OffsetNumber	offnum;
+	ItemId		iid;
+	int		i;
+	bool		killedsomething = false;
+
+	Assert(so->curBlkno != InvalidBlockNumber);
+	Assert(so->killedItems != NULL);
+
+	buffer = ReadBuffer(scan->indexRelation, so->curBlkno);
+	if (!BufferIsValid(buffer))
+		return;
+
+	LockBuffer(buffer, GIST_SHARE);
+	gistcheckpage(scan->indexRelation, buffer);
+	page = BufferGetPage(buffer);
+
+	/*
+	 * If page LSN differs it means that the page was modified since the last read.
+	 * killedItems could be not valid so LP_DEAD hints applying is not safe.
+	 */
+	if(PageGetLSN(page) != so->curPageLSN)
+	{
+		UnlockReleaseBuffer(buffer);
+		so->numKilled = 0; /* reset counter */
+		return;
+	}
+
+	/*
+	 * Mark all killedItems as dead. We need no additional recheck,
+	 * because, if page was modified, pageLSN must have changed.
+	 */
+	for (i = 0; i < so->numKilled; i++)
+	{
+		offnum = so->killedItems[i];
+		iid = PageGetItemId(page, offnum);
+		ItemIdMarkDead(iid);
+		killedsomething = true;
+	}
+
+	if (killedsomething)
+	{
+		GistMarkPageHasGarbage(page);
+		MarkBufferDirtyHint(buffer, true);
+	}
+
+	UnlockReleaseBuffer(buffer);
+
+	/*
+	 * Always reset the scan state, so we don't look for same items on other
+	 * pages.
+	 */
+	so->numKilled = 0;
+}
 
 /*
  * gistindex_keytest() -- does this index tuple satisfy the scan key(s)?
@@ -306,17 +374,33 @@ gistScanPage(IndexScanDesc scan, GISTSearchItem *pageItem, double *myDistances,
 		MemoryContextReset(so->pageDataCxt);
 
 	/*
+	 * We save the LSN of the page as we read it, so that we know whether it
+	 * safe to apply LP_DEAD hints to the page later. This allows us to drop
+	 * the pin for MVCC scans, which allows vacuum to avoid blocking.
+	 */
+	so->curPageLSN = PageGetLSN(page);
+
+	/*
 	 * check all tuples on page
 	 */
 	maxoff = PageGetMaxOffsetNumber(page);
 	for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
 	{
-		IndexTuple	it = (IndexTuple) PageGetItem(page, PageGetItemId(page, i));
+		ItemId      iid = PageGetItemId(page, i);
+		IndexTuple	it;
 		bool		match;
 		bool		recheck;
 		bool		recheck_distances;
 
 		/*
+		 * If the scan specifies not to return killed tuples, then we treat a
+		 * killed tuple as not passing the qual.
+		 */
+		if(scan->ignore_killed_tuples && ItemIdIsDead(iid))
+			continue;
+
+		it = (IndexTuple) PageGetItem(page, iid);
+		/*
 		 * Must call gistindex_keytest in tempCxt, and clean up any leftover
 		 * junk afterward.
 		 */
@@ -331,7 +415,6 @@ gistScanPage(IndexScanDesc scan, GISTSearchItem *pageItem, double *myDistances,
 		/* Ignore tuple if it doesn't match */
 		if (!match)
 			continue;
-
 		if (tbm && GistPageIsLeaf(page))
 		{
 			/*
@@ -348,6 +431,7 @@ gistScanPage(IndexScanDesc scan, GISTSearchItem *pageItem, double *myDistances,
 			 */
 			so->pageData[so->nPageData].heapPtr = it->t_tid;
 			so->pageData[so->nPageData].recheck = recheck;
+			so->pageData[so->nPageData].offnum = i;
 
 			/*
 			 * In an index-only scan, also fetch the data from the tuple.
@@ -572,7 +656,24 @@ gistgettuple(PG_FUNCTION_ARGS)
 		{
 			if (so->curPageData < so->nPageData)
 			{
+				if (scan->kill_prior_tuple && so->curPageData > 0)
+				{
 
+					if (so->killedItems == NULL)
+					{
+						MemoryContext oldCxt =
+							MemoryContextSwitchTo(so->giststate->scanCxt);
+
+						so->killedItems =
+							(OffsetNumber *) palloc(MaxIndexTuplesPerPage
+								* sizeof(OffsetNumber));
+
+						MemoryContextSwitchTo(oldCxt);
+					}
+					if (so->numKilled < MaxIndexTuplesPerPage)
+						so->killedItems[so->numKilled++] =
+							so->pageData[so->curPageData - 1].offnum;
+				}
 				/* continuing to return tuples from a leaf page */
 				scan->xs_ctup.t_self = so->pageData[so->curPageData].heapPtr;
 				scan->xs_recheck = so->pageData[so->curPageData].recheck;
@@ -586,9 +687,36 @@ gistgettuple(PG_FUNCTION_ARGS)
 				PG_RETURN_BOOL(true);
 			}
 
+			/*
+			 * Check the last returned tuple and add it to killitems if
+			 * necessary
+			 */
+			if (scan->kill_prior_tuple
+				&& so->curPageData > 0
+				&& so->curPageData == so->nPageData)
+			{
+
+				if (so->killedItems == NULL)
+				{
+					MemoryContext oldCxt =
+						MemoryContextSwitchTo(so->giststate->scanCxt);
+
+					so->killedItems =
+						(OffsetNumber *) palloc(MaxIndexTuplesPerPage
+							* sizeof(OffsetNumber));
+
+					MemoryContextSwitchTo(oldCxt);
+				}
+				if (so->numKilled < MaxIndexTuplesPerPage)
+					so->killedItems[so->numKilled++] =
+						so->pageData[so->curPageData - 1].offnum;
+			}
 			/* find and process the next index page */
 			do
 			{
+				if ((so->curBlkno != InvalidBlockNumber) && (so->numKilled > 0))
+					gistkillitems(scan);
+
 				GISTSearchItem *item = getNextGISTSearchItem(so);
 
 				if (!item)
@@ -596,6 +724,9 @@ gistgettuple(PG_FUNCTION_ARGS)
 
 				CHECK_FOR_INTERRUPTS();
 
+				/* save current item BlockNumber for next gistkillitems() call */
+				so->curBlkno = item->blkno;
+
 				/*
 				 * While scanning a leaf page, ItemPointers of matching heap
 				 * tuples are stored in so->pageData.  If there are any on
diff --git a/src/backend/access/gist/gistscan.c b/src/backend/access/gist/gistscan.c
index ad39294..a17c5bc 100644
--- a/src/backend/access/gist/gistscan.c
+++ b/src/backend/access/gist/gistscan.c
@@ -93,6 +93,11 @@ gistbeginscan(PG_FUNCTION_ARGS)
 		memset(scan->xs_orderbynulls, true, sizeof(bool) * scan->numberOfOrderBys);
 	}
 
+	so->killedItems = NULL;		/* until needed */
+	so->numKilled = 0;
+	so->curBlkno = InvalidBlockNumber;
+	so->curPageLSN = InvalidXLogRecPtr;
+
 	scan->opaque = so;
 
 	/*
diff --git a/src/include/access/gist.h b/src/include/access/gist.h
index 81e559b..ea3a3b0 100644
--- a/src/include/access/gist.h
+++ b/src/include/access/gist.h
@@ -41,8 +41,11 @@
  */
 #define F_LEAF				(1 << 0)	/* leaf page */
 #define F_DELETED			(1 << 1)	/* the page has been deleted */
-#define F_TUPLES_DELETED	(1 << 2)	/* some tuples on the page are dead */
+#define F_TUPLES_DELETED	(1 << 2)	/* some tuples on the page were
+										 * deleted */
 #define F_FOLLOW_RIGHT		(1 << 3)	/* page to the right has no downlink */
+#define F_HAS_GARBAGE		(1 << 4)	/* some tuples on the page are dead,
+										 * but not deleted yet */
 
 typedef XLogRecPtr GistNSN;
 
@@ -137,6 +140,10 @@ typedef struct GISTENTRY
 #define GistMarkTuplesDeleted(page) ( GistPageGetOpaque(page)->flags |= F_TUPLES_DELETED)
 #define GistClearTuplesDeleted(page)	( GistPageGetOpaque(page)->flags &= ~F_TUPLES_DELETED)
 
+#define GistPageHasGarbage(page) ( GistPageGetOpaque(page)->flags & F_HAS_GARBAGE)
+#define GistMarkPageHasGarbage(page) ( GistPageGetOpaque(page)->flags |= F_HAS_GARBAGE)
+#define GistClearPageHasGarbage(page)	( GistPageGetOpaque(page)->flags &= ~F_HAS_GARBAGE)
+
 #define GistFollowRight(page) ( GistPageGetOpaque(page)->flags & F_FOLLOW_RIGHT)
 #define GistMarkFollowRight(page) ( GistPageGetOpaque(page)->flags |= F_FOLLOW_RIGHT)
 #define GistClearFollowRight(page)	( GistPageGetOpaque(page)->flags &= ~F_FOLLOW_RIGHT)
diff --git a/src/include/access/gist_private.h b/src/include/access/gist_private.h
index 4f1a5c3..86e972e 100644
--- a/src/include/access/gist_private.h
+++ b/src/include/access/gist_private.h
@@ -22,6 +22,7 @@
 #include "storage/bufmgr.h"
 #include "storage/buffile.h"
 #include "utils/hsearch.h"
+#include "access/genam.h"
 
 /*
  * Maximum number of "halves" a page can be split into in one operation.
@@ -124,6 +125,7 @@ typedef struct GISTSearchHeapItem
 	bool		recheckDistances;		/* T if distances must be rechecked */
 	IndexTuple	ftup;			/* data fetched back from the index, used in
 								 * index-only scans */
+	OffsetNumber offnum;
 } GISTSearchHeapItem;
 
 /* Unvisited item, either index page or heap tuple */
@@ -161,6 +163,12 @@ typedef struct GISTScanOpaqueData
 	/* pre-allocated workspace arrays */
 	double	   *distances;		/* output area for gistindex_keytest */
 
+	/* info about killed items if any (killedItems is NULL if never used) */
+	OffsetNumber *killedItems;		/* offset numbers of killed items */
+	int		numKilled;		/* number of currently stored items */
+	BlockNumber curBlkno;		/* current number of block */
+	GistNSN		curPageLSN;	/* pos in the WAL stream when page was read */
+
 	/* In a non-ordered search, returnable heap items are stored here: */
 	GISTSearchHeapItem pageData[BLCKSZ / sizeof(IndexTupleData)];
 	OffsetNumber nPageData;		/* number of valid items in array */
-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to