Alvaro Herrera wrote:

> So here's v16, rebased on top of 9bac66020.  As far as I am concerned,
> this is the last version before I start renaming everything to BRIN and
> then commit.

FWIW in case you or others have interest, here's the diff between your
patch and v16.  Also, for illustrative purposes, the diff between
versions yours and mine of the code that got moved to mmpageops.c
because it's difficult to see it from the partial patch.  (There's
nothing to do with that partial diff other than read it directly.)

-- 
Álvaro Herrera                http://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Training & Services
*** a/contrib/pageinspect/mmfuncs.c
--- b/contrib/pageinspect/mmfuncs.c
***************
*** 29,35 ****
  PG_FUNCTION_INFO_V1(minmax_page_type);
  PG_FUNCTION_INFO_V1(minmax_page_items);
  PG_FUNCTION_INFO_V1(minmax_metapage_info);
- PG_FUNCTION_INFO_V1(minmax_revmap_array_data);
  PG_FUNCTION_INFO_V1(minmax_revmap_data);
  
  typedef struct mm_column_state
--- 29,34 ----
***************
*** 388,394 **** minmax_revmap_data(PG_FUNCTION_ARGS)
  	values[0] = Int64GetDatum((uint64) 0);
  
  	/* Extract (possibly empty) list of TIDs in this page. */
! 	for (i = 0; i < REGULAR_REVMAP_PAGE_MAXITEMS; i++)
  	{
  		ItemPointer	tid;
  
--- 387,393 ----
  	values[0] = Int64GetDatum((uint64) 0);
  
  	/* Extract (possibly empty) list of TIDs in this page. */
! 	for (i = 0; i < REVMAP_PAGE_MAXITEMS; i++)
  	{
  		ItemPointer	tid;
  
*** a/src/backend/access/minmax/Makefile
--- b/src/backend/access/minmax/Makefile
***************
*** 12,17 **** subdir = src/backend/access/minmax
  top_builddir = ../../../..
  include $(top_builddir)/src/Makefile.global
  
! OBJS = minmax.o mmrevmap.o mmtuple.o mmxlog.o mmsortable.o
  
  include $(top_srcdir)/src/backend/common.mk
--- 12,17 ----
  top_builddir = ../../../..
  include $(top_builddir)/src/Makefile.global
  
! OBJS = minmax.o mmpageops.o mmrevmap.o mmtuple.o mmxlog.o mmsortable.o
  
  include $(top_srcdir)/src/backend/common.mk
*** a/src/backend/access/minmax/minmax.c
--- b/src/backend/access/minmax/minmax.c
***************
*** 15,45 ****
   */
  #include "postgres.h"
  
- #include "access/htup_details.h"
  #include "access/minmax.h"
  #include "access/minmax_internal.h"
  #include "access/minmax_page.h"
! #include "access/minmax_revmap.h"
! #include "access/minmax_tuple.h"
  #include "access/minmax_xlog.h"
  #include "access/reloptions.h"
  #include "access/relscan.h"
- #include "access/xlogutils.h"
  #include "catalog/index.h"
- #include "catalog/pg_operator.h"
- #include "commands/vacuum.h"
  #include "miscadmin.h"
  #include "pgstat.h"
  #include "storage/bufmgr.h"
  #include "storage/freespace.h"
- #include "storage/indexfsm.h"
- #include "storage/lmgr.h"
- #include "storage/smgr.h"
- #include "utils/datum.h"
- #include "utils/lsyscache.h"
- #include "utils/memutils.h"
  #include "utils/rel.h"
- #include "utils/syscache.h"
  
  
  /*
--- 15,33 ----
   */
  #include "postgres.h"
  
  #include "access/minmax.h"
  #include "access/minmax_internal.h"
  #include "access/minmax_page.h"
! #include "access/minmax_pageops.h"
  #include "access/minmax_xlog.h"
  #include "access/reloptions.h"
  #include "access/relscan.h"
  #include "catalog/index.h"
  #include "miscadmin.h"
  #include "pgstat.h"
  #include "storage/bufmgr.h"
  #include "storage/freespace.h"
  #include "utils/rel.h"
  
  
  /*
***************
*** 75,93 **** static MMBuildState *initialize_mm_buildstate(Relation idxRel,
  static bool terminate_mm_buildstate(MMBuildState *state);
  static void summarize_range(MMBuildState *mmstate, Relation heapRel,
  				BlockNumber heapBlk);
- static bool mm_doupdate(Relation idxrel, BlockNumber pagesPerRange,
- 			mmRevmapAccess *rmAccess, BlockNumber heapBlk,
- 			Buffer oldbuf, OffsetNumber oldoff,
- 			const MMTuple *origtup, Size origsz,
- 			const MMTuple *newtup, Size newsz,
- 			bool samepage, bool *extended);
- static void mm_doinsert(Relation idxrel, BlockNumber pagesPerRange,
- 			mmRevmapAccess *rmAccess, Buffer *buffer, BlockNumber heapblkno,
- 			MMTuple *tup, Size itemsz, bool *extended);
- static Buffer mm_getinsertbuffer(Relation irel, Buffer oldbuf, Size itemsz,
- 				   bool *extended);
  static void form_and_insert_tuple(MMBuildState *mmstate);
- static Size mm_page_get_freespace(Page page);
  
  
  /*
--- 63,69 ----
***************
*** 123,128 **** mminsert(PG_FUNCTION_ARGS)
--- 99,105 ----
  	rmAccess = mmRevmapAccessInit(idxRel, &pagesPerRange);
  
  restart:
+ 	CHECK_FOR_INTERRUPTS();
  	heapBlk = ItemPointerGetBlockNumber(heaptid);
  	/* normalize the block number to be the first block in the range */
  	heapBlk = (heapBlk / pagesPerRange) * pagesPerRange;
***************
*** 155,161 **** restart:
  
  		addValue = index_getprocinfo(idxRel, keyno + 1,
  									 MINMAX_PROCNUM_ADDVALUE);
- 
  		result = FunctionCall5Coll(addValue,
  								   idxRel->rd_indcollation[keyno],
  								   PointerGetDatum(mmdesc),
--- 132,137 ----
***************
*** 197,203 **** restart:
  		/*
  		 * Try to update the tuple.  If this doesn't work for whatever reason,
  		 * we need to restart from the top; the revmap might be pointing at a
! 		 * different tuple for this block now.
  		 */
  		if (!mm_doupdate(idxRel, pagesPerRange, rmAccess, heapBlk, buf, off,
  						 origtup, origsz, newtup, newsz, samepage, &extended))
--- 173,182 ----
  		/*
  		 * Try to update the tuple.  If this doesn't work for whatever reason,
  		 * we need to restart from the top; the revmap might be pointing at a
! 		 * different tuple for this block now, so we need to recompute
! 		 * to ensure both our new heap tuple and the other inserter's are
! 		 * covered by the combined tuple.  It might be that we don't need to
! 		 * update at all.
  		 */
  		if (!mm_doupdate(idxRel, pagesPerRange, rmAccess, heapBlk, buf, off,
  						 origtup, origsz, newtup, newsz, samepage, &extended))
***************
*** 212,218 **** restart:
  	minmax_free_mmdesc(mmdesc);
  
  	if (extended)
! 		IndexFreeSpaceMapVacuum(idxRel);
  
  	return BoolGetDatum(false);
  }
--- 191,197 ----
  	minmax_free_mmdesc(mmdesc);
  
  	if (extended)
! 		FreeSpaceMapVacuum(idxRel);
  
  	return BoolGetDatum(false);
  }
***************
*** 313,318 **** mmgetbitmap(PG_FUNCTION_ARGS)
--- 292,299 ----
  		OffsetNumber off;
  		MMTuple	   *tup;
  
+ 		CHECK_FOR_INTERRUPTS();
+ 
  		tup = mmGetMMTupleForHeapBlock(opaque->rmAccess, heapBlk, &buf, &off,
  									   BUFFER_LOCK_SHARE);
  		/*
***************
*** 488,494 **** mmbuildCallback(Relation index,
  
  		/* re-initialize state for it */
  		minmax_dtuple_initialize(mmstate->dtuple, mmstate->mmDesc);
- 		mmstate->seentup = false;
  	}
  
  	/* Accumulate the current tuple into the running state */
--- 469,474 ----
***************
*** 603,609 **** mmbuild(PG_FUNCTION_ARGS)
  	idxtuples = mmstate->numtuples;
  	mmRevmapAccessTerminate(mmstate->rmAccess);
  	if (terminate_mm_buildstate(mmstate))
! 		IndexFreeSpaceMapVacuum(index);
  
  	/*
  	 * Return statistics
--- 583,589 ----
  	idxtuples = mmstate->numtuples;
  	mmRevmapAccessTerminate(mmstate->rmAccess);
  	if (terminate_mm_buildstate(mmstate))
! 		FreeSpaceMapVacuum(index);
  
  	/*
  	 * Return statistics
***************
*** 684,689 **** mmvacuumcleanup(PG_FUNCTION_ARGS)
--- 664,671 ----
  		MMTuple	   *tup;
  		OffsetNumber off;
  
+ 		CHECK_FOR_INTERRUPTS();
+ 
  		tup = mmGetMMTupleForHeapBlock(rmAccess, heapBlk, &buf, &off,
  									   BUFFER_LOCK_SHARE);
  		if (tup == NULL)
***************
*** 704,710 **** mmvacuumcleanup(PG_FUNCTION_ARGS)
  	/* free resources */
  	mmRevmapAccessTerminate(rmAccess);
  	if (mmstate && terminate_mm_buildstate(mmstate))
! 		IndexFreeSpaceMapVacuum(info->index);
  
  	heap_close(heapRel, AccessShareLock);
  
--- 686,692 ----
  	/* free resources */
  	mmRevmapAccessTerminate(rmAccess);
  	if (mmstate && terminate_mm_buildstate(mmstate))
! 		FreeSpaceMapVacuum(info->index);
  
  	heap_close(heapRel, AccessShareLock);
  
***************
*** 759,783 **** mm_page_init(Page page, uint16 type)
  	special->type = type;
  }
  
- /*
-  * Return the amount of free space on a regular minmax index page.
-  *
-  * If the page is not a regular page, or has been marked with the
-  * MINMAX_EVACUATE_PAGE flag, returns 0.
-  */
- static Size
- mm_page_get_freespace(Page page)
- {
- 	MinmaxSpecialSpace *special;
- 
- 	special = (MinmaxSpecialSpace *) PageGetSpecialPointer(page);
- 	if (!MINMAX_IS_REGULAR_PAGE(page) ||
- 		(special->flags & MINMAX_EVACUATE_PAGE) != 0)
- 		return 0;
- 	else
- 		return PageGetFreeSpace(page);
- 
- }
  
  /*
   * Initialize a new minmax index' metapage.
--- 741,746 ----
***************
*** 792,799 **** mm_metapage_init(Page page, BlockNumber pagesPerRange, uint16 version)
  	metadata = (MinmaxMetaPageData *) PageGetContents(page);
  
  	metadata->minmaxMagic = MINMAX_META_MAGIC;
- 	metadata->pagesPerRange = pagesPerRange;
  	metadata->minmaxVersion = version;
  	metadata->lastRevmapPage = 0;
  }
  
--- 755,768 ----
  	metadata = (MinmaxMetaPageData *) PageGetContents(page);
  
  	metadata->minmaxMagic = MINMAX_META_MAGIC;
  	metadata->minmaxVersion = version;
+ 	metadata->pagesPerRange = pagesPerRange;
+ 
+ 	/*
+ 	 * Note we cheat here a little.  0 is not a valid revmap block number
+ 	 * (because it's the metapage buffer), but doing this enables the first
+ 	 * revmap page to be created when the index is.
+ 	 */
  	metadata->lastRevmapPage = 0;
  }
  
***************
*** 876,886 **** initialize_mm_buildstate(Relation idxRel, mmRevmapAccess *rmAccess,
  	mmstate->currRangeStart = 0;
  	mmstate->rmAccess = rmAccess;
  	mmstate->mmDesc = minmax_build_mmdesc(idxRel);
! 	mmstate->dtuple = minmax_new_dtuple(mmstate->mmDesc);
  	mmstate->extended = false;
  
  	minmax_dtuple_initialize(mmstate->dtuple, mmstate->mmDesc);
- 	mmstate->seentup = false;
  
  	return mmstate;
  }
--- 845,855 ----
  	mmstate->currRangeStart = 0;
  	mmstate->rmAccess = rmAccess;
  	mmstate->mmDesc = minmax_build_mmdesc(idxRel);
! 	mmstate->seentup = false;
  	mmstate->extended = false;
+ 	mmstate->dtuple = minmax_new_dtuple(mmstate->mmDesc);
  
  	minmax_dtuple_initialize(mmstate->dtuple, mmstate->mmDesc);
  
  	return mmstate;
  }
***************
*** 902,908 **** terminate_mm_buildstate(MMBuildState *mmstate)
  		page = BufferGetPage(mmstate->currentInsertBuf);
  		RecordPageWithFreeSpace(mmstate->irel,
  								BufferGetBlockNumber(mmstate->currentInsertBuf),
! 								mm_page_get_freespace(page));
  		ReleaseBuffer(mmstate->currentInsertBuf);
  	}
  	vacuumfsm = mmstate->extended;
--- 871,877 ----
  		page = BufferGetPage(mmstate->currentInsertBuf);
  		RecordPageWithFreeSpace(mmstate->irel,
  								BufferGetBlockNumber(mmstate->currentInsertBuf),
! 								PageGetFreeSpace(page));
  		ReleaseBuffer(mmstate->currentInsertBuf);
  	}
  	vacuumfsm = mmstate->extended;
***************
*** 945,1525 **** summarize_range(MMBuildState *mmstate, Relation heapRel, BlockNumber heapBlk)
  
  	/* and re-initialize state for the next range */
  	minmax_dtuple_initialize(mmstate->dtuple, mmstate->mmDesc);
- 	mmstate->seentup = false;
- }
- 
- /*
-  * Update tuple origtup (size origsz), located in offset oldoff of buffer
-  * oldbuf, to newtup (size newsz) as summary tuple for the page range starting
-  * at heapBlk.  If samepage is true, then attempt to put the new tuple in the same
-  * page, otherwise get a new one.
-  *
-  * If the update is done, return true; the revmap is updated to point to the
-  * new tuple.  If the update is not done for whatever reason, return false.
-  * Caller may retry the update if this happens.
-  *
-  * If the index had to be extended in the course of this operation, *extended
-  * is set to true.
-  */
- static bool
- mm_doupdate(Relation idxrel, BlockNumber pagesPerRange,
- 			mmRevmapAccess *rmAccess, BlockNumber heapBlk,
- 			Buffer oldbuf, OffsetNumber oldoff,
- 			const MMTuple *origtup, Size origsz,
- 			const MMTuple *newtup, Size newsz,
- 			bool samepage, bool *extended)
- {
- 	Page		oldpage;
- 	ItemId		origlp;
- 	MMTuple	   *oldtup;
- 	Size		oldsz;
- 	Buffer		newbuf;
- 	MinmaxSpecialSpace *special;
- 
- 	if (!samepage)
- 	{
- 		/* need a page on which to put the item */
- 		newbuf = mm_getinsertbuffer(idxrel, oldbuf, newsz, extended);
- 		if (!BufferIsValid(newbuf))
- 			return false;
- 
- 		/*
- 		 * Note: it's possible (though unlikely) that the returned newbuf is
- 		 * the same as oldbuf, if mm_getinsertbuffer determined that the old
- 		 * buffer does in fact have enough space.
- 		 */
- 		if (newbuf == oldbuf)
- 			newbuf = InvalidBuffer;
- 	}
- 	else
- 	{
- 		LockBuffer(oldbuf, BUFFER_LOCK_EXCLUSIVE);
- 		newbuf = InvalidBuffer;
- 	}
- 	oldpage = BufferGetPage(oldbuf);
- 	origlp = PageGetItemId(oldpage, oldoff);
- 
- 	/* Check that the old tuple wasn't updated concurrently */
- 	if (!ItemIdIsNormal(origlp))
- 	{
- 		LockBuffer(oldbuf, BUFFER_LOCK_UNLOCK);
- 		return false;
- 	}
- 
- 	oldsz = ItemIdGetLength(origlp);
- 	oldtup = (MMTuple *) PageGetItem(oldpage, origlp);
- 
- 	/* If both tuples are in fact equal, there is nothing to do */
- 	if (!minmax_tuples_equal(oldtup, oldsz, origtup, origsz))
- 	{
- 		LockBuffer(oldbuf, BUFFER_LOCK_UNLOCK);
- 		return false;
- 	}
- 
- 	special = (MinmaxSpecialSpace *) PageGetSpecialPointer(oldpage);
- 
- 	/*
- 	 * Great, the old tuple is intact.  We can proceed with the update.
- 	 *
- 	 * If there's enough room on the old page for the new tuple, replace it.
- 	 *
- 	 * Note that there might now be enough space on the page even though
- 	 * the caller told us there isn't, if a concurrent updated moved a tuple
- 	 * elsewhere or replaced a tuple with a smaller one.
- 	 */
- 	if ((special->flags & MINMAX_EVACUATE_PAGE) == 0 &&
- 		(newsz <= origsz || PageGetExactFreeSpace(oldpage) >= (origsz - newsz)))
- 	{
- 		if (BufferIsValid(newbuf))
- 			UnlockReleaseBuffer(newbuf);
- 
- 		START_CRIT_SECTION();
- 		PageIndexDeleteNoCompact(oldpage, &oldoff, 1);
- 		if (PageAddItem(oldpage, (Item) newtup, newsz, oldoff, true, false) == InvalidOffsetNumber)
- 			elog(ERROR, "failed to add mmtuple");
- 		MarkBufferDirty(oldbuf);
- 
- 		/* XLOG stuff */
- 		if (RelationNeedsWAL(idxrel))
- 		{
- 			BlockNumber blk = BufferGetBlockNumber(oldbuf);
- 			xl_minmax_samepage_update xlrec;
- 			XLogRecPtr	recptr;
- 			XLogRecData	rdata[2];
- 			uint8		info = XLOG_MINMAX_SAMEPAGE_UPDATE;
- 
- 			xlrec.node = idxrel->rd_node;
- 			ItemPointerSetBlockNumber(&xlrec.tid, blk);
- 			ItemPointerSetOffsetNumber(&xlrec.tid, oldoff);
- 			rdata[0].data = (char *) &xlrec;
- 			rdata[0].len = SizeOfMinmaxSamepageUpdate;
- 			rdata[0].buffer = InvalidBuffer;
- 			rdata[0].next = &(rdata[1]);
- 
- 			rdata[1].data = (char *) newtup;
- 			rdata[1].len = newsz;
- 			rdata[1].buffer = oldbuf;
- 			rdata[1].buffer_std = true;
- 			rdata[1].next = NULL;
- 
- 			recptr = XLogInsert(RM_MINMAX_ID, info, rdata);
- 
- 			PageSetLSN(oldpage, recptr);
- 		}
- 
- 		END_CRIT_SECTION();
- 
- 		LockBuffer(oldbuf, BUFFER_LOCK_UNLOCK);
- 		return true;
- 	}
- 	else if (newbuf == InvalidBuffer)
- 	{
- 		/*
- 		 * Not enough space, but caller said that there was. Tell them to
- 		 * start over
- 		 */
- 		LockBuffer(oldbuf, BUFFER_LOCK_UNLOCK);
- 		return false;
- 	}
- 	else
- 	{
- 		/*
- 		 * Not enough free space on the oldpage. Put the new tuple on the
- 		 * new page, and update the revmap.
- 		 */
- 		Page		newpage = BufferGetPage(newbuf);
- 		Buffer		revmapbuf;
- 		ItemPointerData newtid;
- 		OffsetNumber newoff;
- 
- 		revmapbuf = mmLockRevmapPageForUpdate(rmAccess, heapBlk);
- 
- 		START_CRIT_SECTION();
- 
- 		PageIndexDeleteNoCompact(oldpage, &oldoff, 1);
- 		newoff = PageAddItem(newpage, (Item) newtup, newsz, InvalidOffsetNumber, false, false);
- 		if (newoff == InvalidOffsetNumber)
- 			elog(ERROR, "failed to add mmtuple to new page");
- 		MarkBufferDirty(oldbuf);
- 		MarkBufferDirty(newbuf);
- 
- 		ItemPointerSet(&newtid, BufferGetBlockNumber(newbuf), newoff);
- 		mmSetHeapBlockItemptr(revmapbuf, pagesPerRange, heapBlk, newtid);
- 		MarkBufferDirty(revmapbuf);
- 
- 		/* XLOG stuff */
- 		if (RelationNeedsWAL(idxrel))
- 		{
- 			xl_minmax_update	xlrec;
- 			XLogRecPtr	recptr;
- 			XLogRecData	rdata[4];
- 			uint8		info = XLOG_MINMAX_UPDATE;
- 
- 			xlrec.new.node = idxrel->rd_node;
- 			ItemPointerSet(&xlrec.new.tid, BufferGetBlockNumber(newbuf), newoff);
- 			xlrec.new.heapBlk = heapBlk;
- 			xlrec.new.revmapBlk = BufferGetBlockNumber(revmapbuf);
- 			xlrec.new.pagesPerRange = pagesPerRange;
- 			ItemPointerSet(&xlrec.oldtid, BufferGetBlockNumber(oldbuf), oldoff);
- 
- 			rdata[0].data = (char *) &xlrec;
- 			rdata[0].len = SizeOfMinmaxUpdate;
- 			rdata[0].buffer = InvalidBuffer;
- 			rdata[0].next = &(rdata[1]);
- 
- 			rdata[1].data = (char *) newtup;
- 			rdata[1].len = newsz;
- 			rdata[1].buffer = newbuf;
- 			rdata[1].buffer_std = true;
- 			rdata[1].next = &(rdata[2]);
- 
- 			rdata[2].data = (char *) NULL;
- 			rdata[2].len = 0;
- 			rdata[2].buffer = revmapbuf;
- 			rdata[2].buffer_std = true;
- 			rdata[2].next = &(rdata[3]);
- 
- 			rdata[3].data = (char *) NULL;
- 			rdata[3].len = 0;
- 			rdata[3].buffer = oldbuf;
- 			rdata[3].buffer_std = true;
- 			rdata[3].next = NULL;
- 
- 			recptr = XLogInsert(RM_MINMAX_ID, info, rdata);
- 
- 			PageSetLSN(oldpage, recptr);
- 			PageSetLSN(newpage, recptr);
- 			PageSetLSN(BufferGetPage(revmapbuf), recptr);
- 		}
- 
- 		END_CRIT_SECTION();
- 
- 		LockBuffer(revmapbuf, BUFFER_LOCK_UNLOCK);
- 		LockBuffer(oldbuf, BUFFER_LOCK_UNLOCK);
- 		UnlockReleaseBuffer(newbuf);
- 		return true;
- 	}
- }
- 
- /*
-  * Insert an index tuple into the index relation.  The revmap is updated to
-  * mark the range containing the given page as pointing to the inserted entry.
-  * A WAL record is written.
-  *
-  * The buffer, if valid, is first checked for free space to insert the new
-  * entry; if there isn't enough, a new buffer is obtained and pinned.
-  *
-  * If the relation had to be extended to make room for the new index tuple,
-  * *extended is set to true.
-  */
- static void
- mm_doinsert(Relation idxrel, BlockNumber pagesPerRange,
- 			mmRevmapAccess *rmAccess, Buffer *buffer,
- 			BlockNumber heapBlk, MMTuple *tup, Size itemsz, bool *extended)
- {
- 	Page		page;
- 	BlockNumber blk;
- 	OffsetNumber off;
- 	Buffer		revmapbuf;
- 	ItemPointerData tid;
- 
- 	itemsz = MAXALIGN(itemsz);
- 
- 	/*
- 	 * Lock the revmap page for the update. Note that this may require
- 	 * extending the revmap, which in turn may require moving the currently
- 	 * pinned index block out of the way.
- 	 */
- 	revmapbuf = mmLockRevmapPageForUpdate(rmAccess, heapBlk);
- 
- 	/*
- 	 * Obtain a locked buffer to insert the new tuple.  Note mm_getinsertbuffer
- 	 * ensures there's enough space in the returned buffer.
- 	 */
- 	if (BufferIsValid(*buffer))
- 	{
- 		page = BufferGetPage(*buffer);
- 		LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE);
- 
- 		/*
- 		 * It's possible that another backend (or ourselves!) extended the
- 		 * revmap over the page we held a pin on, so we cannot assume that
- 		 * it's still a regular page.
- 		 */
- 		if (mm_page_get_freespace(page) < itemsz)
- 		{
- 			UnlockReleaseBuffer(*buffer);
- 			*buffer = InvalidBuffer;
- 		}
- 	}
- 	if (!BufferIsValid(*buffer))
- 	{
- 		*buffer = mm_getinsertbuffer(idxrel, InvalidBuffer, itemsz, extended);
- 		Assert(BufferIsValid(*buffer));
- 		page = BufferGetPage(*buffer);
- 		Assert(mm_page_get_freespace(page) >= itemsz);
- 	}
- 
- 	page = BufferGetPage(*buffer);
- 	blk = BufferGetBlockNumber(*buffer);
- 
- 	START_CRIT_SECTION();
- 	off = PageAddItem(page, (Item) tup, itemsz, InvalidOffsetNumber,
- 					  false, false);
- 	if (off == InvalidOffsetNumber)
- 		elog(ERROR, "could not insert new index tuple to page");
- 	MarkBufferDirty(*buffer);
- 
- 	MINMAX_elog(DEBUG2, "inserted tuple (%u,%u) for range starting at %u",
- 				blk, off, heapBlk);
- 
- 	ItemPointerSet(&tid, blk, off);
- 	mmSetHeapBlockItemptr(revmapbuf, pagesPerRange, heapBlk, tid);
- 	MarkBufferDirty(revmapbuf);
- 
- 	/* XLOG stuff */
- 	if (RelationNeedsWAL(idxrel))
- 	{
- 		xl_minmax_insert	xlrec;
- 		XLogRecPtr	recptr;
- 		XLogRecData	rdata[2];
- 		uint8		info = XLOG_MINMAX_INSERT;
- 
- 		xlrec.node = idxrel->rd_node;
- 		xlrec.heapBlk = heapBlk;
- 		xlrec.pagesPerRange = pagesPerRange;
- 		xlrec.revmapBlk = BufferGetBlockNumber(revmapbuf);
- 		ItemPointerSet(&xlrec.tid, blk, off);
- 
- 		rdata[0].data = (char *) &xlrec;
- 		rdata[0].len = SizeOfMinmaxInsert;
- 		rdata[0].buffer = InvalidBuffer;
- 		rdata[0].buffer_std = false;
- 		rdata[0].next = &(rdata[1]);
- 
- 		rdata[1].data = (char *) tup;
- 		rdata[1].len = itemsz;
- 		rdata[1].buffer = *buffer;
- 		rdata[1].buffer_std = true;
- 		rdata[1].next = NULL;
- 
- 		recptr = XLogInsert(RM_MINMAX_ID, info, rdata);
- 
- 		PageSetLSN(page, recptr);
- 		PageSetLSN(BufferGetPage(revmapbuf), recptr);
- 	}
- 
- 	END_CRIT_SECTION();
- 
- 	/* Tuple is firmly on buffer; we can release our locks */
- 	LockBuffer(*buffer, BUFFER_LOCK_UNLOCK);
- 	LockBuffer(revmapbuf, BUFFER_LOCK_UNLOCK);
- }
- 
- /*
-  * Checks if a regular minmax index page is empty.
-  *
-  * If it's not, it's marked for "evacuation", meaning that no new tuples will
-  * be added to it.
-  */
- bool
- mm_start_evacuating_page(Relation idxRel, Buffer buf)
- {
- 	OffsetNumber off;
- 	OffsetNumber maxoff;
- 	MinmaxSpecialSpace *special;
- 	Page		page;
- 
- 	page = BufferGetPage(buf);
- 
- 	if (PageIsNew(page))
- 		return false;
- 
- 	special = (MinmaxSpecialSpace *) PageGetSpecialPointer(page);
- 
- 	maxoff = PageGetMaxOffsetNumber(page);
- 	for (off = FirstOffsetNumber; off <= maxoff; off++)
- 	{
- 		ItemId		lp;
- 
- 		lp = PageGetItemId(page, off);
- 		if (ItemIdIsUsed(lp))
- 		{
- 			/* prevent other backends from adding more stuff to this page. */
- 			special->flags |= MINMAX_EVACUATE_PAGE;
- 			MarkBufferDirtyHint(buf, true);
- 
- 			return true;
- 		}
- 	}
- 	return false;
- }
- 
- /*
-  * Move all tuples out of a page.
-  *
-  * The caller must hold an exclusive lock on the page. The lock and pin are
-  * released.
-  */
- void
- mm_evacuate_page(Relation idxRel, Buffer buf)
- {
- 	OffsetNumber off;
- 	OffsetNumber maxoff;
- 	MinmaxSpecialSpace *special;
- 	Page		page;
- 	mmRevmapAccess *rmAccess;
- 	BlockNumber pagesPerRange;
- 
- 	rmAccess = mmRevmapAccessInit(idxRel, &pagesPerRange);
- 
- 	page = BufferGetPage(buf);
- 	special = (MinmaxSpecialSpace *) PageGetSpecialPointer(page);
- 
- 	Assert(special->flags & MINMAX_EVACUATE_PAGE);
- 
- 	maxoff = PageGetMaxOffsetNumber(page);
- 	for (off = FirstOffsetNumber; off <= maxoff; off++)
- 	{
- 		MMTuple	   *tup;
- 		Size		sz;
- 		ItemId		lp;
- 		bool		extended = false;
- 
- 		lp = PageGetItemId(page, off);
- 		if (ItemIdIsUsed(lp))
- 		{
- 			tup = (MMTuple *) PageGetItem(page, lp);
- 			sz = ItemIdGetLength(lp);
- 
- 			tup = minmax_copy_tuple(tup, sz);
- 
- 			LockBuffer(buf, BUFFER_LOCK_UNLOCK);
- 
- 			if (!mm_doupdate(idxRel, pagesPerRange, rmAccess, tup->mt_blkno, buf,
- 							 off, tup, sz, tup, sz, false, &extended))
- 				off--; /* retry */
- 
- 			LockBuffer(buf, BUFFER_LOCK_SHARE);
- 
- 			if (extended)
- 				IndexFreeSpaceMapVacuum(idxRel);
- 
- 			/* It's possible that someone extended the revmap over this page */
- 			if (!MINMAX_IS_REGULAR_PAGE(page))
- 				break;
- 		}
- 	}
- 
- 	mmRevmapAccessTerminate(rmAccess);
- 
- 	UnlockReleaseBuffer(buf);
- }
- 
- /*
-  * Return a pinned and locked buffer which can be used to insert an index item
-  * of size itemsz.  If oldbuf is a valid buffer, it is also locked (in a order
-  * determined to avoid deadlocks.)
-  *
-  * If there's no existing page with enough free space to accomodate the new
-  * item, the relation is extended.  If this happens, *extended is set to true.
-  *
-  * If we find that the old page is no longer a regular index page (because
-  * of a revmap extension), the old buffer is unlocked and we return
-  * InvalidBuffer.
-  */
- static Buffer
- mm_getinsertbuffer(Relation irel, Buffer oldbuf, Size itemsz,
- 				   bool *was_extended)
- {
- 	BlockNumber oldblk;
- 	BlockNumber newblk;
- 	Page		page;
- 	int			freespace;
- 	bool		extended = false;
- 
- 	if (BufferIsValid(oldbuf))
- 		oldblk = BufferGetBlockNumber(oldbuf);
- 	else
- 		oldblk = InvalidBlockNumber;
- 
- 	/*
- 	 * Loop until we find a page with sufficient free space.  By the time we
- 	 * return to caller out of this loop, both buffers are valid and locked;
- 	 * if we have to restart here, neither buffer is locked and buf is not
- 	 * a pinned buffer.
- 	 */
- 	newblk = RelationGetTargetBlock(irel);
- 	if (newblk == InvalidBlockNumber)
- 		newblk = GetPageWithFreeSpace(irel, itemsz);
- 	for (;;)
- 	{
- 		Buffer		buf;
- 		bool		extensionLockHeld = false;
- 
- 		if (newblk == InvalidBlockNumber)
- 		{
- 			/*
- 			 * There's not enough free space in any existing index page,
- 			 * according to the FSM: extend the relation to obtain a shiny
- 			 * new page.
- 			 */
- 			if (!RELATION_IS_LOCAL(irel))
- 			{
- 				LockRelationForExtension(irel, ExclusiveLock);
- 				extensionLockHeld = true;
- 			}
- 			buf = ReadBuffer(irel, P_NEW);
- 			extended = true;
- 
- 			MINMAX_elog(DEBUG2, "mm_getinsertbuffer: extending to page %u",
- 						BufferGetBlockNumber(buf));
- 		}
- 		else if (newblk == oldblk)
- 		{
- 			/*
- 			 * There's an odd corner-case here where the FSM is out-of-date,
- 			 * and gave us the old page.
- 			 */
- 			buf = oldbuf;
- 		}
- 		else
- 		{
- 			buf = ReadBuffer(irel, newblk);
- 		}
- 
- 		if (BufferIsValid(oldbuf) && oldblk < newblk)
- 		{
- 			LockBuffer(oldbuf, BUFFER_LOCK_EXCLUSIVE);
- 			if (!MINMAX_IS_REGULAR_PAGE(BufferGetPage(oldbuf)))
- 			{
- 				LockBuffer(oldbuf, BUFFER_LOCK_UNLOCK);
- 				ReleaseBuffer(buf);
- 				return InvalidBuffer;
- 			}
- 		}
- 
- 		LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
- 
- 		if (extensionLockHeld)
- 			UnlockRelationForExtension(irel, ExclusiveLock);
- 
- 		page = BufferGetPage(buf);
- 
- 		if (extended)
- 			mm_page_init(page, MINMAX_PAGETYPE_REGULAR);
- 
- 		/*
- 		 * We have a new buffer from FSM now, and both pages are locked.
- 		 * Check that the new page has enough free space, and return it if it
- 		 * does; otherwise start over.  Note that we allow for the FSM to be
- 		 * out of date here, and in that case we update it and move on.
- 		 *
- 		 * (mm_page_get_freespace also checks that the FSM didn't hand us a
- 		 * page that has since been repurposed for the revmap.)
- 		 */
- 		freespace = mm_page_get_freespace(page);
- 		if (freespace >= itemsz)
- 		{
- 			if (extended)
- 				*was_extended = true;
- 			RelationSetTargetBlock(irel, BufferGetBlockNumber(buf));
- 
- 			/* Lock the old buffer if not locked already */
- 			if (BufferIsValid(oldbuf) && newblk < oldblk)
- 				LockBuffer(oldbuf, BUFFER_LOCK_EXCLUSIVE);
- 
- 			return buf;
- 		}
- 
- 		/* This page is no good. */
- 
- 		/*
- 		 * If an entirely new page does not contain enough free space for
- 		 * the new item, then surely that item is oversized.  Complain
- 		 * loudly; but first make sure we record the page as free, for
- 		 * next time.
- 		 */
- 		if (extended)
- 		{
- 			RecordPageWithFreeSpace(irel, BufferGetBlockNumber(buf),
- 									freespace);
- 			ereport(ERROR,
- 					(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
- 					 errmsg("index row size %lu exceeds maximum %lu for index \"%s\"",
- 							(unsigned long) itemsz,
- 							(unsigned long) freespace,
- 							RelationGetRelationName(irel))));
- 			return InvalidBuffer;	/* keep compiler quiet */
- 		}
- 
- 		if (newblk != oldblk)
- 			UnlockReleaseBuffer(buf);
- 		if (BufferIsValid(oldbuf) && oldblk < newblk)
- 			LockBuffer(oldbuf, BUFFER_LOCK_UNLOCK);
- 
- 		newblk = RecordAndGetPageWithFreeSpace(irel, newblk, freespace, itemsz);
- 	}
  }
  
  /*
--- 914,919 ----
***************
*** 1543,1546 **** form_and_insert_tuple(MMBuildState *mmstate)
--- 937,942 ----
  				tup, size, &mmstate->extended);
  	mmstate->numtuples++;
  	pfree(tup);
+ 
+ 	mmstate->seentup = false;
  }
*** /dev/null
--- b/src/backend/access/minmax/mmpageops.c
***************
*** 0 ****
--- 1,638 ----
+ /*
+  * mmpageops.c
+  *		Page-handling routines for Minmax indexes
+  *
+  * Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group
+  * Portions Copyright (c) 1994, Regents of the University of California
+  *
+  * IDENTIFICATION
+  *	  src/backend/access/minmax/mmpageops.c
+  */
+ #include "postgres.h"
+ 
+ #include "access/minmax_pageops.h"
+ #include "access/minmax_page.h"
+ #include "access/minmax_revmap.h"
+ #include "access/minmax_xlog.h"
+ #include "miscadmin.h"
+ #include "storage/bufmgr.h"
+ #include "storage/freespace.h"
+ #include "storage/lmgr.h"
+ #include "storage/smgr.h"
+ #include "utils/rel.h"
+ 
+ 
+ static Buffer mm_getinsertbuffer(Relation irel, Buffer oldbuf, Size itemsz,
+ 				   bool *was_extended);
+ static Size mm_page_get_freespace(Page page);
+ 
+ 
+ /*
+  * Update tuple origtup (size origsz), located in offset oldoff of buffer
+  * oldbuf, to newtup (size newsz) as summary tuple for the page range starting
+  * at heapBlk.  If samepage is true, then attempt to put the new tuple in the same
+  * page, otherwise use some other one.
+  *
+  * If the update is done, return true; the revmap is updated to point to the
+  * new tuple.  If the update is not done for whatever reason, return false.
+  * Caller may retry the update if this happens.
+  *
+  * If the index had to be extended in the course of this operation, *extended
+  * is set to true.
+  */
+ bool
+ mm_doupdate(Relation idxrel, BlockNumber pagesPerRange,
+ 			mmRevmapAccess *rmAccess, BlockNumber heapBlk,
+ 			Buffer oldbuf, OffsetNumber oldoff,
+ 			const MMTuple *origtup, Size origsz,
+ 			const MMTuple *newtup, Size newsz,
+ 			bool samepage, bool *extended)
+ {
+ 	Page		oldpage;
+ 	ItemId		origlp;
+ 	MMTuple	   *oldtup;
+ 	Size		oldsz;
+ 	Buffer		newbuf;
+ 	MinmaxSpecialSpace *special;
+ 
+ 	if (!samepage)
+ 	{
+ 		/* need a page on which to put the item */
+ 		newbuf = mm_getinsertbuffer(idxrel, oldbuf, newsz, extended);
+ 		if (!BufferIsValid(newbuf))
+ 			return false;
+ 
+ 		/*
+ 		 * Note: it's possible (though unlikely) that the returned newbuf is
+ 		 * the same as oldbuf, if mm_getinsertbuffer determined that the old
+ 		 * buffer does in fact have enough space.
+ 		 */
+ 		if (newbuf == oldbuf)
+ 			newbuf = InvalidBuffer;
+ 	}
+ 	else
+ 	{
+ 		LockBuffer(oldbuf, BUFFER_LOCK_EXCLUSIVE);
+ 		newbuf = InvalidBuffer;
+ 	}
+ 	oldpage = BufferGetPage(oldbuf);
+ 	origlp = PageGetItemId(oldpage, oldoff);
+ 
+ 	/* Check that the old tuple wasn't updated concurrently */
+ 	if (!ItemIdIsNormal(origlp))
+ 	{
+ 		LockBuffer(oldbuf, BUFFER_LOCK_UNLOCK);
+ 		return false;
+ 	}
+ 
+ 	oldsz = ItemIdGetLength(origlp);
+ 	oldtup = (MMTuple *) PageGetItem(oldpage, origlp);
+ 
+ 	/*
+ 	 * If both tuples are identical, there is nothing to do; except that if we
+ 	 * were requested to move the tuple across pages, we do it even if they are
+ 	 * equal.
+ 	 */
+ 	if (samepage && minmax_tuples_equal(oldtup, oldsz, origtup, origsz))
+ 	{
+ 		LockBuffer(oldbuf, BUFFER_LOCK_UNLOCK);
+ 		return false;
+ 	}
+ 
+ 	special = (MinmaxSpecialSpace *) PageGetSpecialPointer(oldpage);
+ 
+ 	/*
+ 	 * Great, the old tuple is intact.  We can proceed with the update.
+ 	 *
+ 	 * If there's enough room on the old page for the new tuple, replace it.
+ 	 *
+ 	 * Note that there might now be enough space on the page even though
+ 	 * the caller told us there isn't, if a concurrent updated moved a tuple
+ 	 * elsewhere or replaced a tuple with a smaller one.
+ 	 */
+ 	if ((special->flags & MINMAX_EVACUATE_PAGE) == 0 &&
+ 		(newsz <= origsz || PageGetExactFreeSpace(oldpage) >= (origsz - newsz)))
+ 	{
+ 		if (BufferIsValid(newbuf))
+ 			UnlockReleaseBuffer(newbuf);
+ 
+ 		START_CRIT_SECTION();
+ 		PageIndexDeleteNoCompact(oldpage, &oldoff, 1);
+ 		if (PageAddItem(oldpage, (Item) newtup, newsz, oldoff, true, false) == InvalidOffsetNumber)
+ 			elog(ERROR, "failed to add mmtuple");
+ 		MarkBufferDirty(oldbuf);
+ 
+ 		/* XLOG stuff */
+ 		if (RelationNeedsWAL(idxrel))
+ 		{
+ 			BlockNumber blk = BufferGetBlockNumber(oldbuf);
+ 			xl_minmax_samepage_update xlrec;
+ 			XLogRecPtr	recptr;
+ 			XLogRecData	rdata[2];
+ 			uint8		info = XLOG_MINMAX_SAMEPAGE_UPDATE;
+ 
+ 			xlrec.node = idxrel->rd_node;
+ 			ItemPointerSetBlockNumber(&xlrec.tid, blk);
+ 			ItemPointerSetOffsetNumber(&xlrec.tid, oldoff);
+ 			rdata[0].data = (char *) &xlrec;
+ 			rdata[0].len = SizeOfMinmaxSamepageUpdate;
+ 			rdata[0].buffer = InvalidBuffer;
+ 			rdata[0].next = &(rdata[1]);
+ 
+ 			rdata[1].data = (char *) newtup;
+ 			rdata[1].len = newsz;
+ 			rdata[1].buffer = oldbuf;
+ 			rdata[1].buffer_std = true;
+ 			rdata[1].next = NULL;
+ 
+ 			recptr = XLogInsert(RM_MINMAX_ID, info, rdata);
+ 
+ 			PageSetLSN(oldpage, recptr);
+ 		}
+ 
+ 		END_CRIT_SECTION();
+ 
+ 		LockBuffer(oldbuf, BUFFER_LOCK_UNLOCK);
+ 		return true;
+ 	}
+ 	else if (newbuf == InvalidBuffer)
+ 	{
+ 		/*
+ 		 * Not enough space, but caller said that there was. Tell them to
+ 		 * start over.
+ 		 */
+ 		LockBuffer(oldbuf, BUFFER_LOCK_UNLOCK);
+ 		return false;
+ 	}
+ 	else
+ 	{
+ 		/*
+ 		 * Not enough free space on the oldpage. Put the new tuple on the
+ 		 * new page, and update the revmap.
+ 		 */
+ 		Page		newpage = BufferGetPage(newbuf);
+ 		Buffer		revmapbuf;
+ 		ItemPointerData newtid;
+ 		OffsetNumber newoff;
+ 
+ 		revmapbuf = mmLockRevmapPageForUpdate(rmAccess, heapBlk);
+ 
+ 		START_CRIT_SECTION();
+ 
+ 		PageIndexDeleteNoCompact(oldpage, &oldoff, 1);
+ 		newoff = PageAddItem(newpage, (Item) newtup, newsz, InvalidOffsetNumber, false, false);
+ 		if (newoff == InvalidOffsetNumber)
+ 			elog(ERROR, "failed to add mmtuple to new page");
+ 		MarkBufferDirty(oldbuf);
+ 		MarkBufferDirty(newbuf);
+ 
+ 		ItemPointerSet(&newtid, BufferGetBlockNumber(newbuf), newoff);
+ 		mmSetHeapBlockItemptr(revmapbuf, pagesPerRange, heapBlk, newtid);
+ 		MarkBufferDirty(revmapbuf);
+ 
+ 		/* XLOG stuff */
+ 		if (RelationNeedsWAL(idxrel))
+ 		{
+ 			xl_minmax_update	xlrec;
+ 			XLogRecPtr	recptr;
+ 			XLogRecData	rdata[4];
+ 			uint8		info = XLOG_MINMAX_UPDATE;
+ 
+ 			xlrec.new.node = idxrel->rd_node;
+ 			ItemPointerSet(&xlrec.new.tid, BufferGetBlockNumber(newbuf), newoff);
+ 			xlrec.new.heapBlk = heapBlk;
+ 			xlrec.new.revmapBlk = BufferGetBlockNumber(revmapbuf);
+ 			xlrec.new.pagesPerRange = pagesPerRange;
+ 			ItemPointerSet(&xlrec.oldtid, BufferGetBlockNumber(oldbuf), oldoff);
+ 
+ 			rdata[0].data = (char *) &xlrec;
+ 			rdata[0].len = SizeOfMinmaxUpdate;
+ 			rdata[0].buffer = InvalidBuffer;
+ 			rdata[0].next = &(rdata[1]);
+ 
+ 			rdata[1].data = (char *) newtup;
+ 			rdata[1].len = newsz;
+ 			rdata[1].buffer = newbuf;
+ 			rdata[1].buffer_std = true;
+ 			rdata[1].next = &(rdata[2]);
+ 
+ 			rdata[2].data = (char *) NULL;
+ 			rdata[2].len = 0;
+ 			rdata[2].buffer = revmapbuf;
+ 			rdata[2].buffer_std = true;
+ 			rdata[2].next = &(rdata[3]);
+ 
+ 			rdata[3].data = (char *) NULL;
+ 			rdata[3].len = 0;
+ 			rdata[3].buffer = oldbuf;
+ 			rdata[3].buffer_std = true;
+ 			rdata[3].next = NULL;
+ 
+ 			recptr = XLogInsert(RM_MINMAX_ID, info, rdata);
+ 
+ 			PageSetLSN(oldpage, recptr);
+ 			PageSetLSN(newpage, recptr);
+ 			PageSetLSN(BufferGetPage(revmapbuf), recptr);
+ 		}
+ 
+ 		END_CRIT_SECTION();
+ 
+ 		LockBuffer(revmapbuf, BUFFER_LOCK_UNLOCK);
+ 		LockBuffer(oldbuf, BUFFER_LOCK_UNLOCK);
+ 		UnlockReleaseBuffer(newbuf);
+ 		return true;
+ 	}
+ }
+ 
+ /*
+  * Insert an index tuple into the index relation.  The revmap is updated to
+  * mark the range containing the given page as pointing to the inserted entry.
+  * A WAL record is written.
+  *
+  * The buffer, if valid, is first checked for free space to insert the new
+  * entry; if there isn't enough, a new buffer is obtained and pinned.
+  *
+  * If the relation had to be extended to make room for the new index tuple,
+  * *extended is set to true.
+  */
+ void
+ mm_doinsert(Relation idxrel, BlockNumber pagesPerRange,
+ 			mmRevmapAccess *rmAccess, Buffer *buffer, BlockNumber heapBlk,
+ 			MMTuple *tup, Size itemsz, bool *extended)
+ {
+ 	Page		page;
+ 	BlockNumber blk;
+ 	OffsetNumber off;
+ 	Buffer		revmapbuf;
+ 	ItemPointerData tid;
+ 
+ 	itemsz = MAXALIGN(itemsz);
+ 
+ 	/*
+ 	 * Lock the revmap page for the update. Note that this may require
+ 	 * extending the revmap, which in turn may require moving the currently
+ 	 * pinned index block out of the way.
+ 	 */
+ 	revmapbuf = mmLockRevmapPageForUpdate(rmAccess, heapBlk);
+ 
+ 	/*
+ 	 * Obtain a locked buffer to insert the new tuple.  Note mm_getinsertbuffer
+ 	 * ensures there's enough space in the returned buffer.
+ 	 */
+ 	if (BufferIsValid(*buffer))
+ 	{
+ 		/*
+ 		 * It's possible that another backend (or ourselves!) extended the
+ 		 * revmap over the page we held a pin on, so we cannot assume that
+ 		 * it's still a regular page.
+ 		 */
+ 		LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE);
+ 		if (mm_page_get_freespace(BufferGetPage(*buffer)) < itemsz)
+ 		{
+ 			UnlockReleaseBuffer(*buffer);
+ 			*buffer = InvalidBuffer;
+ 		}
+ 	}
+ 
+ 	if (!BufferIsValid(*buffer))
+ 	{
+ 		*buffer = mm_getinsertbuffer(idxrel, InvalidBuffer, itemsz, extended);
+ 		Assert(BufferIsValid(*buffer));
+ 		Assert(mm_page_get_freespace(BufferGetPage(*buffer)) >= itemsz);
+ 	}
+ 
+ 	page = BufferGetPage(*buffer);
+ 	blk = BufferGetBlockNumber(*buffer);
+ 
+ 	START_CRIT_SECTION();
+ 	off = PageAddItem(page, (Item) tup, itemsz, InvalidOffsetNumber,
+ 					  false, false);
+ 	if (off == InvalidOffsetNumber)
+ 		elog(ERROR, "could not insert new index tuple to page");
+ 	MarkBufferDirty(*buffer);
+ 
+ 	MINMAX_elog(DEBUG2, "inserted tuple (%u,%u) for range starting at %u",
+ 				blk, off, heapBlk);
+ 
+ 	ItemPointerSet(&tid, blk, off);
+ 	mmSetHeapBlockItemptr(revmapbuf, pagesPerRange, heapBlk, tid);
+ 	MarkBufferDirty(revmapbuf);
+ 
+ 	/* XLOG stuff */
+ 	if (RelationNeedsWAL(idxrel))
+ 	{
+ 		xl_minmax_insert	xlrec;
+ 		XLogRecPtr	recptr;
+ 		XLogRecData	rdata[2];
+ 		uint8		info = XLOG_MINMAX_INSERT;
+ 
+ 		xlrec.node = idxrel->rd_node;
+ 		xlrec.heapBlk = heapBlk;
+ 		xlrec.pagesPerRange = pagesPerRange;
+ 		xlrec.revmapBlk = BufferGetBlockNumber(revmapbuf);
+ 		ItemPointerSet(&xlrec.tid, blk, off);
+ 
+ 		rdata[0].data = (char *) &xlrec;
+ 		rdata[0].len = SizeOfMinmaxInsert;
+ 		rdata[0].buffer = InvalidBuffer;
+ 		rdata[0].buffer_std = false;
+ 		rdata[0].next = &(rdata[1]);
+ 
+ 		rdata[1].data = (char *) tup;
+ 		rdata[1].len = itemsz;
+ 		rdata[1].buffer = *buffer;
+ 		rdata[1].buffer_std = true;
+ 		rdata[1].next = NULL;
+ 
+ 		recptr = XLogInsert(RM_MINMAX_ID, info, rdata);
+ 
+ 		PageSetLSN(page, recptr);
+ 		PageSetLSN(BufferGetPage(revmapbuf), recptr);
+ 	}
+ 
+ 	END_CRIT_SECTION();
+ 
+ 	/* Tuple is firmly on buffer; we can release our locks */
+ 	LockBuffer(*buffer, BUFFER_LOCK_UNLOCK);
+ 	LockBuffer(revmapbuf, BUFFER_LOCK_UNLOCK);
+ }
+ 
+ /*
+  * Initiate page evacuation protocol.
+  *
+  * The page must be locked in exclusive mode by the caller.
+  *
+  * If the page is not yet initialized or empty, return false without doing
+  * anything; it can be used for revmap without any further changes.  If it
+  * contains tuples, mark it for evacuation and return true.
+  */
+ bool
+ mm_start_evacuating_page(Relation idxRel, Buffer buf)
+ {
+ 	OffsetNumber off;
+ 	OffsetNumber maxoff;
+ 	MinmaxSpecialSpace *special;
+ 	Page		page;
+ 
+ 	page = BufferGetPage(buf);
+ 
+ 	if (PageIsNew(page))
+ 		return false;
+ 
+ 	special = (MinmaxSpecialSpace *) PageGetSpecialPointer(page);
+ 
+ 	maxoff = PageGetMaxOffsetNumber(page);
+ 	for (off = FirstOffsetNumber; off <= maxoff; off++)
+ 	{
+ 		ItemId		lp;
+ 
+ 		lp = PageGetItemId(page, off);
+ 		if (ItemIdIsUsed(lp))
+ 		{
+ 			/* prevent other backends from adding more stuff to this page */
+ 			special->flags |= MINMAX_EVACUATE_PAGE;
+ 			MarkBufferDirtyHint(buf, true);
+ 
+ 			return true;
+ 		}
+ 	}
+ 	return false;
+ }
+ 
+ /*
+  * Move all tuples out of a page.
+  *
+  * The caller must hold lock on the page. The lock and pin are released.
+  */
+ void
+ mm_evacuate_page(Relation idxRel, BlockNumber pagesPerRange, mmRevmapAccess *rmAccess, Buffer buf)
+ {
+ 	OffsetNumber off;
+ 	OffsetNumber maxoff;
+ 	MinmaxSpecialSpace *special;
+ 	Page		page;
+ 	bool		extended = false;
+ 
+ 	page = BufferGetPage(buf);
+ 	special = (MinmaxSpecialSpace *) PageGetSpecialPointer(page);
+ 
+ 	Assert(special->flags & MINMAX_EVACUATE_PAGE);
+ 
+ 	maxoff = PageGetMaxOffsetNumber(page);
+ 	for (off = FirstOffsetNumber; off <= maxoff; off++)
+ 	{
+ 		MMTuple	   *tup;
+ 		Size		sz;
+ 		ItemId		lp;
+ 
+ 		CHECK_FOR_INTERRUPTS();
+ 
+ 		lp = PageGetItemId(page, off);
+ 		if (ItemIdIsUsed(lp))
+ 		{
+ 			sz = ItemIdGetLength(lp);
+ 			tup = (MMTuple *) PageGetItem(page, lp);
+ 			tup = minmax_copy_tuple(tup, sz);
+ 
+ 			LockBuffer(buf, BUFFER_LOCK_UNLOCK);
+ 
+ 			if (!mm_doupdate(idxRel, pagesPerRange, rmAccess, tup->mt_blkno, buf,
+ 							 off, tup, sz, tup, sz, false, &extended))
+ 				off--; /* retry */
+ 
+ 			LockBuffer(buf, BUFFER_LOCK_SHARE);
+ 
+ 			/* It's possible that someone extended the revmap over this page */
+ 			if (!MINMAX_IS_REGULAR_PAGE(page))
+ 				break;
+ 		}
+ 	}
+ 
+ 	UnlockReleaseBuffer(buf);
+ 
+ 	if (extended)
+ 		FreeSpaceMapVacuum(idxRel);
+ }
+ 
+ /*
+  * Return a pinned and locked buffer which can be used to insert an index item
+  * of size itemsz.  If oldbuf is a valid buffer, it is also locked (in a order
+  * determined to avoid deadlocks.)
+  *
+  * If there's no existing page with enough free space to accomodate the new
+  * item, the relation is extended.  If this happens, *extended is set to true.
+  *
+  * If we find that the old page is no longer a regular index page (because
+  * of a revmap extension), the old buffer is unlocked and we return
+  * InvalidBuffer.
+  */
+ static Buffer
+ mm_getinsertbuffer(Relation irel, Buffer oldbuf, Size itemsz,
+ 				   bool *was_extended)
+ {
+ 	BlockNumber oldblk;
+ 	BlockNumber newblk;
+ 	Page		page;
+ 	int			freespace;
+ 	bool		extended = false;
+ 
+ 	if (BufferIsValid(oldbuf))
+ 		oldblk = BufferGetBlockNumber(oldbuf);
+ 	else
+ 		oldblk = InvalidBlockNumber;
+ 
+ 	/*
+ 	 * Loop until we find a page with sufficient free space.  By the time we
+ 	 * return to caller out of this loop, both buffers are valid and locked;
+ 	 * if we have to restart here, neither buffer is locked and buf is not
+ 	 * a pinned buffer.
+ 	 */
+ 	newblk = RelationGetTargetBlock(irel);
+ 	if (newblk == InvalidBlockNumber)
+ 		newblk = GetPageWithFreeSpace(irel, itemsz);
+ 	for (;;)
+ 	{
+ 		Buffer		buf;
+ 		bool		extensionLockHeld = false;
+ 
+ 		CHECK_FOR_INTERRUPTS();
+ 
+ 		if (newblk == InvalidBlockNumber)
+ 		{
+ 			/*
+ 			 * There's not enough free space in any existing index page,
+ 			 * according to the FSM: extend the relation to obtain a shiny
+ 			 * new page.
+ 			 */
+ 			if (!RELATION_IS_LOCAL(irel))
+ 			{
+ 				LockRelationForExtension(irel, ExclusiveLock);
+ 				extensionLockHeld = true;
+ 			}
+ 			buf = ReadBuffer(irel, P_NEW);
+ 			extended = true;
+ 
+ 			MINMAX_elog(DEBUG2, "mm_getinsertbuffer: extending to page %u",
+ 						BufferGetBlockNumber(buf));
+ 		}
+ 		else if (newblk == oldblk)
+ 		{
+ 			/*
+ 			 * There's an odd corner-case here where the FSM is out-of-date,
+ 			 * and gave us the old page.
+ 			 */
+ 			buf = oldbuf;
+ 		}
+ 		else
+ 		{
+ 			buf = ReadBuffer(irel, newblk);
+ 		}
+ 
+ 		/*
+ 		 * We lock the old buffer first, if it's earlier than the new one.
+ 		 * We also need to check that it hasn't been turned into a revmap
+ 		 * page concurrently; if we detect that it happened, give up and
+ 		 * tell caller to start over.
+ 		 */
+ 		if (BufferIsValid(oldbuf) && oldblk < newblk)
+ 		{
+ 			LockBuffer(oldbuf, BUFFER_LOCK_EXCLUSIVE);
+ 			if (!MINMAX_IS_REGULAR_PAGE(BufferGetPage(oldbuf)))
+ 			{
+ 				LockBuffer(oldbuf, BUFFER_LOCK_UNLOCK);
+ 				ReleaseBuffer(buf);
+ 				return InvalidBuffer;
+ 			}
+ 		}
+ 
+ 		LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
+ 
+ 		if (extensionLockHeld)
+ 			UnlockRelationForExtension(irel, ExclusiveLock);
+ 
+ 		page = BufferGetPage(buf);
+ 
+ 		if (extended)
+ 			mm_page_init(page, MINMAX_PAGETYPE_REGULAR);
+ 
+ 		/*
+ 		 * We have a new buffer from FSM now.  Check that the new page has
+ 		 * enough free space, and return it if it does; otherwise start over.
+ 		 * Note that we allow for the FSM to be out of date here, and in that
+ 		 * case we update it and move on.
+ 		 *
+ 		 * (mm_page_get_freespace also checks that the FSM didn't hand us a
+ 		 * page that has since been repurposed for the revmap.)
+ 		 */
+ 		freespace = mm_page_get_freespace(page);
+ 		if (freespace >= itemsz)
+ 		{
+ 			if (extended)
+ 				*was_extended = true;
+ 
+ 			RelationSetTargetBlock(irel, BufferGetBlockNumber(buf));
+ 
+ 			/*
+ 			 * Lock the old buffer if not locked already.  Note that in this
+ 			 * case we know for sure it's a regular page: it's later than the
+ 			 * new page we just got, which is not a revmap page, and revmap
+ 			 * pages are always consecutive.
+ 			 */
+ 			if (BufferIsValid(oldbuf) && oldblk > newblk)
+ 			{
+ 				LockBuffer(oldbuf, BUFFER_LOCK_EXCLUSIVE);
+ 				Assert(MINMAX_IS_REGULAR_PAGE(BufferGetPage(oldbuf)));
+ 			}
+ 
+ 			return buf;
+ 		}
+ 
+ 		/* This page is no good. */
+ 
+ 		/*
+ 		 * If an entirely new page does not contain enough free space for
+ 		 * the new item, then surely that item is oversized.  Complain
+ 		 * loudly; but first make sure we record the page as free, for
+ 		 * next time.
+ 		 */
+ 		if (extended)
+ 		{
+ 			RecordPageWithFreeSpace(irel, BufferGetBlockNumber(buf),
+ 									freespace);
+ 			ereport(ERROR,
+ 					(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+ 					 errmsg("index row size %lu exceeds maximum %lu for index \"%s\"",
+ 							(unsigned long) itemsz,
+ 							(unsigned long) freespace,
+ 							RelationGetRelationName(irel))));
+ 			return InvalidBuffer;	/* keep compiler quiet */
+ 		}
+ 
+ 		if (newblk != oldblk)
+ 			UnlockReleaseBuffer(buf);
+ 		if (BufferIsValid(oldbuf) && oldblk < newblk)
+ 			LockBuffer(oldbuf, BUFFER_LOCK_UNLOCK);
+ 
+ 		newblk = RecordAndGetPageWithFreeSpace(irel, newblk, freespace, itemsz);
+ 	}
+ }
+ 
+ /*
+  * Return the amount of free space on a regular minmax index page.
+  *
+  * If the page is not a regular page, or has been marked with the
+  * MINMAX_EVACUATE_PAGE flag, returns 0.
+  */
+ static Size
+ mm_page_get_freespace(Page page)
+ {
+ 	MinmaxSpecialSpace *special;
+ 
+ 	special = (MinmaxSpecialSpace *) PageGetSpecialPointer(page);
+ 	if (!MINMAX_IS_REGULAR_PAGE(page) ||
+ 		(special->flags & MINMAX_EVACUATE_PAGE) != 0)
+ 		return 0;
+ 	else
+ 		return PageGetFreeSpace(page);
+ 
+ }
*** a/src/backend/access/minmax/mmrevmap.c
--- b/src/backend/access/minmax/mmrevmap.c
***************
*** 3,17 ****
   *		Reverse range map for MinMax indexes
   *
   * The reverse range map (revmap) is a translation structure for minmax
!  * indexes: for each page range, there is one most-up-to-date summary tuple,
!  * and its location is tracked by the revmap.  Whenever a new tuple is inserted
!  * into a table that violates the previously recorded min/max values, a new
!  * tuple is inserted into the index and the revmap is updated to point to it.
   *
!  * The pages of the revmap are in the beginning of the index, starting at
!  * immediately after the metapage at block 1.  When the revmap needs to be
!  * expanded, all tuples on the regular minmax page at that block (if any) are
!  * moved out of the way.
   *
   * Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
--- 3,16 ----
   *		Reverse range map for MinMax indexes
   *
   * The reverse range map (revmap) is a translation structure for minmax
!  * indexes: for each page range there is one summary tuple, and its location is
!  * tracked by the revmap.  Whenever a new tuple is inserted into a table that
!  * violates the previously recorded summary values, a new tuple is inserted
!  * into the index and the revmap is updated to point to it.
   *
!  * The revmap is stored in the first pages of the index, immediately following
!  * the metapage.  When the revmap needs to be expanded, all tuples on the
!  * regular minmax page at that block (if any) are moved out of the way.
   *
   * Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
***************
*** 21,50 ****
   */
  #include "postgres.h"
  
! #include "access/heapam_xlog.h"
! #include "access/minmax.h"
! #include "access/minmax_internal.h"
  #include "access/minmax_page.h"
  #include "access/minmax_revmap.h"
  #include "access/minmax_xlog.h"
  #include "access/rmgr.h"
  #include "miscadmin.h"
  #include "storage/bufmgr.h"
  #include "storage/lmgr.h"
! #include "storage/relfilenode.h"
! #include "storage/smgr.h"
! #include "utils/memutils.h"
  
  
  /*
!  * In revmap pages, each item stores an ItemPointerData.  These defines
!  * let one find the logical revmap page number and index number of the revmap
!  * item for the given heap block number.
   */
  #define HEAPBLK_TO_REVMAP_BLK(pagesPerRange, heapBlk) \
! 	((heapBlk / pagesPerRange) / REGULAR_REVMAP_PAGE_MAXITEMS)
  #define HEAPBLK_TO_REVMAP_INDEX(pagesPerRange, heapBlk) \
! 	((heapBlk / pagesPerRange) % REGULAR_REVMAP_PAGE_MAXITEMS)
  
  
  struct mmRevmapAccess
--- 20,47 ----
   */
  #include "postgres.h"
  
! #include "access/xlog.h"
  #include "access/minmax_page.h"
+ #include "access/minmax_pageops.h"
  #include "access/minmax_revmap.h"
+ #include "access/minmax_tuple.h"
  #include "access/minmax_xlog.h"
  #include "access/rmgr.h"
  #include "miscadmin.h"
  #include "storage/bufmgr.h"
  #include "storage/lmgr.h"
! #include "utils/rel.h"
  
  
  /*
!  * In revmap pages, each item stores an ItemPointerData.  These defines let one
!  * find the logical revmap page number and index number of the revmap item for
!  * the given heap block number.
   */
  #define HEAPBLK_TO_REVMAP_BLK(pagesPerRange, heapBlk) \
! 	((heapBlk / pagesPerRange) / REVMAP_PAGE_MAXITEMS)
  #define HEAPBLK_TO_REVMAP_INDEX(pagesPerRange, heapBlk) \
! 	((heapBlk / pagesPerRange) % REVMAP_PAGE_MAXITEMS)
  
  
  struct mmRevmapAccess
***************
*** 58,63 **** struct mmRevmapAccess
--- 55,62 ----
  /* typedef appears in minmax_revmap.h */
  
  
+ static BlockNumber rm_get_phys_blkno(mmRevmapAccess *rmAccess,
+ 				  BlockNumber mapBlk, bool extend);
  static void rm_extend(mmRevmapAccess *rmAccess);
  
  /*
***************
*** 73,89 **** mmRevmapAccessInit(Relation idxrel, BlockNumber *pagesPerRange)
  	MinmaxMetaPageData *metadata;
  
  	meta = ReadBuffer(idxrel, MINMAX_METAPAGE_BLKNO);
  	metadata = (MinmaxMetaPageData *) PageGetContents(BufferGetPage(meta));
  
  	rmAccess = palloc(sizeof(mmRevmapAccess));
- 	rmAccess->metaBuf = meta;
  	rmAccess->idxrel = idxrel;
  	rmAccess->pagesPerRange = metadata->pagesPerRange;
  	rmAccess->currBuf = InvalidBuffer;
- 	rmAccess->lastRevmapPage = InvalidBlockNumber;
  
! 	if (pagesPerRange)
! 		*pagesPerRange = metadata->pagesPerRange;
  
  	return rmAccess;
  }
--- 72,90 ----
  	MinmaxMetaPageData *metadata;
  
  	meta = ReadBuffer(idxrel, MINMAX_METAPAGE_BLKNO);
+ 	LockBuffer(meta, BUFFER_LOCK_SHARE);
  	metadata = (MinmaxMetaPageData *) PageGetContents(BufferGetPage(meta));
  
  	rmAccess = palloc(sizeof(mmRevmapAccess));
  	rmAccess->idxrel = idxrel;
  	rmAccess->pagesPerRange = metadata->pagesPerRange;
+ 	rmAccess->lastRevmapPage = metadata->lastRevmapPage;
+ 	rmAccess->metaBuf = meta;
  	rmAccess->currBuf = InvalidBuffer;
  
! 	*pagesPerRange = metadata->pagesPerRange;
! 
! 	LockBuffer(meta, BUFFER_LOCK_UNLOCK);
  
  	return rmAccess;
  }
***************
*** 94,281 **** mmRevmapAccessInit(Relation idxrel, BlockNumber *pagesPerRange)
  void
  mmRevmapAccessTerminate(mmRevmapAccess *rmAccess)
  {
! 	if (rmAccess->metaBuf != InvalidBuffer)
! 		ReleaseBuffer(rmAccess->metaBuf);
  	if (rmAccess->currBuf != InvalidBuffer)
  		ReleaseBuffer(rmAccess->currBuf);
  	pfree(rmAccess);
  }
  
  /*
-  * Read the metapage and update the given rmAccess with the metapage data.
-  */
- static void
- rmaccess_read_metapage(mmRevmapAccess *rmAccess)
- {
- 	MinmaxMetaPageData *metadata;
- 	MinmaxSpecialSpace *special PG_USED_FOR_ASSERTS_ONLY;
- 	Page		metapage;
- 
- 	LockBuffer(rmAccess->metaBuf, BUFFER_LOCK_SHARE);
- 	metapage = BufferGetPage(rmAccess->metaBuf);
- 
- #ifdef USE_ASSERT_CHECKING
- 	/* ensure we really got the metapage */
- 	special = (MinmaxSpecialSpace *) PageGetSpecialPointer(metapage);
- 	Assert(special->type == MINMAX_PAGETYPE_META);
- #endif
- 
- 	metadata = (MinmaxMetaPageData *) PageGetContents(metapage);
- 
- 	rmAccess->lastRevmapPage = metadata->lastRevmapPage;
- 
- 	LockBuffer(rmAccess->metaBuf, BUFFER_LOCK_UNLOCK);
- }
- 
- /*
-  * Given a logical revmap block number, find its physical block number.
-  *
-  * Note this might involve up to two buffer reads, including a possible
-  * update to the metapage.
-  *
-  * If extend is set to true, and the page hasn't been set yet, extend the
-  * array to point to a newly allocated page.
-  */
- static BlockNumber
- rm_get_phys_blkno(mmRevmapAccess *rmAccess, BlockNumber mapBlk, bool extend)
- {
- 	BlockNumber targetblk;
- 
- 	if (rmAccess->lastRevmapPage == InvalidBlockNumber)
- 		rmaccess_read_metapage(rmAccess);
- 
- 	/* the first revmap page is always block number 1 */
- 	targetblk = mapBlk + 1;
- 
- 	if (targetblk <= rmAccess->lastRevmapPage)
- 		return targetblk;
- 
- 	if (!extend)
- 		return InvalidBlockNumber;
- 
- 	/* Extend the revmap */
- 	while (targetblk > rmAccess->lastRevmapPage)
- 		rm_extend(rmAccess);
- 
- 	return targetblk;
- }
- 
- /*
-  * Extend the revmap by one page.
-  *
-  * If there is an existing minmax page at that block, it is atomically moved
-  * out of the way, and the redirect pointer on the new revmap page is set
-  * to point to its new location.
-  *
-  * If rmAccess->lastRevmapPage is out-of-date, it's updated and nothing else
-  * is done.
-  */
- static void
- rm_extend(mmRevmapAccess *rmAccess)
- {
- 	Buffer		buf;
- 	Page		page;
- 	Page		metapage;
- 	MinmaxMetaPageData *metadata;
- 	BlockNumber	mapBlk;
- 	BlockNumber nblocks;
- 	Relation	irel = rmAccess->idxrel;
- 	bool		needLock = !RELATION_IS_LOCAL(irel);
- 
- 	/*
- 	 * Lock the metapage. This locks out concurrent extensions of the revmap,
- 	 * but note that we still need to grab the relation extension lock because
- 	 * another backend can still extend the index with regular minmax pages.
- 	 */
- 	LockBuffer(rmAccess->metaBuf, BUFFER_LOCK_EXCLUSIVE);
- 	metapage = BufferGetPage(rmAccess->metaBuf);
- 	metadata = (MinmaxMetaPageData *) PageGetContents(metapage);
- 
- 	/* Check that our cached lastRevmapPage value was up-to-date */
- 	if (metadata->lastRevmapPage != rmAccess->lastRevmapPage)
- 	{
- 		rmAccess->lastRevmapPage = metadata->lastRevmapPage;
- 
- 		LockBuffer(rmAccess->metaBuf, BUFFER_LOCK_UNLOCK);
- 		return;
- 	}
- 	mapBlk = metadata->lastRevmapPage + 1;
- 
- 	nblocks = RelationGetNumberOfBlocks(irel);
- 	if (mapBlk < nblocks)
- 	{
- 		/* Check that the existing index block is sane. */
- 		buf = ReadBuffer(rmAccess->idxrel, mapBlk);
- 		LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
- 		page = BufferGetPage(buf);
- 	}
- 	else
- 	{
- 		if (needLock)
- 			LockRelationForExtension(irel, ExclusiveLock);
- 
- 		buf = ReadBuffer(irel, P_NEW);
- 		Assert(BufferGetBlockNumber(buf) == mapBlk);
- 		LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
- 		page = BufferGetPage(buf);
- 
- 		if (needLock)
- 			UnlockRelationForExtension(irel, ExclusiveLock);
- 	}
- 
- 	/* Check that it's a regular block (or an empty page) */
- 	if (!PageIsNew(page) && !MINMAX_IS_REGULAR_PAGE(page))
- 		elog(ERROR, "unexpected minmax page type: 0x%04X",
- 			 MINMAX_PAGE_TYPE(page));
- 
- 	/* If the page is in use, evacuate it and restart */
- 	if (mm_start_evacuating_page(rmAccess->idxrel, buf))
- 	{
- 		LockBuffer(rmAccess->metaBuf, BUFFER_LOCK_UNLOCK);
- 		mm_evacuate_page(rmAccess->idxrel, buf);
- 		return;
- 	}
- 
- 	/*
- 	 * Ok, we have now locked the metapage and the target block. Re-initialize
- 	 * it as a revmap page.
- 	 */
- 	START_CRIT_SECTION();
- 
- 	/* the rmr_tids array is initialized to all invalid by PageInit */
- 	mm_page_init(page, MINMAX_PAGETYPE_REVMAP);
- 	MarkBufferDirty(buf);
- 
- 	metadata->lastRevmapPage = mapBlk;
- 	MarkBufferDirty(rmAccess->metaBuf);
- 
- 	if (RelationNeedsWAL(rmAccess->idxrel))
- 	{
- 		xl_minmax_revmap_extend xlrec;
- 		XLogRecPtr	recptr;
- 		XLogRecData	rdata;
- 
- 		xlrec.node = rmAccess->idxrel->rd_node;
- 		xlrec.targetBlk = mapBlk;
- 
- 		rdata.data = (char *) &xlrec;
- 		rdata.len = SizeOfMinmaxRevmapExtend;
- 		rdata.buffer = InvalidBuffer;
- 		rdata.buffer_std = false;
- 		rdata.next = NULL;
- 
- 		recptr = XLogInsert(RM_MINMAX_ID, XLOG_MINMAX_REVMAP_EXTEND, &rdata);
- 		PageSetLSN(metapage, recptr);
- 		PageSetLSN(page, recptr);
- 	}
- 
- 	END_CRIT_SECTION();
- 
- 	LockBuffer(rmAccess->metaBuf, BUFFER_LOCK_UNLOCK);
- 	UnlockReleaseBuffer(buf);
- }
- 
- /*
   * Prepare for updating an entry in the revmap.
   *
   * The map is extended, if necessary.
--- 95,107 ----
  void
  mmRevmapAccessTerminate(mmRevmapAccess *rmAccess)
  {
! 	ReleaseBuffer(rmAccess->metaBuf);
  	if (rmAccess->currBuf != InvalidBuffer)
  		ReleaseBuffer(rmAccess->currBuf);
  	pfree(rmAccess);
  }
  
  /*
   * Prepare for updating an entry in the revmap.
   *
   * The map is extended, if necessary.
***************
*** 285,294 **** mmLockRevmapPageForUpdate(mmRevmapAccess *rmAccess, BlockNumber heapBlk)
  {
  	BlockNumber mapBlk;
  
  	mapBlk = HEAPBLK_TO_REVMAP_BLK(rmAccess->pagesPerRange, heapBlk);
- 
- 	/* Translate the map block number to physical location */
  	mapBlk = rm_get_phys_blkno(rmAccess, mapBlk, true);
  
  	MINMAX_elog(DEBUG2, "locking revmap page for logical page %lu (physical %u) for heap %u",
  				HEAPBLK_TO_REVMAP_BLK(rmAccess->pagesPerRange, heapBlk),
--- 111,123 ----
  {
  	BlockNumber mapBlk;
  
+ 	/*
+ 	 * Translate the map block number to physical location.  Note this extends
+ 	 * the revmap, if necessary.
+ 	 */
  	mapBlk = HEAPBLK_TO_REVMAP_BLK(rmAccess->pagesPerRange, heapBlk);
  	mapBlk = rm_get_phys_blkno(rmAccess, mapBlk, true);
+ 	Assert(mapBlk != InvalidBlockNumber);
  
  	MINMAX_elog(DEBUG2, "locking revmap page for logical page %lu (physical %u) for heap %u",
  				HEAPBLK_TO_REVMAP_BLK(rmAccess->pagesPerRange, heapBlk),
***************
*** 305,311 **** mmLockRevmapPageForUpdate(mmRevmapAccess *rmAccess, BlockNumber heapBlk)
  		if (rmAccess->currBuf != InvalidBuffer)
  			ReleaseBuffer(rmAccess->currBuf);
  
- 		Assert(mapBlk != InvalidBlockNumber);
  		rmAccess->currBuf = ReadBuffer(rmAccess->idxrel, mapBlk);
  	}
  
--- 134,139 ----
***************
*** 373,380 **** mmGetMMTupleForHeapBlock(mmRevmapAccess *rmAccess, BlockNumber heapBlk,
  	/* normalize the heap block number to be the first page in the range */
  	heapBlk = (heapBlk / rmAccess->pagesPerRange) * rmAccess->pagesPerRange;
  
  	mapBlk = HEAPBLK_TO_REVMAP_BLK(rmAccess->pagesPerRange, heapBlk);
- 	/* Translate the map block number to physical location */
  	mapBlk = rm_get_phys_blkno(rmAccess, mapBlk, false);
  	if (mapBlk == InvalidBlockNumber)
  	{
--- 201,208 ----
  	/* normalize the heap block number to be the first page in the range */
  	heapBlk = (heapBlk / rmAccess->pagesPerRange) * rmAccess->pagesPerRange;
  
+ 	/* Compute the revmap page number we need */
  	mapBlk = HEAPBLK_TO_REVMAP_BLK(rmAccess->pagesPerRange, heapBlk);
  	mapBlk = rm_get_phys_blkno(rmAccess, mapBlk, false);
  	if (mapBlk == InvalidBlockNumber)
  	{
***************
*** 385,390 **** mmGetMMTupleForHeapBlock(mmRevmapAccess *rmAccess, BlockNumber heapBlk,
--- 213,220 ----
  	ItemPointerSetInvalid(&previptr);
  	for (;;)
  	{
+ 		CHECK_FOR_INTERRUPTS();
+ 
  		if (rmAccess->currBuf == InvalidBuffer ||
  			BufferGetBlockNumber(rmAccess->currBuf) != mapBlk)
  		{
***************
*** 452,464 **** mmGetMMTupleForHeapBlock(mmRevmapAccess *rmAccess, BlockNumber heapBlk,
  
  		/*
  		 * No luck. Assume that the revmap was updated concurrently.
- 		 *
- 		 * XXX: it would be nice to add some kind of a sanity check here to
- 		 * avoid looping infinitely, if the revmap points to wrong tuple for
- 		 * some reason.
  		 */
  		LockBuffer(*buf, BUFFER_LOCK_UNLOCK);
  	}
  	/* not reached, but keep compiler quiet */
  	return NULL;
  }
--- 282,451 ----
  
  		/*
  		 * No luck. Assume that the revmap was updated concurrently.
  		 */
  		LockBuffer(*buf, BUFFER_LOCK_UNLOCK);
  	}
  	/* not reached, but keep compiler quiet */
  	return NULL;
  }
+ 
+ /*
+  * Given a logical revmap block number, find its physical block number.
+  *
+  * If extend is set to true, and the page hasn't been set yet, extend the
+  * array to point to a newly allocated page.
+  */
+ static BlockNumber
+ rm_get_phys_blkno(mmRevmapAccess *rmAccess, BlockNumber mapBlk, bool extend)
+ {
+ 	BlockNumber targetblk;
+ 
+ 	/* skip the metapage to obtain physical block numbers of revmap pages */
+ 	targetblk = mapBlk + 1;
+ 
+ 	/* Normal case: the revmap page is already allocated */
+ 	if (targetblk <= rmAccess->lastRevmapPage)
+ 		return targetblk;
+ 
+ 	if (!extend)
+ 		return InvalidBlockNumber;
+ 
+ 	/* Extend the revmap */
+ 	while (targetblk > rmAccess->lastRevmapPage)
+ 		rm_extend(rmAccess);
+ 
+ 	return targetblk;
+ }
+ 
+ /*
+  * Extend the revmap by one page.
+  *
+  * However, if the revmap was extended by someone else concurrently, we might
+  * return without actually doing anything.
+  *
+  * If there is an existing minmax page at that block, it is atomically moved
+  * out of the way, and the redirect pointer on the new revmap page is set
+  * to point to its new location.
+  */
+ static void
+ rm_extend(mmRevmapAccess *rmAccess)
+ {
+ 	Buffer		buf;
+ 	Page		page;
+ 	Page		metapage;
+ 	MinmaxMetaPageData *metadata;
+ 	BlockNumber	mapBlk;
+ 	BlockNumber nblocks;
+ 	Relation	irel = rmAccess->idxrel;
+ 	bool		needLock = !RELATION_IS_LOCAL(irel);
+ 
+ 	/*
+ 	 * Lock the metapage. This locks out concurrent extensions of the revmap,
+ 	 * but note that we still need to grab the relation extension lock because
+ 	 * another backend can extend the index with regular minmax pages.
+ 	 */
+ 	LockBuffer(rmAccess->metaBuf, BUFFER_LOCK_EXCLUSIVE);
+ 	metapage = BufferGetPage(rmAccess->metaBuf);
+ 	metadata = (MinmaxMetaPageData *) PageGetContents(metapage);
+ 
+ 	/*
+ 	 * Check that our cached lastRevmapPage value was up-to-date; if it wasn't,
+ 	 * update the cached copy and have caller start over.
+ 	 */
+ 	if (metadata->lastRevmapPage != rmAccess->lastRevmapPage)
+ 	{
+ 		rmAccess->lastRevmapPage = metadata->lastRevmapPage;
+ 		LockBuffer(rmAccess->metaBuf, BUFFER_LOCK_UNLOCK);
+ 		return;
+ 	}
+ 	mapBlk = metadata->lastRevmapPage + 1;
+ 
+ 	nblocks = RelationGetNumberOfBlocks(irel);
+ 	if (mapBlk < nblocks)
+ 	{
+ 		buf = ReadBuffer(irel, mapBlk);
+ 		LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
+ 		page = BufferGetPage(buf);
+ 	}
+ 	else
+ 	{
+ 		if (needLock)
+ 			LockRelationForExtension(irel, ExclusiveLock);
+ 
+ 		buf = ReadBuffer(irel, P_NEW);
+ 		if (BufferGetBlockNumber(buf) != mapBlk)
+ 		{
+ 			/*
+ 			 * Very rare corner case: somebody extended the relation
+ 			 * concurrently after we read its length.  If this happens, give up
+ 			 * and have caller start over.  We will have to evacuate that page
+ 			 * from under whoever is using it.
+ 			 */
+ 			if (needLock)
+ 				UnlockRelationForExtension(irel, ExclusiveLock);
+ 			LockBuffer(rmAccess->metaBuf, BUFFER_LOCK_UNLOCK);
+ 			return;
+ 		}
+ 		LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
+ 		page = BufferGetPage(buf);
+ 
+ 		if (needLock)
+ 			UnlockRelationForExtension(irel, ExclusiveLock);
+ 	}
+ 
+ 	/* Check that it's a regular block (or an empty page) */
+ 	if (!PageIsNew(page) && !MINMAX_IS_REGULAR_PAGE(page))
+ 		elog(ERROR, "unexpected minmax page type: 0x%04X",
+ 			 MINMAX_PAGE_TYPE(page));
+ 
+ 	/* If the page is in use, evacuate it and restart */
+ 	if (mm_start_evacuating_page(irel, buf))
+ 	{
+ 		LockBuffer(rmAccess->metaBuf, BUFFER_LOCK_UNLOCK);
+ 		mm_evacuate_page(irel, rmAccess->pagesPerRange, rmAccess, buf);
+ 
+ 		/* have caller start over */
+ 		return;
+ 	}
+ 
+ 	/*
+ 	 * Ok, we have now locked the metapage and the target block. Re-initialize
+ 	 * it as a revmap page.
+ 	 */
+ 	START_CRIT_SECTION();
+ 
+ 	/* the rmr_tids array is initialized to all invalid by PageInit */
+ 	mm_page_init(page, MINMAX_PAGETYPE_REVMAP);
+ 	MarkBufferDirty(buf);
+ 
+ 	metadata->lastRevmapPage = mapBlk;
+ 	MarkBufferDirty(rmAccess->metaBuf);
+ 
+ 	if (RelationNeedsWAL(rmAccess->idxrel))
+ 	{
+ 		xl_minmax_revmap_extend xlrec;
+ 		XLogRecPtr	recptr;
+ 		XLogRecData	rdata;
+ 
+ 		xlrec.node = rmAccess->idxrel->rd_node;
+ 		xlrec.targetBlk = mapBlk;
+ 
+ 		rdata.data = (char *) &xlrec;
+ 		rdata.len = SizeOfMinmaxRevmapExtend;
+ 		rdata.buffer = InvalidBuffer;
+ 		rdata.buffer_std = false;
+ 		rdata.next = NULL;
+ 
+ 		/* FIXME don't we need to log the metapage buffer also? */
+ 
+ 		recptr = XLogInsert(RM_MINMAX_ID, XLOG_MINMAX_REVMAP_EXTEND, &rdata);
+ 		PageSetLSN(metapage, recptr);
+ 		PageSetLSN(page, recptr);
+ 	}
+ 
+ 	END_CRIT_SECTION();
+ 
+ 	LockBuffer(rmAccess->metaBuf, BUFFER_LOCK_UNLOCK);
+ 
+ 	UnlockReleaseBuffer(buf);
+ }
*** a/src/backend/access/minmax/mmxlog.c
--- b/src/backend/access/minmax/mmxlog.c
***************
*** 279,285 **** minmax_xlog_revmap_extend(XLogRecPtr lsn, XLogRecord *record)
  		}
  	}
  
! 	/* Re-init the target block as a revmap page */
  
  	buf = XLogReadBuffer(xlrec->node, xlrec->targetBlk, true);
  	page = (Page) BufferGetPage(buf);
--- 279,288 ----
  		}
  	}
  
! 	/*
! 	 * Re-init the target block as a revmap page.  There's never a full-
! 	 * page image here.
! 	 */
  
  	buf = XLogReadBuffer(xlrec->node, xlrec->targetBlk, true);
  	page = (Page) BufferGetPage(buf);
***************
*** 288,297 **** minmax_xlog_revmap_extend(XLogRecPtr lsn, XLogRecord *record)
  	PageSetLSN(page, lsn);
  	MarkBufferDirty(buf);
  
- 	metadata->lastRevmapPage = xlrec->targetBlk;
- 	PageSetLSN(metapg, lsn);
- 	MarkBufferDirty(metabuf);
- 
  	UnlockReleaseBuffer(buf);
  	UnlockReleaseBuffer(metabuf);
  }
--- 291,296 ----
*** a/src/backend/access/rmgrdesc/minmaxdesc.c
--- b/src/backend/access/rmgrdesc/minmaxdesc.c
***************
*** 40,46 **** minmax_desc(StringInfo buf, XLogRecord *record)
  			appendStringInfo(buf, "insert(init): ");
  		else
  			appendStringInfo(buf, "insert: ");
! 		appendStringInfo(buf, "%u/%u/%u blk %u revmapBlk %u pagesPerRange %u TID (%u,%u)",
  						 xlrec->node.spcNode, xlrec->node.dbNode,
  						 xlrec->node.relNode,
  						 xlrec->heapBlk, xlrec->revmapBlk,
--- 40,46 ----
  			appendStringInfo(buf, "insert(init): ");
  		else
  			appendStringInfo(buf, "insert: ");
! 		appendStringInfo(buf, "%u/%u/%u heapBlk %u revmapBlk %u pagesPerRange %u TID (%u,%u)",
  						 xlrec->node.spcNode, xlrec->node.dbNode,
  						 xlrec->node.relNode,
  						 xlrec->heapBlk, xlrec->revmapBlk,
***************
*** 56,70 **** minmax_desc(StringInfo buf, XLogRecord *record)
  			appendStringInfo(buf, "update(init): ");
  		else
  			appendStringInfo(buf, "update: ");
! 		appendStringInfo(buf, "rel %u/%u/%u heapBlk %u revmapBlk %u pagesPerRange %u TID (%u,%u) old TID (%u,%u)",
  						 xlrec->new.node.spcNode, xlrec->new.node.dbNode,
  						 xlrec->new.node.relNode,
  						 xlrec->new.heapBlk, xlrec->new.revmapBlk,
  						 xlrec->new.pagesPerRange,
- 						 ItemPointerGetBlockNumber(&xlrec->new.tid),
- 						 ItemPointerGetOffsetNumber(&xlrec->new.tid),
  						 ItemPointerGetBlockNumber(&xlrec->oldtid),
! 						 ItemPointerGetOffsetNumber(&xlrec->oldtid));
  	}
  	else if (info == XLOG_MINMAX_SAMEPAGE_UPDATE)
  	{
--- 56,70 ----
  			appendStringInfo(buf, "update(init): ");
  		else
  			appendStringInfo(buf, "update: ");
! 		appendStringInfo(buf, "rel %u/%u/%u heapBlk %u revmapBlk %u pagesPerRange %u old TID (%u,%u) TID (%u,%u)",
  						 xlrec->new.node.spcNode, xlrec->new.node.dbNode,
  						 xlrec->new.node.relNode,
  						 xlrec->new.heapBlk, xlrec->new.revmapBlk,
  						 xlrec->new.pagesPerRange,
  						 ItemPointerGetBlockNumber(&xlrec->oldtid),
! 						 ItemPointerGetOffsetNumber(&xlrec->oldtid),
! 						 ItemPointerGetBlockNumber(&xlrec->new.tid),
! 						 ItemPointerGetOffsetNumber(&xlrec->new.tid));
  	}
  	else if (info == XLOG_MINMAX_SAMEPAGE_UPDATE)
  	{
***************
*** 76,112 **** minmax_desc(StringInfo buf, XLogRecord *record)
  						 ItemPointerGetBlockNumber(&xlrec->tid),
  						 ItemPointerGetOffsetNumber(&xlrec->tid));
  	}
! 	else if (info == XLOG_MINMAX_METAPG_SET)
! 	{
! 		xl_minmax_metapg_set *xlrec = (xl_minmax_metapg_set *) rec;
! 
! 		appendStringInfo(buf, "metapg: rel %u/%u/%u array revmap idx %d block %u",
! 						 xlrec->node.spcNode, xlrec->node.dbNode,
! 						 xlrec->node.relNode,
! 						 xlrec->blkidx, xlrec->newpg);
! 	}
! 	else if (info == XLOG_MINMAX_RMARRAY_SET)
  	{
! 		xl_minmax_rmarray_set *xlrec = (xl_minmax_rmarray_set *) rec;
  
! 		appendStringInfoString(buf, "revmap array: ");
! 		appendStringInfo(buf, "rel %u/%u/%u array pg %u revmap idx %d block %u",
  						 xlrec->node.spcNode, xlrec->node.dbNode,
! 						 xlrec->node.relNode,
! 						 xlrec->rmarray,
! 						 xlrec->blkidx, xlrec->newpg);
! 	}
! 	else if (info == XLOG_MINMAX_INIT_RMPG)
! 	{
! 		xl_minmax_init_rmpg *xlrec = (xl_minmax_init_rmpg *) rec;
! 
! 		appendStringInfo(buf, "init_rmpg: rel %u/%u/%u blk %u",
! 						 xlrec->node.spcNode, xlrec->node.dbNode,
! 						 xlrec->node.relNode, xlrec->blkno);
! 		if (xlrec->array)
! 			appendStringInfoString(buf, " (array)");
! 		else
! 			appendStringInfo(buf, "(regular) logblk %u", xlrec->logblk);
  	}
  	else
  		appendStringInfo(buf, "UNKNOWN");
--- 76,88 ----
  						 ItemPointerGetBlockNumber(&xlrec->tid),
  						 ItemPointerGetOffsetNumber(&xlrec->tid));
  	}
! 	else if (info == XLOG_MINMAX_REVMAP_EXTEND)
  	{
! 		xl_minmax_revmap_extend *xlrec = (xl_minmax_revmap_extend *) rec;
  
! 		appendStringInfo(buf, "revmap extend: rel %u/%u/%u targetBlk %u",
  						 xlrec->node.spcNode, xlrec->node.dbNode,
! 						 xlrec->node.relNode, xlrec->targetBlk);
  	}
  	else
  		appendStringInfo(buf, "UNKNOWN");
*** a/src/include/access/minmax_internal.h
--- b/src/include/access/minmax_internal.h
***************
*** 21,31 ****
  /*
   * A MinmaxDesc is a struct designed to enable decoding a MinMax tuple from the
   * on-disk format to a DeformedMMTuple and vice-versa.
-  *
-  * Note: we assume, for now, that the data stored for each column is the same
-  * datatype as the indexed heap column.  This restriction can be lifted by
-  * having an Oid array pointer on the PerCol struct, where each member of the
-  * array indicates the typid of the stored data.
   */
  
  /* struct returned by "OpcInfo" amproc */
--- 21,26 ----
***************
*** 60,66 **** typedef struct MinmaxDesc
  	int			md_totalstored;
  
  	/* per-column info */
! 	MinmaxOpcInfo *md_info[FLEXIBLE_ARRAY_MEMBER];	/* tupdesc->natts entries long */
  } MinmaxDesc;
  
  /*
--- 55,61 ----
  	int			md_totalstored;
  
  	/* per-column info */
! 	MinmaxOpcInfo *md_info[FLEXIBLE_ARRAY_MEMBER];	/* md_tupdesc->natts entries long */
  } MinmaxDesc;
  
  /*
***************
*** 87,93 **** extern void minmax_free_mmdesc(MinmaxDesc *mmdesc);
  extern void mm_page_init(Page page, uint16 type);
  extern void mm_metapage_init(Page page, BlockNumber pagesPerRange,
  				 uint16 version);
- extern bool mm_start_evacuating_page(Relation idxRel, Buffer buf);
- extern void mm_evacuate_page(Relation idxRel, Buffer buf);
  
  #endif   /* MINMAX_INTERNAL_H */
--- 82,86 ----
*** a/src/include/access/minmax_page.h
--- b/src/include/access/minmax_page.h
***************
*** 16,21 ****
--- 16,23 ----
  #ifndef MINMAX_PAGE_H
  #define MINMAX_PAGE_H
  
+ #include "storage/block.h"
+ #include "storage/itemptr.h"
  
  /* special space on all minmax pages stores a "type" identifier */
  #define		MINMAX_PAGETYPE_META			0xF091
***************
*** 54,68 **** typedef struct MinmaxMetaPageData
  /* Definitions for regular revmap pages */
  typedef struct RevmapContents
  {
! 	ItemPointerData rmr_tids[1];	/* really REGULAR_REVMAP_PAGE_MAXITEMS */
  } RevmapContents;
  
! #define REGULAR_REVMAP_CONTENT_SIZE	\
  	(BLCKSZ - MAXALIGN(SizeOfPageHeaderData) - \
  	 offsetof(RevmapContents, rmr_tids) - \
  	 MAXALIGN(sizeof(MinmaxSpecialSpace)))
  /* max num of items in the array */
! #define REGULAR_REVMAP_PAGE_MAXITEMS \
! 	(REGULAR_REVMAP_CONTENT_SIZE / sizeof(ItemPointerData))
  
  #endif		/* MINMAX_PAGE_H */
--- 56,70 ----
  /* Definitions for regular revmap pages */
  typedef struct RevmapContents
  {
! 	ItemPointerData rmr_tids[1];	/* really REVMAP_PAGE_MAXITEMS */
  } RevmapContents;
  
! #define REVMAP_CONTENT_SIZE	\
  	(BLCKSZ - MAXALIGN(SizeOfPageHeaderData) - \
  	 offsetof(RevmapContents, rmr_tids) - \
  	 MAXALIGN(sizeof(MinmaxSpecialSpace)))
  /* max num of items in the array */
! #define REVMAP_PAGE_MAXITEMS \
! 	(REVMAP_CONTENT_SIZE / sizeof(ItemPointerData))
  
  #endif		/* MINMAX_PAGE_H */
*** /dev/null
--- b/src/include/access/minmax_pageops.h
***************
*** 0 ****
--- 1,29 ----
+ /*
+  * Prototypes for operating on minmax pages.
+  *
+  * Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group
+  * Portions Copyright (c) 1994, Regents of the University of California
+  *
+  * IDENTIFICATION
+  *	  src/include/access/minmax_pageops.h
+  */
+ #ifndef MINMAX_PAGEOPS_H
+ #define MINMAX_PAGEOPS_H
+ 
+ #include "access/minmax_revmap.h"
+ 
+ extern bool mm_doupdate(Relation idxrel, BlockNumber pagesPerRange,
+ 			mmRevmapAccess *rmAccess, BlockNumber heapBlk,
+ 			Buffer oldbuf, OffsetNumber oldoff,
+ 			const MMTuple *origtup, Size origsz,
+ 			const MMTuple *newtup, Size newsz,
+ 			bool samepage, bool *extended);
+ extern void mm_doinsert(Relation idxrel, BlockNumber pagesPerRange,
+ 			mmRevmapAccess *rmAccess, Buffer *buffer, BlockNumber heapBlk,
+ 			MMTuple *tup, Size itemsz, bool *extended);
+ 
+ extern bool mm_start_evacuating_page(Relation idxRel, Buffer buf);
+ extern void mm_evacuate_page(Relation idxRel, BlockNumber pagesPerRange,
+ 				 mmRevmapAccess *rmAccess, Buffer buf);
+ 
+ #endif	/* MINMAX_PAGEOPS_H */
*** a/src/include/access/minmax_revmap.h
--- b/src/include/access/minmax_revmap.h
***************
*** 13,18 ****
--- 13,19 ----
  
  #include "access/minmax_tuple.h"
  #include "storage/block.h"
+ #include "storage/buf.h"
  #include "storage/itemptr.h"
  #include "storage/off.h"
  #include "utils/relcache.h"
***************
*** 24,30 **** extern mmRevmapAccess *mmRevmapAccessInit(Relation idxrel,
  				   BlockNumber *pagesPerRange);
  extern void mmRevmapAccessTerminate(mmRevmapAccess *rmAccess);
  
- extern void mmRevmapCreate(Relation idxrel);
  extern Buffer mmLockRevmapPageForUpdate(mmRevmapAccess *rmAccess,
  						  BlockNumber heapBlk);
  extern void mmSetHeapBlockItemptr(Buffer rmbuf, BlockNumber pagesPerRange,
--- 25,30 ----
*** minmax.c.heikki	2014-08-20 19:06:27.000000000 -0400
--- src/backend/access/minmax/mmpageops.c	2014-08-20 17:10:55.000000000 -0400
***************
*** 1,8 ****
  /*
   * Update tuple origtup (size origsz), located in offset oldoff of buffer
   * oldbuf, to newtup (size newsz) as summary tuple for the page range starting
   * at heapBlk.  If samepage is true, then attempt to put the new tuple in the same
!  * page, otherwise get a new one.
   *
   * If the update is done, return true; the revmap is updated to point to the
   * new tuple.  If the update is not done for whatever reason, return false.
--- 1,37 ----
  /*
+  * mmpageops.c
+  *		Page-handling routines for Minmax indexes
+  *
+  * Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group
+  * Portions Copyright (c) 1994, Regents of the University of California
+  *
+  * IDENTIFICATION
+  *	  src/backend/access/minmax/mmpageops.c
+  */
+ #include "postgres.h"
+ 
+ #include "access/minmax_pageops.h"
+ #include "access/minmax_page.h"
+ #include "access/minmax_revmap.h"
+ #include "access/minmax_xlog.h"
+ #include "miscadmin.h"
+ #include "storage/bufmgr.h"
+ #include "storage/freespace.h"
+ #include "storage/lmgr.h"
+ #include "storage/smgr.h"
+ #include "utils/rel.h"
+ 
+ 
+ static Buffer mm_getinsertbuffer(Relation irel, Buffer oldbuf, Size itemsz,
+ 				   bool *was_extended);
+ static Size mm_page_get_freespace(Page page);
+ 
+ 
+ /*
   * Update tuple origtup (size origsz), located in offset oldoff of buffer
   * oldbuf, to newtup (size newsz) as summary tuple for the page range starting
   * at heapBlk.  If samepage is true, then attempt to put the new tuple in the same
!  * page, otherwise use some other one.
   *
   * If the update is done, return true; the revmap is updated to point to the
   * new tuple.  If the update is not done for whatever reason, return false.
***************
*** 11,17 ****
   * If the index had to be extended in the course of this operation, *extended
   * is set to true.
   */
! static bool
  mm_doupdate(Relation idxrel, BlockNumber pagesPerRange,
  			mmRevmapAccess *rmAccess, BlockNumber heapBlk,
  			Buffer oldbuf, OffsetNumber oldoff,
--- 40,46 ----
   * If the index had to be extended in the course of this operation, *extended
   * is set to true.
   */
! bool
  mm_doupdate(Relation idxrel, BlockNumber pagesPerRange,
  			mmRevmapAccess *rmAccess, BlockNumber heapBlk,
  			Buffer oldbuf, OffsetNumber oldoff,
***************
*** 59,66 ****
  	oldsz = ItemIdGetLength(origlp);
  	oldtup = (MMTuple *) PageGetItem(oldpage, origlp);
  
! 	/* If both tuples are in fact equal, there is nothing to do */
! 	if (!minmax_tuples_equal(oldtup, oldsz, origtup, origsz))
  	{
  		LockBuffer(oldbuf, BUFFER_LOCK_UNLOCK);
  		return false;
--- 88,99 ----
  	oldsz = ItemIdGetLength(origlp);
  	oldtup = (MMTuple *) PageGetItem(oldpage, origlp);
  
! 	/*
! 	 * If both tuples are identical, there is nothing to do; except that if we
! 	 * were requested to move the tuple across pages, we do it even if they are
! 	 * equal.
! 	 */
! 	if (samepage && minmax_tuples_equal(oldtup, oldsz, origtup, origsz))
  	{
  		LockBuffer(oldbuf, BUFFER_LOCK_UNLOCK);
  		return false;
***************
*** 126,132 ****
  	{
  		/*
  		 * Not enough space, but caller said that there was. Tell them to
! 		 * start over
  		 */
  		LockBuffer(oldbuf, BUFFER_LOCK_UNLOCK);
  		return false;
--- 159,165 ----
  	{
  		/*
  		 * Not enough space, but caller said that there was. Tell them to
! 		 * start over.
  		 */
  		LockBuffer(oldbuf, BUFFER_LOCK_UNLOCK);
  		return false;
***************
*** 222,231 ****
   * If the relation had to be extended to make room for the new index tuple,
   * *extended is set to true.
   */
! static void
  mm_doinsert(Relation idxrel, BlockNumber pagesPerRange,
! 			mmRevmapAccess *rmAccess, Buffer *buffer,
! 			BlockNumber heapBlk, MMTuple *tup, Size itemsz, bool *extended)
  {
  	Page		page;
  	BlockNumber blk;
--- 255,264 ----
   * If the relation had to be extended to make room for the new index tuple,
   * *extended is set to true.
   */
! void
  mm_doinsert(Relation idxrel, BlockNumber pagesPerRange,
! 			mmRevmapAccess *rmAccess, Buffer *buffer, BlockNumber heapBlk,
! 			MMTuple *tup, Size itemsz, bool *extended)
  {
  	Page		page;
  	BlockNumber blk;
***************
*** 248,273 ****
  	 */
  	if (BufferIsValid(*buffer))
  	{
- 		page = BufferGetPage(*buffer);
- 		LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE);
- 
  		/*
  		 * It's possible that another backend (or ourselves!) extended the
  		 * revmap over the page we held a pin on, so we cannot assume that
  		 * it's still a regular page.
  		 */
! 		if (mm_page_get_freespace(page) < itemsz)
  		{
  			UnlockReleaseBuffer(*buffer);
  			*buffer = InvalidBuffer;
  		}
  	}
  	if (!BufferIsValid(*buffer))
  	{
  		*buffer = mm_getinsertbuffer(idxrel, InvalidBuffer, itemsz, extended);
  		Assert(BufferIsValid(*buffer));
! 		page = BufferGetPage(*buffer);
! 		Assert(mm_page_get_freespace(page) >= itemsz);
  	}
  
  	page = BufferGetPage(*buffer);
--- 281,304 ----
  	 */
  	if (BufferIsValid(*buffer))
  	{
  		/*
  		 * It's possible that another backend (or ourselves!) extended the
  		 * revmap over the page we held a pin on, so we cannot assume that
  		 * it's still a regular page.
  		 */
! 		LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE);
! 		if (mm_page_get_freespace(BufferGetPage(*buffer)) < itemsz)
  		{
  			UnlockReleaseBuffer(*buffer);
  			*buffer = InvalidBuffer;
  		}
  	}
+ 
  	if (!BufferIsValid(*buffer))
  	{
  		*buffer = mm_getinsertbuffer(idxrel, InvalidBuffer, itemsz, extended);
  		Assert(BufferIsValid(*buffer));
! 		Assert(mm_page_get_freespace(BufferGetPage(*buffer)) >= itemsz);
  	}
  
  	page = BufferGetPage(*buffer);
***************
*** 327,336 ****
  }
  
  /*
!  * Checks if a regular minmax index page is empty.
   *
!  * If it's not, it's marked for "evacuation", meaning that no new tuples will
!  * be added to it.
   */
  bool
  mm_start_evacuating_page(Relation idxRel, Buffer buf)
--- 358,370 ----
  }
  
  /*
!  * Initiate page evacuation protocol.
   *
!  * The page must be locked in exclusive mode by the caller.
!  *
!  * If the page is not yet initialized or empty, return false without doing
!  * anything; it can be used for revmap without any further changes.  If it
!  * contains tuples, mark it for evacuation and return true.
   */
  bool
  mm_start_evacuating_page(Relation idxRel, Buffer buf)
***************
*** 355,361 ****
  		lp = PageGetItemId(page, off);
  		if (ItemIdIsUsed(lp))
  		{
! 			/* prevent other backends from adding more stuff to this page. */
  			special->flags |= MINMAX_EVACUATE_PAGE;
  			MarkBufferDirtyHint(buf, true);
  
--- 389,395 ----
  		lp = PageGetItemId(page, off);
  		if (ItemIdIsUsed(lp))
  		{
! 			/* prevent other backends from adding more stuff to this page */
  			special->flags |= MINMAX_EVACUATE_PAGE;
  			MarkBufferDirtyHint(buf, true);
  
***************
*** 368,387 ****
  /*
   * Move all tuples out of a page.
   *
!  * The caller must hold an exclusive lock on the page. The lock and pin are
!  * released.
   */
  void
! mm_evacuate_page(Relation idxRel, Buffer buf)
  {
  	OffsetNumber off;
  	OffsetNumber maxoff;
  	MinmaxSpecialSpace *special;
  	Page		page;
! 	mmRevmapAccess *rmAccess;
! 	BlockNumber pagesPerRange;
! 
! 	rmAccess = mmRevmapAccessInit(idxRel, &pagesPerRange);
  
  	page = BufferGetPage(buf);
  	special = (MinmaxSpecialSpace *) PageGetSpecialPointer(page);
--- 402,417 ----
  /*
   * Move all tuples out of a page.
   *
!  * The caller must hold lock on the page. The lock and pin are released.
   */
  void
! mm_evacuate_page(Relation idxRel, BlockNumber pagesPerRange, mmRevmapAccess *rmAccess, Buffer buf)
  {
  	OffsetNumber off;
  	OffsetNumber maxoff;
  	MinmaxSpecialSpace *special;
  	Page		page;
! 	bool		extended = false;
  
  	page = BufferGetPage(buf);
  	special = (MinmaxSpecialSpace *) PageGetSpecialPointer(page);
***************
*** 394,407 ****
  		MMTuple	   *tup;
  		Size		sz;
  		ItemId		lp;
! 		bool		extended = false;
  
  		lp = PageGetItemId(page, off);
  		if (ItemIdIsUsed(lp))
  		{
- 			tup = (MMTuple *) PageGetItem(page, lp);
  			sz = ItemIdGetLength(lp);
! 
  			tup = minmax_copy_tuple(tup, sz);
  
  			LockBuffer(buf, BUFFER_LOCK_UNLOCK);
--- 424,437 ----
  		MMTuple	   *tup;
  		Size		sz;
  		ItemId		lp;
! 
! 		CHECK_FOR_INTERRUPTS();
  
  		lp = PageGetItemId(page, off);
  		if (ItemIdIsUsed(lp))
  		{
  			sz = ItemIdGetLength(lp);
! 			tup = (MMTuple *) PageGetItem(page, lp);
  			tup = minmax_copy_tuple(tup, sz);
  
  			LockBuffer(buf, BUFFER_LOCK_UNLOCK);
***************
*** 412,429 ****
  
  			LockBuffer(buf, BUFFER_LOCK_SHARE);
  
- 			if (extended)
- 				IndexFreeSpaceMapVacuum(idxRel);
- 
  			/* It's possible that someone extended the revmap over this page */
  			if (!MINMAX_IS_REGULAR_PAGE(page))
  				break;
  		}
  	}
  
- 	mmRevmapAccessTerminate(rmAccess);
- 
  	UnlockReleaseBuffer(buf);
  }
  
  /*
--- 442,457 ----
  
  			LockBuffer(buf, BUFFER_LOCK_SHARE);
  
  			/* It's possible that someone extended the revmap over this page */
  			if (!MINMAX_IS_REGULAR_PAGE(page))
  				break;
  		}
  	}
  
  	UnlockReleaseBuffer(buf);
+ 
+ 	if (extended)
+ 		FreeSpaceMapVacuum(idxRel);
  }
  
  /*
***************
*** 467,472 ****
--- 495,502 ----
  		Buffer		buf;
  		bool		extensionLockHeld = false;
  
+ 		CHECK_FOR_INTERRUPTS();
+ 
  		if (newblk == InvalidBlockNumber)
  		{
  			/*
***************
*** 498,503 ****
--- 528,539 ----
  			buf = ReadBuffer(irel, newblk);
  		}
  
+ 		/*
+ 		 * We lock the old buffer first, if it's earlier than the new one.
+ 		 * We also need to check that it hasn't been turned into a revmap
+ 		 * page concurrently; if we detect that it happened, give up and
+ 		 * tell caller to start over.
+ 		 */
  		if (BufferIsValid(oldbuf) && oldblk < newblk)
  		{
  			LockBuffer(oldbuf, BUFFER_LOCK_EXCLUSIVE);
***************
*** 520,529 ****
  			mm_page_init(page, MINMAX_PAGETYPE_REGULAR);
  
  		/*
! 		 * We have a new buffer from FSM now, and both pages are locked.
! 		 * Check that the new page has enough free space, and return it if it
! 		 * does; otherwise start over.  Note that we allow for the FSM to be
! 		 * out of date here, and in that case we update it and move on.
  		 *
  		 * (mm_page_get_freespace also checks that the FSM didn't hand us a
  		 * page that has since been repurposed for the revmap.)
--- 556,565 ----
  			mm_page_init(page, MINMAX_PAGETYPE_REGULAR);
  
  		/*
! 		 * We have a new buffer from FSM now.  Check that the new page has
! 		 * enough free space, and return it if it does; otherwise start over.
! 		 * Note that we allow for the FSM to be out of date here, and in that
! 		 * case we update it and move on.
  		 *
  		 * (mm_page_get_freespace also checks that the FSM didn't hand us a
  		 * page that has since been repurposed for the revmap.)
***************
*** 533,543 ****
  		{
  			if (extended)
  				*was_extended = true;
  			RelationSetTargetBlock(irel, BufferGetBlockNumber(buf));
  
! 			/* Lock the old buffer if not locked already */
! 			if (BufferIsValid(oldbuf) && newblk < oldblk)
  				LockBuffer(oldbuf, BUFFER_LOCK_EXCLUSIVE);
  
  			return buf;
  		}
--- 569,588 ----
  		{
  			if (extended)
  				*was_extended = true;
+ 
  			RelationSetTargetBlock(irel, BufferGetBlockNumber(buf));
  
! 			/*
! 			 * Lock the old buffer if not locked already.  Note that in this
! 			 * case we know for sure it's a regular page: it's later than the
! 			 * new page we just got, which is not a revmap page, and revmap
! 			 * pages are always consecutive.
! 			 */
! 			if (BufferIsValid(oldbuf) && oldblk > newblk)
! 			{
  				LockBuffer(oldbuf, BUFFER_LOCK_EXCLUSIVE);
+ 				Assert(MINMAX_IS_REGULAR_PAGE(BufferGetPage(oldbuf)));
+ 			}
  
  			return buf;
  		}
***************
*** 571,573 ****
--- 616,638 ----
  		newblk = RecordAndGetPageWithFreeSpace(irel, newblk, freespace, itemsz);
  	}
  }
+ 
+ /*
+  * Return the amount of free space on a regular minmax index page.
+  *
+  * If the page is not a regular page, or has been marked with the
+  * MINMAX_EVACUATE_PAGE flag, returns 0.
+  */
+ static Size
+ mm_page_get_freespace(Page page)
+ {
+ 	MinmaxSpecialSpace *special;
+ 
+ 	special = (MinmaxSpecialSpace *) PageGetSpecialPointer(page);
+ 	if (!MINMAX_IS_REGULAR_PAGE(page) ||
+ 		(special->flags & MINMAX_EVACUATE_PAGE) != 0)
+ 		return 0;
+ 	else
+ 		return PageGetFreeSpace(page);
+ 
+ }
-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to