On Mon, Aug 10, 2015 at 11:05 AM, Michael Paquier
<michael.paqu...@gmail.com> wrote:
> On Mon, Aug 10, 2015 at 12:39 AM, Robert Haas <robertmh...@gmail.com> wrote:
>> On Thu, Aug 6, 2015 at 11:33 AM, Jim Nasby <jim.na...@bluetreble.com> wrote:
>>> They also provide a level of control over what is and isn't installed in a
>>> cluster. Personally, I'd prefer that most users not even be aware of the
>>> existence of things like pageinspect.
>>
>> +1.
>>
>> [...]
>>
>> Extensions are a useful packaging mechanism for functionality that is
>> useful but not required, and debugging facilities are definitely very
>> useful but should not be required.
>
> +1.

Sorry to be come discussion late.

I have encountered the much cases where pg_stat_statement,
pgstattuples are required in production, so I basically agree with
moving such extension into core.
But IMO, the diagnostic tools for visibility map, heap (pageinspect)
and so on, are a kind of debugging tool.

Attached latest v11 patches, which is separated into 2 patches: frozen
bit patch and diagnostic function patch.
Moving diagnostic function into core is still under the discussion,
but this patch puts such function into core because the diagnostic
function for visibility map needs to be in core to execute regression
test at least.

Regards,

--
Masahiko Sawada
diff --git a/contrib/pgstattuple/pgstatapprox.c b/contrib/pgstattuple/pgstatapprox.c
index 22c5f7a..b1b6a06 100644
--- a/contrib/pgstattuple/pgstatapprox.c
+++ b/contrib/pgstattuple/pgstatapprox.c
@@ -87,7 +87,7 @@ statapprox_heap(Relation rel, output_type *stat)
 		 * If the page has only visible tuples, then we can find out the free
 		 * space from the FSM and move on.
 		 */
-		if (visibilitymap_test(rel, blkno, &vmbuffer))
+		if (visibilitymap_test(rel, blkno, &vmbuffer, VISIBILITYMAP_ALL_VISIBLE))
 		{
 			freespace = GetRecordedFreeSpace(rel, blkno);
 			stat->tuple_len += BLCKSZ - freespace;
diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c
index 3701d8e..dabd632 100644
--- a/src/backend/access/heap/heapam.c
+++ b/src/backend/access/heap/heapam.c
@@ -2176,8 +2176,9 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid,
 	CheckForSerializableConflictIn(relation, NULL, InvalidBuffer);
 
 	/*
-	 * Find buffer to insert this tuple into.  If the page is all visible,
-	 * this will also pin the requisite visibility map page.
+	 * Find buffer to insert this tuple into.  If the page is all visible
+	 * or all frozen, this will also pin the requisite visibility map and
+	 * frozen map page.
 	 */
 	buffer = RelationGetBufferForTuple(relation, heaptup->t_len,
 									   InvalidBuffer, options, bistate,
@@ -2192,7 +2193,11 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid,
 	if (PageIsAllVisible(BufferGetPage(buffer)))
 	{
 		all_visible_cleared = true;
+
+		/* all-frozen information is also cleared at the same time */
 		PageClearAllVisible(BufferGetPage(buffer));
+		PageClearAllFrozen(BufferGetPage(buffer));
+
 		visibilitymap_clear(relation,
 							ItemPointerGetBlockNumber(&(heaptup->t_self)),
 							vmbuffer);
@@ -2493,7 +2498,11 @@ heap_multi_insert(Relation relation, HeapTuple *tuples, int ntuples,
 		if (PageIsAllVisible(page))
 		{
 			all_visible_cleared = true;
+
+			/* all-frozen information is also cleared at the same time */
 			PageClearAllVisible(page);
+			PageClearAllFrozen(page);
+
 			visibilitymap_clear(relation,
 								BufferGetBlockNumber(buffer),
 								vmbuffer);
@@ -2776,9 +2785,9 @@ heap_delete(Relation relation, ItemPointer tid,
 
 	/*
 	 * If we didn't pin the visibility map page and the page has become all
-	 * visible while we were busy locking the buffer, we'll have to unlock and
-	 * re-lock, to avoid holding the buffer lock across an I/O.  That's a bit
-	 * unfortunate, but hopefully shouldn't happen often.
+	 * visible or all frozen while we were busy locking the buffer, we'll
+	 * have to unlock and re-lock, to avoid holding the buffer lock across an
+	 * I/O.  That's a bit unfortunate, but hopefully shouldn't happen often.
 	 */
 	if (vmbuffer == InvalidBuffer && PageIsAllVisible(page))
 	{
@@ -2972,10 +2981,15 @@ l1:
 	 */
 	PageSetPrunable(page, xid);
 
+	/* clear PD_ALL_VISIBLE and PD_ALL_FORZEN flags */
 	if (PageIsAllVisible(page))
 	{
 		all_visible_cleared = true;
+
+		/* all-frozen information is also cleared at the same time */
 		PageClearAllVisible(page);
+		PageClearAllFrozen(page);
+
 		visibilitymap_clear(relation, BufferGetBlockNumber(buffer),
 							vmbuffer);
 	}
@@ -3254,7 +3268,7 @@ heap_update(Relation relation, ItemPointer otid, HeapTuple newtup,
 	 * in the middle of changing this, so we'll need to recheck after we have
 	 * the lock.
 	 */
-	if (PageIsAllVisible(page))
+	if (PageIsAllVisible(page) || PageIsAllFrozen(page))
 		visibilitymap_pin(relation, block, &vmbuffer);
 
 	LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
@@ -3850,14 +3864,22 @@ l2:
 	if (PageIsAllVisible(BufferGetPage(buffer)))
 	{
 		all_visible_cleared = true;
+
+		/* all-frozen information is also cleared at the same time */
 		PageClearAllVisible(BufferGetPage(buffer));
+		PageClearAllFrozen(BufferGetPage(buffer));
+
 		visibilitymap_clear(relation, BufferGetBlockNumber(buffer),
 							vmbuffer);
 	}
 	if (newbuf != buffer && PageIsAllVisible(BufferGetPage(newbuf)))
 	{
 		all_visible_cleared_new = true;
+
+		/* all-frozen information is also cleared at the same time */
 		PageClearAllVisible(BufferGetPage(newbuf));
+		PageClearAllFrozen(BufferGetPage(newbuf));
+
 		visibilitymap_clear(relation, BufferGetBlockNumber(newbuf),
 							vmbuffer_new);
 	}
@@ -6942,7 +6964,7 @@ log_heap_freeze(Relation reln, Buffer buffer, TransactionId cutoff_xid,
  */
 XLogRecPtr
 log_heap_visible(RelFileNode rnode, Buffer heap_buffer, Buffer vm_buffer,
-				 TransactionId cutoff_xid)
+				 TransactionId cutoff_xid, uint8 vmflags)
 {
 	xl_heap_visible xlrec;
 	XLogRecPtr	recptr;
@@ -6952,6 +6974,7 @@ log_heap_visible(RelFileNode rnode, Buffer heap_buffer, Buffer vm_buffer,
 	Assert(BufferIsValid(vm_buffer));
 
 	xlrec.cutoff_xid = cutoff_xid;
+	xlrec.flags = vmflags;
 	XLogBeginInsert();
 	XLogRegisterData((char *) &xlrec, SizeOfHeapVisible);
 
@@ -7541,8 +7564,14 @@ heap_xlog_visible(XLogReaderState *record)
 		 * the subsequent update won't be replayed to clear the flag.
 		 */
 		page = BufferGetPage(buffer);
-		PageSetAllVisible(page);
+
+		if (xlrec->flags & VISIBILITYMAP_ALL_VISIBLE)
+			PageSetAllVisible(page);
+		if (xlrec->flags & VISIBILITYMAP_ALL_FROZEN)
+			PageSetAllFrozen(page);
+
 		MarkBufferDirty(buffer);
+
 	}
 	else if (action == BLK_RESTORED)
 	{
@@ -7593,7 +7622,7 @@ heap_xlog_visible(XLogReaderState *record)
 		 */
 		if (lsn > PageGetLSN(vmpage))
 			visibilitymap_set(reln, blkno, InvalidBuffer, lsn, vmbuffer,
-							  xlrec->cutoff_xid);
+							  xlrec->cutoff_xid, xlrec->flags);
 
 		ReleaseBuffer(vmbuffer);
 		FreeFakeRelcacheEntry(reln);
@@ -7743,7 +7772,10 @@ heap_xlog_delete(XLogReaderState *record)
 		PageSetPrunable(page, XLogRecGetXid(record));
 
 		if (xlrec->flags & XLH_DELETE_ALL_VISIBLE_CLEARED)
+		{
 			PageClearAllVisible(page);
+			PageClearAllFrozen(page);
+		}
 
 		/* Make sure there is no forward chain link in t_ctid */
 		htup->t_ctid = target_tid;
@@ -7847,7 +7879,10 @@ heap_xlog_insert(XLogReaderState *record)
 		PageSetLSN(page, lsn);
 
 		if (xlrec->flags & XLH_INSERT_ALL_VISIBLE_CLEARED)
+		{
 			PageClearAllVisible(page);
+			PageClearAllFrozen(page);
+		}
 
 		MarkBufferDirty(buffer);
 	}
@@ -7986,7 +8021,10 @@ heap_xlog_multi_insert(XLogReaderState *record)
 		PageSetLSN(page, lsn);
 
 		if (xlrec->flags & XLH_INSERT_ALL_VISIBLE_CLEARED)
+		{
 			PageClearAllVisible(page);
+			PageClearAllFrozen(page);
+		}
 
 		MarkBufferDirty(buffer);
 	}
@@ -8114,7 +8152,10 @@ heap_xlog_update(XLogReaderState *record, bool hot_update)
 		PageSetPrunable(page, XLogRecGetXid(record));
 
 		if (xlrec->flags & XLH_UPDATE_OLD_ALL_VISIBLE_CLEARED)
+		{
 			PageClearAllVisible(page);
+			PageClearAllFrozen(page);
+		}
 
 		PageSetLSN(page, lsn);
 		MarkBufferDirty(obuffer);
@@ -8249,7 +8290,10 @@ heap_xlog_update(XLogReaderState *record, bool hot_update)
 			elog(PANIC, "heap_update_redo: failed to add tuple");
 
 		if (xlrec->flags & XLH_UPDATE_NEW_ALL_VISIBLE_CLEARED)
+		{
 			PageClearAllVisible(page);
+			PageClearAllFrozen(page);
+		}
 
 		freespace = PageGetHeapFreeSpace(page); /* needed to update FSM below */
 
diff --git a/src/backend/access/heap/visibilitymap.c b/src/backend/access/heap/visibilitymap.c
index 7c38772..a284b85 100644
--- a/src/backend/access/heap/visibilitymap.c
+++ b/src/backend/access/heap/visibilitymap.c
@@ -21,33 +21,45 @@
  *
  * NOTES
  *
- * The visibility map is a bitmap with one bit per heap page. A set bit means
- * that all tuples on the page are known visible to all transactions, and
- * therefore the page doesn't need to be vacuumed. The map is conservative in
- * the sense that we make sure that whenever a bit is set, we know the
- * condition is true, but if a bit is not set, it might or might not be true.
+ * The visibility map is a bitmap with two bits (all-visible and all-frozen)
+ * per heap page. A set all-visible bit means that all tuples on the page are
+ * known visible to all transactions, and therefore the page doesn't need to
+ * be vacuumed. A set all-frozen bit means that all tuples on the page are
+ * completely frozen, and therefore the page doesn't need to be vacuumed even
+ * if whole table scanning vacuum is required (e.g. anti-wraparound vacuum).
+ * A all-frozen bit must be set only when the page is already all-visible.
+ * That is, all-frozen bit is always set with all-visible bit.
+ *
+ * The map is conservative in the sense that we make sure that whenever a bit
+ * is set, we know the condition is true, but if a bit is not set, it might or
+ * might not be true.
  *
  * Clearing a visibility map bit is not separately WAL-logged.  The callers
  * must make sure that whenever a bit is cleared, the bit is cleared on WAL
- * replay of the updating operation as well.
+ * replay of the updating operation as well.  And all-frozen bit must be
+ * cleared with all-visible at the same time.
  *
  * When we *set* a visibility map during VACUUM, we must write WAL.  This may
  * seem counterintuitive, since the bit is basically a hint: if it is clear,
- * it may still be the case that every tuple on the page is visible to all
- * transactions; we just don't know that for certain.  The difficulty is that
- * there are two bits which are typically set together: the PD_ALL_VISIBLE bit
- * on the page itself, and the visibility map bit.  If a crash occurs after the
- * visibility map page makes it to disk and before the updated heap page makes
- * it to disk, redo must set the bit on the heap page.  Otherwise, the next
- * insert, update, or delete on the heap page will fail to realize that the
- * visibility map bit must be cleared, possibly causing index-only scans to
- * return wrong answers.
+ * it may still be the case that every tuple on the page is visible or frozen
+ * to all transactions; we just don't know that for certain.  The difficulty is
+ * that there are two bits which are typically set together: the PD_ALL_VISIBLE
+ * or PD_ALL_FROZEN bit on the page itself, and the visibility map bit.  If a
+ * crash occurs after the visibility map page makes it to disk and before the
+ * updated heap page makes it to disk, redo must set the bit on the heap page.
+ * Otherwise, the next insert, update, or delete on the heap page will fail to
+ * realize that the visibility map bit must be cleared, possibly causing index-only
+ * scans to return wrong answers.
  *
  * VACUUM will normally skip pages for which the visibility map bit is set;
  * such pages can't contain any dead tuples and therefore don't need vacuuming.
- * The visibility map is not used for anti-wraparound vacuums, because
+ * The visibility map is not used for anti-wraparound vacuums before 9.5, because
  * an anti-wraparound vacuum needs to freeze tuples and observe the latest xid
  * present in the table, even on pages that don't have any dead tuples.
+ * 9.6 or later, the visibility map has a additional bit which indicates all tuple
+ * on single page has been completely forzen, so the visibility map is also used for
+ * anti-wraparound vacuums.
+ *
  *
  * LOCKING
  *
@@ -58,14 +70,14 @@
  * section that logs the page modification. However, we don't want to hold
  * the buffer lock over any I/O that may be required to read in the visibility
  * map page.  To avoid this, we examine the heap page before locking it;
- * if the page-level PD_ALL_VISIBLE bit is set, we pin the visibility map
- * bit.  Then, we lock the buffer.  But this creates a race condition: there
- * is a possibility that in the time it takes to lock the buffer, the
- * PD_ALL_VISIBLE bit gets set.  If that happens, we have to unlock the
- * buffer, pin the visibility map page, and relock the buffer.  This shouldn't
- * happen often, because only VACUUM currently sets visibility map bits,
- * and the race will only occur if VACUUM processes a given page at almost
- * exactly the same time that someone tries to further modify it.
+ * if the page-level PD_ALL_VISIBLE or PD_ALL_FROZEN bit is set, we pin the
+ * visibility map bit.  Then, we lock the buffer.  But this creates a race
+ * condition: there is a possibility that in the time it takes to lock the
+ * buffer, the PD_ALL_VISIBLE or PD_ALL_FROZEN bit gets set.  If that happens,
+ * we have to unlock the buffer, pin the visibility map page, and relock the
+ * buffer.  This shouldn't happen often, because only VACUUM currently sets
+ * visibility map bits, and the race will only occur if VACUUM processes a given
+ * page at almost exactly the same time that someone tries to further modify it.
  *
  * To set a bit, you need to hold a lock on the heap page. That prevents
  * the race condition where VACUUM sees that all tuples on the page are
@@ -101,11 +113,14 @@
  */
 #define MAPSIZE (BLCKSZ - MAXALIGN(SizeOfPageHeaderData))
 
-/* Number of bits allocated for each heap block. */
-#define BITS_PER_HEAPBLOCK 1
+/*
+ * Number of bits allocated for each heap block.
+ * One for all-visible, other for all-frozen.
+*/
+#define BITS_PER_HEAPBLOCK 2
 
 /* Number of heap blocks we can represent in one byte. */
-#define HEAPBLOCKS_PER_BYTE 8
+#define HEAPBLOCKS_PER_BYTE 4
 
 /* Number of heap blocks we can represent in one visibility map page. */
 #define HEAPBLOCKS_PER_PAGE (MAPSIZE * HEAPBLOCKS_PER_BYTE)
@@ -115,24 +130,42 @@
 #define HEAPBLK_TO_MAPBYTE(x) (((x) % HEAPBLOCKS_PER_PAGE) / HEAPBLOCKS_PER_BYTE)
 #define HEAPBLK_TO_MAPBIT(x) ((x) % HEAPBLOCKS_PER_BYTE)
 
-/* table for fast counting of set bits */
-static const uint8 number_of_ones[256] = {
-	0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
-	1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
-	1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
-	2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
-	1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
-	2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
-	2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
-	3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
-	1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
-	2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
-	2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
-	3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
-	2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
-	3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
-	3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
-	4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
+/* tables for fast counting of set bits for visible and freeze */
+static const uint8 number_of_ones_for_visible[256] = {
+	0, 1, 0, 1, 1, 2, 1, 2, 0, 1, 0, 1, 1, 2, 1, 2,
+	1, 2, 1, 2, 2, 3, 2, 3, 1, 2, 1, 2, 2, 3, 2, 3,
+	0, 1, 0, 1, 1, 2, 1, 2, 0, 1, 0, 1, 1, 2, 1, 2,
+	1, 2, 1, 2, 2, 3, 2, 3, 1, 2, 1, 2, 2, 3, 2, 3,
+	1, 2, 1, 2, 2, 3, 2, 3, 1, 2, 1, 2, 2, 3, 2, 3,
+	2, 3, 2, 3, 3, 4, 3, 4, 2, 3, 2, 3, 3, 4, 3, 4,
+	1, 2, 1, 2, 2, 3, 2, 3, 1, 2, 1, 2, 2, 3, 2, 3,
+	2, 3, 2, 3, 3, 4, 3, 4, 2, 3, 2, 3, 3, 4, 3, 4,
+	0, 1, 0, 1, 1, 2, 1, 2, 0, 1, 0, 1, 1, 2, 1, 2,
+	1, 2, 1, 2, 2, 3, 2, 3, 1, 2, 1, 2, 2, 3, 2, 3,
+	0, 1, 0, 1, 1, 2, 1, 2, 0, 1, 0, 1, 1, 2, 1, 2,
+	1, 2, 1, 2, 2, 3, 2, 3, 1, 2, 1, 2, 2, 3, 2, 3,
+	1, 2, 1, 2, 2, 3, 2, 3, 1, 2, 1, 2, 2, 3, 2, 3,
+	2, 3, 2, 3, 3, 4, 3, 4, 2, 3, 2, 3, 3, 4, 3, 4,
+	1, 2, 1, 2, 2, 3, 2, 3, 1, 2, 1, 2, 2, 3, 2, 3,
+	2, 3, 2, 3, 3, 4, 3, 4, 2, 3, 2, 3, 3, 4, 3, 4
+};
+static const uint8 number_of_ones_for_frozen[256] = {
+	0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 2, 2, 1, 1, 2, 2,
+	0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 2, 2, 1, 1, 2, 2,
+	1, 1, 2, 2, 1, 1, 2, 2, 2, 2, 3, 3, 2, 2, 3, 3,
+	1, 1, 2, 2, 1, 1, 2, 2, 2, 2, 3, 3, 2, 2, 3, 3,
+	0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 2, 2, 1, 1, 2, 2,
+	0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 2, 2, 1, 1, 2, 2,
+	1, 1, 2, 2, 1, 1, 2, 2, 2, 2, 3, 3, 2, 2, 3, 3,
+	1, 1, 2, 2, 1, 1, 2, 2, 2, 2, 3, 3, 2, 2, 3, 3,
+	1, 1, 2, 2, 1, 1, 2, 2, 2, 2, 3, 3, 2, 2, 3, 3,
+	1, 1, 2, 2, 1, 1, 2, 2, 2, 2, 3, 3, 2, 2, 3, 3,
+	2, 2, 3, 3, 2, 2, 3, 3, 3, 3, 4, 4, 3, 3, 4, 4,
+	2, 2, 3, 3, 2, 2, 3, 3, 3, 3, 4, 4, 3, 3, 4, 4,
+	1, 1, 2, 2, 1, 1, 2, 2, 2, 2, 3, 3, 2, 2, 3, 3,
+	1, 1, 2, 2, 1, 1, 2, 2, 2, 2, 3, 3, 2, 2, 3, 3,
+	2, 2, 3, 3, 2, 2, 3, 3, 3, 3, 4, 4, 3, 3, 4, 4,
+	2, 2, 3, 3, 2, 2, 3, 3, 3, 3, 4, 4, 3, 3, 4, 4
 };
 
 /* prototypes for internal routines */
@@ -141,7 +174,7 @@ static void vm_extend(Relation rel, BlockNumber nvmblocks);
 
 
 /*
- *	visibilitymap_clear - clear a bit in visibility map
+ *	visibilitymap_clear - clear all bits in visibility map
  *
  * You must pass a buffer containing the correct map page to this function.
  * Call visibilitymap_pin first to pin the right one. This function doesn't do
@@ -153,7 +186,8 @@ visibilitymap_clear(Relation rel, BlockNumber heapBlk, Buffer buf)
 	BlockNumber mapBlock = HEAPBLK_TO_MAPBLOCK(heapBlk);
 	int			mapByte = HEAPBLK_TO_MAPBYTE(heapBlk);
 	int			mapBit = HEAPBLK_TO_MAPBIT(heapBlk);
-	uint8		mask = 1 << mapBit;
+	uint8		mask = (VISIBILITYMAP_ALL_VISIBLE | VISIBILITYMAP_ALL_FROZEN) <<
+		(BITS_PER_HEAPBLOCK * mapBit);
 	char	   *map;
 
 #ifdef TRACE_VISIBILITYMAP
@@ -225,7 +259,7 @@ visibilitymap_pin_ok(BlockNumber heapBlk, Buffer buf)
 }
 
 /*
- *	visibilitymap_set - set a bit on a previously pinned page
+ *	visibilitymap_set - set bit(s) on a previously pinned page
  *
  * recptr is the LSN of the XLOG record we're replaying, if we're in recovery,
  * or InvalidXLogRecPtr in normal running.  The page LSN is advanced to the
@@ -234,10 +268,11 @@ visibilitymap_pin_ok(BlockNumber heapBlk, Buffer buf)
  * marked all-visible; it is needed for Hot Standby, and can be
  * InvalidTransactionId if the page contains no tuples.
  *
- * Caller is expected to set the heap page's PD_ALL_VISIBLE bit before calling
- * this function. Except in recovery, caller should also pass the heap
- * buffer. When checksums are enabled and we're not in recovery, we must add
- * the heap buffer to the WAL chain to protect it from being torn.
+ * Caller is expected to set the heap page's PD_ALL_VISIBLE or PD_ALL_FROZEN
+ * bit before calling this function. Except in recovery, caller should also
+ * pass the heap buffer and flags which indicates what flag we want to set.
+ * When checksums are enabled and we're not in recovery, we must add the heap
+ * buffer to the WAL chain to protect it from being torn.
  *
  * You must pass a buffer containing the correct map page to this function.
  * Call visibilitymap_pin first to pin the right one. This function doesn't do
@@ -245,7 +280,8 @@ visibilitymap_pin_ok(BlockNumber heapBlk, Buffer buf)
  */
 void
 visibilitymap_set(Relation rel, BlockNumber heapBlk, Buffer heapBuf,
-				  XLogRecPtr recptr, Buffer vmBuf, TransactionId cutoff_xid)
+				  XLogRecPtr recptr, Buffer vmBuf, TransactionId cutoff_xid,
+				  uint8 flags)
 {
 	BlockNumber mapBlock = HEAPBLK_TO_MAPBLOCK(heapBlk);
 	uint32		mapByte = HEAPBLK_TO_MAPBYTE(heapBlk);
@@ -254,7 +290,7 @@ visibilitymap_set(Relation rel, BlockNumber heapBlk, Buffer heapBuf,
 	char	   *map;
 
 #ifdef TRACE_VISIBILITYMAP
-	elog(DEBUG1, "vm_set %s %d", RelationGetRelationName(rel), heapBlk);
+	elog(DEBUG1, "vm_set %s %d %u", RelationGetRelationName(rel), heapBlk, flags);
 #endif
 
 	Assert(InRecovery || XLogRecPtrIsInvalid(recptr));
@@ -272,11 +308,11 @@ visibilitymap_set(Relation rel, BlockNumber heapBlk, Buffer heapBuf,
 	map = PageGetContents(page);
 	LockBuffer(vmBuf, BUFFER_LOCK_EXCLUSIVE);
 
-	if (!(map[mapByte] & (1 << mapBit)))
+	if (flags != (map[mapByte] & (flags << (BITS_PER_HEAPBLOCK * mapBit))))
 	{
 		START_CRIT_SECTION();
 
-		map[mapByte] |= (1 << mapBit);
+		map[mapByte] |= (flags << (BITS_PER_HEAPBLOCK * mapBit));
 		MarkBufferDirty(vmBuf);
 
 		if (RelationNeedsWAL(rel))
@@ -285,7 +321,7 @@ visibilitymap_set(Relation rel, BlockNumber heapBlk, Buffer heapBuf,
 			{
 				Assert(!InRecovery);
 				recptr = log_heap_visible(rel->rd_node, heapBuf, vmBuf,
-										  cutoff_xid);
+										  cutoff_xid, flags);
 
 				/*
 				 * If data checksums are enabled (or wal_log_hints=on), we
@@ -295,11 +331,15 @@ visibilitymap_set(Relation rel, BlockNumber heapBlk, Buffer heapBuf,
 				{
 					Page		heapPage = BufferGetPage(heapBuf);
 
-					/* caller is expected to set PD_ALL_VISIBLE first */
-					Assert(PageIsAllVisible(heapPage));
+					/*
+					 * caller is expected to set PD_ALL_VISIBLE or
+					 * PD_ALL_FROZEN first.
+					 */
+					Assert(PageIsAllVisible(heapPage) || PageIsAllFrozen(heapPage));
 					PageSetLSN(heapPage, recptr);
 				}
 			}
+
 			PageSetLSN(page, recptr);
 		}
 
@@ -310,15 +350,16 @@ visibilitymap_set(Relation rel, BlockNumber heapBlk, Buffer heapBuf,
 }
 
 /*
- *	visibilitymap_test - test if a bit is set
+ *	visibilitymap_test - test if bit(s) is set
  *
- * Are all tuples on heapBlk visible to all, according to the visibility map?
+ * Are all tuples on heapBlk visible or frozen to all, according to the visibility map?
  *
  * On entry, *buf should be InvalidBuffer or a valid buffer returned by an
  * earlier call to visibilitymap_pin or visibilitymap_test on the same
  * relation. On return, *buf is a valid buffer with the map page containing
  * the bit for heapBlk, or InvalidBuffer. The caller is responsible for
- * releasing *buf after it's done testing and setting bits.
+ * releasing *buf after it's done testing and setting bits, and must set flags
+ * which indicates what flag we want to test.
  *
  * NOTE: This function is typically called without a lock on the heap page,
  * so somebody else could change the bit just after we look at it.  In fact,
@@ -328,7 +369,7 @@ visibilitymap_set(Relation rel, BlockNumber heapBlk, Buffer heapBuf,
  * all concurrency issues!
  */
 bool
-visibilitymap_test(Relation rel, BlockNumber heapBlk, Buffer *buf)
+visibilitymap_test(Relation rel, BlockNumber heapBlk, Buffer *buf, uint8 flags)
 {
 	BlockNumber mapBlock = HEAPBLK_TO_MAPBLOCK(heapBlk);
 	uint32		mapByte = HEAPBLK_TO_MAPBYTE(heapBlk);
@@ -337,7 +378,7 @@ visibilitymap_test(Relation rel, BlockNumber heapBlk, Buffer *buf)
 	char	   *map;
 
 #ifdef TRACE_VISIBILITYMAP
-	elog(DEBUG1, "vm_test %s %d", RelationGetRelationName(rel), heapBlk);
+	elog(DEBUG1, "vm_test %s %d %u", RelationGetRelationName(rel), heapBlk, flags);
 #endif
 
 	/* Reuse the old pinned buffer if possible */
@@ -360,11 +401,12 @@ visibilitymap_test(Relation rel, BlockNumber heapBlk, Buffer *buf)
 	map = PageGetContents(BufferGetPage(*buf));
 
 	/*
-	 * A single-bit read is atomic.  There could be memory-ordering effects
+	 * A single or double bit read is atomic.  There could be memory-ordering effects
 	 * here, but for performance reasons we make it the caller's job to worry
 	 * about that.
 	 */
-	result = (map[mapByte] & (1 << mapBit)) ? true : false;
+	result = (map[mapByte] & (flags << (BITS_PER_HEAPBLOCK * mapBit))) ?
+		true : false;
 
 	return result;
 }
@@ -374,10 +416,11 @@ visibilitymap_test(Relation rel, BlockNumber heapBlk, Buffer *buf)
  *
  * Note: we ignore the possibility of race conditions when the table is being
  * extended concurrently with the call.  New pages added to the table aren't
- * going to be marked all-visible, so they won't affect the result.
+ * going to be marked all-visible or all-frozen, so they won't affect the result.
+ * The caller must set the flags which indicates what flag we want to count.
  */
 BlockNumber
-visibilitymap_count(Relation rel)
+visibilitymap_count(Relation rel, uint8 flags)
 {
 	BlockNumber result = 0;
 	BlockNumber mapBlock;
@@ -406,7 +449,10 @@ visibilitymap_count(Relation rel)
 
 		for (i = 0; i < MAPSIZE; i++)
 		{
-			result += number_of_ones[map[i]];
+			if (flags & VISIBILITYMAP_ALL_VISIBLE)
+				result += number_of_ones_for_visible[map[i]];
+			if (flags & VISIBILITYMAP_ALL_FROZEN)
+				result += number_of_ones_for_frozen[map[i]];
 		}
 
 		ReleaseBuffer(mapBuffer);
diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c
index e59b163..10f8dc9 100644
--- a/src/backend/catalog/index.c
+++ b/src/backend/catalog/index.c
@@ -1919,11 +1919,18 @@ index_update_stats(Relation rel,
 	{
 		BlockNumber relpages = RelationGetNumberOfBlocks(rel);
 		BlockNumber relallvisible;
+		BlockNumber relallfrozen;
 
 		if (rd_rel->relkind != RELKIND_INDEX)
-			relallvisible = visibilitymap_count(rel);
+		{
+			relallvisible = visibilitymap_count(rel, VISIBILITYMAP_ALL_VISIBLE);
+			relallfrozen = visibilitymap_count(rel, VISIBILITYMAP_ALL_FROZEN);
+		}
 		else	/* don't bother for indexes */
+		{
 			relallvisible = 0;
+			relallfrozen = 0;
+		}
 
 		if (rd_rel->relpages != (int32) relpages)
 		{
@@ -1940,6 +1947,11 @@ index_update_stats(Relation rel,
 			rd_rel->relallvisible = (int32) relallvisible;
 			dirty = true;
 		}
+		if (rd_rel->relallfrozen != (int32) relallfrozen)
+		{
+			rd_rel->relallfrozen = (int32) relallfrozen;
+			dirty = true;
+		}
 	}
 
 	/*
diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c
index 861048f..392c2a4 100644
--- a/src/backend/commands/analyze.c
+++ b/src/backend/commands/analyze.c
@@ -572,7 +572,8 @@ do_analyze_rel(Relation onerel, int options, VacuumParams *params,
 		vac_update_relstats(onerel,
 							relpages,
 							totalrows,
-							visibilitymap_count(onerel),
+							visibilitymap_count(onerel, VISIBILITYMAP_ALL_VISIBLE),
+							visibilitymap_count(onerel, VISIBILITYMAP_ALL_FROZEN),
 							hasindex,
 							InvalidTransactionId,
 							InvalidMultiXactId,
@@ -595,6 +596,7 @@ do_analyze_rel(Relation onerel, int options, VacuumParams *params,
 								RelationGetNumberOfBlocks(Irel[ind]),
 								totalindexrows,
 								0,
+								0,
 								false,
 								InvalidTransactionId,
 								InvalidMultiXactId,
diff --git a/src/backend/commands/cluster.c b/src/backend/commands/cluster.c
index 7ab4874..d3725dd 100644
--- a/src/backend/commands/cluster.c
+++ b/src/backend/commands/cluster.c
@@ -22,6 +22,7 @@
 #include "access/rewriteheap.h"
 #include "access/transam.h"
 #include "access/tuptoaster.h"
+#include "access/visibilitymap.h"
 #include "access/xact.h"
 #include "access/xlog.h"
 #include "catalog/catalog.h"
diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c
index 85b0483..744bfff 100644
--- a/src/backend/commands/vacuum.c
+++ b/src/backend/commands/vacuum.c
@@ -747,6 +747,7 @@ void
 vac_update_relstats(Relation relation,
 					BlockNumber num_pages, double num_tuples,
 					BlockNumber num_all_visible_pages,
+					BlockNumber num_all_frozen_pages,
 					bool hasindex, TransactionId frozenxid,
 					MultiXactId minmulti,
 					bool in_outer_xact)
@@ -784,6 +785,11 @@ vac_update_relstats(Relation relation,
 		pgcform->relallvisible = (int32) num_all_visible_pages;
 		dirty = true;
 	}
+	if (pgcform->relallfrozen != (int32) num_all_frozen_pages)
+	{
+		pgcform->relallfrozen = (int32) num_all_frozen_pages;
+		dirty = true;
+	}
 
 	/* Apply DDL updates, but not inside an outer transaction (see above) */
 
diff --git a/src/backend/commands/vacuumlazy.c b/src/backend/commands/vacuumlazy.c
index a01cfb4..120de63 100644
--- a/src/backend/commands/vacuumlazy.c
+++ b/src/backend/commands/vacuumlazy.c
@@ -106,6 +106,8 @@ typedef struct LVRelStats
 	BlockNumber rel_pages;		/* total number of pages */
 	BlockNumber scanned_pages;	/* number of pages we examined */
 	BlockNumber pinskipped_pages;		/* # of pages we skipped due to a pin */
+	BlockNumber vmskipped_frozen_pages; /* # of pages we skipped by all-frozen bit
+									of visibility map */
 	double		scanned_tuples; /* counts only tuples on scanned pages */
 	double		old_rel_tuples; /* previous value of pg_class.reltuples */
 	double		new_rel_tuples; /* new estimated total # of tuples */
@@ -156,7 +158,7 @@ static void lazy_record_dead_tuple(LVRelStats *vacrelstats,
 static bool lazy_tid_reaped(ItemPointer itemptr, void *state);
 static int	vac_cmp_itemptr(const void *left, const void *right);
 static bool heap_page_is_all_visible(Relation rel, Buffer buf,
-						 TransactionId *visibility_cutoff_xid);
+						 TransactionId *visibility_cutoff_xid, bool *all_frozen);
 
 
 /*
@@ -188,7 +190,8 @@ lazy_vacuum_rel(Relation onerel, int options, VacuumParams *params,
 	MultiXactId mxactFullScanLimit;
 	BlockNumber new_rel_pages;
 	double		new_rel_tuples;
-	BlockNumber new_rel_allvisible;
+	BlockNumber new_rel_allvisible,
+				new_rel_allfrozen;
 	double		new_live_tuples;
 	TransactionId new_frozen_xid;
 	MultiXactId new_min_multi;
@@ -222,6 +225,8 @@ lazy_vacuum_rel(Relation onerel, int options, VacuumParams *params,
 	 * than or equal to the requested Xid full-table scan limit; or if the
 	 * table's minimum MultiXactId is older than or equal to the requested
 	 * mxid full-table scan limit.
+	 * Even if scan_all is set so far, we could skip to scan some pages
+	 * according by frozen map.
 	 */
 	scan_all = TransactionIdPrecedesOrEquals(onerel->rd_rel->relfrozenxid,
 											 xidFullScanLimit);
@@ -253,7 +258,8 @@ lazy_vacuum_rel(Relation onerel, int options, VacuumParams *params,
 	 * NB: We need to check this before truncating the relation, because that
 	 * will change ->rel_pages.
 	 */
-	if (vacrelstats->scanned_pages < vacrelstats->rel_pages)
+	if ((vacrelstats->scanned_pages + vacrelstats->vmskipped_frozen_pages)
+		< vacrelstats->rel_pages)
 	{
 		Assert(!scan_all);
 		scanned_all = false;
@@ -301,10 +307,14 @@ lazy_vacuum_rel(Relation onerel, int options, VacuumParams *params,
 		new_rel_tuples = vacrelstats->old_rel_tuples;
 	}
 
-	new_rel_allvisible = visibilitymap_count(onerel);
+	new_rel_allvisible = visibilitymap_count(onerel, VISIBILITYMAP_ALL_VISIBLE);
 	if (new_rel_allvisible > new_rel_pages)
 		new_rel_allvisible = new_rel_pages;
 
+	new_rel_allfrozen = visibilitymap_count(onerel, VISIBILITYMAP_ALL_FROZEN);
+	if (new_rel_allfrozen > new_rel_pages)
+		new_rel_allfrozen = new_rel_pages;
+
 	new_frozen_xid = scanned_all ? FreezeLimit : InvalidTransactionId;
 	new_min_multi = scanned_all ? MultiXactCutoff : InvalidMultiXactId;
 
@@ -312,6 +322,7 @@ lazy_vacuum_rel(Relation onerel, int options, VacuumParams *params,
 						new_rel_pages,
 						new_rel_tuples,
 						new_rel_allvisible,
+						new_rel_allfrozen,
 						vacrelstats->hasindex,
 						new_frozen_xid,
 						new_min_multi,
@@ -360,10 +371,11 @@ lazy_vacuum_rel(Relation onerel, int options, VacuumParams *params,
 							 get_namespace_name(RelationGetNamespace(onerel)),
 							 RelationGetRelationName(onerel),
 							 vacrelstats->num_index_scans);
-			appendStringInfo(&buf, _("pages: %u removed, %u remain, %u skipped due to pins\n"),
+			appendStringInfo(&buf, _("pages: %u removed, %u remain, %u skipped due to pins, %u skipped according to vm\n"),
 							 vacrelstats->pages_removed,
 							 vacrelstats->rel_pages,
-							 vacrelstats->pinskipped_pages);
+							 vacrelstats->pinskipped_pages,
+							 vacrelstats->vmskipped_frozen_pages);
 			appendStringInfo(&buf,
 							 _("tuples: %.0f removed, %.0f remain, %.0f are dead but not yet removable\n"),
 							 vacrelstats->tuples_deleted,
@@ -486,9 +498,12 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
 	 * consecutive pages.  Since we're reading sequentially, the OS should be
 	 * doing readahead for us, so there's no gain in skipping a page now and
 	 * then; that's likely to disable readahead and so be counterproductive.
-	 * Also, skipping even a single page means that we can't update
-	 * relfrozenxid, so we only want to do it if we can skip a goodly number
-	 * of pages.
+	 * Also, skipping even a single page accorinding to all-visible bit of
+	 * visibility map means that we can't update relfrozenxid, so we only want
+	 * to do it if we can skip a goodly number. On the other hand, we count
+	 * both how many pages we skipped according to all-frozen bit of visibility
+	 * map and how many pages we freeze page, so we can update relfrozenxid if
+	 * the sum of them is as many as pages of table.
 	 *
 	 * Before entering the main loop, establish the invariant that
 	 * next_not_all_visible_block is the next block number >= blkno that's not
@@ -515,7 +530,8 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
 		 next_not_all_visible_block < nblocks;
 		 next_not_all_visible_block++)
 	{
-		if (!visibilitymap_test(onerel, next_not_all_visible_block, &vmbuffer))
+		if (!visibilitymap_test(onerel, next_not_all_visible_block, &vmbuffer,
+								VISIBILITYMAP_ALL_VISIBLE | VISIBILITYMAP_ALL_FROZEN))
 			break;
 		vacuum_delay_point();
 	}
@@ -533,7 +549,10 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
 		bool		tupgone,
 					hastup;
 		int			prev_dead_count;
-		int			nfrozen;
+		int			nfrozen; /* # of tuples is frozen */
+		int			nalready_frozen; /* # of tuples is already frozen */
+		int			ntotal_frozen; /* # of tuples is in single page */
+		int			ntup_per_page;
 		Size		freespace;
 		bool		all_visible_according_to_vm;
 		bool		all_visible;
@@ -548,7 +567,8 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
 				 next_not_all_visible_block++)
 			{
 				if (!visibilitymap_test(onerel, next_not_all_visible_block,
-										&vmbuffer))
+										&vmbuffer,
+										VISIBILITYMAP_ALL_VISIBLE | VISIBILITYMAP_ALL_FROZEN))
 					break;
 				vacuum_delay_point();
 			}
@@ -566,9 +586,25 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
 		}
 		else
 		{
-			/* Current block is all-visible */
-			if (skipping_all_visible_blocks && !scan_all)
-				continue;
+			/*
+			 * This block is at least all-visible according to visibility map.
+			 * We check whehter this block is all-frozen to skip to vacuum this
+			 * page even if scanning whole page is required.
+			 */
+			if (scan_all)
+			{
+				if (visibilitymap_test(onerel, blkno, &vmbuffer, VISIBILITYMAP_ALL_FROZEN))
+				{
+					vacrelstats->vmskipped_frozen_pages++;
+					continue;
+				}
+			}
+			else
+			{
+				if (skipping_all_visible_blocks)
+					continue;
+			}
+
 			all_visible_according_to_vm = true;
 		}
 
@@ -740,7 +776,8 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
 
 				PageSetAllVisible(page);
 				visibilitymap_set(onerel, blkno, buf, InvalidXLogRecPtr,
-								  vmbuffer, InvalidTransactionId);
+								  vmbuffer, InvalidTransactionId,
+								  VISIBILITYMAP_ALL_VISIBLE);
 				END_CRIT_SECTION();
 			}
 
@@ -764,6 +801,8 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
 		all_visible = true;
 		has_dead_tuples = false;
 		nfrozen = 0;
+		nalready_frozen = 0;
+		ntup_per_page = 0;
 		hastup = false;
 		prev_dead_count = vacrelstats->num_dead_tuples;
 		maxoff = PageGetMaxOffsetNumber(page);
@@ -918,8 +957,13 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
 			else
 			{
 				num_tuples += 1;
+				ntup_per_page += 1;;
 				hastup = true;
 
+				/* Check whether this tuple is alrady frozen or not */
+				if (HeapTupleHeaderXminFrozen(tuple.t_data))
+					nalready_frozen += 1;
+
 				/*
 				 * Each non-removable tuple must be checked to see if it needs
 				 * freezing.  Note we already have exclusive buffer lock.
@@ -931,9 +975,10 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
 		}						/* scan along page */
 
 		/*
-		 * If we froze any tuples, mark the buffer dirty, and write a WAL
-		 * record recording the changes.  We must log the changes to be
-		 * crash-safe against future truncation of CLOG.
+		 * If we froze any tuples or any tuples are already frozen,
+		 * mark the buffer dirty, and write a WAL record recording the changes.
+		 * We must log the changes to be crash-safe against future truncation
+		 * of CLOG.
 		 */
 		if (nfrozen > 0)
 		{
@@ -966,6 +1011,9 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
 			END_CRIT_SECTION();
 		}
 
+		/* Compute the number of frozen tuples in a page */
+		ntotal_frozen = nfrozen + nalready_frozen;
+
 		/*
 		 * If there are no indexes then we can vacuum the page right now
 		 * instead of doing a second scan.
@@ -988,26 +1036,47 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
 
 		freespace = PageGetHeapFreeSpace(page);
 
-		/* mark page all-visible, if appropriate */
-		if (all_visible && !all_visible_according_to_vm)
+		/* This page is all visible */
+		if (all_visible)
 		{
-			/*
-			 * It should never be the case that the visibility map page is set
-			 * while the page-level bit is clear, but the reverse is allowed
-			 * (if checksums are not enabled).  Regardless, set the both bits
-			 * so that we get back in sync.
-			 *
-			 * NB: If the heap page is all-visible but the VM bit is not set,
-			 * we don't need to dirty the heap page.  However, if checksums
-			 * are enabled, we do need to make sure that the heap page is
-			 * dirtied before passing it to visibilitymap_set(), because it
-			 * may be logged.  Given that this situation should only happen in
-			 * rare cases after a crash, it is not worth optimizing.
-			 */
-			PageSetAllVisible(page);
-			MarkBufferDirty(buf);
-			visibilitymap_set(onerel, blkno, buf, InvalidXLogRecPtr,
-							  vmbuffer, visibility_cutoff_xid);
+			uint8 flags = 0;
+
+			/* mark page all-visible, if appropriate */
+			if (!all_visible_according_to_vm)
+			{
+				/*
+				 * It should never be the case that the visibility map page is set
+				 * while the page-level bit is clear, but the reverse is allowed
+				 * (if checksums are not enabled).  Regardless, set the both bits
+				 * so that we get back in sync.
+				 *
+				 * NB: If the heap page is all-visible but the VM bit is not set,
+				 * we don't need to dirty the heap page.  However, if checksums
+				 * are enabled, we do need to make sure that the heap page is
+				 * dirtied before passing it to visibilitymap_set(), because it
+				 * may be logged.  Given that this situation should only happen in
+				 * rare cases after a crash, it is not worth optimizing.
+				 */
+				PageSetAllVisible(page);
+				flags |= VISIBILITYMAP_ALL_VISIBLE;
+			}
+
+			/* mark page all-frozen, if all tuples are frozen in total */
+			if ((ntotal_frozen == ntup_per_page) &&
+				!visibilitymap_test(onerel, blkno, &vmbuffer, VISIBILITYMAP_ALL_FROZEN))
+			{
+				Assert(PageIsAllVisible(page));
+
+				PageSetAllFrozen(page);
+				flags |= VISIBILITYMAP_ALL_FROZEN;
+			}
+
+			if (flags)
+			{
+				MarkBufferDirty(buf);
+				visibilitymap_set(onerel, blkno, buf, InvalidXLogRecPtr,
+								  vmbuffer, visibility_cutoff_xid, flags);
+			}
 		}
 
 		/*
@@ -1018,7 +1087,7 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
 		 * that something bad has happened.
 		 */
 		else if (all_visible_according_to_vm && !PageIsAllVisible(page)
-				 && visibilitymap_test(onerel, blkno, &vmbuffer))
+				 && visibilitymap_test(onerel, blkno, &vmbuffer, VISIBILITYMAP_ALL_VISIBLE))
 		{
 			elog(WARNING, "page is not marked all-visible but visibility map bit is set in relation \"%s\" page %u",
 				 relname, blkno);
@@ -1047,6 +1116,17 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
 			visibilitymap_clear(onerel, blkno, vmbuffer);
 		}
 
+		/*
+		 * As a result of scanning a page, we set VM all-frozen bit and page header
+		 * if all tuples of single page are frozen.
+		 */
+		if (ntotal_frozen == ntup_per_page)
+		{
+			PageSetAllFrozen(page);
+			visibilitymap_set(onerel, blkno, buf, InvalidXLogRecPtr, vmbuffer,
+							  InvalidTransactionId, VISIBILITYMAP_ALL_FROZEN);
+		}
+
 		UnlockReleaseBuffer(buf);
 
 		/* Remember the location of the last page with nonremovable tuples */
@@ -1078,7 +1158,7 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
 														 num_tuples);
 
 	/*
-	 * Release any remaining pin on visibility map page.
+	 * Release any remaining pin on visibility map and frozen map page.
 	 */
 	if (BufferIsValid(vmbuffer))
 	{
@@ -1115,6 +1195,14 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
 						tups_vacuumed, vacuumed_pages)));
 
 	/*
+	 * This information would be effective for how much effect all-frozen bit
+	 * of VM had for freezing tuples.
+	 */
+	ereport(elevel,
+			(errmsg("Skipped %d frozen pages acoording to visibility map",
+					vacrelstats->vmskipped_frozen_pages)));
+
+	/*
 	 * This is pretty messy, but we split it up so that we can skip emitting
 	 * individual parts of the message when not applicable.
 	 */
@@ -1226,6 +1314,7 @@ lazy_vacuum_page(Relation onerel, BlockNumber blkno, Buffer buffer,
 	OffsetNumber unused[MaxOffsetNumber];
 	int			uncnt = 0;
 	TransactionId visibility_cutoff_xid;
+	bool		all_frozen;
 
 	START_CRIT_SECTION();
 
@@ -1277,19 +1366,31 @@ lazy_vacuum_page(Relation onerel, BlockNumber blkno, Buffer buffer,
 	 * dirty, exclusively locked, and, if needed, a full page image has been
 	 * emitted in the log_heap_clean() above.
 	 */
-	if (heap_page_is_all_visible(onerel, buffer, &visibility_cutoff_xid))
+	if (heap_page_is_all_visible(onerel, buffer, &visibility_cutoff_xid, &all_frozen))
 		PageSetAllVisible(page);
 
 	/*
 	 * All the changes to the heap page have been done. If the all-visible
-	 * flag is now set, also set the VM bit.
+	 * flag is now set, also set the VM all-visible bit.
+	 * Also, if this page is all-frozen, set VM all-frozen bit and flag.
 	 */
-	if (PageIsAllVisible(page) &&
-		!visibilitymap_test(onerel, blkno, vmbuffer))
+	if (PageIsAllVisible(page))
 	{
-		Assert(BufferIsValid(*vmbuffer));
-		visibilitymap_set(onerel, blkno, buffer, InvalidXLogRecPtr, *vmbuffer,
-						  visibility_cutoff_xid);
+		uint8 flags = 0;
+
+		if (!visibilitymap_test(onerel, blkno, vmbuffer, VISIBILITYMAP_ALL_VISIBLE))
+			flags |= VISIBILITYMAP_ALL_VISIBLE;
+
+		/* mark page all-frozen, and set VM all-frozen bit */
+		if (all_frozen)
+		{
+			PageSetAllFrozen(page);
+			flags |= VISIBILITYMAP_ALL_FROZEN;
+		}
+
+		if (flags)
+			visibilitymap_set(onerel, blkno, buffer, InvalidXLogRecPtr, *vmbuffer,
+							  visibility_cutoff_xid, flags);
 	}
 
 	return tupindex;
@@ -1408,6 +1509,7 @@ lazy_cleanup_index(Relation indrel,
 							stats->num_pages,
 							stats->num_index_tuples,
 							0,
+							0,
 							false,
 							InvalidTransactionId,
 							InvalidMultiXactId,
@@ -1782,7 +1884,8 @@ vac_cmp_itemptr(const void *left, const void *right)
  * xmin amongst the visible tuples.
  */
 static bool
-heap_page_is_all_visible(Relation rel, Buffer buf, TransactionId *visibility_cutoff_xid)
+heap_page_is_all_visible(Relation rel, Buffer buf, TransactionId *visibility_cutoff_xid,
+						 bool *all_frozen)
 {
 	Page		page = BufferGetPage(buf);
 	BlockNumber blockno = BufferGetBlockNumber(buf);
@@ -1791,6 +1894,7 @@ heap_page_is_all_visible(Relation rel, Buffer buf, TransactionId *visibility_cut
 	bool		all_visible = true;
 
 	*visibility_cutoff_xid = InvalidTransactionId;
+	*all_frozen = true;
 
 	/*
 	 * This is a stripped down version of the line pointer scan in
@@ -1814,7 +1918,7 @@ heap_page_is_all_visible(Relation rel, Buffer buf, TransactionId *visibility_cut
 
 		/*
 		 * Dead line pointers can have index pointers pointing to them. So
-		 * they can't be treated as visible
+		 * they can't be treated as visible and frozen.
 		 */
 		if (ItemIdIsDead(itemid))
 		{
@@ -1855,6 +1959,10 @@ heap_page_is_all_visible(Relation rel, Buffer buf, TransactionId *visibility_cut
 					/* Track newest xmin on page. */
 					if (TransactionIdFollows(xmin, *visibility_cutoff_xid))
 						*visibility_cutoff_xid = xmin;
+
+					/* Check whether this tuple is alrady frozen or not */
+					if (!HeapTupleHeaderXminFrozen(tuple.t_data))
+						*all_frozen = false;
 				}
 				break;
 
@@ -1863,6 +1971,7 @@ heap_page_is_all_visible(Relation rel, Buffer buf, TransactionId *visibility_cut
 			case HEAPTUPLE_INSERT_IN_PROGRESS:
 			case HEAPTUPLE_DELETE_IN_PROGRESS:
 				all_visible = false;
+				*all_frozen = false;
 				break;
 
 			default:
diff --git a/src/backend/executor/nodeIndexonlyscan.c b/src/backend/executor/nodeIndexonlyscan.c
index 9f54c46..08df289 100644
--- a/src/backend/executor/nodeIndexonlyscan.c
+++ b/src/backend/executor/nodeIndexonlyscan.c
@@ -116,7 +116,7 @@ IndexOnlyNext(IndexOnlyScanState *node)
 		 */
 		if (!visibilitymap_test(scandesc->heapRelation,
 								ItemPointerGetBlockNumber(tid),
-								&node->ioss_VMBuffer))
+								&node->ioss_VMBuffer, VISIBILITYMAP_ALL_VISIBLE))
 		{
 			/*
 			 * Rats, we have to visit the heap to check visibility.
diff --git a/src/backend/executor/nodeModifyTable.c b/src/backend/executor/nodeModifyTable.c
index 1ef76d0..ee49ddf 100644
--- a/src/backend/executor/nodeModifyTable.c
+++ b/src/backend/executor/nodeModifyTable.c
@@ -127,7 +127,7 @@ ExecCheckPlanOutput(Relation resultRel, List *targetList)
 	if (attno != resultDesc->natts)
 		ereport(ERROR,
 				(errcode(ERRCODE_DATATYPE_MISMATCH),
-		  errmsg("table row type and query-specified row type do not match"),
+				 errmsg("table row type and query-specified row type do not match"),
 				 errdetail("Query has too few columns.")));
 }
 
diff --git a/src/bin/pg_upgrade/file.c b/src/bin/pg_upgrade/file.c
index 79d9390..8fededc 100644
--- a/src/bin/pg_upgrade/file.c
+++ b/src/bin/pg_upgrade/file.c
@@ -10,6 +10,7 @@
 #include "postgres_fe.h"
 
 #include "pg_upgrade.h"
+#include "storage/bufpage.h"
 
 #include <fcntl.h>
 
@@ -21,6 +22,27 @@ static int	copy_file(const char *fromfile, const char *tofile, bool force);
 static int	win32_pghardlink(const char *src, const char *dst);
 #endif
 
+static int rewrite_vm_to_vfm(const char *fromfile, const char *tofile, bool force);
+
+/* table for fast rewriting vm file to vfm file */
+static const uint16 rewrite_vm_to_vfm_table[256] = {
+	0,     1,     4,     5,     16,    17,    20,    21,    64,    65,    68,    69,    80,    81,    84,    85,
+	256,   257,   260,   261,   272,   273,   276,   277,   320,   321,   324,   325,   336,   337,   340,   341,
+	1024,  1025,  1028,  1029,  1040,  1041,  1044,  1045,  1088,  1089,  1092,  1093,  1104,  1105,  1108,  1109,
+	1280,  1281,  1284,  1285,  1296,  1297,  1300,  1301,  1344,  1345,  1348,  1349,  1360,  1361,  1364,  1365,
+	4096,  4097,  4100,  4101,  4112,  4113,  4116,  4117,  4160,  4161,  4164,  4165,  4176,  4177,  4180,  4181,
+	4352,  4353,  4356,  4357,  4368,  4369,  4372,  4373,  4416,  4417,  4420,  4421,  4432,  4433,  4436,  4437,
+	5120,  5121,  5124,  5125,  5136,  5137,  5140,  5141,  5184,  5185,  5188,  5189,  5200,  5201,  5204,  5205,
+	5376,  5377,  5380,  5381,  5392,  5393,  5396,  5397,  5440,  5441,  5444,  5445,  5456,  5457,  5460,  5461,
+	16384, 16385, 16388, 16389, 16400, 16401, 16404, 16405, 16448, 16449, 16452, 16453, 16464, 16465, 16468, 16469,
+	16640, 16641, 16644, 16645, 16656, 16657, 16660, 16661, 16704, 16705, 16708, 16709, 16720, 16721, 16724, 16725,
+	17408, 17409, 17412, 17413, 17424, 17425, 17428, 17429, 17472, 17473, 17476, 17477, 17488, 17489, 17492, 17493,
+	17664, 17665, 17668, 17669, 17680, 17681, 17684, 17685, 17728, 17729, 17732, 17733, 17744, 17745, 17748, 17749,
+	20480, 20481, 20484, 20485, 20496, 20497, 20500, 20501, 20544, 20545, 20548, 20549, 20560, 20561, 20564, 20565,
+	20736, 20737, 20740, 20741, 20752, 20753, 20756, 20757, 20800, 20801, 20804, 20805, 20816, 20817, 20820, 20821,
+	21504, 21505, 21508, 21509, 21520, 21521, 21524, 21525, 21568, 21569, 21572, 21573, 21584, 21585, 21588, 21589,
+	21760, 21761, 21764, 21765, 21776, 21777, 21780, 21781, 21824, 21825, 21828, 21829, 21840, 21841, 21844, 21845
+};
 
 /*
  * copyAndUpdateFile()
@@ -30,11 +52,19 @@ static int	win32_pghardlink(const char *src, const char *dst);
  */
 const char *
 copyAndUpdateFile(pageCnvCtx *pageConverter,
-				  const char *src, const char *dst, bool force)
+				  const char *src, const char *dst, bool force, bool rewrite_vm)
 {
+
 	if (pageConverter == NULL)
 	{
-		if (pg_copy_file(src, dst, force) == -1)
+		int ret;
+
+		if (rewrite_vm)
+			ret = rewrite_vm_to_vfm(src, dst, force);
+		else
+			ret = pg_copy_file(src, dst, force);
+
+		if (ret)
 			return getErrorText(errno);
 		else
 			return NULL;
@@ -99,7 +129,6 @@ copyAndUpdateFile(pageCnvCtx *pageConverter,
 	}
 }
 
-
 /*
  * linkAndUpdateFile()
  *
@@ -201,6 +230,110 @@ copy_file(const char *srcfile, const char *dstfile, bool force)
 #endif
 
 
+/*
+ * rewriteVisibiiltyMap()
+ *
+ * A additional bit which indicates that all tuples on page is completely
+ * frozen is added into visibility map at PG 9.6. So the format of visibiilty
+ * map has been changed.
+ * Copies a visibility map file while adding all-frozen bit(0) into each bit.
+ */
+static int
+rewrite_vm_to_vfm(const char *fromfile, const char *tofile, bool force)
+{
+#define REWRITE_BUF_SIZE (50 * BLCKSZ)
+#define BITS_PER_HEAPBLOCK 2
+
+	int			src_fd, dst_fd;
+	uint16 		vfm_bits;
+	ssize_t 	nbytes;
+	char 		*buffer;
+	int			ret = 0;
+	int			save_errno = 0;
+
+	if ((fromfile == NULL) || (tofile == NULL))
+	{
+		errno = EINVAL;
+		return -1;
+	}
+
+	if ((src_fd = open(fromfile, O_RDONLY, 0)) < 0)
+		return -1;
+
+	if ((dst_fd = open(tofile, O_RDWR | O_CREAT | (force ? 0 : O_EXCL), S_IRUSR | S_IWUSR)) < 0)
+	{
+		save_errno = errno;
+		if (src_fd != 0)
+			close(src_fd);
+
+		errno = save_errno;
+		return -1;
+	}
+
+	buffer = (char *) pg_malloc(REWRITE_BUF_SIZE);
+
+	/* Copy page header data in advance */
+	if ((nbytes = read(src_fd, buffer, MAXALIGN(SizeOfPageHeaderData))) <= 0)
+	{
+		save_errno = errno;
+		return -1;
+	}
+
+	if (write(dst_fd, buffer, nbytes) != nbytes)
+	{
+		/* if write didn't set errno, assume problem is no disk space */
+		if (errno == 0)
+			errno = ENOSPC;
+		save_errno = errno;
+		return -1;
+	}
+
+	/* perform data rewriting i.e read src srouce, write to destination */
+	while (true)
+	{
+		ssize_t nbytes = read(src_fd, buffer, REWRITE_BUF_SIZE);
+		char *cur, *end;
+
+		if (nbytes < 0)
+		{
+			ret = -1;
+			break;
+		}
+
+		if (nbytes == 0)
+			break;
+
+		cur = buffer;
+		end = buffer + nbytes;
+
+		/*
+		 * Rewrite a byte and write dest_fd per BITS_PER_HEAPBLOCK bytes.
+		 */
+		while (end > cur)
+		{
+			/* Get rewritten bit from table and its string representation */
+			vfm_bits = rewrite_vm_to_vfm_table[(uint8) *cur];
+
+			if (write(dst_fd, &vfm_bits, BITS_PER_HEAPBLOCK) != BITS_PER_HEAPBLOCK)
+			{
+				ret = -1;
+				break;
+			}
+			cur++;
+		}
+	}
+
+	pg_free(buffer);
+
+	if (src_fd != 0)
+		close(src_fd);
+
+	if (dst_fd != 0)
+		close(dst_fd);
+
+	return ret;
+}
+
 void
 check_hard_link(void)
 {
diff --git a/src/bin/pg_upgrade/pg_upgrade.h b/src/bin/pg_upgrade/pg_upgrade.h
index 13aa891..090422d 100644
--- a/src/bin/pg_upgrade/pg_upgrade.h
+++ b/src/bin/pg_upgrade/pg_upgrade.h
@@ -112,6 +112,11 @@ extern char *output_files[];
 #define VISIBILITY_MAP_CRASHSAFE_CAT_VER 201107031
 
 /*
+ * The format of visibility map changed with this 9.6 commit,
+ *
+ */
+#define VISIBILITY_MAP_FROZEN_BIT_CAT_VER 201508181
+/*
  * pg_multixact format changed in 9.3 commit 0ac5ad5134f2769ccbaefec73844f85,
  * ("Improve concurrency of foreign key locking") which also updated catalog
  * version to this value.  pg_upgrade behavior depends on whether old and new
@@ -397,7 +402,7 @@ typedef void *pageCnvCtx;
 #endif
 
 const char *copyAndUpdateFile(pageCnvCtx *pageConverter, const char *src,
-				  const char *dst, bool force);
+				  const char *dst, bool force, bool rewrite_vm);
 const char *linkAndUpdateFile(pageCnvCtx *pageConverter, const char *src,
 				  const char *dst);
 
diff --git a/src/bin/pg_upgrade/relfilenode.c b/src/bin/pg_upgrade/relfilenode.c
index c22df42..766a473 100644
--- a/src/bin/pg_upgrade/relfilenode.c
+++ b/src/bin/pg_upgrade/relfilenode.c
@@ -18,7 +18,7 @@
 static void transfer_single_new_db(pageCnvCtx *pageConverter,
 					   FileNameMap *maps, int size, char *old_tablespace);
 static void transfer_relfile(pageCnvCtx *pageConverter, FileNameMap *map,
-				 const char *suffix);
+				 const char *type_old_suffix, const char *type_new_suffix);
 
 
 /*
@@ -171,6 +171,7 @@ transfer_single_new_db(pageCnvCtx *pageConverter,
 {
 	int			mapnum;
 	bool		vm_crashsafe_match = true;
+	bool		vm_rewrite_needed = false;
 
 	/*
 	 * Do the old and new cluster disagree on the crash-safetiness of the vm
@@ -180,13 +181,20 @@ transfer_single_new_db(pageCnvCtx *pageConverter,
 		new_cluster.controldata.cat_ver >= VISIBILITY_MAP_CRASHSAFE_CAT_VER)
 		vm_crashsafe_match = false;
 
+	/*
+	 * Do we need to rewrite "vm" to "vfm".
+	 */
+	if (old_cluster.controldata.cat_ver < VISIBILITY_MAP_FROZEN_BIT_CAT_VER &&
+		new_cluster.controldata.cat_ver >= VISIBILITY_MAP_FROZEN_BIT_CAT_VER)
+		vm_rewrite_needed = true;
+
 	for (mapnum = 0; mapnum < size; mapnum++)
 	{
 		if (old_tablespace == NULL ||
 			strcmp(maps[mapnum].old_tablespace, old_tablespace) == 0)
 		{
 			/* transfer primary file */
-			transfer_relfile(pageConverter, &maps[mapnum], "");
+			transfer_relfile(pageConverter, &maps[mapnum], "", "");
 
 			/* fsm/vm files added in PG 8.4 */
 			if (GET_MAJOR_VERSION(old_cluster.major_version) >= 804)
@@ -194,9 +202,17 @@ transfer_single_new_db(pageCnvCtx *pageConverter,
 				/*
 				 * Copy/link any fsm and vm files, if they exist
 				 */
-				transfer_relfile(pageConverter, &maps[mapnum], "_fsm");
+				transfer_relfile(pageConverter, &maps[mapnum], "_fsm", "_fsm");
 				if (vm_crashsafe_match)
-					transfer_relfile(pageConverter, &maps[mapnum], "_vm");
+				{
+					/*
+					 * vm file is changed to vfm file in PG 9.6.
+					 */
+					if (vm_rewrite_needed)
+						transfer_relfile(pageConverter, &maps[mapnum], "_vm", "_vfm");
+					else
+						transfer_relfile(pageConverter, &maps[mapnum], "_vm", "_vm");
+				}
 			}
 		}
 	}
@@ -210,7 +226,7 @@ transfer_single_new_db(pageCnvCtx *pageConverter,
  */
 static void
 transfer_relfile(pageCnvCtx *pageConverter, FileNameMap *map,
-				 const char *type_suffix)
+				 const char *type_old_suffix, const char *type_new_suffix)
 {
 	const char *msg;
 	char		old_file[MAXPGPATH];
@@ -218,6 +234,7 @@ transfer_relfile(pageCnvCtx *pageConverter, FileNameMap *map,
 	int			fd;
 	int			segno;
 	char		extent_suffix[65];
+	bool		rewrite_vm = false;
 
 	/*
 	 * Now copy/link any related segments as well. Remember, PG breaks large
@@ -236,18 +253,18 @@ transfer_relfile(pageCnvCtx *pageConverter, FileNameMap *map,
 				 map->old_tablespace_suffix,
 				 map->old_db_oid,
 				 map->old_relfilenode,
-				 type_suffix,
+				 type_old_suffix,
 				 extent_suffix);
 		snprintf(new_file, sizeof(new_file), "%s%s/%u/%u%s%s",
 				 map->new_tablespace,
 				 map->new_tablespace_suffix,
 				 map->new_db_oid,
 				 map->new_relfilenode,
-				 type_suffix,
+				 type_new_suffix,
 				 extent_suffix);
 
 		/* Is it an extent, fsm, or vm file? */
-		if (type_suffix[0] != '\0' || segno != 0)
+		if (type_old_suffix[0] != '\0' || segno != 0)
 		{
 			/* Did file open fail? */
 			if ((fd = open(old_file, O_RDONLY, 0)) == -1)
@@ -276,7 +293,11 @@ transfer_relfile(pageCnvCtx *pageConverter, FileNameMap *map,
 		{
 			pg_log(PG_VERBOSE, "copying \"%s\" to \"%s\"\n", old_file, new_file);
 
-			if ((msg = copyAndUpdateFile(pageConverter, old_file, new_file, true)) != NULL)
+			/* We need to rewrite vm file to vfm file. */
+			if (strcmp(type_old_suffix, type_new_suffix) != 0)
+				rewrite_vm = true;
+
+			if ((msg = copyAndUpdateFile(pageConverter, old_file, new_file, true, rewrite_vm)) != NULL)
 				pg_fatal("error while copying relation \"%s.%s\" (\"%s\" to \"%s\"): %s\n",
 						 map->nspname, map->relname, old_file, new_file, msg);
 		}
diff --git a/src/bin/pg_upgrade/test.sh b/src/bin/pg_upgrade/test.sh
index ec3a7ed..508757e 100644
--- a/src/bin/pg_upgrade/test.sh
+++ b/src/bin/pg_upgrade/test.sh
@@ -170,6 +170,11 @@ if "$MAKE" -C "$oldsrc" installcheck; then
 		mv "$temp_root"/dump1.sql "$temp_root"/dump1.sql.orig
 		sed "s;$oldsrc;$newsrc;g" "$temp_root"/dump1.sql.orig >"$temp_root"/dump1.sql
 	fi
+
+	vm_sql="SELECT c.relname, c.relallvisible FROM pg_class as c, pg_namespace as n WHERE c.relnamespace = n.oid AND n.nspname NOT IN ('information_schema', 'pg_toast', 'pg_catalog') ORDER BY c.relname;"
+	# Test for rewriting visibility map
+	vacuumdb -d regression || visibilitymap_vacuum1_status=$?
+	psql -d regression -c "$vm_sql" > "$temp_root"/vm_test1.txt || visibilitymap_test1_status=$?
 else
 	make_installcheck_status=$?
 fi
@@ -184,6 +189,14 @@ if [ -n "$pg_dumpall1_status" ]; then
 	echo "pg_dumpall of pre-upgrade database cluster failed"
 	exit 1
 fi
+if [ -n "$visibilitymap_vacuum1_status" ];then
+	echo "VACUUM of pre-upgrade database cluster for visibility map test failed"
+	exit 1
+fi
+if [ -n "$visibilitymap_test1_status" ];then
+	echo "SELECT of pre-upgrade database cluster for visibility map test failed"
+	exit 1
+fi
 
 PGDATA=$BASE_PGDATA
 
@@ -199,6 +212,8 @@ case $testhost in
 esac
 
 pg_dumpall -f "$temp_root"/dump2.sql || pg_dumpall2_status=$?
+vacuumdb -d regression || visibilitymap_vacuum2_status=$?
+psql -d regression -c "$vm_sql" > "$temp_root"/vm_test2.txt || visibilitymap_test2_status=$?
 pg_ctl -m fast stop
 
 # no need to echo commands anymore
@@ -210,11 +225,26 @@ if [ -n "$pg_dumpall2_status" ]; then
 	exit 1
 fi
 
+if [ -n "$visibilitymap_vacuum2_status" ];then
+	echo "VACUUM of post-upgrade database cluster for visibility map test failed"
+	exit 1
+fi
+
+if [ -n "$visibilitymap_test2_status" ];then
+	echo "SELECT of post-upgrade database cluster for visibility map test failed"
+	exit 1
+fi
+
 case $testhost in
 	MINGW*)	cmd /c delete_old_cluster.bat ;;
 	*)	    sh ./delete_old_cluster.sh ;;
 esac
 
+if ! diff "$temp_root"/vm_test1.txt "$temp_root"/vm_test2.txt >/dev/null; then
+	echo "Visibility map rewriting test failed"
+	exit 1
+fi
+
 if diff "$temp_root"/dump1.sql "$temp_root"/dump2.sql >/dev/null; then
 	echo PASSED
 	exit 0
diff --git a/src/common/relpath.c b/src/common/relpath.c
index 66dfef1..5898f1b 100644
--- a/src/common/relpath.c
+++ b/src/common/relpath.c
@@ -30,11 +30,14 @@
  * If you add a new entry, remember to update the errhint in
  * forkname_to_number() below, and update the SGML documentation for
  * pg_relation_size().
+ * 9.6 or later, the visibility map fork name is changed from "vm" to
+ * "vfm" bacause visibility map has not only information about all-visible
+ * but also information about all-frozen.
  */
 const char *const forkNames[] = {
 	"main",						/* MAIN_FORKNUM */
 	"fsm",						/* FSM_FORKNUM */
-	"vm",						/* VISIBILITYMAP_FORKNUM */
+	"vfm",						/* VISIBILITYMAP_FORKNUM */
 	"init"						/* INIT_FORKNUM */
 };
 
diff --git a/src/include/access/heapam_xlog.h b/src/include/access/heapam_xlog.h
index caa0f14..93afb10 100644
--- a/src/include/access/heapam_xlog.h
+++ b/src/include/access/heapam_xlog.h
@@ -320,9 +320,10 @@ typedef struct xl_heap_freeze_page
 typedef struct xl_heap_visible
 {
 	TransactionId cutoff_xid;
+	uint8		  flags;
 } xl_heap_visible;
 
-#define SizeOfHeapVisible (offsetof(xl_heap_visible, cutoff_xid) + sizeof(TransactionId))
+#define SizeOfHeapVisible (offsetof(xl_heap_visible, flags) + sizeof(uint8))
 
 typedef struct xl_heap_new_cid
 {
@@ -389,6 +390,6 @@ extern bool heap_prepare_freeze_tuple(HeapTupleHeader tuple,
 extern void heap_execute_freeze_tuple(HeapTupleHeader tuple,
 						  xl_heap_freeze_tuple *xlrec_tp);
 extern XLogRecPtr log_heap_visible(RelFileNode rnode, Buffer heap_buffer,
-				 Buffer vm_buffer, TransactionId cutoff_xid);
+			     Buffer vm_buffer, TransactionId cutoff_xid, uint8 flags);
 
 #endif   /* HEAPAM_XLOG_H */
diff --git a/src/include/access/visibilitymap.h b/src/include/access/visibilitymap.h
index 0c0e0ef..7270609 100644
--- a/src/include/access/visibilitymap.h
+++ b/src/include/access/visibilitymap.h
@@ -19,15 +19,20 @@
 #include "storage/buf.h"
 #include "utils/relcache.h"
 
-extern void visibilitymap_clear(Relation rel, BlockNumber heapBlk,
-					Buffer vmbuf);
+/* Flags for bit map */
+#define VISIBILITYMAP_ALL_VISIBLE	0x01
+#define VISIBILITYMAP_ALL_FROZEN	0x02
+
+extern void visibilitymap_clear(Relation rel, BlockNumber heapBlk, Buffer vmbuf);
 extern void visibilitymap_pin(Relation rel, BlockNumber heapBlk,
 				  Buffer *vmbuf);
 extern bool visibilitymap_pin_ok(BlockNumber heapBlk, Buffer vmbuf);
 extern void visibilitymap_set(Relation rel, BlockNumber heapBlk, Buffer heapBuf,
-				  XLogRecPtr recptr, Buffer vmBuf, TransactionId cutoff_xid);
-extern bool visibilitymap_test(Relation rel, BlockNumber heapBlk, Buffer *vmbuf);
-extern BlockNumber visibilitymap_count(Relation rel);
+							  XLogRecPtr recptr, Buffer vmBuf, TransactionId cutoff_xid,
+							  uint8 flags);
+extern bool visibilitymap_test(Relation rel, BlockNumber heapBlk, Buffer *vmbuf,
+							   uint8 flags);
+extern BlockNumber visibilitymap_count(Relation rel, uint8 flags);
 extern void visibilitymap_truncate(Relation rel, BlockNumber nheapblocks);
 
 #endif   /* VISIBILITYMAP_H */
diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h
index b58fe46..98d93c5 100644
--- a/src/include/catalog/catversion.h
+++ b/src/include/catalog/catversion.h
@@ -53,6 +53,6 @@
  */
 
 /*							yyyymmddN */
-#define CATALOG_VERSION_NO	201508111
+#define CATALOG_VERSION_NO	201508181
 
 #endif
diff --git a/src/include/catalog/pg_class.h b/src/include/catalog/pg_class.h
index e526cd9..ea0f7c1 100644
--- a/src/include/catalog/pg_class.h
+++ b/src/include/catalog/pg_class.h
@@ -47,6 +47,8 @@ CATALOG(pg_class,1259) BKI_BOOTSTRAP BKI_ROWTYPE_OID(83) BKI_SCHEMA_MACRO
 	float4		reltuples;		/* # of tuples (not always up-to-date) */
 	int32		relallvisible;	/* # of all-visible blocks (not always
 								 * up-to-date) */
+	int32		relallfrozen;	/* # of all-frozen blocks (not always
+								   up-to-date) */
 	Oid			reltoastrelid;	/* OID of toast table; 0 if none */
 	bool		relhasindex;	/* T if has (or has had) any indexes */
 	bool		relisshared;	/* T if shared across databases */
@@ -95,7 +97,7 @@ typedef FormData_pg_class *Form_pg_class;
  * ----------------
  */
 
-#define Natts_pg_class					30
+#define Natts_pg_class					31
 #define Anum_pg_class_relname			1
 #define Anum_pg_class_relnamespace		2
 #define Anum_pg_class_reltype			3
@@ -107,25 +109,26 @@ typedef FormData_pg_class *Form_pg_class;
 #define Anum_pg_class_relpages			9
 #define Anum_pg_class_reltuples			10
 #define Anum_pg_class_relallvisible		11
-#define Anum_pg_class_reltoastrelid		12
-#define Anum_pg_class_relhasindex		13
-#define Anum_pg_class_relisshared		14
-#define Anum_pg_class_relpersistence	15
-#define Anum_pg_class_relkind			16
-#define Anum_pg_class_relnatts			17
-#define Anum_pg_class_relchecks			18
-#define Anum_pg_class_relhasoids		19
-#define Anum_pg_class_relhaspkey		20
-#define Anum_pg_class_relhasrules		21
-#define Anum_pg_class_relhastriggers	22
-#define Anum_pg_class_relhassubclass	23
-#define Anum_pg_class_relrowsecurity	24
-#define Anum_pg_class_relispopulated	25
-#define Anum_pg_class_relreplident		26
-#define Anum_pg_class_relfrozenxid		27
-#define Anum_pg_class_relminmxid		28
-#define Anum_pg_class_relacl			29
-#define Anum_pg_class_reloptions		30
+#define Anum_pg_class_relallfrozen		12
+#define Anum_pg_class_reltoastrelid		13
+#define Anum_pg_class_relhasindex		14
+#define Anum_pg_class_relisshared		15
+#define Anum_pg_class_relpersistence	16
+#define Anum_pg_class_relkind			17
+#define Anum_pg_class_relnatts			18
+#define Anum_pg_class_relchecks			19
+#define Anum_pg_class_relhasoids		20
+#define Anum_pg_class_relhaspkey		21
+#define Anum_pg_class_relhasrules		22
+#define Anum_pg_class_relhastriggers	23
+#define Anum_pg_class_relhassubclass	24
+#define Anum_pg_class_relrowsecurity	25
+#define Anum_pg_class_relispopulated	26
+#define Anum_pg_class_relreplident		27
+#define Anum_pg_class_relfrozenxid		28
+#define Anum_pg_class_relminmxid		29
+#define Anum_pg_class_relacl			30
+#define Anum_pg_class_reloptions		31
 
 /* ----------------
  *		initial contents of pg_class
@@ -140,13 +143,13 @@ typedef FormData_pg_class *Form_pg_class;
  * Note: "3" in the relfrozenxid column stands for FirstNormalTransactionId;
  * similarly, "1" in relminmxid stands for FirstMultiXactId
  */
-DATA(insert OID = 1247 (  pg_type		PGNSP 71 0 PGUID 0 0 0 0 0 0 0 f f p r 30 0 t f f f f f t n 3 1 _null_ _null_ ));
+DATA(insert OID = 1247 (  pg_type		PGNSP 71 0 PGUID 0 0 0 0 0 0 0 0 f f p r 30 0 t f f f f f t n 3 1 _null_ _null_ ));
 DESCR("");
-DATA(insert OID = 1249 (  pg_attribute	PGNSP 75 0 PGUID 0 0 0 0 0 0 0 f f p r 21 0 f f f f f f t n 3 1 _null_ _null_ ));
+DATA(insert OID = 1249 (  pg_attribute	PGNSP 75 0 PGUID 0 0 0 0 0 0 0 0 f f p r 21 0 f f f f f f t n 3 1 _null_ _null_ ));
 DESCR("");
-DATA(insert OID = 1255 (  pg_proc		PGNSP 81 0 PGUID 0 0 0 0 0 0 0 f f p r 28 0 t f f f f f t n 3 1 _null_ _null_ ));
+DATA(insert OID = 1255 (  pg_proc		PGNSP 81 0 PGUID 0 0 0 0 0 0 0 0 f f p r 28 0 t f f f f f t n 3 1 _null_ _null_ ));
 DESCR("");
-DATA(insert OID = 1259 (  pg_class		PGNSP 83 0 PGUID 0 0 0 0 0 0 0 f f p r 30 0 t f f f f f t n 3 1 _null_ _null_ ));
+DATA(insert OID = 1259 (  pg_class		PGNSP 83 0 PGUID 0 0 0 0 0 0 0 0 f f p r 31 0 t f f f f f t n 3 1 _null_ _null_ ));
 DESCR("");
 
 
diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h
index ddf7c67..e320149 100644
--- a/src/include/catalog/pg_proc.h
+++ b/src/include/catalog/pg_proc.h
@@ -3213,6 +3213,12 @@ DESCR("sleep until the specified time");
 DATA(insert OID = 2971 (  text				PGNSP PGUID 12 1 0 0 0 f f f f t f i 1 0 25 "16" _null_ _null_ _null_ _null_ _null_ booltext _null_ _null_ _null_ ));
 DESCR("convert boolean to text");
 
+DATA(insert OID = 3308 (  pg_is_all_visible		PGNSP PGUID 12 1 0 0 0 f f f f t f v 2 0 16 "2205 20" _null_ _null_ _null_ _null_ _null_ pg_is_all_visible _null_ _null_ _null_ ));
+DESCR("true if the page is all visible");
+DATA(insert OID = 3309 (  pg_is_all_frozen		PGNSP PGUID 12 1 0 0 0 f f f f t f v 2 0 16 "2205 20" _null_ _null_ _null_ _null_ _null_ pg_is_all_frozen _null_ _null_ _null_ ));
+DESCR("true if the page is all frozen");
+
+
 /* Aggregates (moved here from pg_aggregate for 7.3) */
 
 DATA(insert OID = 2100 (  avg				PGNSP PGUID 12 1 0 0 0 t f f f f f i 1 0 1700 "20" _null_ _null_ _null_ _null_ _null_	aggregate_dummy _null_ _null_ _null_ ));
diff --git a/src/include/commands/vacuum.h b/src/include/commands/vacuum.h
index e3a31af..d2bae2d 100644
--- a/src/include/commands/vacuum.h
+++ b/src/include/commands/vacuum.h
@@ -172,6 +172,7 @@ extern void vac_update_relstats(Relation relation,
 					BlockNumber num_pages,
 					double num_tuples,
 					BlockNumber num_all_visible_pages,
+					BlockNumber num_all_frozen_pages,
 					bool hasindex,
 					TransactionId frozenxid,
 					MultiXactId minmulti,
diff --git a/src/include/storage/bufpage.h b/src/include/storage/bufpage.h
index a2f78ee..7bf2718 100644
--- a/src/include/storage/bufpage.h
+++ b/src/include/storage/bufpage.h
@@ -178,8 +178,10 @@ typedef PageHeaderData *PageHeader;
 										 * tuple? */
 #define PD_ALL_VISIBLE		0x0004		/* all tuples on page are visible to
 										 * everyone */
+#define PD_ALL_FROZEN		0x0008		/* all tuples on page are completely
+										   frozen */
 
-#define PD_VALID_FLAG_BITS	0x0007		/* OR of all valid pd_flags bits */
+#define PD_VALID_FLAG_BITS	0x000F		/* OR of all valid pd_flags bits */
 
 /*
  * Page layout version number 0 is for pre-7.3 Postgres releases.
@@ -369,6 +371,13 @@ typedef PageHeaderData *PageHeader;
 #define PageClearAllVisible(page) \
 	(((PageHeader) (page))->pd_flags &= ~PD_ALL_VISIBLE)
 
+#define PageIsAllFrozen(page) \
+	(((PageHeader) (page))->pd_flags & PD_ALL_FROZEN)
+#define PageSetAllFrozen(page) \
+	(((PageHeader) (page))->pd_flags |= PD_ALL_FROZEN)
+#define PageClearAllFrozen(page) \
+	(((PageHeader) (page))->pd_flags &= ~PD_ALL_FROZEN)
+
 #define PageIsPrunable(page, oldestxmin) \
 ( \
 	AssertMacro(TransactionIdIsNormal(oldestxmin)), \
diff --git a/src/test/regress/expected/visibilitymap.out b/src/test/regress/expected/visibilitymap.out
new file mode 100644
index 0000000..543eeaa
--- /dev/null
+++ b/src/test/regress/expected/visibilitymap.out
@@ -0,0 +1,75 @@
+--
+-- Visibility map
+--
+CREATE FUNCTION
+  pg_visibilitymap(rel regclass, blkno OUT bigint, all_visible OUT bool, all_frozen OUT bool)
+RETURNS SETOF RECORD
+AS $$
+  SELECT blkno, pg_is_all_visible($1, blkno) AS all_visible, pg_is_all_frozen($1, blkno) AS all_frozen
+  FROM generate_series(0, pg_relation_size($1) / current_setting('block_size')::bigint - 1) AS blkno;
+$$
+LANGUAGE SQL;
+CREATE TABLE vmtest (i INT primary key);
+INSERT INTO vmtest SELECT generate_series(1,10000);
+\set VERBOSITY terse
+-- All pages are become all-visible
+VACUUM vmtest;
+SELECT count(all_visible) = (pg_relation_size('vmtest') / current_setting('block_size')::int)
+       FROM pg_visibilitymap('vmtest')
+       WHERE all_visible;
+ ?column? 
+----------
+ t
+(1 row)
+
+SELECT relallvisible = (pg_relation_size('vmtest') / current_setting('block_size')::int)
+       FROM pg_class
+       WHERE relname = 'vmtest';
+ ?column? 
+----------
+ t
+(1 row)
+
+VACUUM FREEZE vmtest;
+SELECT count(all_visible) = (pg_relation_size('vmtest') / current_setting('block_size')::int)
+       FROM pg_visibilitymap('vmtest')
+       WHERE all_visible
+       GROUP BY all_visible;
+ ?column? 
+----------
+ t
+(1 row)
+
+SELECT count(all_frozen) = (pg_relation_size('vmtest') / current_setting('block_size')::int)
+       FROM pg_visibilitymap('vmtest')
+       WHERE all_frozen
+       GROUP BY all_frozen;
+ ?column? 
+----------
+ t
+(1 row)
+
+SELECT relallvisible = (pg_relation_size('vmtest') / current_setting('block_size')::int)
+       FROM pg_class
+       WHERE relname = 'vmtest';
+ ?column? 
+----------
+ t
+(1 row)
+
+SELECT relallfrozen = (pg_relation_size('vmtest') / current_setting('block_size')::int)
+       FROM pg_class
+       WHERE relname = 'vmtest';
+ ?column? 
+----------
+ t
+(1 row)
+
+-- All pages are skipped acoording to VM
+VACUUM FREEZE VERBOSE vmtest;
+INFO:  vacuuming "public.vmtest"
+INFO:  index "vmtest_pkey" now contains 10000 row versions in 30 pages
+INFO:  Skipped 45 frozen pages acoording to visibility map
+INFO:  "vmtest": found 0 removable, 0 nonremovable row versions in 0 out of 45 pages
+DROP FUNCTION pg_visibilitymap(regclass);
+DROP TABLE vmtest;
diff --git a/src/test/regress/parallel_schedule b/src/test/regress/parallel_schedule
index 4df15de..893d773 100644
--- a/src/test/regress/parallel_schedule
+++ b/src/test/regress/parallel_schedule
@@ -108,5 +108,8 @@ test: plancache limit plpgsql copy2 temp domain rangefuncs prepare without_oid c
 # event triggers cannot run concurrently with any test that runs DDL
 test: event_trigger
 
+# visibility map and vacuum test cannot run concurrently with any test that runs SQL
+test: visibilitymap
+
 # run stats by itself because its delay may be insufficient under heavy load
 test: stats
diff --git a/src/test/regress/serial_schedule b/src/test/regress/serial_schedule
index 15d74d4..da84aa6 100644
--- a/src/test/regress/serial_schedule
+++ b/src/test/regress/serial_schedule
@@ -157,3 +157,4 @@ test: with
 test: xml
 test: event_trigger
 test: stats
+test: visibilitymap
\ No newline at end of file
diff --git a/src/test/regress/sql/visibilitymap.sql b/src/test/regress/sql/visibilitymap.sql
new file mode 100644
index 0000000..11b552e
--- /dev/null
+++ b/src/test/regress/sql/visibilitymap.sql
@@ -0,0 +1,49 @@
+--
+-- Visibility map
+--
+
+CREATE FUNCTION
+  pg_visibilitymap(rel regclass, blkno OUT bigint, all_visible OUT bool, all_frozen OUT bool)
+RETURNS SETOF RECORD
+AS $$
+  SELECT blkno, pg_is_all_visible($1, blkno) AS all_visible, pg_is_all_frozen($1, blkno) AS all_frozen
+  FROM generate_series(0, pg_relation_size($1) / current_setting('block_size')::bigint - 1) AS blkno;
+$$
+LANGUAGE SQL;
+
+CREATE TABLE vmtest (i INT primary key);
+INSERT INTO vmtest SELECT generate_series(1,10000);
+
+\set VERBOSITY terse
+
+-- All pages are become all-visible
+VACUUM vmtest;
+SELECT count(all_visible) = (pg_relation_size('vmtest') / current_setting('block_size')::int)
+       FROM pg_visibilitymap('vmtest')
+       WHERE all_visible;
+SELECT relallvisible = (pg_relation_size('vmtest') / current_setting('block_size')::int)
+       FROM pg_class
+       WHERE relname = 'vmtest';
+
+VACUUM FREEZE vmtest;
+SELECT count(all_visible) = (pg_relation_size('vmtest') / current_setting('block_size')::int)
+       FROM pg_visibilitymap('vmtest')
+       WHERE all_visible
+       GROUP BY all_visible;
+SELECT count(all_frozen) = (pg_relation_size('vmtest') / current_setting('block_size')::int)
+       FROM pg_visibilitymap('vmtest')
+       WHERE all_frozen
+       GROUP BY all_frozen;
+
+SELECT relallvisible = (pg_relation_size('vmtest') / current_setting('block_size')::int)
+       FROM pg_class
+       WHERE relname = 'vmtest';
+SELECT relallfrozen = (pg_relation_size('vmtest') / current_setting('block_size')::int)
+       FROM pg_class
+       WHERE relname = 'vmtest';
+
+-- All pages are skipped acoording to VM
+VACUUM FREEZE VERBOSE vmtest;
+
+DROP FUNCTION pg_visibilitymap(regclass);
+DROP TABLE vmtest;
diff --git a/src/backend/access/heap/Makefile b/src/backend/access/heap/Makefile
index b83d496..806ce27 100644
--- a/src/backend/access/heap/Makefile
+++ b/src/backend/access/heap/Makefile
@@ -12,6 +12,7 @@ subdir = src/backend/access/heap
 top_builddir = ../../../..
 include $(top_builddir)/src/Makefile.global
 
-OBJS = heapam.o hio.o pruneheap.o rewriteheap.o syncscan.o tuptoaster.o visibilitymap.o
+OBJS = heapam.o hio.o pruneheap.o rewriteheap.o syncscan.o tuptoaster.o visibilitymap.o \
+	heapfuncs.o
 
 include $(top_srcdir)/src/backend/common.mk
diff --git a/src/backend/access/heap/heapfuncs.c b/src/backend/access/heap/heapfuncs.c
new file mode 100644
index 0000000..6c3753b
--- /dev/null
+++ b/src/backend/access/heap/heapfuncs.c
@@ -0,0 +1,81 @@
+/*-------------------------------------------------------------------------
+ *
+ * heapfuncs.c
+ *	  Functions for accessing the related heap page
+ *
+ * Portions Copyright (c) 1996-2015, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ *	  src/backend/access/heap/heapfuncs.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "access/visibilitymap.h"
+#include "funcapi.h"
+#include "storage/freespace.h"
+#include "storage/bufmgr.h"
+
+/* Functions for visibilitymap */
+extern Datum pg_is_all_visible(PG_FUNCTION_ARGS);
+extern Datum pg_is_all_frozen(PG_FUNCTION_ARGS);
+
+static bool visibilitymap_test_internal(Oid relid, uint64 blkno, uint8);
+
+/*
+ * Return the page is all-visible or not, according to the visibility map.
+ */
+Datum
+pg_is_all_visible(PG_FUNCTION_ARGS)
+{
+	Oid			relid = PG_GETARG_OID(0);
+	int64		blkno = PG_GETARG_INT64(1);
+	bool		all_visible;
+
+	all_visible = visibilitymap_test_internal(relid, blkno, VISIBILITYMAP_ALL_VISIBLE);
+
+	PG_RETURN_BOOL(all_visible);
+}
+
+/*
+ * Return the page is all-frozen or not, according to the visibility map.
+ */
+Datum
+pg_is_all_frozen(PG_FUNCTION_ARGS)
+{
+	Oid			relid = PG_GETARG_OID(0);
+	int64		blkno = PG_GETARG_INT64(1);
+	bool		all_frozen;
+
+	all_frozen = visibilitymap_test_internal(relid, blkno, VISIBILITYMAP_ALL_FROZEN);
+
+	PG_RETURN_BOOL(all_frozen);
+}
+
+static bool
+visibilitymap_test_internal(Oid relid, uint64 blkno, uint8 flag)
+{
+
+	Relation	rel;
+	Buffer		vmbuffer = InvalidBuffer;
+	bool		result;
+
+	rel = relation_open(relid, AccessShareLock);
+
+	if (blkno < 0 || blkno > MaxBlockNumber)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+				 errmsg("invalid block number")));
+
+	result = visibilitymap_test(rel, blkno, &vmbuffer, flag);
+
+	if (BufferIsValid(vmbuffer))
+		ReleaseBuffer(vmbuffer);
+	relation_close(rel, AccessShareLock);
+
+	return result;
+}
-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to