Than you for reviewing!

On Thu, Jul 7, 2016 at 7:06 AM, Andres Freund <and...@anarazel.de> wrote:
> On 2016-07-05 23:37:59 +0900, Masahiko Sawada wrote:
>> diff --git a/src/backend/access/heap/heapam.c 
>> b/src/backend/access/heap/heapam.c
>> index 57da57a..fd66527 100644
>> --- a/src/backend/access/heap/heapam.c
>> +++ b/src/backend/access/heap/heapam.c
>> @@ -3923,6 +3923,17 @@ l2:
>>
>>       if (need_toast || newtupsize > pagefree)
>>       {
>> +             /*
>> +              * To prevent data corruption due to updating old tuple by
>> +              * other backends after released buffer
>
> That's not really the reason, is it? The prime problem is crash safety /
> replication. The row-lock we're faking (by setting xmax to our xid),
> prevents concurrent updates until this backend died.

Fixed.

>>                 , we need to emit that
>> +              * xmax of old tuple is set and clear visibility map bits if
>> +              * needed before releasing buffer. We can reuse xl_heap_lock
>> +              * for this purpose. It should be fine even if we crash midway
>> +              * from this section and the actual updating one later, since
>> +              * the xmax will appear to come from an aborted xid.
>> +              */
>> +             START_CRIT_SECTION();
>> +
>>               /* Clear obsolete visibility flags ... */
>>               oldtup.t_data->t_infomask &= ~(HEAP_XMAX_BITS | HEAP_MOVED);
>>               oldtup.t_data->t_infomask2 &= ~HEAP_KEYS_UPDATED;
>> @@ -3936,6 +3947,46 @@ l2:
>>               /* temporarily make it look not-updated */
>>               oldtup.t_data->t_ctid = oldtup.t_self;
>>               already_marked = true;
>> +
>> +             /* Clear PD_ALL_VISIBLE flags */
>> +             if (PageIsAllVisible(BufferGetPage(buffer)))
>> +             {
>> +                     all_visible_cleared = true;
>> +                     PageClearAllVisible(BufferGetPage(buffer));
>> +                     visibilitymap_clear(relation, 
>> BufferGetBlockNumber(buffer),
>> +                                                             vmbuffer);
>> +             }
>> +
>> +             MarkBufferDirty(buffer);
>> +
>> +             if (RelationNeedsWAL(relation))
>> +             {
>> +                     xl_heap_lock xlrec;
>> +                     XLogRecPtr recptr;
>> +
>> +                     /*
>> +                      * For logical decoding we need combocids to properly 
>> decode the
>> +                      * catalog.
>> +                      */
>> +                     if (RelationIsAccessibleInLogicalDecoding(relation))
>> +                             log_heap_new_cid(relation, &oldtup);
>
> Hm, I don't see that being necessary here. Row locks aren't logically
> decoded, so there's no need to emit this here.

Fixed.

>
>> +     /* Clear PD_ALL_VISIBLE flags */
>> +     if (PageIsAllVisible(page))
>> +     {
>> +             Buffer  vmbuffer = InvalidBuffer;
>> +             BlockNumber     block = BufferGetBlockNumber(*buffer);
>> +
>> +             all_visible_cleared = true;
>> +             PageClearAllVisible(page);
>> +             visibilitymap_pin(relation, block, &vmbuffer);
>> +             visibilitymap_clear(relation, block, vmbuffer);
>> +     }
>> +
>
> That clears all-visible unnecessarily, we only need to clear all-frozen.
>

Fixed.

>
>> @@ -8694,6 +8761,23 @@ heap_xlog_lock(XLogReaderState *record)
>>               }
>>               HeapTupleHeaderSetXmax(htup, xlrec->locking_xid);
>>               HeapTupleHeaderSetCmax(htup, FirstCommandId, false);
>> +
>> +             /* The visibility map need to be cleared */
>> +             if ((xlrec->infobits_set & XLHL_ALL_VISIBLE_CLEARED) != 0)
>> +             {
>> +                     RelFileNode     rnode;
>> +                     Buffer          vmbuffer = InvalidBuffer;
>> +                     BlockNumber     blkno;
>> +                     Relation        reln;
>> +
>> +                     XLogRecGetBlockTag(record, 0, &rnode, NULL, &blkno);
>> +                     reln = CreateFakeRelcacheEntry(rnode);
>> +
>> +                     visibilitymap_pin(reln, blkno, &vmbuffer);
>> +                     visibilitymap_clear(reln, blkno, vmbuffer);
>> +                     PageClearAllVisible(page);
>> +             }
>> +
>
>
>>               PageSetLSN(page, lsn);
>>               MarkBufferDirty(buffer);
>>       }
>> diff --git a/src/include/access/heapam_xlog.h 
>> b/src/include/access/heapam_xlog.h
>> index a822d0b..41b3c54 100644
>> --- a/src/include/access/heapam_xlog.h
>> +++ b/src/include/access/heapam_xlog.h
>> @@ -242,6 +242,7 @@ typedef struct xl_heap_cleanup_info
>>  #define XLHL_XMAX_EXCL_LOCK          0x04
>>  #define XLHL_XMAX_KEYSHR_LOCK        0x08
>>  #define XLHL_KEYS_UPDATED            0x10
>> +#define XLHL_ALL_VISIBLE_CLEARED 0x20
>
> Hm. We can't easily do that in the back-patched version; because a
> standby won't know to check for the flag . That's kinda ok, since we
> don't yet need to clear all-visible yet at that point of
> heap_update. But that better means we don't do so on the master either.
>

Attached latest version patch.
I changed visibilitymap_clear function so that it allows to specify
bits being cleared.
The function that needs to clear the only all-frozen bit on visibility
map calls visibilitymap_clear_extended function to clear particular
bit.
Other function can call visibilitymap_clear function to clear all bits
for one page.

Instead of adding XLHL_ALL_VISIBLE_CLEARED, we do vm loop up for back branches.
To reduce unnecessary looking up visibility map, I changed it so that
we check the PD_ALL_VISIBLE on heap page, and then look up all-frozen
bit on visibility map if necessary.

Regards,

--
Masahiko Sawada
diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c
index 57da57a..e2efba3 100644
--- a/src/backend/access/heap/heapam.c
+++ b/src/backend/access/heap/heapam.c
@@ -3923,6 +3923,16 @@ l2:
 
 	if (need_toast || newtupsize > pagefree)
 	{
+		/*
+		 * For crash safety, we need to emit that xmax of old tuple is set
+		 * and clear only the all-frozen bit on visibility map if needed
+		 * before releasing the buffer. We can reuse xl_heap_lock for this
+		 * purpose. It should be fine even if we crash midway from this
+		 * section and the actual updating one later, since the xmax will
+		 * appear to come from an aborted xid.
+		 */
+		START_CRIT_SECTION();
+
 		/* Clear obsolete visibility flags ... */
 		oldtup.t_data->t_infomask &= ~(HEAP_XMAX_BITS | HEAP_MOVED);
 		oldtup.t_data->t_infomask2 &= ~HEAP_KEYS_UPDATED;
@@ -3936,6 +3946,36 @@ l2:
 		/* temporarily make it look not-updated */
 		oldtup.t_data->t_ctid = oldtup.t_self;
 		already_marked = true;
+
+		/* Clear only the all-frozen bit on visibility map if needed */
+		if (PageIsAllVisible(BufferGetPage(buffer)) &&
+			VM_ALL_FROZEN(relation, block, &vmbuffer))
+		{
+			visibilitymap_clear_extended(relation, block, vmbuffer,
+										 VISIBILITYMAP_ALL_FROZEN);
+		}
+
+		MarkBufferDirty(buffer);
+
+		if (RelationNeedsWAL(relation))
+		{
+			xl_heap_lock xlrec;
+			XLogRecPtr recptr;
+
+			XLogBeginInsert();
+			XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
+
+			xlrec.offnum = ItemPointerGetOffsetNumber(&oldtup.t_self);
+			xlrec.locking_xid = xmax_old_tuple;
+			xlrec.infobits_set = compute_infobits(oldtup.t_data->t_infomask,
+												  oldtup.t_data->t_infomask2);
+			XLogRegisterData((char *) &xlrec, SizeOfHeapLock);
+			recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_LOCK);
+			PageSetLSN(page, recptr);
+		}
+
+		END_CRIT_SECTION();
+
 		LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
 
 		/*
@@ -4506,6 +4546,7 @@ heap_lock_tuple(Relation relation, HeapTuple tuple,
 	ItemPointer tid = &(tuple->t_self);
 	ItemId		lp;
 	Page		page;
+	Buffer		vmbuffer = InvalidBuffer;
 	TransactionId xid,
 				xmax;
 	uint16		old_infomask,
@@ -5034,6 +5075,17 @@ failed:
 	if (HEAP_XMAX_IS_LOCKED_ONLY(new_infomask))
 		tuple->t_data->t_ctid = *tid;
 
+	/* Clear only the all-frozen bit on visibility map if needed */
+	if (PageIsAllVisible(page) &&
+		VM_ALL_FROZEN(relation, BufferGetBlockNumber(*buffer), &vmbuffer))
+	{
+		BlockNumber	block = BufferGetBlockNumber(*buffer);
+
+		visibilitymap_pin(relation, block, &vmbuffer);
+		visibilitymap_clear_extended(relation, block, vmbuffer,
+									 VISIBILITYMAP_ALL_FROZEN);
+	}
+
 	MarkBufferDirty(*buffer);
 
 	/*
@@ -5072,6 +5124,8 @@ failed:
 	END_CRIT_SECTION();
 
 	LockBuffer(*buffer, BUFFER_LOCK_UNLOCK);
+	if (BufferIsValid(vmbuffer))
+		ReleaseBuffer(vmbuffer);
 
 	/*
 	 * Don't update the visibility map here. Locking a tuple doesn't change
@@ -8694,6 +8748,22 @@ heap_xlog_lock(XLogReaderState *record)
 		}
 		HeapTupleHeaderSetXmax(htup, xlrec->locking_xid);
 		HeapTupleHeaderSetCmax(htup, FirstCommandId, false);
+
+		/* The all-frozen bit on visibility map need to be cleared if needed */
+		if (PageIsAllVisible(BufferGetPage(buffer)))
+		{
+			RelFileNode	rnode;
+			Buffer		vmbuffer = InvalidBuffer;
+			BlockNumber	blkno;
+			Relation	reln;
+
+			XLogRecGetBlockTag(record, 0, &rnode, NULL, &blkno);
+			reln = CreateFakeRelcacheEntry(rnode);
+			visibilitymap_clear_extended(reln, blkno, vmbuffer,
+										 VISIBILITYMAP_ALL_FROZEN);
+			ReleaseBuffer(vmbuffer);
+		}
+
 		PageSetLSN(page, lsn);
 		MarkBufferDirty(buffer);
 	}
diff --git a/src/backend/access/heap/visibilitymap.c b/src/backend/access/heap/visibilitymap.c
index b472d31..c52c0b0 100644
--- a/src/backend/access/heap/visibilitymap.c
+++ b/src/backend/access/heap/visibilitymap.c
@@ -11,7 +11,7 @@
  *	  src/backend/access/heap/visibilitymap.c
  *
  * INTERFACE ROUTINES
- *		visibilitymap_clear  - clear a bit in the visibility map
+ *		visibilitymap_clear  - clear all bits for one page in the visibility map
  *		visibilitymap_pin	 - pin a map page for setting a bit
  *		visibilitymap_pin_ok - check whether correct map page is already pinned
  *		visibilitymap_set	 - set a bit in a previously pinned page
@@ -157,23 +157,34 @@ static const uint8 number_of_ones_for_frozen[256] = {
 static Buffer vm_readbuf(Relation rel, BlockNumber blkno, bool extend);
 static void vm_extend(Relation rel, BlockNumber nvmblocks);
 
+/*
+ * A shorthand for visibilitymap_clear_extended, for clearing all bits for one
+ * page in visibility map with VISIBILITYMAP_VALID_BITS.
+ */
+void
+visibilitymap_clear(Relation rel, BlockNumber heapBlk, Buffer buf)
+{
+	visibilitymap_clear_extended(rel, heapBlk, buf, VISIBILITYMAP_VALID_BITS);
+}
 
 /*
- *	visibilitymap_clear - clear all bits for one page in visibility map
+ *	visibilitymap_clear_extended - clear bit(s) for one page in visibility map
  *
  * You must pass a buffer containing the correct map page to this function.
  * Call visibilitymap_pin first to pin the right one. This function doesn't do
  * any I/O.
  */
 void
-visibilitymap_clear(Relation rel, BlockNumber heapBlk, Buffer buf)
+visibilitymap_clear_extended(Relation rel, BlockNumber heapBlk, Buffer buf, uint8 flags)
 {
 	BlockNumber mapBlock = HEAPBLK_TO_MAPBLOCK(heapBlk);
 	int			mapByte = HEAPBLK_TO_MAPBYTE(heapBlk);
 	int			mapOffset = HEAPBLK_TO_OFFSET(heapBlk);
-	uint8		mask = VISIBILITYMAP_VALID_BITS << mapOffset;
+	uint8		mask = flags << mapOffset;
 	char	   *map;
 
+	Assert(flags & VISIBILITYMAP_VALID_BITS);
+
 #ifdef TRACE_VISIBILITYMAP
 	elog(DEBUG1, "vm_clear %s %d", RelationGetRelationName(rel), heapBlk);
 #endif
diff --git a/src/include/access/visibilitymap.h b/src/include/access/visibilitymap.h
index fca99ca..f305b03 100644
--- a/src/include/access/visibilitymap.h
+++ b/src/include/access/visibilitymap.h
@@ -36,6 +36,8 @@
 
 extern void visibilitymap_clear(Relation rel, BlockNumber heapBlk,
 					Buffer vmbuf);
+extern void visibilitymap_clear_extended(Relation rel, BlockNumber heapBlk,
+					Buffer vmbuf, uint8 flags);
 extern void visibilitymap_pin(Relation rel, BlockNumber heapBlk,
 				  Buffer *vmbuf);
 extern bool visibilitymap_pin_ok(BlockNumber heapBlk, Buffer vmbuf);
-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to