On Mon, Jul 4, 2016 at 5:44 PM, Masahiko Sawada <sawada.m...@gmail.com> wrote:
> On Sat, Jul 2, 2016 at 12:34 PM, Amit Kapila <amit.kapil...@gmail.com> wrote:
>> On Fri, Jul 1, 2016 at 7:52 PM, Masahiko Sawada <sawada.m...@gmail.com> 
>> wrote:
>>> On Fri, Jul 1, 2016 at 11:12 AM, Amit Kapila <amit.kapil...@gmail.com> 
>>> wrote:
>>>>
>>>> Why do you think IndexOnlyScan will return wrong result?  If the
>>>> server crash in the way as you described, the transaction that has
>>>> made modifications will anyway be considered aborted, so the result of
>>>> IndexOnlyScan should not be wrong.
>>>>
>>>
>>> Ah, you're right, I misunderstood.
>>>
>>> Attached updated patch incorporating your comments.
>>> I've changed it so that heap_xlog_lock clears vm flags if page is
>>> marked all frozen.
>>>
>>
>> I think we should make a similar change in heap_lock_tuple API as
>> well.
>> Also, currently by default heap_xlog_lock tuple tries to clear
>> the visibility flags, isn't it better to handle it as we do at all
>> other places (ex. see log_heap_update), by logging the information
>> about same.
>
> I will deal with them.
>
>> Though, it is important to get the patch right, but I feel in the
>> meantime, it might be better to start benchmarking.  AFAIU, even if
>> change some part of information while WAL logging it, the benchmark
>> results won't be much different.
>
> Okay, I will do the benchmark test as well.
>

I measured the thoughput and the output quantity of WAL with HEAD and
HEAD+patch(attached) on my virtual environment.
I used pgbench with attached custom script file which sets 3200 length
string to the filler column in order to make toast data.
The scale factor is 1000 and pgbench options are, -c 4 -T 600 -f toast_test.sql.
After changed filler column to the text data type I ran it.

* Throughput
HEAD : 1833.204172
Patched : 1827.399482

* Output quantity of WAL
HEAD :  7771 MB
Patched : 8082 MB

The throughput is almost same, but the ouput quantity of WAL is
slightly increased. (about 4%)

Regards,

--
Masahiko Sawada

Attachment: toast_test.sql
Description: Binary data

diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c
index 57da57a..fd66527 100644
--- a/src/backend/access/heap/heapam.c
+++ b/src/backend/access/heap/heapam.c
@@ -3923,6 +3923,17 @@ l2:
 
 	if (need_toast || newtupsize > pagefree)
 	{
+		/*
+		 * To prevent data corruption due to updating old tuple by
+		 * other backends after released buffer, we need to emit that
+		 * xmax of old tuple is set and clear visibility map bits if
+		 * needed before releasing buffer. We can reuse xl_heap_lock
+		 * for this purpose. It should be fine even if we crash midway
+		 * from this section and the actual updating one later, since
+		 * the xmax will appear to come from an aborted xid.
+		 */
+		START_CRIT_SECTION();
+
 		/* Clear obsolete visibility flags ... */
 		oldtup.t_data->t_infomask &= ~(HEAP_XMAX_BITS | HEAP_MOVED);
 		oldtup.t_data->t_infomask2 &= ~HEAP_KEYS_UPDATED;
@@ -3936,6 +3947,46 @@ l2:
 		/* temporarily make it look not-updated */
 		oldtup.t_data->t_ctid = oldtup.t_self;
 		already_marked = true;
+
+		/* Clear PD_ALL_VISIBLE flags */
+		if (PageIsAllVisible(BufferGetPage(buffer)))
+		{
+			all_visible_cleared = true;
+			PageClearAllVisible(BufferGetPage(buffer));
+			visibilitymap_clear(relation, BufferGetBlockNumber(buffer),
+								vmbuffer);
+		}
+
+		MarkBufferDirty(buffer);
+
+		if (RelationNeedsWAL(relation))
+		{
+			xl_heap_lock xlrec;
+			XLogRecPtr recptr;
+
+			/*
+			 * For logical decoding we need combocids to properly decode the
+			 * catalog.
+			 */
+			if (RelationIsAccessibleInLogicalDecoding(relation))
+				log_heap_new_cid(relation, &oldtup);
+
+			XLogBeginInsert();
+			XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
+
+			xlrec.offnum = ItemPointerGetOffsetNumber(&oldtup.t_self);
+			xlrec.locking_xid = xmax_old_tuple;
+			xlrec.infobits_set = compute_infobits(oldtup.t_data->t_infomask,
+												  oldtup.t_data->t_infomask2);
+			if (all_visible_cleared)
+				xlrec.infobits_set |= XLHL_ALL_VISIBLE_CLEARED;
+			XLogRegisterData((char *) &xlrec, SizeOfHeapLock);
+			recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_LOCK);
+			PageSetLSN(page, recptr);
+		}
+
+		END_CRIT_SECTION();
+
 		LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
 
 		/*
@@ -4140,7 +4191,8 @@ l2:
 		 */
 		if (RelationIsAccessibleInLogicalDecoding(relation))
 		{
-			log_heap_new_cid(relation, &oldtup);
+			if (!already_marked)
+				log_heap_new_cid(relation, &oldtup);
 			log_heap_new_cid(relation, heaptup);
 		}
 
@@ -4513,6 +4565,7 @@ heap_lock_tuple(Relation relation, HeapTuple tuple,
 				new_infomask2;
 	bool		first_time = true;
 	bool		have_tuple_lock = false;
+	bool		all_visible_cleared = false;
 
 	*buffer = ReadBuffer(relation, ItemPointerGetBlockNumber(tid));
 	LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE);
@@ -5034,6 +5087,18 @@ failed:
 	if (HEAP_XMAX_IS_LOCKED_ONLY(new_infomask))
 		tuple->t_data->t_ctid = *tid;
 
+	/* Clear PD_ALL_VISIBLE flags */
+	if (PageIsAllVisible(page))
+	{
+		Buffer	vmbuffer = InvalidBuffer;
+		BlockNumber	block = BufferGetBlockNumber(*buffer);
+
+		all_visible_cleared = true;
+		PageClearAllVisible(page);
+		visibilitymap_pin(relation, block, &vmbuffer);
+		visibilitymap_clear(relation, block, vmbuffer);
+	}
+
 	MarkBufferDirty(*buffer);
 
 	/*
@@ -5060,6 +5125,8 @@ failed:
 		xlrec.locking_xid = xid;
 		xlrec.infobits_set = compute_infobits(new_infomask,
 											  tuple->t_data->t_infomask2);
+		if (all_visible_cleared)
+			xlrec.infobits_set |= XLHL_ALL_VISIBLE_CLEARED;
 		XLogRegisterData((char *) &xlrec, SizeOfHeapLock);
 
 		/* we don't decode row locks atm, so no need to log the origin */
@@ -8694,6 +8761,23 @@ heap_xlog_lock(XLogReaderState *record)
 		}
 		HeapTupleHeaderSetXmax(htup, xlrec->locking_xid);
 		HeapTupleHeaderSetCmax(htup, FirstCommandId, false);
+
+		/* The visibility map need to be cleared */
+		if ((xlrec->infobits_set & XLHL_ALL_VISIBLE_CLEARED) != 0)
+		{
+			RelFileNode	rnode;
+			Buffer		vmbuffer = InvalidBuffer;
+			BlockNumber	blkno;
+			Relation	reln;
+
+			XLogRecGetBlockTag(record, 0, &rnode, NULL, &blkno);
+			reln = CreateFakeRelcacheEntry(rnode);
+
+			visibilitymap_pin(reln, blkno, &vmbuffer);
+			visibilitymap_clear(reln, blkno, vmbuffer);
+			PageClearAllVisible(page);
+		}
+
 		PageSetLSN(page, lsn);
 		MarkBufferDirty(buffer);
 	}
diff --git a/src/include/access/heapam_xlog.h b/src/include/access/heapam_xlog.h
index a822d0b..41b3c54 100644
--- a/src/include/access/heapam_xlog.h
+++ b/src/include/access/heapam_xlog.h
@@ -242,6 +242,7 @@ typedef struct xl_heap_cleanup_info
 #define XLHL_XMAX_EXCL_LOCK		0x04
 #define XLHL_XMAX_KEYSHR_LOCK	0x08
 #define XLHL_KEYS_UPDATED		0x10
+#define XLHL_ALL_VISIBLE_CLEARED 0x20
 
 /* This is what we need to know about lock */
 typedef struct xl_heap_lock
-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to