On Fri, Jul 8, 2016 at 10:24 PM, Amit Kapila <amit.kapil...@gmail.com> wrote:
> On Thu, Jul 7, 2016 at 12:34 PM, Masahiko Sawada <sawada.m...@gmail.com> 
> wrote:
>> Than you for reviewing!
>>
>> On Thu, Jul 7, 2016 at 7:06 AM, Andres Freund <and...@anarazel.de> wrote:
>>> On 2016-07-05 23:37:59 +0900, Masahiko Sawada wrote:
>>>> diff --git a/src/backend/access/heap/heapam.c 
>>>> b/src/backend/access/heap/heapam.c
>>>> index 57da57a..fd66527 100644
>>>> --- a/src/backend/access/heap/heapam.c
>>>> +++ b/src/backend/access/heap/heapam.c
>>>> @@ -3923,6 +3923,17 @@ l2:
>>>>
>>>>       if (need_toast || newtupsize > pagefree)
>>>>       {
>>>> +             /*
>>>> +              * To prevent data corruption due to updating old tuple by
>>>> +              * other backends after released buffer
>>>
>>> That's not really the reason, is it? The prime problem is crash safety /
>>> replication. The row-lock we're faking (by setting xmax to our xid),
>>> prevents concurrent updates until this backend died.
>>
>> Fixed.
>>
>>>>                 , we need to emit that
>>>> +              * xmax of old tuple is set and clear visibility map bits if
>>>> +              * needed before releasing buffer. We can reuse xl_heap_lock
>>>> +              * for this purpose. It should be fine even if we crash 
>>>> midway
>>>> +              * from this section and the actual updating one later, since
>>>> +              * the xmax will appear to come from an aborted xid.
>>>> +              */
>>>> +             START_CRIT_SECTION();
>>>> +
>>>>               /* Clear obsolete visibility flags ... */
>>>>               oldtup.t_data->t_infomask &= ~(HEAP_XMAX_BITS | HEAP_MOVED);
>>>>               oldtup.t_data->t_infomask2 &= ~HEAP_KEYS_UPDATED;
>>>> @@ -3936,6 +3947,46 @@ l2:
>>>>               /* temporarily make it look not-updated */
>>>>               oldtup.t_data->t_ctid = oldtup.t_self;
>>>>               already_marked = true;
>>>> +
>>>> +             /* Clear PD_ALL_VISIBLE flags */
>>>> +             if (PageIsAllVisible(BufferGetPage(buffer)))
>>>> +             {
>>>> +                     all_visible_cleared = true;
>>>> +                     PageClearAllVisible(BufferGetPage(buffer));
>>>> +                     visibilitymap_clear(relation, 
>>>> BufferGetBlockNumber(buffer),
>>>> +                                                             vmbuffer);
>>>> +             }
>>>> +
>>>> +             MarkBufferDirty(buffer);
>>>> +
>>>> +             if (RelationNeedsWAL(relation))
>>>> +             {
>>>> +                     xl_heap_lock xlrec;
>>>> +                     XLogRecPtr recptr;
>>>> +
>>>> +                     /*
>>>> +                      * For logical decoding we need combocids to 
>>>> properly decode the
>>>> +                      * catalog.
>>>> +                      */
>>>> +                     if (RelationIsAccessibleInLogicalDecoding(relation))
>>>> +                             log_heap_new_cid(relation, &oldtup);
>>>
>>> Hm, I don't see that being necessary here. Row locks aren't logically
>>> decoded, so there's no need to emit this here.
>>
>> Fixed.
>>
>>>
>>>> +     /* Clear PD_ALL_VISIBLE flags */
>>>> +     if (PageIsAllVisible(page))
>>>> +     {
>>>> +             Buffer  vmbuffer = InvalidBuffer;
>>>> +             BlockNumber     block = BufferGetBlockNumber(*buffer);
>>>> +
>>>> +             all_visible_cleared = true;
>>>> +             PageClearAllVisible(page);
>>>> +             visibilitymap_pin(relation, block, &vmbuffer);
>>>> +             visibilitymap_clear(relation, block, vmbuffer);
>>>> +     }
>>>> +
>>>
>>> That clears all-visible unnecessarily, we only need to clear all-frozen.
>>>
>>
>> Fixed.
>>
>>>
>>>> @@ -8694,6 +8761,23 @@ heap_xlog_lock(XLogReaderState *record)
>>>>               }
>>>>               HeapTupleHeaderSetXmax(htup, xlrec->locking_xid);
>>>>               HeapTupleHeaderSetCmax(htup, FirstCommandId, false);
>>>> +
>>>> +             /* The visibility map need to be cleared */
>>>> +             if ((xlrec->infobits_set & XLHL_ALL_VISIBLE_CLEARED) != 0)
>>>> +             {
>>>> +                     RelFileNode     rnode;
>>>> +                     Buffer          vmbuffer = InvalidBuffer;
>>>> +                     BlockNumber     blkno;
>>>> +                     Relation        reln;
>>>> +
>>>> +                     XLogRecGetBlockTag(record, 0, &rnode, NULL, &blkno);
>>>> +                     reln = CreateFakeRelcacheEntry(rnode);
>>>> +
>>>> +                     visibilitymap_pin(reln, blkno, &vmbuffer);
>>>> +                     visibilitymap_clear(reln, blkno, vmbuffer);
>>>> +                     PageClearAllVisible(page);
>>>> +             }
>>>> +
>>>
>>>
>>>>               PageSetLSN(page, lsn);
>>>>               MarkBufferDirty(buffer);
>>>>       }
>>>> diff --git a/src/include/access/heapam_xlog.h 
>>>> b/src/include/access/heapam_xlog.h
>>>> index a822d0b..41b3c54 100644
>>>> --- a/src/include/access/heapam_xlog.h
>>>> +++ b/src/include/access/heapam_xlog.h
>>>> @@ -242,6 +242,7 @@ typedef struct xl_heap_cleanup_info
>>>>  #define XLHL_XMAX_EXCL_LOCK          0x04
>>>>  #define XLHL_XMAX_KEYSHR_LOCK        0x08
>>>>  #define XLHL_KEYS_UPDATED            0x10
>>>> +#define XLHL_ALL_VISIBLE_CLEARED 0x20
>>>
>>> Hm. We can't easily do that in the back-patched version; because a
>>> standby won't know to check for the flag . That's kinda ok, since we
>>> don't yet need to clear all-visible yet at that point of
>>> heap_update. But that better means we don't do so on the master either.
>>>
>>
>> Attached latest version patch.
>
> + /* Clear only the all-frozen bit on visibility map if needed */
>
> + if (PageIsAllVisible(BufferGetPage(buffer)) &&
>
> + VM_ALL_FROZEN(relation, block, &vmbuffer))
> + {
> + visibilitymap_clear_extended(relation, block, vmbuffer,
> + VISIBILITYMAP_ALL_FROZEN);
> + }
> +
>
> + if (RelationNeedsWAL(relation))
> + {
> ..
>
> + XLogBeginInsert();
> + XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
> +
> + xlrec.offnum = ItemPointerGetOffsetNumber(&oldtup.t_self);
> + xlrec.locking_xid = xmax_old_tuple;
> + xlrec.infobits_set = compute_infobits(oldtup.t_data->t_infomask,
> +   oldtup.t_data->t_infomask2);
> + XLogRegisterData((char *) &xlrec, SizeOfHeapLock);
> + recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_LOCK);
> ..
>
> One thing that looks awkward in this code is that it doesn't record
> whether the frozen bit is actually cleared during the actual operation
> and then during replay, it always clear the frozen bit irrespective of
> whether it has been cleared by the actual operation or not.
>

I changed it so that we look all-frozen bit up first, and then clear
it if needed.

> + /* Clear only the all-frozen bit on visibility map if needed */
> + if (PageIsAllVisible(page) &&
> + VM_ALL_FROZEN(relation, BufferGetBlockNumber(*buffer), &vmbuffer))
> + {
> + BlockNumber block = BufferGetBlockNumber(*buffer);
> +
> + visibilitymap_pin(relation, block, &vmbuffer);
>
> I think it is not right to call visibilitymap_pin after holding a
> buffer lock (visibilitymap_pin can perform I/O).  Refer heap_update
> for how to pin the visibility map.
>

Thank you for your advice!
Fixed.

Attached separated two patched, please give me feedbacks.

Regards,

--
Masahiko Sawada
From 2020960bc7688afa8b0526c857a0e0af8b20d370 Mon Sep 17 00:00:00 2001
From: Masahiko Sawada <sawada.m...@gmail.com>
Date: Mon, 11 Jul 2016 12:39:32 -0700
Subject: [PATCH 1/2] Fix heap_udpate set xmax without WAL logging in the
 already_marked = true case.

---
 src/backend/access/heap/heapam.c | 32 ++++++++++++++++++++++++++++++++
 1 file changed, 32 insertions(+)

diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c
index 57da57a..e7cb8ca 100644
--- a/src/backend/access/heap/heapam.c
+++ b/src/backend/access/heap/heapam.c
@@ -3923,6 +3923,16 @@ l2:
 
        if (need_toast || newtupsize > pagefree)
        {
+               /*
+                * For crash safety, we need to emit that xmax of old tuple is 
set
+                * and clear only the all-frozen bit on visibility map if needed
+                * before releasing the buffer. We can reuse xl_heap_lock for 
this
+                * purpose. It should be fine even if we crash midway from this
+                * section and the actual updating one later, since the xmax 
will
+                * appear to come from an aborted xid.
+                */
+               START_CRIT_SECTION();
+
                /* Clear obsolete visibility flags ... */
                oldtup.t_data->t_infomask &= ~(HEAP_XMAX_BITS | HEAP_MOVED);
                oldtup.t_data->t_infomask2 &= ~HEAP_KEYS_UPDATED;
@@ -3936,6 +3946,28 @@ l2:
                /* temporarily make it look not-updated */
                oldtup.t_data->t_ctid = oldtup.t_self;
                already_marked = true;
+
+               MarkBufferDirty(buffer);
+
+               if (RelationNeedsWAL(relation))
+               {
+                       xl_heap_lock xlrec;
+                       XLogRecPtr recptr;
+
+                       XLogBeginInsert();
+                       XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
+
+                       xlrec.offnum = 
ItemPointerGetOffsetNumber(&oldtup.t_self);
+                       xlrec.locking_xid = xmax_old_tuple;
+                       xlrec.infobits_set = 
compute_infobits(oldtup.t_data->t_infomask,
+                                                                               
                  oldtup.t_data->t_infomask2);
+                       XLogRegisterData((char *) &xlrec, SizeOfHeapLock);
+                       recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_LOCK);
+                       PageSetLSN(page, recptr);
+               }
+
+               END_CRIT_SECTION();
+
                LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
 
                /*
-- 
2.8.1

From 37e8901df9fe6aa6125059257dc6b8d659bb2929 Mon Sep 17 00:00:00 2001
From: Masahiko Sawada <sawada.m...@gmail.com>
Date: Mon, 11 Jul 2016 12:44:57 -0700
Subject: [PATCH 2/2] Clear all-frozen bit on visibility map when xmax is set.

---
 src/backend/access/heap/heapam.c        | 49 +++++++++++++++++++++++++++++++++
 src/backend/access/heap/visibilitymap.c | 19 ++++++++++---
 src/include/access/visibilitymap.h      |  2 ++
 3 files changed, 66 insertions(+), 4 deletions(-)

diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c
index e7cb8ca..23e9b75 100644
--- a/src/backend/access/heap/heapam.c
+++ b/src/backend/access/heap/heapam.c
@@ -3947,6 +3947,14 @@ l2:
                oldtup.t_data->t_ctid = oldtup.t_self;
                already_marked = true;
 
+               /* Clear only the all-frozen bit on visibility map if needed */
+               if (PageIsAllVisible(BufferGetPage(buffer)) &&
+                       VM_ALL_FROZEN(relation, block, &vmbuffer))
+               {
+                       visibilitymap_clear_extended(relation, block, vmbuffer,
+                                                                               
 VISIBILITYMAP_ALL_FROZEN);
+               }
+
                MarkBufferDirty(buffer);
 
                if (RelationNeedsWAL(relation))
@@ -4538,6 +4546,8 @@ heap_lock_tuple(Relation relation, HeapTuple tuple,
        ItemPointer tid = &(tuple->t_self);
        ItemId          lp;
        Page            page;
+       Buffer          vmbuffer = InvalidBuffer;
+       BlockNumber     block;
        TransactionId xid,
                                xmax;
        uint16          old_infomask,
@@ -4547,6 +4557,15 @@ heap_lock_tuple(Relation relation, HeapTuple tuple,
        bool            have_tuple_lock = false;
 
        *buffer = ReadBuffer(relation, ItemPointerGetBlockNumber(tid));
+       block = ItemPointerGetBlockNumber(tid);
+
+       /*
+        * Before locking the buffer, pin the visibility map page if it appears 
to
+        * to be necessary
+        */
+       if (PageIsAllVisible(BufferGetPage(*buffer)))
+               visibilitymap_pin(relation, block, &vmbuffer);
+
        LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE);
 
        page = BufferGetPage(*buffer);
@@ -5066,6 +5085,15 @@ failed:
        if (HEAP_XMAX_IS_LOCKED_ONLY(new_infomask))
                tuple->t_data->t_ctid = *tid;
 
+       /* Clear only the all-frozen bit on visibility map if needed */
+       if (PageIsAllVisible(page) &&
+               VM_ALL_FROZEN(relation, block, &vmbuffer))
+       {
+               visibilitymap_clear_extended(relation, block, vmbuffer,
+                                                                        
VISIBILITYMAP_ALL_FROZEN);
+       }
+
+
        MarkBufferDirty(*buffer);
 
        /*
@@ -5104,6 +5132,8 @@ failed:
        END_CRIT_SECTION();
 
        LockBuffer(*buffer, BUFFER_LOCK_UNLOCK);
+       if (BufferIsValid(vmbuffer))
+               ReleaseBuffer(vmbuffer);
 
        /*
         * Don't update the visibility map here. Locking a tuple doesn't change
@@ -8726,6 +8756,25 @@ heap_xlog_lock(XLogReaderState *record)
                }
                HeapTupleHeaderSetXmax(htup, xlrec->locking_xid);
                HeapTupleHeaderSetCmax(htup, FirstCommandId, false);
+
+               /* The all-frozen bit on visibility map need to be cleared if 
needed */
+               if (PageIsAllVisible(BufferGetPage(buffer)))
+               {
+                       RelFileNode     rnode;
+                       Buffer          vmbuffer = InvalidBuffer;
+                       BlockNumber     block;
+                       Relation        reln;
+
+                       XLogRecGetBlockTag(record, 0, &rnode, NULL, &block);
+                       reln = CreateFakeRelcacheEntry(rnode);
+                       visibilitymap_pin(reln, block, &vmbuffer);
+
+                       if (VM_ALL_FROZEN(reln, block, &vmbuffer))
+                               visibilitymap_clear_extended(reln, block, 
vmbuffer,
+                                                                               
         VISIBILITYMAP_ALL_FROZEN);
+                       ReleaseBuffer(vmbuffer);
+               }
+
                PageSetLSN(page, lsn);
                MarkBufferDirty(buffer);
        }
diff --git a/src/backend/access/heap/visibilitymap.c 
b/src/backend/access/heap/visibilitymap.c
index b472d31..c52c0b0 100644
--- a/src/backend/access/heap/visibilitymap.c
+++ b/src/backend/access/heap/visibilitymap.c
@@ -11,7 +11,7 @@
  *       src/backend/access/heap/visibilitymap.c
  *
  * INTERFACE ROUTINES
- *             visibilitymap_clear  - clear a bit in the visibility map
+ *             visibilitymap_clear  - clear all bits for one page in the 
visibility map
  *             visibilitymap_pin        - pin a map page for setting a bit
  *             visibilitymap_pin_ok - check whether correct map page is 
already pinned
  *             visibilitymap_set        - set a bit in a previously pinned page
@@ -157,23 +157,34 @@ static const uint8 number_of_ones_for_frozen[256] = {
 static Buffer vm_readbuf(Relation rel, BlockNumber blkno, bool extend);
 static void vm_extend(Relation rel, BlockNumber nvmblocks);
 
+/*
+ * A shorthand for visibilitymap_clear_extended, for clearing all bits for one
+ * page in visibility map with VISIBILITYMAP_VALID_BITS.
+ */
+void
+visibilitymap_clear(Relation rel, BlockNumber heapBlk, Buffer buf)
+{
+       visibilitymap_clear_extended(rel, heapBlk, buf, 
VISIBILITYMAP_VALID_BITS);
+}
 
 /*
- *     visibilitymap_clear - clear all bits for one page in visibility map
+ *     visibilitymap_clear_extended - clear bit(s) for one page in visibility 
map
  *
  * You must pass a buffer containing the correct map page to this function.
  * Call visibilitymap_pin first to pin the right one. This function doesn't do
  * any I/O.
  */
 void
-visibilitymap_clear(Relation rel, BlockNumber heapBlk, Buffer buf)
+visibilitymap_clear_extended(Relation rel, BlockNumber heapBlk, Buffer buf, 
uint8 flags)
 {
        BlockNumber mapBlock = HEAPBLK_TO_MAPBLOCK(heapBlk);
        int                     mapByte = HEAPBLK_TO_MAPBYTE(heapBlk);
        int                     mapOffset = HEAPBLK_TO_OFFSET(heapBlk);
-       uint8           mask = VISIBILITYMAP_VALID_BITS << mapOffset;
+       uint8           mask = flags << mapOffset;
        char       *map;
 
+       Assert(flags & VISIBILITYMAP_VALID_BITS);
+
 #ifdef TRACE_VISIBILITYMAP
        elog(DEBUG1, "vm_clear %s %d", RelationGetRelationName(rel), heapBlk);
 #endif
diff --git a/src/include/access/visibilitymap.h 
b/src/include/access/visibilitymap.h
index fca99ca..f305b03 100644
--- a/src/include/access/visibilitymap.h
+++ b/src/include/access/visibilitymap.h
@@ -36,6 +36,8 @@
 
 extern void visibilitymap_clear(Relation rel, BlockNumber heapBlk,
                                        Buffer vmbuf);
+extern void visibilitymap_clear_extended(Relation rel, BlockNumber heapBlk,
+                                       Buffer vmbuf, uint8 flags);
 extern void visibilitymap_pin(Relation rel, BlockNumber heapBlk,
                                  Buffer *vmbuf);
 extern bool visibilitymap_pin_ok(BlockNumber heapBlk, Buffer vmbuf);
-- 
2.8.1

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to