On Tue, Jun 24, 2025 at 4:01 PM Melanie Plageman <melanieplage...@gmail.com> wrote: > > visibilitymap_set() arguably breaks a few of the coding rules for > modifying and WAL logging buffers set out in > src/backend/access/transam/README.
Here is a rebased version of this (it had some conflicts with recent commits). - Melanie
From 0b4998028f30001d455f8856a58ce909725a427a Mon Sep 17 00:00:00 2001 From: Melanie Plageman <melanieplage...@gmail.com> Date: Thu, 26 Jun 2025 15:45:35 -0400 Subject: [PATCH v2] Introduce heap-specific wrapper for visibilitymap_set() visibilitymap_set(), which sets bits in the visibility map corresponding to the heap block of the table passed in, arguably breaks a few of the coding rules for modifying and WAL logging buffers set out in access/transam/README. In several of the places where visibilitymap_set() is called, setting the heap page PD_ALL_VISIBLE and marking the buffer dirty are done outside of a critical section. In some places before visibilitymap_set() is called, MarkBufferDirty() is used when MarkBufferDirtyHint() would be appropriate. And in some places where PD_ALL_VISIBLE may already be set and we don't mark the buffer dirty, when checksums/wal_log_hints are enabled visibilitymap_set() will still set the heap page LSN -- even though it was correct not to set the buffer dirty. Besides all of these issues, having these operations open-coded all over the place is error-prone. This commit introduces a wrapper that does the correct operations to the heap page itself and invokes visibilitymap_set() to make changes to the VM page. --- src/backend/access/heap/heapam.c | 92 ++++++++++++++++++++----- src/backend/access/heap/heapam_xlog.c | 2 +- src/backend/access/heap/vacuumlazy.c | 66 +++++------------- src/backend/access/heap/visibilitymap.c | 58 ++++++---------- src/include/access/heapam.h | 3 + src/include/access/visibilitymap.h | 2 +- 6 files changed, 117 insertions(+), 106 deletions(-) diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c index 0dcd6ee817e..112f946dab0 100644 --- a/src/backend/access/heap/heapam.c +++ b/src/backend/access/heap/heapam.c @@ -2505,8 +2505,6 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples, BufferGetBlockNumber(buffer), vmbuffer, VISIBILITYMAP_VALID_BITS); } - else if (all_frozen_set) - PageSetAllVisible(page); /* * XXX Should we set PageSetPrunable on this page ? See heap_insert() @@ -2632,23 +2630,16 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples, /* * If we've frozen everything on the page, update the visibilitymap. - * We're already holding pin on the vmbuffer. + * We're already holding pin on the vmbuffer. It's fine to use + * InvalidTransactionId as the cutoff_xid here - this is only used + * when HEAP_INSERT_FROZEN is specified, which intentionally violates + * visibility rules. */ if (all_frozen_set) - { - Assert(PageIsAllVisible(page)); - Assert(visibilitymap_pin_ok(BufferGetBlockNumber(buffer), vmbuffer)); - - /* - * It's fine to use InvalidTransactionId here - this is only used - * when HEAP_INSERT_FROZEN is specified, which intentionally - * violates visibility rules. - */ - visibilitymap_set(relation, BufferGetBlockNumber(buffer), buffer, - InvalidXLogRecPtr, vmbuffer, - InvalidTransactionId, - VISIBILITYMAP_ALL_VISIBLE | VISIBILITYMAP_ALL_FROZEN); - } + heap_page_set_vm_and_log(relation, BufferGetBlockNumber(buffer), buffer, + vmbuffer, + InvalidTransactionId, + VISIBILITYMAP_ALL_VISIBLE | VISIBILITYMAP_ALL_FROZEN); UnlockReleaseBuffer(buffer); ndone += nthispage; @@ -7840,6 +7831,73 @@ heap_tuple_needs_eventual_freeze(HeapTupleHeader tuple) return false; } +/* + * Make the heap and VM page changes needed to set a page all-visible. + * Do not call in recovery. + */ +uint8 +heap_page_set_vm_and_log(Relation rel, BlockNumber heap_blk, Buffer heap_buf, + Buffer vmbuf, TransactionId cutoff_xid, + uint8 vmflags) +{ + Page heap_page = BufferGetPage(heap_buf); + bool set_heap_lsn = false; + XLogRecPtr recptr = InvalidXLogRecPtr; + uint8 old_vmbits = 0; + + Assert(BufferIsValid(heap_buf)); + + START_CRIT_SECTION(); + + /* Check that we have the right heap page pinned, if present */ + if (BufferGetBlockNumber(heap_buf) != heap_blk) + elog(ERROR, "wrong heap buffer passed to heap_page_set_vm_and_log"); + + /* + * We must never end up with the VM bit set and the page-level + * PD_ALL_VISIBLE bit clear. If that were to occur, a subsequent page + * modification would fail to clear the VM bit. Though it is possible for + * the page-level bit to be set and the VM bit to be clear if checksums + * and wal_log_hints are not enabled. + */ + if (!PageIsAllVisible(heap_page)) + { + PageSetAllVisible(heap_page); + + /* + * Buffer will usually be dirty from other changes, so it is worth the + * extra check + */ + if (!BufferIsDirty(heap_buf)) + { + if (XLogHintBitIsNeeded()) + MarkBufferDirty(heap_buf); + else + MarkBufferDirtyHint(heap_buf, true); + } + + set_heap_lsn = XLogHintBitIsNeeded(); + } + + old_vmbits = visibilitymap_set(rel, heap_blk, heap_buf, + &recptr, vmbuf, cutoff_xid, vmflags); + + /* + * If we modified the heap page and data checksums are enabled (or + * wal_log_hints=on), we need to protect the heap page from being torn. + * + * If not, then we must *not* update the heap page's LSN. In this case, + * the FPI for the heap page was omitted from the WAL record inserted in + * the VM record, so it would be incorrect to update the heap page's LSN. + */ + if (set_heap_lsn) + PageSetLSN(heap_page, recptr); + + END_CRIT_SECTION(); + + return old_vmbits; +} + /* * heap_tuple_should_freeze * diff --git a/src/backend/access/heap/heapam_xlog.c b/src/backend/access/heap/heapam_xlog.c index eb4bd3d6ae3..f2bc1bd06ee 100644 --- a/src/backend/access/heap/heapam_xlog.c +++ b/src/backend/access/heap/heapam_xlog.c @@ -297,7 +297,7 @@ heap_xlog_visible(XLogReaderState *record) reln = CreateFakeRelcacheEntry(rlocator); visibilitymap_pin(reln, blkno, &vmbuffer); - visibilitymap_set(reln, blkno, InvalidBuffer, lsn, vmbuffer, + visibilitymap_set(reln, blkno, InvalidBuffer, &lsn, vmbuffer, xlrec->snapshotConflictHorizon, vmbits); ReleaseBuffer(vmbuffer); diff --git a/src/backend/access/heap/vacuumlazy.c b/src/backend/access/heap/vacuumlazy.c index 8a42e17aec2..c0608af7d29 100644 --- a/src/backend/access/heap/vacuumlazy.c +++ b/src/backend/access/heap/vacuumlazy.c @@ -1874,9 +1874,6 @@ lazy_scan_new_or_empty(LVRelState *vacrel, Buffer buf, BlockNumber blkno, { START_CRIT_SECTION(); - /* mark buffer dirty before writing a WAL record */ - MarkBufferDirty(buf); - /* * It's possible that another backend has extended the heap, * initialized the page, and then failed to WAL-log the page due @@ -1888,14 +1885,15 @@ lazy_scan_new_or_empty(LVRelState *vacrel, Buffer buf, BlockNumber blkno, */ if (RelationNeedsWAL(vacrel->rel) && PageGetLSN(page) == InvalidXLogRecPtr) + { + MarkBufferDirty(buf); log_newpage_buffer(buf, true); + } - PageSetAllVisible(page); - visibilitymap_set(vacrel->rel, blkno, buf, - InvalidXLogRecPtr, - vmbuffer, InvalidTransactionId, - VISIBILITYMAP_ALL_VISIBLE | - VISIBILITYMAP_ALL_FROZEN); + heap_page_set_vm_and_log(vacrel->rel, blkno, buf, + vmbuffer, InvalidTransactionId, + VISIBILITYMAP_ALL_VISIBLE | + VISIBILITYMAP_ALL_FROZEN); END_CRIT_SECTION(); /* Count the newly all-frozen pages for logging */ @@ -2069,25 +2067,9 @@ lazy_scan_prune(LVRelState *vacrel, flags |= VISIBILITYMAP_ALL_FROZEN; } - /* - * It should never be the case that the visibility map page is set - * while the page-level bit is clear, but the reverse is allowed (if - * checksums are not enabled). Regardless, set both bits so that we - * get back in sync. - * - * NB: If the heap page is all-visible but the VM bit is not set, we - * don't need to dirty the heap page. However, if checksums are - * enabled, we do need to make sure that the heap page is dirtied - * before passing it to visibilitymap_set(), because it may be logged. - * Given that this situation should only happen in rare cases after a - * crash, it is not worth optimizing. - */ - PageSetAllVisible(page); - MarkBufferDirty(buf); - old_vmbits = visibilitymap_set(vacrel->rel, blkno, buf, - InvalidXLogRecPtr, - vmbuffer, presult.vm_conflict_horizon, - flags); + old_vmbits = heap_page_set_vm_and_log(vacrel->rel, blkno, buf, + vmbuffer, presult.vm_conflict_horizon, + flags); /* * If the page wasn't already set all-visible and/or all-frozen in the @@ -2159,17 +2141,6 @@ lazy_scan_prune(LVRelState *vacrel, { uint8 old_vmbits; - /* - * Avoid relying on all_visible_according_to_vm as a proxy for the - * page-level PD_ALL_VISIBLE bit being set, since it might have become - * stale -- even when all_visible is set - */ - if (!PageIsAllVisible(page)) - { - PageSetAllVisible(page); - MarkBufferDirty(buf); - } - /* * Set the page all-frozen (and all-visible) in the VM. * @@ -2178,11 +2149,10 @@ lazy_scan_prune(LVRelState *vacrel, * was logged when the page's tuples were frozen. */ Assert(!TransactionIdIsValid(presult.vm_conflict_horizon)); - old_vmbits = visibilitymap_set(vacrel->rel, blkno, buf, - InvalidXLogRecPtr, - vmbuffer, InvalidTransactionId, - VISIBILITYMAP_ALL_VISIBLE | - VISIBILITYMAP_ALL_FROZEN); + old_vmbits = heap_page_set_vm_and_log(vacrel->rel, blkno, buf, + vmbuffer, InvalidTransactionId, + VISIBILITYMAP_ALL_VISIBLE | + VISIBILITYMAP_ALL_FROZEN); /* * The page was likely already set all-visible in the VM. However, @@ -2913,11 +2883,9 @@ lazy_vacuum_heap_page(LVRelState *vacrel, BlockNumber blkno, Buffer buffer, flags |= VISIBILITYMAP_ALL_FROZEN; } - PageSetAllVisible(page); - visibilitymap_set(vacrel->rel, blkno, buffer, - InvalidXLogRecPtr, - vmbuffer, visibility_cutoff_xid, - flags); + heap_page_set_vm_and_log(vacrel->rel, blkno, buffer, + vmbuffer, visibility_cutoff_xid, + flags); /* Count the newly set VM page for logging */ vacrel->vm_new_visible_pages++; diff --git a/src/backend/access/heap/visibilitymap.c b/src/backend/access/heap/visibilitymap.c index 745a04ef26e..c57632168c7 100644 --- a/src/backend/access/heap/visibilitymap.c +++ b/src/backend/access/heap/visibilitymap.c @@ -222,29 +222,31 @@ visibilitymap_pin_ok(BlockNumber heapBlk, Buffer vmbuf) /* * visibilitymap_set - set bit(s) on a previously pinned page * - * recptr is the LSN of the XLOG record we're replaying, if we're in recovery, - * or InvalidXLogRecPtr in normal running. The VM page LSN is advanced to the - * one provided; in normal running, we generate a new XLOG record and set the - * page LSN to that value (though the heap page's LSN may *not* be updated; - * see below). cutoff_xid is the largest xmin on the page being marked - * all-visible; it is needed for Hot Standby, and can be InvalidTransactionId - * if the page contains no tuples. It can also be set to InvalidTransactionId - * when a page that is already all-visible is being marked all-frozen. - * * Caller is expected to set the heap page's PD_ALL_VISIBLE bit before calling - * this function. Except in recovery, caller should also pass the heap - * buffer. When checksums are enabled and we're not in recovery, we must add - * the heap buffer to the WAL chain to protect it from being torn. + * this function. Except in recovery, caller should also pass the heap buffer. + * When checksums are enabled and we're not in recovery, we must add the heap + * buffer to the WAL chain to protect it from being torn. * * You must pass a buffer containing the correct map page to this function. * Call visibilitymap_pin first to pin the right one. This function doesn't do * any I/O. * - * Returns the state of the page's VM bits before setting flags. + * cutoff_xid is the largest xmin on the page being marked all-visible; it is + * needed for Hot Standby, and can be InvalidTransactionId if the page + * contains no tuples. It can also be set to InvalidTransactionId when a page + * that is already all-visible is being marked all-frozen. + * + * If we're in recovery, recptr points to the LSN of the XLOG record we're + * replaying and the VM page LSN is advanced to this LSN. During normal + * running, we'll generate a new XLOG record for the changes to the VM and set + * the VM page LSN. We will return this LSN in recptr, and the caller may use + * this to set the heap page LSN. + * + * Returns the state of the page's VM bits before setting flags and sets. */ uint8 visibilitymap_set(Relation rel, BlockNumber heapBlk, Buffer heapBuf, - XLogRecPtr recptr, Buffer vmBuf, TransactionId cutoff_xid, + XLogRecPtr *recptr, Buffer vmBuf, TransactionId cutoff_xid, uint8 flags) { BlockNumber mapBlock = HEAPBLK_TO_MAPBLOCK(heapBlk); @@ -258,17 +260,13 @@ visibilitymap_set(Relation rel, BlockNumber heapBlk, Buffer heapBuf, elog(DEBUG1, "vm_set %s %d", RelationGetRelationName(rel), heapBlk); #endif - Assert(InRecovery || XLogRecPtrIsInvalid(recptr)); + Assert(InRecovery || XLogRecPtrIsInvalid(*recptr)); Assert(InRecovery || PageIsAllVisible((Page) BufferGetPage(heapBuf))); Assert((flags & VISIBILITYMAP_VALID_BITS) == flags); /* Must never set all_frozen bit without also setting all_visible bit */ Assert(flags != VISIBILITYMAP_ALL_FROZEN); - /* Check that we have the right heap page pinned, if present */ - if (BufferIsValid(heapBuf) && BufferGetBlockNumber(heapBuf) != heapBlk) - elog(ERROR, "wrong heap buffer passed to visibilitymap_set"); - /* Check that we have the right VM page pinned */ if (!BufferIsValid(vmBuf) || BufferGetBlockNumber(vmBuf) != mapBlock) elog(ERROR, "wrong VM buffer passed to visibilitymap_set"); @@ -287,28 +285,12 @@ visibilitymap_set(Relation rel, BlockNumber heapBlk, Buffer heapBuf, if (RelationNeedsWAL(rel)) { - if (XLogRecPtrIsInvalid(recptr)) + if (XLogRecPtrIsInvalid(*recptr)) { Assert(!InRecovery); - recptr = log_heap_visible(rel, heapBuf, vmBuf, cutoff_xid, flags); - - /* - * If data checksums are enabled (or wal_log_hints=on), we - * need to protect the heap page from being torn. - * - * If not, then we must *not* update the heap page's LSN. In - * this case, the FPI for the heap page was omitted from the - * WAL record inserted above, so it would be incorrect to - * update the heap page's LSN. - */ - if (XLogHintBitIsNeeded()) - { - Page heapPage = BufferGetPage(heapBuf); - - PageSetLSN(heapPage, recptr); - } + *recptr = log_heap_visible(rel, heapBuf, vmBuf, cutoff_xid, flags); } - PageSetLSN(page, recptr); + PageSetLSN(page, *recptr); } END_CRIT_SECTION(); diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h index 3a9424c19c9..9375296062f 100644 --- a/src/include/access/heapam.h +++ b/src/include/access/heapam.h @@ -360,6 +360,9 @@ extern bool heap_tuple_should_freeze(HeapTupleHeader tuple, TransactionId *NoFreezePageRelfrozenXid, MultiXactId *NoFreezePageRelminMxid); extern bool heap_tuple_needs_eventual_freeze(HeapTupleHeader tuple); +extern uint8 heap_page_set_vm_and_log(Relation rel, BlockNumber heap_blk, Buffer heap_buf, + Buffer vmbuf, TransactionId cutoff_xid, + uint8 vmflags); extern void simple_heap_insert(Relation relation, HeapTuple tup); extern void simple_heap_delete(Relation relation, ItemPointer tid); diff --git a/src/include/access/visibilitymap.h b/src/include/access/visibilitymap.h index be21c6dd1a3..4c7472e0b51 100644 --- a/src/include/access/visibilitymap.h +++ b/src/include/access/visibilitymap.h @@ -33,7 +33,7 @@ extern void visibilitymap_pin(Relation rel, BlockNumber heapBlk, extern bool visibilitymap_pin_ok(BlockNumber heapBlk, Buffer vmbuf); extern uint8 visibilitymap_set(Relation rel, BlockNumber heapBlk, Buffer heapBuf, - XLogRecPtr recptr, + XLogRecPtr *recptr, Buffer vmBuf, TransactionId cutoff_xid, uint8 flags); -- 2.34.1