Rebased against 40d964ec997f64227bc0ff5e058dc4a5770a70a9 I moved some unrelated patches to a separate thread ("vacuum verbose detail logs are unclear")
>From 33c7166e3c8f056a8eb6295ec92fed8c85eda7d6 Mon Sep 17 00:00:00 2001 From: Justin Pryzby <pryz...@telsasoft.com> Date: Mon, 23 Dec 2019 14:38:01 -0600 Subject: [PATCH v9 1/3] dedup2: skip_blocks
--- src/backend/access/heap/vacuumlazy.c | 187 ++++++++++++++++------------------- 1 file changed, 84 insertions(+), 103 deletions(-) diff --git a/src/backend/access/heap/vacuumlazy.c b/src/backend/access/heap/vacuumlazy.c index b331f4c..9849685 100644 --- a/src/backend/access/heap/vacuumlazy.c +++ b/src/backend/access/heap/vacuumlazy.c @@ -660,6 +660,88 @@ vacuum_log_cleanup_info(Relation rel, LVRelStats *vacrelstats) } /* + * Return whether skipping blocks or not. + * Except when aggressive is set, we want to skip pages that are + * all-visible according to the visibility map, but only when we can skip + * at least SKIP_PAGES_THRESHOLD consecutive pages. Since we're reading + * sequentially, the OS should be doing readahead for us, so there's no + * gain in skipping a page now and then; that's likely to disable + * readahead and so be counterproductive. Also, skipping even a single + * page means that we can't update relfrozenxid, so we only want to do it + * if we can skip a goodly number of pages. + * + * When aggressive is set, we can't skip pages just because they are + * all-visible, but we can still skip pages that are all-frozen, since + * such pages do not need freezing and do not affect the value that we can + * safely set for relfrozenxid or relminmxid. + * + * Before entering the main loop, establish the invariant that + * next_unskippable_block is the next block number >= blkno that we can't + * skip based on the visibility map, either all-visible for a regular scan + * or all-frozen for an aggressive scan. We set it to nblocks if there's + * no such block. We also set up the skipping_blocks flag correctly at + * this stage. + * + * Note: The value returned by visibilitymap_get_status could be slightly + * out-of-date, since we make this test before reading the corresponding + * heap page or locking the buffer. This is OK. If we mistakenly think + * that the page is all-visible or all-frozen when in fact the flag's just + * been cleared, we might fail to vacuum the page. It's easy to see that + * skipping a page when aggressive is not set is not a very big deal; we + * might leave some dead tuples lying around, but the next vacuum will + * find them. But even when aggressive *is* set, it's still OK if we miss + * a page whose all-frozen marking has just been cleared. Any new XIDs + * just added to that page are necessarily newer than the GlobalXmin we + * computed, so they'll have no effect on the value to which we can safely + * set relfrozenxid. A similar argument applies for MXIDs and relminmxid. + * + * We will scan the table's last page, at least to the extent of + * determining whether it has tuples or not, even if it should be skipped + * according to the above rules; except when we've already determined that + * it's not worth trying to truncate the table. This avoids having + * lazy_truncate_heap() take access-exclusive lock on the table to attempt + * a truncation that just fails immediately because there are tuples in + * the last page. This is worth avoiding mainly because such a lock must + * be replayed on any hot standby, where it can be disruptive. + */ +static bool +skip_blocks(Relation onerel, VacuumParams *params, BlockNumber *next_unskippable_block, BlockNumber nblocks, Buffer *vmbuffer, bool aggressive) +{ + if ((params->options & VACOPT_DISABLE_PAGE_SKIPPING) == 0) + { + while (*next_unskippable_block < nblocks) + { + uint8 vmstatus; + + vmstatus = visibilitymap_get_status(onerel, *next_unskippable_block, + vmbuffer); + if (aggressive) + { + if ((vmstatus & VISIBILITYMAP_ALL_FROZEN) == 0) + break; + } + else + { + if ((vmstatus & VISIBILITYMAP_ALL_VISIBLE) == 0) + break; + } + vacuum_delay_point(); + ++*next_unskippable_block; + } + } + + + /* + * We know we can't skip the current block. But set up + * skipping_blocks to do the right thing at the following blocks. + */ + if (*next_unskippable_block >= SKIP_PAGES_THRESHOLD) + return true; + else + return false; +} + +/* * lazy_scan_heap() -- scan an open heap relation * * This routine prunes each page in the heap, which will among other @@ -794,78 +876,8 @@ lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats, initprog_val[2] = dead_tuples->max_tuples; pgstat_progress_update_multi_param(3, initprog_index, initprog_val); - /* - * Except when aggressive is set, we want to skip pages that are - * all-visible according to the visibility map, but only when we can skip - * at least SKIP_PAGES_THRESHOLD consecutive pages. Since we're reading - * sequentially, the OS should be doing readahead for us, so there's no - * gain in skipping a page now and then; that's likely to disable - * readahead and so be counterproductive. Also, skipping even a single - * page means that we can't update relfrozenxid, so we only want to do it - * if we can skip a goodly number of pages. - * - * When aggressive is set, we can't skip pages just because they are - * all-visible, but we can still skip pages that are all-frozen, since - * such pages do not need freezing and do not affect the value that we can - * safely set for relfrozenxid or relminmxid. - * - * Before entering the main loop, establish the invariant that - * next_unskippable_block is the next block number >= blkno that we can't - * skip based on the visibility map, either all-visible for a regular scan - * or all-frozen for an aggressive scan. We set it to nblocks if there's - * no such block. We also set up the skipping_blocks flag correctly at - * this stage. - * - * Note: The value returned by visibilitymap_get_status could be slightly - * out-of-date, since we make this test before reading the corresponding - * heap page or locking the buffer. This is OK. If we mistakenly think - * that the page is all-visible or all-frozen when in fact the flag's just - * been cleared, we might fail to vacuum the page. It's easy to see that - * skipping a page when aggressive is not set is not a very big deal; we - * might leave some dead tuples lying around, but the next vacuum will - * find them. But even when aggressive *is* set, it's still OK if we miss - * a page whose all-frozen marking has just been cleared. Any new XIDs - * just added to that page are necessarily newer than the GlobalXmin we - * computed, so they'll have no effect on the value to which we can safely - * set relfrozenxid. A similar argument applies for MXIDs and relminmxid. - * - * We will scan the table's last page, at least to the extent of - * determining whether it has tuples or not, even if it should be skipped - * according to the above rules; except when we've already determined that - * it's not worth trying to truncate the table. This avoids having - * lazy_truncate_heap() take access-exclusive lock on the table to attempt - * a truncation that just fails immediately because there are tuples in - * the last page. This is worth avoiding mainly because such a lock must - * be replayed on any hot standby, where it can be disruptive. - */ next_unskippable_block = 0; - if ((params->options & VACOPT_DISABLE_PAGE_SKIPPING) == 0) - { - while (next_unskippable_block < nblocks) - { - uint8 vmstatus; - - vmstatus = visibilitymap_get_status(onerel, next_unskippable_block, - &vmbuffer); - if (aggressive) - { - if ((vmstatus & VISIBILITYMAP_ALL_FROZEN) == 0) - break; - } - else - { - if ((vmstatus & VISIBILITYMAP_ALL_VISIBLE) == 0) - break; - } - vacuum_delay_point(); - next_unskippable_block++; - } - } - - if (next_unskippable_block >= SKIP_PAGES_THRESHOLD) - skipping_blocks = true; - else - skipping_blocks = false; + skipping_blocks = skip_blocks(onerel, params, &next_unskippable_block, nblocks, &vmbuffer, aggressive); for (blkno = 0; blkno < nblocks; blkno++) { @@ -894,38 +906,7 @@ lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats, { /* Time to advance next_unskippable_block */ next_unskippable_block++; - if ((params->options & VACOPT_DISABLE_PAGE_SKIPPING) == 0) - { - while (next_unskippable_block < nblocks) - { - uint8 vmskipflags; - - vmskipflags = visibilitymap_get_status(onerel, - next_unskippable_block, - &vmbuffer); - if (aggressive) - { - if ((vmskipflags & VISIBILITYMAP_ALL_FROZEN) == 0) - break; - } - else - { - if ((vmskipflags & VISIBILITYMAP_ALL_VISIBLE) == 0) - break; - } - vacuum_delay_point(); - next_unskippable_block++; - } - } - - /* - * We know we can't skip the current block. But set up - * skipping_blocks to do the right thing at the following blocks. - */ - if (next_unskippable_block - blkno > SKIP_PAGES_THRESHOLD) - skipping_blocks = true; - else - skipping_blocks = false; + skipping_blocks = skip_blocks(onerel, params, &next_unskippable_block, nblocks, &vmbuffer, aggressive); /* * Normally, the fact that we can't skip this block must mean that -- 2.7.4
>From 623c725c8add0670b28cdbfceca1824ba5b0647c Mon Sep 17 00:00:00 2001 From: Justin Pryzby <pryz...@telsasoft.com> Date: Thu, 12 Dec 2019 20:54:37 -0600 Subject: [PATCH v9 2/3] vacuum errcontext to show block being processed As requested here. https://www.postgresql.org/message-id/20190807235154.erbmr4o4bo6vgnjv%40alap3.anarazel.de --- src/backend/access/heap/vacuumlazy.c | 37 ++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/src/backend/access/heap/vacuumlazy.c b/src/backend/access/heap/vacuumlazy.c index 9849685..c96abdf 100644 --- a/src/backend/access/heap/vacuumlazy.c +++ b/src/backend/access/heap/vacuumlazy.c @@ -289,6 +289,12 @@ typedef struct LVRelStats bool lock_waiter_detected; } LVRelStats; +typedef struct +{ + char *relname; + char *relnamespace; + BlockNumber blkno; +} vacuum_error_callback_arg; /* A few variables that don't seem worth passing around as parameters */ static int elevel = -1; @@ -358,6 +364,7 @@ static void end_parallel_vacuum(Relation *Irel, IndexBulkDeleteResult **stats, LVParallelState *lps, int nindexes); static LVSharedIndStats *get_indstats(LVShared *lvshared, int n); static bool skip_parallel_vacuum_index(Relation indrel, LVShared *lvshared); +static void vacuum_error_callback(void *arg); /* @@ -803,6 +810,8 @@ lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats, PROGRESS_VACUUM_MAX_DEAD_TUPLES }; int64 initprog_val[3]; + ErrorContextCallback errcallback; + vacuum_error_callback_arg errcbarg; pg_rusage_init(&ru0); @@ -879,6 +888,15 @@ lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats, next_unskippable_block = 0; skipping_blocks = skip_blocks(onerel, params, &next_unskippable_block, nblocks, &vmbuffer, aggressive); + /* Setup error traceback support for ereport() */ + errcbarg.relnamespace = get_namespace_name(RelationGetNamespace(onerel)); + errcbarg.relname = relname; + errcbarg.blkno = InvalidBlockNumber; /* Not known yet */ + errcallback.callback = vacuum_error_callback; + errcallback.arg = (void *) &errcbarg; + errcallback.previous = error_context_stack; + error_context_stack = &errcallback; + for (blkno = 0; blkno < nblocks; blkno++) { Buffer buf; @@ -900,6 +918,8 @@ lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats, #define FORCE_CHECK_PAGE() \ (blkno == nblocks - 1 && should_attempt_truncation(params, vacrelstats)) + errcbarg.blkno = blkno; + pgstat_progress_update_param(PROGRESS_VACUUM_HEAP_BLKS_SCANNED, blkno); if (blkno == next_unskippable_block) @@ -966,8 +986,11 @@ lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats, } /* Work on all the indexes, then the heap */ + /* Don't use the errcontext handler outside this function */ + error_context_stack = errcallback.previous; lazy_vacuum_all_indexes(onerel, Irel, indstats, vacrelstats, lps, nindexes); + error_context_stack = &errcallback; /* Remove tuples from heap */ lazy_vacuum_heap(onerel, vacrelstats); @@ -1575,6 +1598,9 @@ lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats, RecordPageWithFreeSpace(onerel, blkno, freespace); } + /* Pop the error context stack */ + error_context_stack = errcallback.previous; + /* report that everything is scanned and vacuumed */ pgstat_progress_update_param(PROGRESS_VACUUM_HEAP_BLKS_SCANNED, blkno); @@ -3353,3 +3379,14 @@ parallel_vacuum_main(dsm_segment *seg, shm_toc *toc) table_close(onerel, ShareUpdateExclusiveLock); pfree(stats); } + +/* + * Error context callback for errors occurring during vacuum. + */ +static void +vacuum_error_callback(void *arg) +{ + vacuum_error_callback_arg *cbarg = arg; + errcontext("while scanning block %u of relation \"%s.%s\"", + cbarg->blkno, cbarg->relnamespace, cbarg->relname); +} -- 2.7.4
>From 27a0c085d8d965252ebb8eb2e47362f27fa4203e Mon Sep 17 00:00:00 2001 From: Justin Pryzby <pryz...@telsasoft.com> Date: Thu, 12 Dec 2019 20:34:03 -0600 Subject: [PATCH v9 3/3] add errcontext callback in lazy_vacuum_heap, too --- src/backend/access/heap/vacuumlazy.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/src/backend/access/heap/vacuumlazy.c b/src/backend/access/heap/vacuumlazy.c index c96abdf..f380437 100644 --- a/src/backend/access/heap/vacuumlazy.c +++ b/src/backend/access/heap/vacuumlazy.c @@ -1639,6 +1639,7 @@ lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats, /* Remove tuples from heap */ lazy_vacuum_heap(onerel, vacrelstats); + error_context_stack = errcallback.previous; } /* @@ -1777,10 +1778,22 @@ lazy_vacuum_heap(Relation onerel, LVRelStats *vacrelstats) PGRUsage ru0; Buffer vmbuffer = InvalidBuffer; + ErrorContextCallback errcallback; + vacuum_error_callback_arg errcbarg; + /* Report that we are now vacuuming the heap */ pgstat_progress_update_param(PROGRESS_VACUUM_PHASE, PROGRESS_VACUUM_PHASE_VACUUM_HEAP); + /* Setup error traceback support for ereport() */ + errcbarg.relnamespace = get_namespace_name(RelationGetNamespace(onerel)); + errcbarg.relname = RelationGetRelationName(onerel); + errcbarg.blkno = InvalidBlockNumber; /* Not known yet */ + errcallback.callback = vacuum_error_callback; + errcallback.arg = (void *) &errcbarg; + errcallback.previous = error_context_stack; + error_context_stack = &errcallback; + pg_rusage_init(&ru0); npages = 0; @@ -1795,6 +1808,7 @@ lazy_vacuum_heap(Relation onerel, LVRelStats *vacrelstats) vacuum_delay_point(); tblk = ItemPointerGetBlockNumber(&vacrelstats->dead_tuples->itemptrs[tupindex]); + errcbarg.blkno = tblk; buf = ReadBufferExtended(onerel, MAIN_FORKNUM, tblk, RBM_NORMAL, vac_strategy); if (!ConditionalLockBufferForCleanup(buf)) -- 2.7.4