On Mon, Jan 20, 2020 at 11:11:20AM -0800, Andres Freund wrote: > This I do not get. I didn't yet fully wake up, so I might just be slow?
It was needlessly cute at the cost of clarity (meant to avoid setting error_context_stack in lazy_scan_heap and again immediately on its return). On Mon, Jan 20, 2020 at 11:13:05AM -0800, Andres Freund wrote: > I was thinking that you could just use LVRelStats. Done. On Mon, Jan 20, 2020 at 11:11:20AM -0800, Andres Freund wrote: > Alternatively we could push another context for each index inside > lazy_vacuum_all_indexes(). There's been plenty bugs in indexes > triggering problems, so that could be worthwhile. Did this too, although I'm not sure what kind of errors it'd find (?) I considered elimating other uses of RelationGetRelationName, or looping over vacrelstats->blkno instead of local blkno. I did that in an additional patch (that will cause conflicts if you try to apply it, due to other vacuum patch in this branch). CREATE TABLE t AS SELECT generate_series(1,99999)a; postgres=# SET client_min_messages=debug;SET statement_timeout=39; VACUUM (VERBOSE, PARALLEL 0) t; INFO: vacuuming "public.t" 2020-01-20 15:46:14.993 CST [20056] ERROR: canceling statement due to statement timeout 2020-01-20 15:46:14.993 CST [20056] CONTEXT: while scanning block 211 of relation "public.t" 2020-01-20 15:46:14.993 CST [20056] STATEMENT: VACUUM (VERBOSE, PARALLEL 0) t; ERROR: canceling statement due to statement timeout CONTEXT: while scanning block 211 of relation "public.t" SELECT 'CREATE INDEX ON t(a)' FROM generate_series(1,11);\gexec UPDATE t SET a=a+1; postgres=# SET client_min_messages=debug;SET statement_timeout=99; VACUUM (VERBOSE, PARALLEL 0) t; INFO: vacuuming "public.t" DEBUG: "t_a_idx": vacuuming index 2020-01-20 15:47:36.338 CST [20139] ERROR: canceling statement due to statement timeout 2020-01-20 15:47:36.338 CST [20139] CONTEXT: while vacuuming relation "public.t_a_idx" 2020-01-20 15:47:36.338 CST [20139] STATEMENT: VACUUM (VERBOSE, PARALLEL 0) t; ERROR: canceling statement due to statement timeout CONTEXT: while vacuuming relation "public.t_a_idx" I haven't found a good way of exercizing the "vacuuming heap" path, though.
>From 44f5eaaef66c570395a9af2bdbe74943c9163c4d Mon Sep 17 00:00:00 2001 From: Justin Pryzby <pryz...@telsasoft.com> Date: Thu, 12 Dec 2019 20:54:37 -0600 Subject: [PATCH v10 1/5] vacuum errcontext to show block being processed As requested here. https://www.postgresql.org/message-id/20190807235154.erbmr4o4bo6vgnjv%40alap3.anarazel.de --- src/backend/access/heap/vacuumlazy.c | 38 ++++++++++++++++++++++++++++++++++-- 1 file changed, 36 insertions(+), 2 deletions(-) diff --git a/src/backend/access/heap/vacuumlazy.c b/src/backend/access/heap/vacuumlazy.c index bd2e7fb..2eb3caa 100644 --- a/src/backend/access/heap/vacuumlazy.c +++ b/src/backend/access/heap/vacuumlazy.c @@ -290,8 +290,12 @@ typedef struct LVRelStats int num_index_scans; TransactionId latestRemovedXid; bool lock_waiter_detected; -} LVRelStats; + /* Used by the error callback */ + char *relname; + char *relnamespace; + BlockNumber blkno; +} LVRelStats; /* A few variables that don't seem worth passing around as parameters */ static int elevel = -1; @@ -361,6 +365,7 @@ static void end_parallel_vacuum(Relation *Irel, IndexBulkDeleteResult **stats, LVParallelState *lps, int nindexes); static LVSharedIndStats *get_indstats(LVShared *lvshared, int n); static bool skip_parallel_vacuum_index(Relation indrel, LVShared *lvshared); +static void vacuum_error_callback(void *arg); /* @@ -813,6 +818,7 @@ lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats, PROGRESS_VACUUM_MAX_DEAD_TUPLES }; int64 initprog_val[3]; + ErrorContextCallback errcallback; pg_rusage_init(&ru0); @@ -892,6 +898,16 @@ lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats, next_unskippable_block = 0; skipping_blocks = skip_blocks(onerel, params, &next_unskippable_block, nblocks, &vmbuffer, aggressive); + /* Setup error traceback support for ereport() */ + vacrelstats->relnamespace = get_namespace_name(RelationGetNamespace(onerel)); + vacrelstats->relname = relname; + vacrelstats->blkno = InvalidBlockNumber; /* Not known yet */ + + errcallback.callback = vacuum_error_callback; + errcallback.arg = (void *) vacrelstats; + errcallback.previous = error_context_stack; + error_context_stack = &errcallback; + for (blkno = 0; blkno < nblocks; blkno++) { Buffer buf; @@ -913,6 +929,8 @@ lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats, #define FORCE_CHECK_PAGE() \ (blkno == nblocks - 1 && should_attempt_truncation(params, vacrelstats)) + vacrelstats->blkno = blkno; + pgstat_progress_update_param(PROGRESS_VACUUM_HEAP_BLKS_SCANNED, blkno); if (blkno == next_unskippable_block) @@ -979,11 +997,13 @@ lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats, } /* Work on all the indexes, then the heap */ + /* Don't use the errcontext handler outside this function */ + error_context_stack = errcallback.previous; lazy_vacuum_all_indexes(onerel, Irel, indstats, vacrelstats, lps, nindexes); - /* Remove tuples from heap */ lazy_vacuum_heap(onerel, vacrelstats); + error_context_stack = &errcallback; /* * Forget the now-vacuumed tuples, and press on, but be careful @@ -1602,6 +1622,9 @@ lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats, RecordPageWithFreeSpace(onerel, blkno, freespace); } + /* Pop the error context stack */ + error_context_stack = errcallback.previous; + /* report that everything is scanned and vacuumed */ pgstat_progress_update_param(PROGRESS_VACUUM_HEAP_BLKS_SCANNED, blkno); @@ -3416,3 +3439,14 @@ parallel_vacuum_main(dsm_segment *seg, shm_toc *toc) table_close(onerel, ShareUpdateExclusiveLock); pfree(stats); } + +/* + * Error context callback for errors occurring during vacuum. + */ +static void +vacuum_error_callback(void *arg) +{ + LVRelStats *cbarg = arg; + errcontext("while scanning block %u of relation \"%s.%s\"", + cbarg->blkno, cbarg->relnamespace, cbarg->relname); +} -- 2.7.4
>From 4a7cd7e3d883277a7018206245624137c6cdb8f4 Mon Sep 17 00:00:00 2001 From: Justin Pryzby <pryz...@telsasoft.com> Date: Thu, 12 Dec 2019 20:34:03 -0600 Subject: [PATCH v10 2/5] add errcontext callback in lazy_vacuum_heap, too --- src/backend/access/heap/vacuumlazy.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/src/backend/access/heap/vacuumlazy.c b/src/backend/access/heap/vacuumlazy.c index 2eb3caa..32d9ef1 100644 --- a/src/backend/access/heap/vacuumlazy.c +++ b/src/backend/access/heap/vacuumlazy.c @@ -1827,12 +1827,23 @@ lazy_vacuum_heap(Relation onerel, LVRelStats *vacrelstats) PGRUsage ru0; Buffer vmbuffer = InvalidBuffer; + ErrorContextCallback errcallback; + /* Report that we are now vacuuming the heap */ pgstat_progress_update_param(PROGRESS_VACUUM_PHASE, PROGRESS_VACUUM_PHASE_VACUUM_HEAP); ereport(DEBUG1, (errmsg("\"%s\": vacuuming heap", RelationGetRelationName(onerel)))); + /* Setup error traceback support for ereport() */ + vacrelstats->relnamespace = get_namespace_name(RelationGetNamespace(onerel)); + vacrelstats->relname = RelationGetRelationName(onerel); + vacrelstats->blkno = InvalidBlockNumber; /* Not known yet */ + errcallback.callback = vacuum_error_callback; + errcallback.arg = (void *) vacrelstats; + errcallback.previous = error_context_stack; + error_context_stack = &errcallback; + pg_rusage_init(&ru0); npages = 0; @@ -1847,6 +1858,7 @@ lazy_vacuum_heap(Relation onerel, LVRelStats *vacrelstats) vacuum_delay_point(); tblk = ItemPointerGetBlockNumber(&vacrelstats->dead_tuples->itemptrs[tupindex]); + vacrelstats->blkno = tblk; buf = ReadBufferExtended(onerel, MAIN_FORKNUM, tblk, RBM_NORMAL, vac_strategy); if (!ConditionalLockBufferForCleanup(buf)) @@ -1867,6 +1879,8 @@ lazy_vacuum_heap(Relation onerel, LVRelStats *vacrelstats) npages++; } + error_context_stack = errcallback.previous; + if (BufferIsValid(vmbuffer)) { ReleaseBuffer(vmbuffer); -- 2.7.4
>From 55340719bb509f0c5366d2931281f044f9df9823 Mon Sep 17 00:00:00 2001 From: Justin Pryzby <pryz...@telsasoft.com> Date: Mon, 20 Jan 2020 14:28:45 -0600 Subject: [PATCH v10 3/5] Add vacrelstats.stage and distinct context message --- src/backend/access/heap/vacuumlazy.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/src/backend/access/heap/vacuumlazy.c b/src/backend/access/heap/vacuumlazy.c index 32d9ef1..ebf1c9b 100644 --- a/src/backend/access/heap/vacuumlazy.c +++ b/src/backend/access/heap/vacuumlazy.c @@ -295,6 +295,7 @@ typedef struct LVRelStats char *relname; char *relnamespace; BlockNumber blkno; + int stage; /* 0: scan heap; 1: vacuum heap */ } LVRelStats; /* A few variables that don't seem worth passing around as parameters */ @@ -902,6 +903,7 @@ lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats, vacrelstats->relnamespace = get_namespace_name(RelationGetNamespace(onerel)); vacrelstats->relname = relname; vacrelstats->blkno = InvalidBlockNumber; /* Not known yet */ + vacrelstats->stage = 0; errcallback.callback = vacuum_error_callback; errcallback.arg = (void *) vacrelstats; @@ -1839,6 +1841,8 @@ lazy_vacuum_heap(Relation onerel, LVRelStats *vacrelstats) vacrelstats->relnamespace = get_namespace_name(RelationGetNamespace(onerel)); vacrelstats->relname = RelationGetRelationName(onerel); vacrelstats->blkno = InvalidBlockNumber; /* Not known yet */ + vacrelstats->stage = 1; + errcallback.callback = vacuum_error_callback; errcallback.arg = (void *) vacrelstats; errcallback.previous = error_context_stack; @@ -3461,6 +3465,10 @@ static void vacuum_error_callback(void *arg) { LVRelStats *cbarg = arg; - errcontext("while scanning block %u of relation \"%s.%s\"", - cbarg->blkno, cbarg->relnamespace, cbarg->relname); -} + + if (cbarg->stage == 0) + errcontext(_("while scanning block %u of relation \"%s.%s\""), + cbarg->blkno, cbarg->relnamespace, cbarg->relname); + else if (cbarg->stage == 1) + errcontext(_("while vacuuming block %u of relation \"%s.%s\""), + cbarg->blkno, cbarg->relnamespace, cbarg->relname); -- 2.7.4
>From 2df15f17e234b90c28a98ee58b1b35ee52f0945a Mon Sep 17 00:00:00 2001 From: Justin Pryzby <pryz...@telsasoft.com> Date: Mon, 20 Jan 2020 14:44:06 -0600 Subject: [PATCH v10 4/5] errcontext for lazy_vacuum_index --- src/backend/access/heap/vacuumlazy.c | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/src/backend/access/heap/vacuumlazy.c b/src/backend/access/heap/vacuumlazy.c index ebf1c9b..21f56e9 100644 --- a/src/backend/access/heap/vacuumlazy.c +++ b/src/backend/access/heap/vacuumlazy.c @@ -295,7 +295,7 @@ typedef struct LVRelStats char *relname; char *relnamespace; BlockNumber blkno; - int stage; /* 0: scan heap; 1: vacuum heap */ + int stage; /* 0: scan heap; 1: vacuum heap; 2: vacuum index */ } LVRelStats; /* A few variables that don't seem worth passing around as parameters */ @@ -2399,6 +2399,8 @@ lazy_vacuum_index(Relation indrel, IndexBulkDeleteResult **stats, IndexVacuumInfo ivinfo; const char *msg; PGRUsage ru0; + ErrorContextCallback errcallback; + LVRelStats vacrelstats; /* Used for error callback, only */ pg_rusage_init(&ru0); @@ -2410,10 +2412,24 @@ lazy_vacuum_index(Relation indrel, IndexBulkDeleteResult **stats, ivinfo.num_heap_tuples = reltuples; ivinfo.strategy = vac_strategy; + /* Setup error traceback support for ereport() */ + vacrelstats.relnamespace = get_namespace_name(RelationGetNamespace(indrel)); + vacrelstats.relname = RelationGetRelationName(indrel); + vacrelstats.blkno = InvalidBlockNumber; /* Not used */ + vacrelstats.stage = 2; + + errcallback.callback = vacuum_error_callback; + errcallback.arg = (void *) &vacrelstats; + errcallback.previous = error_context_stack; + error_context_stack = &errcallback; + /* Do bulk deletion */ *stats = index_bulk_delete(&ivinfo, *stats, lazy_tid_reaped, (void *) dead_tuples); + /* Pop the error context stack */ + error_context_stack = errcallback.previous; + if (IsParallelWorker()) msg = gettext_noop("scanned index \"%s\" to remove %d row versions by parallel vacuum worker"); else @@ -3472,3 +3488,7 @@ vacuum_error_callback(void *arg) else if (cbarg->stage == 1) errcontext(_("while vacuuming block %u of relation \"%s.%s\""), cbarg->blkno, cbarg->relnamespace, cbarg->relname); + else if (cbarg->stage == 2) + errcontext(_("while vacuuming relation \"%s.%s\""), + cbarg->relnamespace, cbarg->relname); +} -- 2.7.4
>From b368ac6744d2553bed686f2aeeeac470a27a80c8 Mon Sep 17 00:00:00 2001 From: Justin Pryzby <pryz...@telsasoft.com> Date: Mon, 20 Jan 2020 15:26:39 -0600 Subject: [PATCH v10 5/5] Avoid extra calls like GetRelationName --- src/backend/access/heap/vacuumlazy.c | 78 ++++++++++++++++++------------------ 1 file changed, 38 insertions(+), 40 deletions(-) diff --git a/src/backend/access/heap/vacuumlazy.c b/src/backend/access/heap/vacuumlazy.c index 21f56e9..20e5c52 100644 --- a/src/backend/access/heap/vacuumlazy.c +++ b/src/backend/access/heap/vacuumlazy.c @@ -606,8 +606,8 @@ heap_vacuum_rel(Relation onerel, VacuumParams *params, } appendStringInfo(&buf, msgfmt, get_database_name(MyDatabaseId), - get_namespace_name(RelationGetNamespace(onerel)), - RelationGetRelationName(onerel), + vacrelstats->relnamespace, + vacrelstats->relname, vacrelstats->num_index_scans); appendStringInfo(&buf, _("pages: %u removed, %u remain, %u skipped due to pins, %u skipped frozen, %u marked all visible, %u marked frozen\n"), // scanned_pages ? @@ -793,8 +793,7 @@ lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats, BlockNumber nblocks, blkno; HeapTupleData tuple; - char *relname, - *msgprefix; + char *msgprefix; TransactionId relfrozenxid = onerel->rd_rel->relfrozenxid; TransactionId relminmxid = onerel->rd_rel->relminmxid; BlockNumber empty_pages, @@ -823,18 +822,6 @@ lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats, pg_rusage_init(&ru0); - relname = RelationGetRelationName(onerel); - if (aggressive) - ereport(elevel, - (errmsg("aggressively vacuuming \"%s.%s\"", - get_namespace_name(RelationGetNamespace(onerel)), - relname))); - else - ereport(elevel, - (errmsg("vacuuming \"%s.%s\"", - get_namespace_name(RelationGetNamespace(onerel)), - relname))); - empty_pages = vacuumed_pages = 0; next_fsm_block_to_vacuum = (BlockNumber) 0; num_tuples = live_tuples = tups_vacuumed = nkeep = nunused = 0; @@ -850,8 +837,30 @@ lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats, vacrelstats->pages_frozen = 0; vacrelstats->pages_allvisible = 0; + /* Setup error traceback support for ereport() */ + vacrelstats->relnamespace = get_namespace_name(RelationGetNamespace(onerel)); + vacrelstats->relname = RelationGetRelationName(onerel); + vacrelstats->blkno = InvalidBlockNumber; /* Not known yet */ + vacrelstats->stage = 0; + + errcallback.callback = vacuum_error_callback; + errcallback.arg = (void *) vacrelstats; + errcallback.previous = error_context_stack; + error_context_stack = &errcallback; + vacrelstats->latestRemovedXid = InvalidTransactionId; + if (aggressive) + ereport(elevel, + (errmsg("aggressively vacuuming \"%s.%s\"", + vacrelstats->relnamespace, + vacrelstats->relname))); + else + ereport(elevel, + (errmsg("vacuuming \"%s.%s\"", + vacrelstats->relnamespace, + vacrelstats->relname))); + /* * Initialize state for a parallel vacuum. As of now, only one worker * can be used for an index, so we invoke parallelism only if there are at @@ -872,7 +881,7 @@ lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats, if (params->nworkers > 0) ereport(WARNING, (errmsg("disabling parallel option of vacuum on \"%s\" --- cannot vacuum temporary tables in parallel", - RelationGetRelationName(onerel)))); + vacrelstats->relname))); } else lps = begin_parallel_vacuum(RelationGetRelid(onerel), Irel, @@ -899,17 +908,6 @@ lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats, next_unskippable_block = 0; skipping_blocks = skip_blocks(onerel, params, &next_unskippable_block, nblocks, &vmbuffer, aggressive); - /* Setup error traceback support for ereport() */ - vacrelstats->relnamespace = get_namespace_name(RelationGetNamespace(onerel)); - vacrelstats->relname = relname; - vacrelstats->blkno = InvalidBlockNumber; /* Not known yet */ - vacrelstats->stage = 0; - - errcallback.callback = vacuum_error_callback; - errcallback.arg = (void *) vacrelstats; - errcallback.previous = error_context_stack; - error_context_stack = &errcallback; - for (blkno = 0; blkno < nblocks; blkno++) { Buffer buf; @@ -1561,7 +1559,7 @@ lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats, && VM_ALL_VISIBLE(onerel, blkno, &vmbuffer)) { elog(WARNING, "page is not marked all-visible but visibility map bit is set in relation \"%s\" page %u", - relname, blkno); + vacrelstats->relname, blkno); visibilitymap_clear(onerel, blkno, vmbuffer, VISIBILITYMAP_VALID_BITS); } @@ -1582,7 +1580,7 @@ lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats, else if (PageIsAllVisible(page) && has_dead_tuples) { elog(WARNING, "page containing dead tuples is marked as all-visible in relation \"%s\" page %u", - relname, blkno); + vacrelstats->relname, blkno); PageClearAllVisible(page); MarkBufferDirty(buf); visibilitymap_clear(onerel, blkno, vmbuffer, @@ -1695,7 +1693,7 @@ lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats, if (vacuumed_pages) ereport(elevel, (errmsg("\"%s\": removed %.0f row versions in %u pages", - RelationGetRelationName(onerel), + vacrelstats->relname, tups_vacuumed, vacuumed_pages))); // tups_vacuumed being reported twice @@ -1746,7 +1744,7 @@ lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats, ereport(elevel, (errmsg("\"%s\": removed %.0f, found %.0f nonremovable row versions in %u out of %u pages", - RelationGetRelationName(onerel), + vacrelstats->relname, tups_vacuumed, num_tuples, vacrelstats->scanned_pages, nblocks), // XXX: %.0f are dead but not yet removable vacrelstats->new_dead_tuples, @@ -1835,11 +1833,11 @@ lazy_vacuum_heap(Relation onerel, LVRelStats *vacrelstats) pgstat_progress_update_param(PROGRESS_VACUUM_PHASE, PROGRESS_VACUUM_PHASE_VACUUM_HEAP); ereport(DEBUG1, (errmsg("\"%s\": vacuuming heap", - RelationGetRelationName(onerel)))); + vacrelstats->relname))); /* Setup error traceback support for ereport() */ - vacrelstats->relnamespace = get_namespace_name(RelationGetNamespace(onerel)); - vacrelstats->relname = RelationGetRelationName(onerel); + /* vacrelstats->relnamespace already set */ + /* vacrelstats->relname already set */ vacrelstats->blkno = InvalidBlockNumber; /* Not known yet */ vacrelstats->stage = 1; @@ -1893,7 +1891,7 @@ lazy_vacuum_heap(Relation onerel, LVRelStats *vacrelstats) ereport(DEBUG1, (errmsg("\"%s\": removed %d row versions in %d pages", - RelationGetRelationName(onerel), + vacrelstats->relname, tupindex, npages), errdetail_internal("%s", pg_rusage_show(&ru0)))); } @@ -2437,7 +2435,7 @@ lazy_vacuum_index(Relation indrel, IndexBulkDeleteResult **stats, ereport(DEBUG1, (errmsg(msg, - RelationGetRelationName(indrel), + vacrelstats.relname, dead_tuples->num_tuples), errdetail_internal("%s", pg_rusage_show(&ru0)))); } @@ -2581,7 +2579,7 @@ lazy_truncate_heap(Relation onerel, LVRelStats *vacrelstats) vacrelstats->lock_waiter_detected = true; ereport(elevel, (errmsg("\"%s\": stopping truncate due to conflicting lock request", - RelationGetRelationName(onerel)))); + vacrelstats->relname))); return; } @@ -2646,7 +2644,7 @@ lazy_truncate_heap(Relation onerel, LVRelStats *vacrelstats) ereport(elevel, (errmsg("\"%s\": truncated %u to %u pages", - RelationGetRelationName(onerel), + vacrelstats->relname, old_rel_pages, new_rel_pages), errdetail_internal("%s", pg_rusage_show(&ru0)))); @@ -2711,7 +2709,7 @@ count_nondeletable_pages(Relation onerel, LVRelStats *vacrelstats) { ereport(elevel, (errmsg("\"%s\": suspending truncate due to conflicting lock request", - RelationGetRelationName(onerel)))); + vacrelstats->relname))); vacrelstats->lock_waiter_detected = true; return blkno; -- 2.7.4