Hi,

On 2023-02-06 17:53:00 -0800, Andres Freund wrote:
> Another run hit an issue we've been fighting repeatedly on the buildfarm / CI:
> https://cirrus-ci.com/task/5527490404286464
> https://api.cirrus-ci.com/v1/artifact/task/5527490404286464/testrun/build/testrun/regress-running/regress/regression.diffs
>
> diff -U3 /tmp/cirrus-ci-build/src/test/regress/expected/create_index.out 
> /tmp/cirrus-ci-build/build/testrun/regress-running/regress/results/create_index.out
> --- /tmp/cirrus-ci-build/src/test/regress/expected/create_index.out   
> 2023-02-06 23:52:43.627604000 +0000
> +++ 
> /tmp/cirrus-ci-build/build/testrun/regress-running/regress/results/create_index.out
>        2023-02-07 00:03:04.535232000 +0000
> @@ -1930,12 +1930,13 @@
>  SELECT thousand, tenthous FROM tenk1
>  WHERE thousand < 2 AND tenthous IN (1001,3000)
>  ORDER BY thousand;
> -                      QUERY PLAN
> --------------------------------------------------------
> - Index Only Scan using tenk1_thous_tenthous on tenk1
> -   Index Cond: (thousand < 2)
> -   Filter: (tenthous = ANY ('{1001,3000}'::integer[]))
> -(3 rows)
> +                                      QUERY PLAN
> +--------------------------------------------------------------------------------------
> + Sort
> +   Sort Key: thousand
> +   ->  Index Only Scan using tenk1_thous_tenthous on tenk1
> +         Index Cond: ((thousand < 2) AND (tenthous = ANY 
> ('{1001,3000}'::integer[])))
> +(4 rows)
>
>  SELECT thousand, tenthous FROM tenk1
>  WHERE thousand < 2 AND tenthous IN (1001,3000)
>
>
> I'd be tempted to disable the test, but it also found genuine issues in a
> bunch of CF entries, and all these test instabilities seem like ones we'd also
> see, with a lower occurrence on the buildfarm.

The last occasion we hit this was at: 
https://www.postgresql.org/message-id/1346227.1649887693%40sss.pgh.pa.us

I'm working on cleaning up the patch used for debugging in that thread, to
make VACUUM log to the server log if VERBOSE isn't used.

One thing I'm not quite sure what to do about is that we atm use a hardcoded
DEBUG2 (not controlled by VERBOSE) in a bunch of places:

        ereport(DEBUG2,
                        (errmsg("table \"%s\": removed %lld dead item 
identifiers in %u pages",
                                        vacrel->relname, (long long) index, 
vacuumed_pages)));

    ivinfo.message_level = DEBUG2;

I find DEBUG2 hard to use to run the entire regression tests, it results in a
lot of output. Lots of it far less important than these kinds of details
here. So I'd like to use a different log level for them - but without further
complications that'd mean they'd show up in VACUUUM VERBOSE.

I made them part of VERBOSE for now, but not because I think that's
necessarily the right answer, but because it could be useful for debugging
this stupid flapping test.


I right now set instrument = true when
message_level_is_interesting(DEBUG1). But that probably should be false? I set
it to true because of starttime, but it'd probably be better to just move it
out of the if (instrument). Also would require re-jiggering the condition of
the "main block" doing the logging.


FWIW, running all regression tests that support doing so against a running
server with DEBUG1 results in 8.1MB, DEBUG2 in 17MB.


Greetings,

Andres Freund
>From d627c0692e8ddf6a7759f101f30d8d1c54583778 Mon Sep 17 00:00:00 2001
From: Andres Freund <and...@anarazel.de>
Date: Tue, 7 Feb 2023 18:12:48 -0800
Subject: [PATCH v2 1/2] wip: Log VACUUM information as DEBUG1 when VERBOSE is
 not used

Otherwise it's very hard to debug some kinds of issues, as one cannot use
VACUUM VERBOSE in regression tests.

Author:
Reviewed-by:
Discussion: https://postgr.es/m/
Backpatch:
---
 src/backend/access/heap/vacuumlazy.c | 57 ++++++++++++++++++++++------
 1 file changed, 45 insertions(+), 12 deletions(-)

diff --git a/src/backend/access/heap/vacuumlazy.c b/src/backend/access/heap/vacuumlazy.c
index 8f14cf85f38..f6f3bb4c021 100644
--- a/src/backend/access/heap/vacuumlazy.c
+++ b/src/backend/access/heap/vacuumlazy.c
@@ -180,6 +180,7 @@ typedef struct LVRelState
 	OffsetNumber offnum;		/* used only for heap operations */
 	VacErrPhase phase;
 	bool		verbose;		/* VACUUM VERBOSE? */
+	int			elevel;
 
 	/*
 	 * dead_items stores TIDs whose index tuples are deleted by index
@@ -324,10 +325,33 @@ heap_vacuum_rel(Relation rel, VacuumParams *params,
 				StartPageDirty = VacuumPageDirty;
 	ErrorContextCallback errcallback;
 	char	  **indnames = NULL;
+	int			elevel;
+
+	if (params->options & VACOPT_VERBOSE)
+	{
+		verbose = true;
+		elevel = INFO;
+		instrument = true;
+	}
+	else if (IsAutoVacuumWorkerProcess() && params->log_min_duration >= 0)
+	{
+		verbose = true;
+		elevel = LOG;
+		instrument = true;
+	}
+	else if (message_level_is_interesting(DEBUG1))
+	{
+		verbose = true;
+		elevel = DEBUG1;
+		instrument = true;
+	}
+	else
+	{
+		verbose = false;
+		elevel = DEBUG5; /* avoid compiler warning, not reached */
+		instrument = false;
+	}
 
-	verbose = (params->options & VACOPT_VERBOSE) != 0;
-	instrument = (verbose || (IsAutoVacuumWorkerProcess() &&
-							  params->log_min_duration >= 0));
 	if (instrument)
 	{
 		pg_rusage_init(&ru0);
@@ -361,6 +385,7 @@ heap_vacuum_rel(Relation rel, VacuumParams *params,
 	vacrel->indname = NULL;
 	vacrel->phase = VACUUM_ERRCB_PHASE_UNKNOWN;
 	vacrel->verbose = verbose;
+	vacrel->elevel = elevel;
 	errcallback.callback = vacuum_error_callback;
 	errcallback.arg = vacrel;
 	errcallback.previous = error_context_stack;
@@ -475,12 +500,12 @@ heap_vacuum_rel(Relation rel, VacuumParams *params,
 	if (verbose)
 	{
 		if (vacrel->aggressive)
-			ereport(INFO,
+			ereport(elevel,
 					(errmsg("aggressively vacuuming \"%s.%s.%s\"",
 							vacrel->dbname, vacrel->relnamespace,
 							vacrel->relname)));
 		else
-			ereport(INFO,
+			ereport(elevel,
 					(errmsg("vacuuming \"%s.%s.%s\"",
 							vacrel->dbname, vacrel->relnamespace,
 							vacrel->relname)));
@@ -765,7 +790,7 @@ heap_vacuum_rel(Relation rel, VacuumParams *params,
 							 (unsigned long long) walusage.wal_bytes);
 			appendStringInfo(&buf, _("system usage: %s"), pg_rusage_show(&ru0));
 
-			ereport(verbose ? INFO : LOG,
+			ereport(elevel,
 					(errmsg_internal("%s", buf.data)));
 			pfree(buf.data);
 		}
@@ -2707,7 +2732,11 @@ lazy_vacuum_one_index(Relation indrel, IndexBulkDeleteResult *istat,
 	ivinfo.analyze_only = false;
 	ivinfo.report_progress = false;
 	ivinfo.estimated_count = true;
-	ivinfo.message_level = DEBUG2;
+	/*
+	 * FIXME: This shows more information for VACUUM VERBOSE, uses DEBUG1
+	 * instead of DEBUG2.
+	 */
+	ivinfo.message_level = vacrel->elevel;
 	ivinfo.num_heap_tuples = reltuples;
 	ivinfo.strategy = vacrel->bstrategy;
 
@@ -2755,7 +2784,11 @@ lazy_cleanup_one_index(Relation indrel, IndexBulkDeleteResult *istat,
 	ivinfo.analyze_only = false;
 	ivinfo.report_progress = false;
 	ivinfo.estimated_count = estimated_count;
-	ivinfo.message_level = DEBUG2;
+	/*
+	 * FIXME: This shows more information for VACUUM VERBOSE, uses DEBUG1
+	 * instead of DEBUG2.
+	 */
+	ivinfo.message_level = vacrel->elevel;
 
 	ivinfo.num_heap_tuples = reltuples;
 	ivinfo.strategy = vacrel->bstrategy;
@@ -2875,7 +2908,7 @@ lazy_truncate_heap(LVRelState *vacrel)
 				 * We failed to establish the lock in the specified number of
 				 * retries. This means we give up truncating.
 				 */
-				ereport(vacrel->verbose ? INFO : DEBUG2,
+				ereport(vacrel->elevel,
 						(errmsg("\"%s\": stopping truncate due to conflicting lock request",
 								vacrel->relname)));
 				return;
@@ -2945,7 +2978,7 @@ lazy_truncate_heap(LVRelState *vacrel)
 		vacrel->removed_pages += orig_rel_pages - new_rel_pages;
 		vacrel->rel_pages = new_rel_pages;
 
-		ereport(vacrel->verbose ? INFO : DEBUG2,
+		ereport(vacrel->elevel,
 				(errmsg("table \"%s\": truncated %u to %u pages",
 						vacrel->relname,
 						orig_rel_pages, new_rel_pages)));
@@ -3007,7 +3040,7 @@ count_nondeletable_pages(LVRelState *vacrel, bool *lock_waiter_detected)
 			{
 				if (LockHasWaitersRelation(vacrel->rel, AccessExclusiveLock))
 				{
-					ereport(vacrel->verbose ? INFO : DEBUG2,
+					ereport(vacrel->elevel,
 							(errmsg("table \"%s\": suspending truncate due to conflicting lock request",
 									vacrel->relname)));
 
@@ -3175,7 +3208,7 @@ dead_items_alloc(LVRelState *vacrel, int nworkers)
 			vacrel->pvs = parallel_vacuum_init(vacrel->rel, vacrel->indrels,
 											   vacrel->nindexes, nworkers,
 											   max_items,
-											   vacrel->verbose ? INFO : DEBUG2,
+											   vacrel->elevel,
 											   vacrel->bstrategy);
 
 		/* If parallel mode started, dead_items space is allocated in DSM */
-- 
2.38.0

Reply via email to