On Tue, Jul 30, 2024 at 08:53:48AM +0000, Bertrand Drouvot wrote:
> Did a quick check and still LGTM.

Applied 0003 for now to add the redo LSN to the pgstats file, adding
the redo LSN to the two DEBUG2 entries when reading and writing while
on it, that I forgot.  (It was not 01:57 where I am now.)

Attached is the last one.
--
Michael
From 8f1f7a9ca9c19dae88057a5593b0f9c0cd748a88 Mon Sep 17 00:00:00 2001
From: Michael Paquier <mich...@paquier.xyz>
Date: Wed, 17 Jul 2024 12:42:43 +0900
Subject: [PATCH v6] Flush pgstats file during checkpoints

The flushes are done for non-shutdown checkpoints and restart points, so
as it is possible to keep some statistics around in the event of a
crash.  Keeping the before_shmem_exit() callback to flush the pgstats
file in a shutdown sequence is a bit easier than doing so at checkpoint,
as they may be stats to flush before we are completely done with the
shutdown checkpoint.  Keeping the callback also keeps the shutdown of
single-user mode simpler, where the stats also need to be flushed.

At the beginning of recovery, the stats file is loaded when the redo LSN
it stores matches with the point we are replaying from.  The file is not
removed anymore after being read, to ensure that there is still a source
of stats to feed on should the system crash until the next checkpoint
that would update the stats.
---
 src/include/pgstat.h                          |  4 +-
 src/backend/access/transam/xlog.c             | 30 +++++----
 src/backend/utils/activity/pgstat.c           | 65 ++++---------------
 src/test/recovery/t/029_stats_restart.pl      | 26 ++++++--
 .../recovery/t/030_stats_cleanup_replica.pl   |  2 +-
 5 files changed, 57 insertions(+), 70 deletions(-)

diff --git a/src/include/pgstat.h b/src/include/pgstat.h
index 043d39a565..f780b56cc3 100644
--- a/src/include/pgstat.h
+++ b/src/include/pgstat.h
@@ -466,9 +466,9 @@ typedef struct PgStat_PendingWalStats
 extern Size StatsShmemSize(void);
 extern void StatsShmemInit(void);
 
-/* Functions called during server startup / shutdown */
+/* Functions called during server startup / checkpoint / shutdown */
 extern void pgstat_restore_stats(XLogRecPtr redo);
-extern void pgstat_discard_stats(void);
+extern void pgstat_flush_stats(XLogRecPtr redo);
 extern void pgstat_before_server_shutdown(int code, Datum arg);
 
 /* Functions for backend initialization */
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 6499eabe4d..f058d68fc3 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -5401,7 +5401,6 @@ StartupXLOG(void)
 	XLogCtlInsert *Insert;
 	CheckPoint	checkPoint;
 	bool		wasShutdown;
-	bool		didCrash;
 	bool		haveTblspcMap;
 	bool		haveBackupLabel;
 	XLogRecPtr	EndOfLog;
@@ -5521,10 +5520,7 @@ StartupXLOG(void)
 	{
 		RemoveTempXlogFiles();
 		SyncDataDirectory();
-		didCrash = true;
 	}
-	else
-		didCrash = false;
 
 	/*
 	 * Prepare for WAL recovery if needed.
@@ -5645,14 +5641,8 @@ StartupXLOG(void)
 	 *
 	 * NB: Restoring replication slot stats relies on slot state to have
 	 * already been restored from disk.
-	 *
-	 * TODO: With a bit of extra work we could just start with a pgstat file
-	 * associated with the checkpoint redo location we're starting from.
 	 */
-	if (didCrash)
-		pgstat_discard_stats();
-	else
-		pgstat_restore_stats(checkPoint.redo);
+	pgstat_restore_stats(checkPoint.redo);
 
 	lastFullPageWrites = checkPoint.fullPageWrites;
 
@@ -7244,6 +7234,15 @@ CreateCheckPoint(int flags)
 	 */
 	END_CRIT_SECTION();
 
+	/*
+	 * The control file update is done, hence it is time to write some fresh
+	 * statistics.  Stats are flushed at shutdown by the checkpointer in a
+	 * dedicated before_shmem_exit callback, combined with sanity checks
+	 * related to the shutdown of pgstats.
+	 */
+	if (!shutdown)
+		pgstat_flush_stats(RedoRecPtr);
+
 	/*
 	 * WAL summaries end when the next XLOG_CHECKPOINT_REDO or
 	 * XLOG_CHECKPOINT_SHUTDOWN record is reached. This is the first point
@@ -7713,6 +7712,15 @@ CreateRestartPoint(int flags)
 	}
 	LWLockRelease(ControlFileLock);
 
+	/*
+	 * The control file update is done, hence it is time to write some fresh
+	 * statistics.  Stats are flushed at shutdown by the checkpointer in a
+	 * dedicated before_shmem_exit callback, combined with sanity checks
+	 * related to the shutdown of pgstats.
+	 */
+	if ((flags & CHECKPOINT_IS_SHUTDOWN) == 0)
+		pgstat_flush_stats(RedoRecPtr);
+
 	/*
 	 * Update the average distance between checkpoints/restartpoints if the
 	 * prior checkpoint exists.
diff --git a/src/backend/utils/activity/pgstat.c b/src/backend/utils/activity/pgstat.c
index 81484222cf..0b02353359 100644
--- a/src/backend/utils/activity/pgstat.c
+++ b/src/backend/utils/activity/pgstat.c
@@ -10,9 +10,10 @@
  * statistics.
  *
  * Statistics are loaded from the filesystem during startup (by the startup
- * process), unless preceded by a crash, in which case all stats are
- * discarded. They are written out by the checkpointer process just before
- * shutting down, except when shutting down in immediate mode.
+ * process) if the stats file has a redo LSN that matches with the one stored
+ * in the control file. They are written out by the checkpointer during
+ * checkpoints and restart points, as well as before shutting down, except
+ * when shutting down in immediate mode.
  *
  * Fixed-numbered stats are stored in plain (non-dynamic) shared memory.
  *
@@ -455,53 +456,19 @@ pgstat_restore_stats(XLogRecPtr redo)
 }
 
 /*
- * Remove the stats file.  This is currently used only if WAL recovery is
- * needed after a crash.
+ * Write stats in memory to disk.
  *
- * Should only be called by the startup process or in single user mode.
+ * This is called by the checkpointer or in single-user mode.
  */
 void
-pgstat_discard_stats(void)
+pgstat_flush_stats(XLogRecPtr redo)
 {
-	int			ret;
-
-	/* NB: this needs to be done even in single user mode */
-
-	ret = unlink(PGSTAT_STAT_PERMANENT_FILENAME);
-	if (ret != 0)
-	{
-		if (errno == ENOENT)
-			elog(DEBUG2,
-				 "didn't need to unlink permanent stats file \"%s\" - didn't exist",
-				 PGSTAT_STAT_PERMANENT_FILENAME);
-		else
-			ereport(LOG,
-					(errcode_for_file_access(),
-					 errmsg("could not unlink permanent statistics file \"%s\": %m",
-							PGSTAT_STAT_PERMANENT_FILENAME)));
-	}
-	else
-	{
-		ereport(DEBUG2,
-				(errcode_for_file_access(),
-				 errmsg_internal("unlinked permanent statistics file \"%s\"",
-								 PGSTAT_STAT_PERMANENT_FILENAME)));
-	}
-
-	/*
-	 * Reset stats contents. This will set reset timestamps of fixed-numbered
-	 * stats to the current time (no variable stats exist).
-	 */
-	pgstat_reset_after_failure();
+	pgstat_write_statsfile(redo);
 }
 
 /*
  * pgstat_before_server_shutdown() needs to be called by exactly one process
- * during regular server shutdowns. Otherwise all stats will be lost.
- *
- * We currently only write out stats for proc_exit(0). We might want to change
- * that at some point... But right now pgstat_discard_stats() would be called
- * during the start after a disorderly shutdown, anyway.
+ * during regular server shutdowns.
  */
 void
 pgstat_before_server_shutdown(int code, Datum arg)
@@ -526,6 +493,9 @@ pgstat_before_server_shutdown(int code, Datum arg)
 	 */
 	if (code == 0)
 	{
+		/* we're shutting down, so it's ok to just override this */
+		pgstat_fetch_consistency = PGSTAT_FETCH_CONSISTENCY_NONE;
+
 		pgStatLocal.shmem->is_shutdown = true;
 		pgstat_write_statsfile(GetRedoRecPtr());
 	}
@@ -1346,8 +1316,8 @@ write_chunk(FILE *fpout, void *ptr, size_t len)
 #define write_chunk_s(fpout, ptr) write_chunk(fpout, ptr, sizeof(*ptr))
 
 /*
- * This function is called in the last process that is accessing the shared
- * stats so locking is not required.
+ * This function is called in the checkpointer or in single-user mode,
+ * so locking is not required.
  */
 static void
 pgstat_write_statsfile(XLogRecPtr redo)
@@ -1364,9 +1334,6 @@ pgstat_write_statsfile(XLogRecPtr redo)
 	/* should be called only by the checkpointer or single user mode */
 	Assert(!IsUnderPostmaster || MyBackendType == B_CHECKPOINTER);
 
-	/* we're shutting down, so it's ok to just override this */
-	pgstat_fetch_consistency = PGSTAT_FETCH_CONSISTENCY_NONE;
-
 	elog(DEBUG2, "writing stats file \"%s\" with redo %X/%X", statfile,
 		 LSN_FORMAT_ARGS(redo));
 
@@ -1423,7 +1390,6 @@ pgstat_write_statsfile(XLogRecPtr redo)
 		CHECK_FOR_INTERRUPTS();
 
 		/* we may have some "dropped" entries not yet removed, skip them */
-		Assert(!ps->dropped);
 		if (ps->dropped)
 			continue;
 
@@ -1753,9 +1719,6 @@ pgstat_read_statsfile(XLogRecPtr redo)
 done:
 	FreeFile(fpin);
 
-	elog(DEBUG2, "removing permanent stats file \"%s\"", statfile);
-	unlink(statfile);
-
 	return;
 
 error:
diff --git a/src/test/recovery/t/029_stats_restart.pl b/src/test/recovery/t/029_stats_restart.pl
index 93a7209f69..73d059b9f9 100644
--- a/src/test/recovery/t/029_stats_restart.pl
+++ b/src/test/recovery/t/029_stats_restart.pl
@@ -59,7 +59,7 @@ ok(-f "$og_stats", "origin stats file must exist");
 copy($og_stats, $statsfile) or die "Copy failed: $!";
 
 
-## test discarding of stats file after crash etc
+## test retention of stats file after crash etc
 
 $node->start;
 
@@ -69,12 +69,28 @@ is(have_stats('function', $dboid, $funcoid),
 	't', "$sect: function stats do exist");
 is(have_stats('relation', $dboid, $tableoid),
 	't', "$sect: relation stats do exist");
+# Flush the stats to disk.
+$node->psql('postgres', 'checkpoint');
 
 $node->stop('immediate');
 
-ok(!-f "$og_stats", "no stats file should exist after immediate shutdown");
+ok(-f "$og_stats", "stats file should exist after immediate shutdown");
 
-# copy the old stats back to test we discard stats after crash restart
+# Start once, there should be stats restored from the previous checkpoint.
+$node->start;
+
+$sect = "crashrecovery";
+is(have_stats('database', $dboid, 0), 't', "$sect: db stats do exist");
+is(have_stats('function', $dboid, $funcoid),
+	't', "$sect: function stats do exist");
+is(have_stats('relation', $dboid, $tableoid),
+	't', "$sect: relation stats do exist");
+
+$node->stop('immediate');
+
+# Copy the old stats back, and test that we discard stats after crash restart
+# because these don't match with the redo LSN stored in the stats file
+# generated by the previous checkpoint.
 copy($statsfile, $og_stats) or die "Copy failed: $!";
 
 $node->start;
@@ -274,9 +290,9 @@ my $wal_restart_immediate = wal_stats();
 
 cmp_ok(
 	$wal_reset_restart->{reset},
-	'lt',
+	'eq',
 	$wal_restart_immediate->{reset},
-	"$sect: reset timestamp is new");
+	"$sect: reset timestamp is the same");
 
 $node->stop;
 done_testing();
diff --git a/src/test/recovery/t/030_stats_cleanup_replica.pl b/src/test/recovery/t/030_stats_cleanup_replica.pl
index 74b516cc7c..5735921c99 100644
--- a/src/test/recovery/t/030_stats_cleanup_replica.pl
+++ b/src/test/recovery/t/030_stats_cleanup_replica.pl
@@ -115,7 +115,7 @@ $node_standby->start();
 $sect = "post immediate restart";
 
 test_standby_func_tab_stats_status('postgres',
-	$dboid, $tableoid, $funcoid, 'f');
+	$dboid, $tableoid, $funcoid, 't');
 
 
 done_testing();
-- 
2.45.2

Attachment: signature.asc
Description: PGP signature

Reply via email to