Hi,
I created a .patch that will allow me to recover the stat files after a
potential crash.
Depending on the traffic on the server some records might be lost (0.5 sec
of records / more or less ? ).
>From what I read it is still better than no stat files at all.
I restricted it to the default recovery scenario only
(RECOVERY_TARGET_TIMELINE_LATEST) to avoid having invalid stats files with
other recovery options.
Am I missing something ? File integrity should be fine because of renaming.
--- a/src/include/pgstat.h 2022-02-22 22:22:22.222222222 +0200
+++ b/src/include/pgstat.h 2022-02-22 22:22:22.222222222 +0200
@@ -29,6 +29,7 @@
#define PGSTAT_STAT_PERMANENT_DIRECTORY "pg_stat"
#define PGSTAT_STAT_PERMANENT_FILENAME "pg_stat/global.stat"
#define PGSTAT_STAT_PERMANENT_TMPFILE "pg_stat/global.tmp"
+#define PGSTAT_STAT_RECOVERY_FILENAME "pg_stat/recovery"
/* Default directory to store temporary statistics data in */
#define PG_STAT_TMP_DIR "pg_stat_tmp"
@@ -1091,6 +1092,7 @@
extern void pgstat_init(void);
extern int pgstat_start(void);
extern void pgstat_reset_all(void);
+extern void pgstat_create_recovery_file(void);
extern void allow_immediate_pgstat_restart(void);
#ifdef EXEC_BACKEND
--- a/src/backend/access/transam/xlog.c 2022-02-22 22:22:22.222222222 +0200
+++ b/src/backend/access/transam/xlog.c 2022-02-22 22:22:22.222222222 +0200
@@ -5195,7 +5195,16 @@
/*
* Reset pgstat data, because it may be invalid after recovery.
*/
- pgstat_reset_all();
+ if (recoveryTargetTimeLineGoal == RECOVERY_TARGET_TIMELINE_LATEST)
+ {
+ elog(WARNING, "Doing recovery");
+ pgstat_create_recovery_file();
+ }
+ else
+ {
+ elog(WARNING, "Reseting recovery files");
+ pgstat_reset_all();
+ }
/*
* If there was a backup label file, it's done its job and the info
--- a/src/backend/postmaster/pgstat.c 2022-02-22 22:22:22.222222222 +0200
+++ b/src/backend/postmaster/pgstat.c 2022-02-22 22:22:22.222222222 +0200
@@ -739,6 +739,54 @@
pgstat_reset_remove_files(PGSTAT_STAT_PERMANENT_DIRECTORY);
}
+static bool
+pgstat_check_recovery_file_exists()
+{
+ const char *stat_rec_file = PGSTAT_STAT_RECOVERY_FILENAME;
+ return (unlink(stat_rec_file) == 0);
+}
+
+void
+pgstat_create_recovery_file(void)
+{
+ FILE *fpout;
+ const char *stat_rec_file = PGSTAT_STAT_RECOVERY_FILENAME;
+
+ elog(WARNING, "writing stats recovery file \"%s\"", stat_rec_file);
+
+ /*
+ * Open the statistics recovery file to touch it.
+ */
+ fpout = AllocateFile(stat_rec_file, PG_BINARY_W);
+ if (fpout == NULL)
+ {
+ ereport(LOG,
+ (errcode_for_file_access(),
+ errmsg("could not open stats recovery file \"%s\": %m",
+ stat_rec_file)));
+ return;
+ }
+
+ if (ferror(fpout))
+ {
+ ereport(LOG,
+ (errcode_for_file_access(),
+ errmsg("could not write stats recovery file \"%s\": %m",
+ stat_rec_file)));
+ FreeFile(fpout);
+ unlink(stat_rec_file);
+ }
+ else if (FreeFile(fpout) < 0)
+ {
+ ereport(LOG,
+ (errcode_for_file_access(),
+ errmsg("could not close stats recovery file \"%s\": %m",
+ stat_rec_file)));
+ unlink(stat_rec_file);
+ }
+
+}
+
#ifdef EXEC_BACKEND
/*
@@ -3525,7 +3573,8 @@
* Read in existing stats files or initialize the stats to zero.
*/
pgStatRunningInCollector = true;
- pgStatDBHash = pgstat_read_statsfiles(InvalidOid, true, true);
+ bool recFile = pgstat_check_recovery_file_exists();
+ pgStatDBHash = pgstat_read_statsfiles(InvalidOid, recFile ? false : true, true);
/* Prepare to wait for our latch or data in our socket. */
wes = CreateWaitEventSet(CurrentMemoryContext, 3);