Hello,
Currently pg_rewind refuses to run if full_page_writes is off. This is
to prevent it to run into a torn page during operation.
This is usually a good call, but some file systems like ZFS are
naturally immune to torn page (maybe btrfs too, but I don't know for
sure for this one).
Having the option to use pg_rewind without the cost associated with
full_page_writes when using a system immune to torn page is beneficial:
increased performance and more compact WAL.
This patch adds a new option "--no-ensure-full-page-writes" to pg_rewind
for this situation, as well as patched documentation.
Regards,
Jeremie Grauer
From 4f973b7e2d4bd9c04b6b6ce2c825dfdab4dbad11 Mon Sep 17 00:00:00 2001
From: Jeremie Grauer <[email protected]>
Date: Thu, 3 Nov 2022 16:23:49 +0100
Subject: [PATCH] adds the option --no-ensure-full-page-writes to pg_rewind
---
doc/src/sgml/ref/pg_rewind.sgml | 25 +++++++++++++++++-
src/bin/pg_rewind/libpq_source.c | 44 ++++++++++++++++++--------------
src/bin/pg_rewind/pg_rewind.c | 43 ++++++++++++++++++-------------
src/bin/pg_rewind/pg_rewind.h | 1 +
4 files changed, 75 insertions(+), 38 deletions(-)
diff --git a/doc/src/sgml/ref/pg_rewind.sgml b/doc/src/sgml/ref/pg_rewind.sgml
index 69d6924b3a..3dbdb503cd 100644
--- a/doc/src/sgml/ref/pg_rewind.sgml
+++ b/doc/src/sgml/ref/pg_rewind.sgml
@@ -99,7 +99,11 @@ PostgreSQL documentation
in <filename>postgresql.conf</filename> or data checksums enabled when
the cluster was initialized with <application>initdb</application>. Neither of these
are currently on by default. <xref linkend="guc-full-page-writes"/>
- must also be set to <literal>on</literal>, but is enabled by default.
+ should also be set to <literal>on</literal> (which is the default) except when the
+ underlying system is not succeptible to torn pages. ZFS filesystem for instance doesn't
+ need this parameter. If you are running such a system, you need to add the option
+ --no-enforce-full-page-writes else <application>pg_rewind</application> will refuse
+ to run.
</para>
<warning>
@@ -283,6 +287,25 @@ PostgreSQL documentation
</listitem>
</varlistentry>
+ <varlistentry>
+ <term><option>--no-ensure-full-page-writes</option></term>
+ <listitem>
+ <para>
+ <application>pg_rewind</application> by default requires that <xref linkend="guc-full-page-writes"/>
+ is set to on. This ensures that <application>pg_rewind</application> do
+ not run into torn pages while running. This option is necessary most of
+ the time since very few filesystems enforce the flush of full page to disk.
+ One such system is ZFS where the page is always flushed to disk in its
+ entirety. Other storage system may also have the same warranty. This option
+ is here to allow pg_rewind to run on such systems without enforcing that
+ <xref linkend="guc-full-page-writes"/> is on. It's important to know what
+ you are doing before setting this setting because in case your system does
+ not really warrants that the page is persisted to disk in its entirety, you
+ may run into data corruption.
+ </para>
+ </listitem>
+ </varlistentry>
+
<varlistentry>
<term><option>-V</option></term>
<term><option>--version</option></term>
diff --git a/src/bin/pg_rewind/libpq_source.c b/src/bin/pg_rewind/libpq_source.c
index 011c9cce6e..7b0c1d54f2 100644
--- a/src/bin/pg_rewind/libpq_source.c
+++ b/src/bin/pg_rewind/libpq_source.c
@@ -132,26 +132,32 @@ init_libpq_conn(PGconn *conn)
PQclear(res);
/*
- * Also check that full_page_writes is enabled. We can get torn pages if
- * a page is modified while we read it with pg_read_binary_file(), and we
- * rely on full page images to fix them.
+ * If --no-ensure-full-page-write is false (the default), check that
+ * full_page_writes is enabled. The goal here is to not run into a torn
+ * pages. Torn page can happend if a page is modified while we read it
+ * with pg_read_binary_file(). Some file systems are immune to torn page,
+ * this is why there is an option to disable this check. Outside of those
+ * specific systems, we rely on full_page_writes to avoid torn page.
*/
- str = run_simple_query(conn, "SHOW full_page_writes");
- if (strcmp(str, "on") != 0)
- pg_fatal("full_page_writes must be enabled in the source server");
- pg_free(str);
-
- /* Prepare a statement we'll use to fetch files */
- res = PQprepare(conn, "fetch_chunks_stmt",
- "SELECT path, begin,\n"
- " pg_read_binary_file(path, begin, len, true) AS chunk\n"
- "FROM unnest ($1::text[], $2::int8[], $3::int4[]) as x(path, begin, len)",
- 3, NULL);
-
- if (PQresultStatus(res) != PGRES_COMMAND_OK)
- pg_fatal("could not prepare statement to fetch file contents: %s",
- PQresultErrorMessage(res));
- PQclear(res);
+ if (!no_ensure_full_page_writes)
+ {
+ str = run_simple_query(conn, "SHOW full_page_writes");
+ if (strcmp(str, "on") != 0)
+ pg_fatal("full_page_writes must be enabled in the source server");
+ pg_free(str);
+
+ /* Prepare a statement we'll use to fetch files */
+ res = PQprepare(conn, "fetch_chunks_stmt",
+ "SELECT path, begin,\n"
+ " pg_read_binary_file(path, begin, len, true) AS chunk\n"
+ "FROM unnest ($1::text[], $2::int8[], $3::int4[]) as x(path, begin, len)",
+ 3, NULL);
+
+ if (PQresultStatus(res) != PGRES_COMMAND_OK)
+ pg_fatal("could not prepare statement to fetch file contents: %s",
+ PQresultErrorMessage(res));
+ PQclear(res);
+ }
}
/*
diff --git a/src/bin/pg_rewind/pg_rewind.c b/src/bin/pg_rewind/pg_rewind.c
index 3cd77c09b1..230e48cfc3 100644
--- a/src/bin/pg_rewind/pg_rewind.c
+++ b/src/bin/pg_rewind/pg_rewind.c
@@ -68,6 +68,7 @@ bool showprogress = false;
bool dry_run = false;
bool do_sync = true;
bool restore_wal = false;
+bool no_ensure_full_page_writes = false;
/* Target history */
TimeLineHistoryEntry *targetHistory;
@@ -86,23 +87,24 @@ usage(const char *progname)
printf(_("%s resynchronizes a PostgreSQL cluster with another copy of the cluster.\n\n"), progname);
printf(_("Usage:\n %s [OPTION]...\n\n"), progname);
printf(_("Options:\n"));
- printf(_(" -c, --restore-target-wal use restore_command in target configuration to\n"
- " retrieve WAL files from archives\n"));
- printf(_(" -D, --target-pgdata=DIRECTORY existing data directory to modify\n"));
- printf(_(" --source-pgdata=DIRECTORY source data directory to synchronize with\n"));
- printf(_(" --source-server=CONNSTR source server to synchronize with\n"));
- printf(_(" -n, --dry-run stop before modifying anything\n"));
- printf(_(" -N, --no-sync do not wait for changes to be written\n"
- " safely to disk\n"));
- printf(_(" -P, --progress write progress messages\n"));
- printf(_(" -R, --write-recovery-conf write configuration for replication\n"
- " (requires --source-server)\n"));
- printf(_(" --config-file=FILENAME use specified main server configuration\n"
- " file when running target cluster\n"));
- printf(_(" --debug write a lot of debug messages\n"));
- printf(_(" --no-ensure-shutdown do not automatically fix unclean shutdown\n"));
- printf(_(" -V, --version output version information, then exit\n"));
- printf(_(" -?, --help show this help, then exit\n"));
+ printf(_(" -c, --restore-target-wal use restore_command in target configuration to\n"
+ " retrieve WAL files from archives\n"));
+ printf(_(" -D, --target-pgdata=DIRECTORY existing data directory to modify\n"));
+ printf(_(" --source-pgdata=DIRECTORY source data directory to synchronize with\n"));
+ printf(_(" --source-server=CONNSTR source server to synchronize with\n"));
+ printf(_(" -n, --dry-run stop before modifying anything\n"));
+ printf(_(" -N, --no-sync do not wait for changes to be written\n"
+ " safely to disk\n"));
+ printf(_(" -P, --progress write progress messages\n"));
+ printf(_(" -R, --write-recovery-conf write configuration for replication\n"
+ " (requires --source-server)\n"));
+ printf(_(" --config-file=FILENAME use specified main server configuration\n"
+ " file when running target cluster\n"));
+ printf(_(" --debug write a lot of debug messages\n"));
+ printf(_(" --no-ensure-shutdown do not automatically fix unclean shutdown\n"));
+ printf(_(" --no-ensure-full-page-writes do not make sure that full_page_writes is on\n"));
+ printf(_(" -V, --version output version information, then exit\n"));
+ printf(_(" -?, --help show this help, then exit\n"));
printf(_("\nReport bugs to <%s>.\n"), PACKAGE_BUGREPORT);
printf(_("%s home page: <%s>\n"), PACKAGE_NAME, PACKAGE_URL);
}
@@ -118,7 +120,8 @@ main(int argc, char **argv)
{"source-pgdata", required_argument, NULL, 1},
{"source-server", required_argument, NULL, 2},
{"no-ensure-shutdown", no_argument, NULL, 4},
- {"config-file", required_argument, NULL, 5},
+ {"no-ensure-full-page-writes", no_argument, NULL, 5},
+ {"config-file", required_argument, NULL, 6},
{"version", no_argument, NULL, 'V'},
{"restore-target-wal", no_argument, NULL, 'c'},
{"dry-run", no_argument, NULL, 'n'},
@@ -207,6 +210,10 @@ main(int argc, char **argv)
break;
case 5:
+ no_ensure_full_page_writes = true;
+ break;
+
+ case 6:
config_file = pg_strdup(optarg);
break;
diff --git a/src/bin/pg_rewind/pg_rewind.h b/src/bin/pg_rewind/pg_rewind.h
index 80c276285a..8ddb9e34b1 100644
--- a/src/bin/pg_rewind/pg_rewind.h
+++ b/src/bin/pg_rewind/pg_rewind.h
@@ -23,6 +23,7 @@ extern char *datadir_target;
extern bool showprogress;
extern bool dry_run;
extern bool do_sync;
+extern bool no_ensure_full_page_writes;
extern int WalSegSz;
/* Target history */
--
2.38.1