Howdy, all.

I'm interested in compressing archived WAL segments in an environment
set up for PITR in the interests of reducing both network traffic and
storage requirements. However, pg_standby presently checks file sizes,
requiring that an archive segment be exactly the right size to be
considered valid. The idea of compressing log segments is not new --
the clearxlogtail project in pgfoundry provides a tool to make such
compression more effective, and is explicitly intended for said
purpose -- but as of 8.3.4, pg_standby appears not to support such
environments; I propose adding such support.

To allow pg_standby to operate in an environment where archive
segments are compressed, two behaviors are necessary:

 - suppressing the file-size checks. This puts the onus on the user to
create these files via an atomic mechanism, but is necessary to allow
compressed files to be considered.
 - allowing a custom restore command to be provided. This permits the
user to specify the mechanism to be used to decompress the segment.
One bikeshed is determining whether the user should pass in a command
suitable for use in a pipeline or a command which accepts input and
output as arguments.

A sample implementation is attached, intended only to kickstart
discussion; I'm not attached to either its implementation or its
proposed command-line syntax.

Thoughts?
--- pg_standby.c.orig	2008-07-08 10:12:04.000000000 -0500
+++ pg_standby.c	2008-10-22 19:05:41.000000000 -0500
@@ -50,9 +50,11 @@
 bool		triggered = false;	/* have we been triggered? */
 bool		need_cleanup = false;		/* do we need to remove files from
 										 * archive? */
+bool		disable_size_checks = false;	/* avoid checking segment size */
 
 static volatile sig_atomic_t signaled = false;
 
+char	   *customRestore;	/* Filter or command used to restore segments */
 char	   *archiveLocation;	/* where to find the archive? */
 char	   *triggerPath;		/* where to find the trigger file? */
 char	   *xlogFilePath;		/* where we are going to restore to */
@@ -66,6 +68,8 @@
 
 #define RESTORE_COMMAND_COPY 0
 #define RESTORE_COMMAND_LINK 1
+#define RESTORE_COMMAND_PIPE 2
+#define RESTORE_COMMAND_CUST 3
 int			restoreCommandType;
 
 #define XLOG_DATA			 0
@@ -112,8 +116,15 @@
 	snprintf(WALFilePath, MAXPGPATH, "%s\\%s", archiveLocation, nextWALFileName);
 	switch (restoreCommandType)
 	{
+		case RESTORE_COMMAND_PIPE:
+			snprintf(restoreCommand, MAXPGPATH, "%s <\"%s\" >\"%s\"", customRestore, WALFilePath, xlogFilePath);
+			break;
+		case RESTORE_COMMAND_CUST:
+			SET_RESTORE_COMMAND(customRestore, WALFilePath, xlogFilePath);
+			break;
 		case RESTORE_COMMAND_LINK:
 			SET_RESTORE_COMMAND("mklink", WALFilePath, xlogFilePath);
+			break;
 		case RESTORE_COMMAND_COPY:
 		default:
 			SET_RESTORE_COMMAND("copy", WALFilePath, xlogFilePath);
@@ -123,6 +134,12 @@
 	snprintf(WALFilePath, MAXPGPATH, "%s/%s", archiveLocation, nextWALFileName);
 	switch (restoreCommandType)
 	{
+		case RESTORE_COMMAND_PIPE:
+			snprintf(restoreCommand, MAXPGPATH, "%s <\"%s\" >\"%s\"", customRestore, WALFilePath, xlogFilePath);
+			break;
+		case RESTORE_COMMAND_CUST:
+			snprintf(restoreCommand, MAXPGPATH, "%s \"%s\" \"%s\"", customRestore, WALFilePath, xlogFilePath);
+			break;
 		case RESTORE_COMMAND_LINK:
 #if HAVE_WORKING_LINK
 			SET_RESTORE_COMMAND("ln -s -f", WALFilePath, xlogFilePath);
@@ -170,7 +187,7 @@
 			nextWALFileType = XLOG_BACKUP_LABEL;
 			return true;
 		}
-		else if (stat_buf.st_size == XLOG_SEG_SIZE)
+		else if (disable_size_checks || stat_buf.st_size == XLOG_SEG_SIZE)
 		{
 #ifdef WIN32
 
@@ -190,7 +207,7 @@
 		/*
 		 * If still too small, wait until it is the correct size
 		 */
-		if (stat_buf.st_size > XLOG_SEG_SIZE)
+		if ( (!disable_size_checks) && stat_buf.st_size > XLOG_SEG_SIZE)
 		{
 			if (debug)
 			{
@@ -432,12 +449,15 @@
 	fprintf(stderr, "				note space between ARCHIVELOCATION and NEXTWALFILE\n");
 	fprintf(stderr, "with main intended use as a restore_command in the recovery.conf\n");
 	fprintf(stderr, "	 restore_command = 'pg_standby [OPTION]... ARCHIVELOCATION %%f %%p %%r'\n");
-	fprintf(stderr, "e.g. restore_command = 'pg_standby -l /mnt/server/archiverdir %%f %%p %%r'\n");
+	fprintf(stderr, "e.g. restore_command = 'pg_standby -l /mnt/server/archiverdir %%f %%p %%r'\n\n");
+	fprintf(stderr, "If -C or -p are used, the archive must be populated using atomic calls (ie. rename).\n");
 	fprintf(stderr, "\nOptions:\n");
+	fprintf(stderr, "  -C COMMAND		invoke command for retrieval from the archive (as \"COMMAND source dest\")\n");
 	fprintf(stderr, "  -c			copies file from archive (default)\n");
 	fprintf(stderr, "  -d			generate lots of debugging output (testing only)\n");
 	fprintf(stderr, "  -k NUMFILESTOKEEP	if RESTARTWALFILE not used, removes files prior to limit (0 keeps all)\n");
 	fprintf(stderr, "  -l			links into archive (leaves file in archive)\n");
+	fprintf(stderr, "  -p COMMAND		pipe through command on retrieval from the archive (ie. 'gzip -c')\n");
 	fprintf(stderr, "  -r MAXRETRIES		max number of times to retry, with progressive wait (default=3)\n");
 	fprintf(stderr, "  -s SLEEPTIME		seconds to wait between file checks (min=1, max=60, default=5)\n");
 	fprintf(stderr, "  -t TRIGGERFILE	defines a trigger file to initiate failover (no default)\n");
@@ -460,13 +480,23 @@
 	(void) signal(SIGINT, sighandler);
 	(void) signal(SIGQUIT, sighandler);
 
-	while ((c = getopt(argc, argv, "cdk:lr:s:t:w:")) != -1)
+	while ((c = getopt(argc, argv, "C:cdk:lr:s:t:w:p:")) != -1)
 	{
 		switch (c)
 		{
 			case 'c':			/* Use copy */
 				restoreCommandType = RESTORE_COMMAND_COPY;
 				break;
+			case 'C':			/* Use custom command */
+				restoreCommandType = RESTORE_COMMAND_CUST;
+				customRestore = optarg;
+				disable_size_checks = true;
+				break;
+			case 'p':			/* Use pipeline */
+				restoreCommandType = RESTORE_COMMAND_PIPE;
+				customRestore = optarg;
+				disable_size_checks = true;
+				break;
 			case 'd':			/* Debug mode */
 				debug = true;
 				break;
-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to