Howdy, all. I'm interested in compressing archived WAL segments in an environment set up for PITR in the interests of reducing both network traffic and storage requirements. However, pg_standby presently checks file sizes, requiring that an archive segment be exactly the right size to be considered valid. The idea of compressing log segments is not new -- the clearxlogtail project in pgfoundry provides a tool to make such compression more effective, and is explicitly intended for said purpose -- but as of 8.3.4, pg_standby appears not to support such environments; I propose adding such support.
To allow pg_standby to operate in an environment where archive segments are compressed, two behaviors are necessary: - suppressing the file-size checks. This puts the onus on the user to create these files via an atomic mechanism, but is necessary to allow compressed files to be considered. - allowing a custom restore command to be provided. This permits the user to specify the mechanism to be used to decompress the segment. One bikeshed is determining whether the user should pass in a command suitable for use in a pipeline or a command which accepts input and output as arguments. A sample implementation is attached, intended only to kickstart discussion; I'm not attached to either its implementation or its proposed command-line syntax. Thoughts?
--- pg_standby.c.orig 2008-07-08 10:12:04.000000000 -0500 +++ pg_standby.c 2008-10-22 19:05:41.000000000 -0500 @@ -50,9 +50,11 @@ bool triggered = false; /* have we been triggered? */ bool need_cleanup = false; /* do we need to remove files from * archive? */ +bool disable_size_checks = false; /* avoid checking segment size */ static volatile sig_atomic_t signaled = false; +char *customRestore; /* Filter or command used to restore segments */ char *archiveLocation; /* where to find the archive? */ char *triggerPath; /* where to find the trigger file? */ char *xlogFilePath; /* where we are going to restore to */ @@ -66,6 +68,8 @@ #define RESTORE_COMMAND_COPY 0 #define RESTORE_COMMAND_LINK 1 +#define RESTORE_COMMAND_PIPE 2 +#define RESTORE_COMMAND_CUST 3 int restoreCommandType; #define XLOG_DATA 0 @@ -112,8 +116,15 @@ snprintf(WALFilePath, MAXPGPATH, "%s\\%s", archiveLocation, nextWALFileName); switch (restoreCommandType) { + case RESTORE_COMMAND_PIPE: + snprintf(restoreCommand, MAXPGPATH, "%s <\"%s\" >\"%s\"", customRestore, WALFilePath, xlogFilePath); + break; + case RESTORE_COMMAND_CUST: + SET_RESTORE_COMMAND(customRestore, WALFilePath, xlogFilePath); + break; case RESTORE_COMMAND_LINK: SET_RESTORE_COMMAND("mklink", WALFilePath, xlogFilePath); + break; case RESTORE_COMMAND_COPY: default: SET_RESTORE_COMMAND("copy", WALFilePath, xlogFilePath); @@ -123,6 +134,12 @@ snprintf(WALFilePath, MAXPGPATH, "%s/%s", archiveLocation, nextWALFileName); switch (restoreCommandType) { + case RESTORE_COMMAND_PIPE: + snprintf(restoreCommand, MAXPGPATH, "%s <\"%s\" >\"%s\"", customRestore, WALFilePath, xlogFilePath); + break; + case RESTORE_COMMAND_CUST: + snprintf(restoreCommand, MAXPGPATH, "%s \"%s\" \"%s\"", customRestore, WALFilePath, xlogFilePath); + break; case RESTORE_COMMAND_LINK: #if HAVE_WORKING_LINK SET_RESTORE_COMMAND("ln -s -f", WALFilePath, xlogFilePath); @@ -170,7 +187,7 @@ nextWALFileType = XLOG_BACKUP_LABEL; return true; } - else if (stat_buf.st_size == XLOG_SEG_SIZE) + else if (disable_size_checks || stat_buf.st_size == XLOG_SEG_SIZE) { #ifdef WIN32 @@ -190,7 +207,7 @@ /* * If still too small, wait until it is the correct size */ - if (stat_buf.st_size > XLOG_SEG_SIZE) + if ( (!disable_size_checks) && stat_buf.st_size > XLOG_SEG_SIZE) { if (debug) { @@ -432,12 +449,15 @@ fprintf(stderr, " note space between ARCHIVELOCATION and NEXTWALFILE\n"); fprintf(stderr, "with main intended use as a restore_command in the recovery.conf\n"); fprintf(stderr, " restore_command = 'pg_standby [OPTION]... ARCHIVELOCATION %%f %%p %%r'\n"); - fprintf(stderr, "e.g. restore_command = 'pg_standby -l /mnt/server/archiverdir %%f %%p %%r'\n"); + fprintf(stderr, "e.g. restore_command = 'pg_standby -l /mnt/server/archiverdir %%f %%p %%r'\n\n"); + fprintf(stderr, "If -C or -p are used, the archive must be populated using atomic calls (ie. rename).\n"); fprintf(stderr, "\nOptions:\n"); + fprintf(stderr, " -C COMMAND invoke command for retrieval from the archive (as \"COMMAND source dest\")\n"); fprintf(stderr, " -c copies file from archive (default)\n"); fprintf(stderr, " -d generate lots of debugging output (testing only)\n"); fprintf(stderr, " -k NUMFILESTOKEEP if RESTARTWALFILE not used, removes files prior to limit (0 keeps all)\n"); fprintf(stderr, " -l links into archive (leaves file in archive)\n"); + fprintf(stderr, " -p COMMAND pipe through command on retrieval from the archive (ie. 'gzip -c')\n"); fprintf(stderr, " -r MAXRETRIES max number of times to retry, with progressive wait (default=3)\n"); fprintf(stderr, " -s SLEEPTIME seconds to wait between file checks (min=1, max=60, default=5)\n"); fprintf(stderr, " -t TRIGGERFILE defines a trigger file to initiate failover (no default)\n"); @@ -460,13 +480,23 @@ (void) signal(SIGINT, sighandler); (void) signal(SIGQUIT, sighandler); - while ((c = getopt(argc, argv, "cdk:lr:s:t:w:")) != -1) + while ((c = getopt(argc, argv, "C:cdk:lr:s:t:w:p:")) != -1) { switch (c) { case 'c': /* Use copy */ restoreCommandType = RESTORE_COMMAND_COPY; break; + case 'C': /* Use custom command */ + restoreCommandType = RESTORE_COMMAND_CUST; + customRestore = optarg; + disable_size_checks = true; + break; + case 'p': /* Use pipeline */ + restoreCommandType = RESTORE_COMMAND_PIPE; + customRestore = optarg; + disable_size_checks = true; + break; case 'd': /* Debug mode */ debug = true; break;
-- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers