One awkward omission in the new built-in standby mode, mainly used for
streaming replication, is that there is no easy way to delete old
archived files like you do with the %r parameter to restore_command.
This was discussed at
http://archives.postgresql.org/pgsql-hackers/2010-02/msg01003.php, among
other things.

Per discussion, attached patch adds a new restartpoint_command option to
recovery.conf. That's an external shell command just like
recovery_end_command that's executed at every restartpoint. You can use
the %r parameter to pass the filename of the oldest WAL file that needs
to be retained.

While developing this I noticed that %r in recovery_end_command is not
working correctly:

LOG:  redo done at 0/14000C10
LOG:  last completed transaction was at log time 2000-01-01
02:21:08.816445+02
cp: cannot stat
`/home/hlinnaka/pgsql.cvshead/walarchive/000000010000000000000014': No
such file or directory
cp: cannot stat
`/home/hlinnaka/pgsql.cvshead/walarchive/00000002.history': No such file
or directory
LOG:  selected new timeline ID: 2
cp: cannot stat
`/home/hlinnaka/pgsql.cvshead/walarchive/00000001.history': No such file
or directory
LOG:  archive recovery complete
LOG:  checkpoint starting: end-of-recovery immediate wait
LOG:  checkpoint complete: wrote 0 buffers (0.0%); 0 transaction log
file(s) added, 0 removed, 0 recycled; write=0.000 s, sync=0.000 s,
total=0.003 s
LOG:  executing recovery_end_command "echo recovery_end_command %r"
recovery_end_command 000000000000000000000000
LOG:  database system is ready to accept connections
LOG:  autovacuum launcher started

Note how %r is always expanded to 000000000000000000000000. That's
because %r is expanded only when InRedo is true, which makes sense for
restore_command where that piece of code was copy-pasted from, but it's
never true anymore when recovery_end_command is run. The attached patch
fixes that too.

Barring objections, I will commit this later today.

-- 
  Heikki Linnakangas
  EnterpriseDB   http://www.enterprisedb.com
diff --git a/doc/src/sgml/recovery-config.sgml b/doc/src/sgml/recovery-config.sgml
index 6404768..7b7a8bd 100644
--- a/doc/src/sgml/recovery-config.sgml
+++ b/doc/src/sgml/recovery-config.sgml
@@ -59,14 +59,15 @@ restore_command = 'copy "C:\\server\\archivedir\\%f" "%p"'  # Windows
       </listitem>
      </varlistentry>
 
-     <varlistentry id="recovery-end-command" xreflabel="recovery_end_command">
-      <term><varname>recovery_end_command</varname> (<type>string</type>)</term>
+     <varlistentry id="restartpoint-command" xreflabel="restartpoint_command">
+      <term><varname>restartpoint_command</varname> (<type>string</type>)</term>
       <listitem>
        <para>
-        This parameter specifies a shell command that will be executed once only
-        at the end of recovery. This parameter is optional. The purpose of the
-        <varname>recovery_end_command</> is to provide a mechanism for cleanup
-        following replication or recovery.
+        This parameter specifies a shell command that will be executed at
+        every restartpoint. This parameter is optional. The purpose of the
+        <varname>restartpoint_command</> is to provide a mechanism for cleaning
+        up old archived WAL files that are no longer needed by the standby
+        server.
         Any <literal>%r</> is replaced by the name of the file
         containing the last valid restart point. That is the earliest file that
         must be kept to allow a restore to be restartable, so this information
@@ -79,6 +80,24 @@ restore_command = 'copy "C:\\server\\archivedir\\%f" "%p"'  # Windows
        </para>
        <para>
         If the command returns a non-zero exit status then a WARNING log
+        message will be written.
+       </para>
+      </listitem>
+     </varlistentry>
+
+     <varlistentry id="recovery-end-command" xreflabel="recovery_end_command">
+      <term><varname>recovery_end_command</varname> (<type>string</type>)</term>
+      <listitem>
+       <para>
+        This parameter specifies a shell command that will be executed once only
+        at the end of recovery. This parameter is optional. The purpose of the
+        <varname>recovery_end_command</> is to provide a mechanism for cleanup
+        following replication or recovery.
+        Any <literal>%r</> is replaced by the name of the file containing the
+        last valid restart point, like in <xref linkend="restartpoint-command">.
+       </para>
+       <para>
+        If the command returns a non-zero exit status then a WARNING log
         message will be written and the database will proceed to start up
         anyway.  An exception is that if the command was terminated by a
         signal, the database will not proceed with startup.
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 995794a..519526e 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -171,6 +171,7 @@ static bool restoredFromArchive = false;
 /* options taken from recovery.conf for archive recovery */
 static char *recoveryRestoreCommand = NULL;
 static char *recoveryEndCommand = NULL;
+static char *restartPointCommand = NULL;
 static bool recoveryTarget = false;
 static bool recoveryTargetExact = false;
 static bool recoveryTargetInclusive = true;
@@ -370,6 +371,11 @@ typedef struct XLogCtlData
 	int			XLogCacheBlck;	/* highest allocated xlog buffer index */
 	TimeLineID	ThisTimeLineID;
 	TimeLineID	RecoveryTargetTLI;
+	/*
+	 * restartPointCommand is read from recovery.conf but needs to be in
+	 * shared memory so that the bgwriter process can access it.
+	 */
+	char		restartPointCommand[MAXPGPATH];
 
 	/*
 	 * SharedRecoveryInProgress indicates if we're still in crash or archive
@@ -520,7 +526,8 @@ static bool XLogPageRead(XLogRecPtr *RecPtr, int emode, bool fetching_ckpt,
 static void XLogFileClose(void);
 static bool RestoreArchivedFile(char *path, const char *xlogfname,
 					const char *recovername, off_t expectedSize);
-static void ExecuteRecoveryEndCommand(void);
+static void ExecuteRecoveryCommand(char *command, char *commandName,
+					   bool failOnerror);
 static void PreallocXlogFiles(XLogRecPtr endptr);
 static void RemoveOldXlogFiles(uint32 log, uint32 seg, XLogRecPtr endptr);
 static void ValidateXLOGDirectoryStructure(void);
@@ -2990,12 +2997,19 @@ not_available:
 }
 
 /*
- * Attempt to execute the recovery_end_command.
+ * Attempt to execute an external recovery command.
+ *
+ * 'command' is the shell command to be executed, 'commandName' is a human-
+ * readable name describing the command emitted in the logs. If 'failonSignal'
+ * is true and the command is killed by a signal, a FATAL error is thrown.
+ * Otherwise a WARNING is emitted.
+ *
+ * This is currently used for restore_end_command and restartpoint_command.
  */
 static void
-ExecuteRecoveryEndCommand(void)
+ExecuteRecoveryCommand(char *command, char *commandName, bool failOnSignal)
 {
-	char		xlogRecoveryEndCmd[MAXPGPATH];
+	char		xlogRecoveryCmd[MAXPGPATH];
 	char		lastRestartPointFname[MAXPGPATH];
 	char	   *dp;
 	char	   *endp;
@@ -3005,7 +3019,7 @@ ExecuteRecoveryEndCommand(void)
 	uint32		restartLog;
 	uint32		restartSeg;
 
-	Assert(recoveryEndCommand);
+	Assert(command && commandName);
 
 	/*
 	 * Calculate the archive file cutoff point for use during log shipping
@@ -3023,25 +3037,22 @@ ExecuteRecoveryEndCommand(void)
 	 * flags to signify the point when we can begin deleting WAL files from
 	 * the archive.
 	 */
-	if (InRedo)
-	{
-		XLByteToSeg(ControlFile->checkPointCopy.redo,
-					restartLog, restartSeg);
-		XLogFileName(lastRestartPointFname,
-					 ControlFile->checkPointCopy.ThisTimeLineID,
-					 restartLog, restartSeg);
-	}
-	else
-		XLogFileName(lastRestartPointFname, 0, 0, 0);
+	LWLockAcquire(ControlFileLock, LW_SHARED);
+	XLByteToSeg(ControlFile->checkPointCopy.redo,
+				restartLog, restartSeg);
+	XLogFileName(lastRestartPointFname,
+				 ControlFile->checkPointCopy.ThisTimeLineID,
+				 restartLog, restartSeg);
+	LWLockRelease(ControlFileLock);
 
 	/*
 	 * construct the command to be executed
 	 */
-	dp = xlogRecoveryEndCmd;
-	endp = xlogRecoveryEndCmd + MAXPGPATH - 1;
+	dp = xlogRecoveryCmd;
+	endp = xlogRecoveryCmd + MAXPGPATH - 1;
 	*endp = '\0';
 
-	for (sp = recoveryEndCommand; *sp; sp++)
+	for (sp = command; *sp; sp++)
 	{
 		if (*sp == '%')
 		{
@@ -3075,13 +3086,12 @@ ExecuteRecoveryEndCommand(void)
 	*dp = '\0';
 
 	ereport(DEBUG3,
-			(errmsg_internal("executing recovery end command \"%s\"",
-							 xlogRecoveryEndCmd)));
+			(errmsg_internal("executing %s \"%s\"", commandName, command)));
 
 	/*
 	 * execute the constructed command
 	 */
-	rc = system(xlogRecoveryEndCmd);
+	rc = system(xlogRecoveryCmd);
 	if (rc != 0)
 	{
 		/*
@@ -3091,9 +3101,13 @@ ExecuteRecoveryEndCommand(void)
 		 */
 		signaled = WIFSIGNALED(rc) || WEXITSTATUS(rc) > 125;
 
-		ereport(signaled ? FATAL : WARNING,
-				(errmsg("recovery_end_command \"%s\": return code %d",
-						xlogRecoveryEndCmd, rc)));
+		/*
+		 * translator: First %s represents a recovery.conf parameter name like
+		 * "recovery_end_command", and the 2nd is the value of that parameter.
+		 */
+		ereport((signaled && failOnSignal) ? FATAL : WARNING,
+				(errmsg("%s \"%s\": return code %d", commandName,
+						command, rc)));
 	}
 }
 
@@ -4936,6 +4950,13 @@ readRecoveryCommandFile(void)
 					(errmsg("recovery_end_command = '%s'",
 							recoveryEndCommand)));
 		}
+		else if (strcmp(tok1, "restartpoint_command") == 0)
+		{
+			restartPointCommand = pstrdup(tok2);
+			ereport(DEBUG2,
+					(errmsg("restartpoint_command = '%s'",
+							restartPointCommand)));
+		}
 		else if (strcmp(tok1, "recovery_target_timeline") == 0)
 		{
 			rtliGiven = true;
@@ -5505,8 +5526,14 @@ StartupXLOG(void)
 						recoveryTargetTLI,
 						ControlFile->checkPointCopy.ThisTimeLineID)));
 
-	/* Save the selected recovery target timeline ID in shared memory */
+	/*
+	 * Save the selected recovery target timeline ID and restartpoint_command
+	 * in shared memory so that other processes can see them
+	 */
 	XLogCtl->RecoveryTargetTLI = recoveryTargetTLI;
+	strncpy(XLogCtl->restartPointCommand,
+			restartPointCommand ? restartPointCommand : "",
+			sizeof(XLogCtl->restartPointCommand));
 
 	if (read_backup_label(&checkPointLoc))
 	{
@@ -6129,7 +6156,9 @@ StartupXLOG(void)
 		 * And finally, execute the recovery_end_command, if any.
 		 */
 		if (recoveryEndCommand)
-			ExecuteRecoveryEndCommand();
+			ExecuteRecoveryCommand(recoveryEndCommand,
+								   "recovery_end_command",
+								   true);
 	}
 
 	/*
@@ -7318,6 +7347,15 @@ CreateRestartPoint(int flags)
 			 timestamptz_to_str(GetLatestXLogTime()))));
 
 	LWLockRelease(CheckpointLock);
+
+	/*
+	 * Finally, execute restartpoint_command, if any.
+	 */
+	if (XLogCtl->restartPointCommand[0])
+		ExecuteRecoveryCommand(XLogCtl->restartPointCommand,
+							   "restartpoint_command",
+							   false);
+
 	return true;
 }
 
-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to