Index: contrib/pg_standby/pg_standby.c
===================================================================
RCS file: /projects/cvsroot/pgsql/contrib/pg_standby/pg_standby.c,v
retrieving revision 1.21
diff -c -r1.21 pg_standby.c
*** contrib/pg_standby/pg_standby.c	26 Mar 2009 22:29:13 -0000	1.21
--- contrib/pg_standby/pg_standby.c	23 Apr 2009 12:04:08 -0000
***************
*** 52,58 ****
  int			keepfiles = 0;		/* number of WAL files to keep, 0 keep all */
  int			maxretries = 3;		/* number of retries on restore command */
  bool		debug = false;		/* are we debugging? */
- bool		triggered = false;	/* have we been triggered? */
  bool		need_cleanup = false;		/* do we need to remove files from
  										 * archive? */
  
--- 52,57 ----
***************
*** 69,74 ****
--- 68,98 ----
  char		exclusiveCleanupFileName[MAXPGPATH];		/* the file we need to
  														 * get from archive */
  
+ /*
+  * Two types of failover are supported (smart and fast failover).
+  *
+  * The content of the trigger file determines the type of failover.
+  * If the trigger file containing "smart" exists, smart failover is chosen;
+  * pg_standby acts as cp or ln command itself, on successful completion
+  * all the available WAL records will be applied resulting in zero data loss.
+  * But, it might take some times before finishing recovery.
+  *
+  * On the other hand, the existence of the trigger file with "fast"
+  * causes recovery to end immediately even if the available WAL files
+  * remain. So, some transactions might be lost.
+  *
+  * An empty trigger file performs smart failover.
+  *
+  * Fast failover is triggered by the signal (SIGUSR1 or SIGINT).
+  *
+  * A timeout causes smart failover.
+  */
+ #define NoFailover		0
+ #define SmartFailover	1
+ #define FastFailover	2
+ 
+ static int Failover = NoFailover;
+ 
  #define RESTORE_COMMAND_COPY 0
  #define RESTORE_COMMAND_LINK 1
  int			restoreCommandType;
***************
*** 108,114 ****
   *
   *	As an example, and probably the common case, we use either
   *	cp/ln commands on *nix, or copy/move command on Windows.
-  *
   */
  static void
  CustomizableInitialize(void)
--- 132,137 ----
***************
*** 357,363 ****
  static bool
  CheckForExternalTrigger(void)
  {
! 	int			rc;
  
  	/*
  	 * Look for a trigger file, if that option has been selected
--- 380,387 ----
  static bool
  CheckForExternalTrigger(void)
  {
! 	char	buf[32];
! 	FILE   *fd;
  
  	/*
  	 * Look for a trigger file, if that option has been selected
***************
*** 365,374 ****
  	 * We use stat() here because triggerPath is always a file rather than
  	 * potentially being in an archive
  	 */
! 	if (triggerPath && stat(triggerPath, &stat_buf) == 0)
  	{
! 		fprintf(stderr, "trigger file found\n");
  		fflush(stderr);
  
  		/*
  		 * If trigger file found, we *must* delete it. Here's why: When
--- 389,438 ----
  	 * We use stat() here because triggerPath is always a file rather than
  	 * potentially being in an archive
  	 */
! 	if (!triggerPath || stat(triggerPath, &stat_buf) != 0)
! 		return false;
! 
! 	/*
! 	 * An empty trigger file performs smart failover
! 	 */
! 	if (stat_buf.st_size == 0)
  	{
! 		Failover = SmartFailover;
! 		fprintf(stderr, "trigger file found: smart failover\n");
  		fflush(stderr);
+ 		return true;
+ 	}
+ 
+ 	if ((fd = fopen(triggerPath, "r")) == NULL)
+ 	{
+ 		fprintf(stderr, "WARNING: could not open \"%s\": %s\n",
+ 				triggerPath, strerror(errno));
+ 		fflush(stderr);
+ 		return false;
+ 	}
+ 	
+ 	if (fgets(buf, sizeof(buf), fd) == NULL)
+ 	{
+ 		fprintf(stderr, "WARNING: could not read \"%s\": %s\n",
+ 				triggerPath, strerror(errno));
+ 		fflush(stderr);
+ 		fclose(fd);
+ 		return false;
+ 	}
+ 	
+ 	fclose(fd);
+ 	
+ 	if (strncmp(buf, "smart", 5) == 0)
+ 	{
+ 		Failover = SmartFailover;
+ 		fprintf(stderr, "trigger file found: smart failover\n");
+ 		fflush(stderr);
+ 		return true;
+ 	}
+ 	
+ 	if (strncmp(buf, "fast", 4) == 0)
+ 	{
+ 		int	rc;
  
  		/*
  		 * If trigger file found, we *must* delete it. Here's why: When
***************
*** 379,391 ****
  		rc = unlink(triggerPath);
  		if (rc != 0)
  		{
! 			fprintf(stderr, "\n ERROR: could not remove \"%s\": %s", triggerPath, strerror(errno));
  			fflush(stderr);
  			exit(rc);
  		}
  		return true;
  	}
! 
  	return false;
  }
  
--- 443,463 ----
  		rc = unlink(triggerPath);
  		if (rc != 0)
  		{
! 			fprintf(stderr, "\n ERROR: could not remove \"%s\": %s",
! 					triggerPath, strerror(errno));
  			fflush(stderr);
  			exit(rc);
  		}
+ 
+ 		Failover = FastFailover;
+ 		fprintf(stderr, "trigger file found: fast failover\n");
+ 		fflush(stderr);
  		return true;
  	}
! 	
! 	fprintf(stderr, "WARNING: invalid content in \"%s\"\n",
! 			triggerPath);
! 	fflush(stderr);
  	return false;
  }
  
***************
*** 552,559 ****
  				break;
  			case 't':			/* Trigger file */
  				triggerPath = optarg;
- 				if (CheckForExternalTrigger())
- 					exit(1);	/* Normal exit, with non-zero */
  				break;
  			case 'w':			/* Max wait time */
  				maxwaittime = atoi(optarg);
--- 624,629 ----
***************
*** 659,664 ****
--- 729,757 ----
  		strcmp(nextWALFileName + strlen(nextWALFileName) - strlen(".history"),
  			   ".history") == 0)
  	{
+ 		/*
+ 		 * Get rid of the trigger file at the end of archive recovery at least.
+ 		 * Otherwise, it would unexpectedly cause the subsequent warm-standby to
+ 		 * end.
+ 		 *
+ 		 * Here is the right place to remove the trigger file since a timeline
+ 		 * history file is requested only at the beginning and end of archive
+ 		 * recovery.
+ 		 */
+ 		if (triggerPath && stat(triggerPath, &stat_buf) == 0)
+ 		{
+ 			int	rc;
+ 			
+ 			rc = unlink(triggerPath);
+ 			if (rc != 0)
+ 			{
+ 				fprintf(stderr, "\n ERROR: could not remove \"%s\": %s",
+ 						triggerPath, strerror(errno));
+ 				fflush(stderr);
+ 				exit(rc);
+ 			}
+ 		}
+ 
  		nextWALFileType = XLOG_HISTORY;
  		if (RestoreWALFileForRecovery())
  			exit(0);
***************
*** 676,697 ****
  	/*
  	 * Main wait loop
  	 */
! 	while (!CustomizableNextWALFileReady() && !triggered)
  	{
  		if (sleeptime <= 60)
  			pg_usleep(sleeptime * 1000000L);
  
  		if (signaled)
  		{
! 			triggered = true;
  			if (debug)
  			{
! 				fprintf(stderr, "\nsignaled to exit\n");
  				fflush(stderr);
  			}
  		}
  		else
  		{
  
  			if (debug)
  			{
--- 769,803 ----
  	/*
  	 * Main wait loop
  	 */
! 	while (!CheckForExternalTrigger() && !CustomizableNextWALFileReady())
  	{
  		if (sleeptime <= 60)
  			pg_usleep(sleeptime * 1000000L);
  
  		if (signaled)
  		{
! 			Failover = FastFailover;
  			if (debug)
  			{
! 				fprintf(stderr, "\nsignaled to exit: fast failover\n");
  				fflush(stderr);
  			}
+ 			break;
  		}
  		else
  		{
+ 			waittime += sleeptime;
+ 			if (waittime >= maxwaittime && maxwaittime > 0)
+ 			{
+ 				Failover = FastFailover;
+ 				if (debug)
+ 				{
+ 					fprintf(stderr, "\nTimed out after %d seconds: fast failover\n",
+ 							waittime);
+ 					fflush(stderr);
+ 				}
+ 				break;
+ 			}
  
  			if (debug)
  			{
***************
*** 700,722 ****
  					fprintf(stderr, " Checking for trigger file...");
  				fflush(stderr);
  			}
- 
- 			waittime += sleeptime;
- 
- 			if (!triggered && (CheckForExternalTrigger() || (waittime >= maxwaittime && maxwaittime > 0)))
- 			{
- 				triggered = true;
- 				if (debug && waittime >= maxwaittime && maxwaittime > 0)
- 					fprintf(stderr, "\nTimed out after %d seconds\n", waittime);
- 			}
  		}
  	}
  
  	/*
  	 * Action on exit
  	 */
! 	if (triggered)
! 		exit(1);				/* Normal exit, with non-zero */
  
  	/*
  	 * Once we have restored this file successfully we can remove some prior
--- 806,819 ----
  					fprintf(stderr, " Checking for trigger file...");
  				fflush(stderr);
  			}
  		}
  	}
  
  	/*
  	 * Action on exit
  	 */
! 	if (Failover == FastFailover)
! 		exit(1);
  
  	/*
  	 * Once we have restored this file successfully we can remove some prior
***************
*** 724,731 ****
  	 * of them will be requested again immediately after the failed restore,
  	 * or when we restart recovery.
  	 */
! 	if (RestoreWALFileForRecovery() && need_cleanup)
! 		CustomizableCleanupPriorWALFiles();
  
! 	return 0;
  }
--- 821,833 ----
  	 * of them will be requested again immediately after the failed restore,
  	 * or when we restart recovery.
  	 */
! 	if (RestoreWALFileForRecovery())
! 	{
! 		if (need_cleanup)
! 			CustomizableCleanupPriorWALFiles();
  
! 		exit(0);
! 	}
! 	else
! 		exit(1);
  }
Index: doc/src/sgml/pgstandby.sgml
===================================================================
RCS file: /projects/cvsroot/pgsql/doc/src/sgml/pgstandby.sgml,v
retrieving revision 2.7
diff -c -r2.7 pgstandby.sgml
*** doc/src/sgml/pgstandby.sgml	27 Feb 2009 09:30:21 -0000	2.7
--- doc/src/sgml/pgstandby.sgml	23 Apr 2009 12:04:08 -0000
***************
*** 92,97 ****
--- 92,135 ----
     is specified,
     the <replaceable>archivelocation</> directory must be writable too.
    </para>
+   <para>
+    There are two ways to fail over a <quote>warm standby</> database server.
+    You control the type of failover by creating different trigger files
+    (if <literal>-t</> has been specified).
+ 
+    <variablelist>
+     <varlistentry>
+      <term>Smart Failover</term>
+      <listitem>
+       <para>
+        If a trigger file containing <literal>smart</> or an empty one exists,
+        <application>pg_standby</application> acts as <literal>cp</> or
+        <literal>ln</> command itself, on successful completion all the
+        available WAL records will be applied resulting in zero data loss.
+        But it might take some times before finishing failover.
+       </para>
+      </listitem>
+     </varlistentry>
+     <varlistentry>
+      <term>Fast Failover</term>
+      <listitem>
+       <para>
+        The existence of a trigger file containing <literal>fast</> causes
+        recovery to end immediately even if the available WAL files remain.
+        Though some transactions might be lost, it won't take long before
+        finishing failover.
+       </para>
+       <para>
+        You mustn't copy any extra files into <filename>pg_xlog</> of a
+        <quote>warm standby</> database server if you use a fast trigger.
+        Otherwise, <application>pg_standby</> might get stuck while reading
+        them from <filename>pg_xlog</>. When failover comes to a dead halt
+        unfortunately, it can resume by creating a fast trigger file again.
+       </para>
+      </listitem>
+     </varlistentry>
+    </variablelist>
+   </para>
  
    <table>
     <title><application>pg_standby</> options</title>
***************
*** 177,188 ****
        <entry><literal>-t</> <replaceable>triggerfile</></entry>
        <entry>none</entry>
        <entry>
!        Specify a trigger file whose presence should cause recovery to end
!        whether or not the next WAL file is available.
         It is recommended that you use a structured filename to
         avoid confusion as to which server is being triggered
         when multiple servers exist on the same system; for example
         <filename>/tmp/pgsql.trigger.5432</>.
        </entry>
       </row>
       <row>
--- 215,227 ----
        <entry><literal>-t</> <replaceable>triggerfile</></entry>
        <entry>none</entry>
        <entry>
!        Specify a trigger file whose presence should perform failover.
         It is recommended that you use a structured filename to
         avoid confusion as to which server is being triggered
         when multiple servers exist on the same system; for example
         <filename>/tmp/pgsql.trigger.5432</>.
+        Note that a trigger file is deleted at the end of recovery
+        regardless of failover type.
        </entry>
       </row>
       <row>
***************
*** 190,196 ****
        <entry>0</entry>
        <entry>
         Set the maximum number of seconds to wait for the next WAL file,
!        after which recovery will end and the standby will come up.
         A setting of zero (the default) means wait forever.
         The default setting is not necessarily recommended;
         consult <xref linkend="warm-standby"> for discussion.
--- 229,235 ----
        <entry>0</entry>
        <entry>
         Set the maximum number of seconds to wait for the next WAL file,
!        after which a fast failover will be performed.
         A setting of zero (the default) means wait forever.
         The default setting is not necessarily recommended;
         consult <xref linkend="warm-standby"> for discussion.
***************
*** 236,242 ****
     <listitem>
      <para>
       stop waiting only when a trigger file called
!      <filename>/tmp/pgsql.trigger.5442</> appears
      </para>
     </listitem>
     <listitem>
--- 275,282 ----
     <listitem>
      <para>
       stop waiting only when a trigger file called
!      <filename>/tmp/pgsql.trigger.5442</> appears,
!      and perform failover according to its content
      </para>
     </listitem>
     <listitem>
***************
*** 277,283 ****
     <listitem>
      <para>
       stop waiting only when a trigger file called
!      <filename>C:\pgsql.trigger.5442</> appears
      </para>
     </listitem>
     <listitem>
--- 317,324 ----
     <listitem>
      <para>
       stop waiting only when a trigger file called
!      <filename>C:\pgsql.trigger.5442</> appears,
!      and perform failover according to its content
      </para>
     </listitem>
     <listitem>
