I wrote:
> It strikes me that a different approach that might be of value would
> be to re-read postmaster.pid and make sure that (a) it's still there
> and (b) it still contains the current postmaster's PID.  This would
> be morally equivalent to what Jim suggests above, and it would dodge
> the checkpointer-destroys-the-evidence problem, and it would have the
> additional advantage that we'd notice when a brain-dead DBA decides
> to manually remove postmaster.pid so he can start a new postmaster.
> (It's probably far too late to avoid data corruption at that point,
> but better late than never.)

> This is still not bulletproof against all overwrite-with-a-backup
> scenarios, but it seems like a noticeable improvement over what we
> discussed yesterday.

Here's a rewritten patch that looks at postmaster.pid instead of
pg_control.  It should be effectively the same as the prior patch in terms
of response to directory-removal cases, and it should also catch many
overwrite cases.

                        regards, tom lane

diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c
index baa43b2..498ebfa 100644
*** a/src/backend/postmaster/postmaster.c
--- b/src/backend/postmaster/postmaster.c
*************** ServerLoop(void)
*** 1602,1610 ****
  	fd_set		readmask;
  	int			nSockets;
  	time_t		now,
  				last_touch_time;
  
! 	last_touch_time = time(NULL);
  
  	nSockets = initMasks(&readmask);
  
--- 1602,1611 ----
  	fd_set		readmask;
  	int			nSockets;
  	time_t		now,
+ 				last_lockfile_recheck_time,
  				last_touch_time;
  
! 	last_lockfile_recheck_time = last_touch_time = time(NULL);
  
  	nSockets = initMasks(&readmask);
  
*************** ServerLoop(void)
*** 1754,1772 ****
  		if (StartWorkerNeeded || HaveCrashedWorker)
  			maybe_start_bgworker();
  
- 		/*
- 		 * Touch Unix socket and lock files every 58 minutes, to ensure that
- 		 * they are not removed by overzealous /tmp-cleaning tasks.  We assume
- 		 * no one runs cleaners with cutoff times of less than an hour ...
- 		 */
- 		now = time(NULL);
- 		if (now - last_touch_time >= 58 * SECS_PER_MINUTE)
- 		{
- 			TouchSocketFiles();
- 			TouchSocketLockFiles();
- 			last_touch_time = now;
- 		}
- 
  #ifdef HAVE_PTHREAD_IS_THREADED_NP
  
  		/*
--- 1755,1760 ----
*************** ServerLoop(void)
*** 1793,1798 ****
--- 1781,1828 ----
  			/* reset flag so we don't SIGKILL again */
  			AbortStartTime = 0;
  		}
+ 
+ 		/*
+ 		 * Lastly, check to see if it's time to do some things that we don't
+ 		 * want to do every single time through the loop, because they're a
+ 		 * bit expensive.  Note that there's up to a minute of slop in when
+ 		 * these tasks will be performed, since DetermineSleepTime() will let
+ 		 * us sleep at most that long.
+ 		 */
+ 		now = time(NULL);
+ 
+ 		/*
+ 		 * Once a minute, verify that postmaster.pid hasn't been removed or
+ 		 * overwritten.  If it has, we commit hara-kiri.  This avoids having
+ 		 * postmasters and child processes hanging around after their database
+ 		 * is gone, and maybe causing problems if a new database cluster is
+ 		 * created in the same place.  It also provides some protection
+ 		 * against a DBA foolishly removing postmaster.pid and manually
+ 		 * starting a new postmaster.  Data corruption is likely to ensue from
+ 		 * that anyway, but we can minimize the damage by aborting ASAP.
+ 		 */
+ 		if (now - last_lockfile_recheck_time >= 1 * SECS_PER_MINUTE)
+ 		{
+ 			if (!RecheckDataDirLockFile())
+ 			{
+ 				ereport(LOG,
+ 						(errmsg("performing immediate shutdown because data directory lock file is invalid")));
+ 				kill(MyProcPid, SIGQUIT);
+ 			}
+ 			last_lockfile_recheck_time = now;
+ 		}
+ 
+ 		/*
+ 		 * Touch Unix socket and lock files every 58 minutes, to ensure that
+ 		 * they are not removed by overzealous /tmp-cleaning tasks.  We assume
+ 		 * no one runs cleaners with cutoff times of less than an hour ...
+ 		 */
+ 		if (now - last_touch_time >= 58 * SECS_PER_MINUTE)
+ 		{
+ 			TouchSocketFiles();
+ 			TouchSocketLockFiles();
+ 			last_touch_time = now;
+ 		}
  	}
  }
  
diff --git a/src/backend/utils/init/miscinit.c b/src/backend/utils/init/miscinit.c
index f0099d3..b6270e1 100644
*** a/src/backend/utils/init/miscinit.c
--- b/src/backend/utils/init/miscinit.c
*************** AddToDataDirLockFile(int target_line, co
*** 1202,1207 ****
--- 1202,1277 ----
  }
  
  
+ /*
+  * Recheck that the data directory lock file still exists with expected
+  * content.  Return TRUE if the lock file appears OK, FALSE if it isn't.
+  *
+  * We call this periodically in the postmaster.  The idea is that if the
+  * lock file has been removed or replaced by another postmaster, we should
+  * do a panic database shutdown.  Therefore, we should return TRUE if there
+  * is any doubt: we do not want to cause a panic shutdown unnecessarily.
+  * Transient failures like EINTR or ENFILE should not cause us to fail.
+  * (If there really is something wrong, we'll detect it on a future recheck.)
+  */
+ bool
+ RecheckDataDirLockFile(void)
+ {
+ 	int			fd;
+ 	int			len;
+ 	long		file_pid;
+ 	char		buffer[BLCKSZ];
+ 
+ 	fd = open(DIRECTORY_LOCK_FILE, O_RDWR | PG_BINARY, 0);
+ 	if (fd < 0)
+ 	{
+ 		/*
+ 		 * There are many foreseeable false-positive error conditions.  For
+ 		 * safety, fail only on enumerated clearly-something-is-wrong
+ 		 * conditions.
+ 		 */
+ 		switch (errno)
+ 		{
+ 			case ENOENT:
+ 			case ENOTDIR:
+ 				/* disaster */
+ 				ereport(LOG,
+ 						(errcode_for_file_access(),
+ 						 errmsg("could not open file \"%s\": %m",
+ 								DIRECTORY_LOCK_FILE)));
+ 				return false;
+ 			default:
+ 				/* non-fatal, at least for now */
+ 				ereport(LOG,
+ 						(errcode_for_file_access(),
+ 				  errmsg("could not open file \"%s\": %m; continuing anyway",
+ 						 DIRECTORY_LOCK_FILE)));
+ 				return true;
+ 		}
+ 	}
+ 	len = read(fd, buffer, sizeof(buffer) - 1);
+ 	if (len < 0)
+ 	{
+ 		ereport(LOG,
+ 				(errcode_for_file_access(),
+ 				 errmsg("could not read from file \"%s\": %m",
+ 						DIRECTORY_LOCK_FILE)));
+ 		close(fd);
+ 		return true;			/* treat read failure as nonfatal */
+ 	}
+ 	buffer[len] = '\0';
+ 	close(fd);
+ 	file_pid = atol(buffer);
+ 	if (file_pid == getpid())
+ 		return true;			/* all is well */
+ 
+ 	/* Trouble: someone's overwritten the lock file */
+ 	ereport(LOG,
+ 			(errmsg("lock file \"%s\" contains wrong PID: %ld instead of %ld",
+ 					DIRECTORY_LOCK_FILE, file_pid, (long) getpid())));
+ 	return false;
+ }
+ 
+ 
  /*-------------------------------------------------------------------------
   *				Version checking support
   *-------------------------------------------------------------------------
diff --git a/src/include/miscadmin.h b/src/include/miscadmin.h
index 80ac732..87133bd 100644
*** a/src/include/miscadmin.h
--- b/src/include/miscadmin.h
*************** extern void CreateSocketLockFile(const c
*** 450,455 ****
--- 450,456 ----
  					 const char *socketDir);
  extern void TouchSocketLockFiles(void);
  extern void AddToDataDirLockFile(int target_line, const char *str);
+ extern bool RecheckDataDirLockFile(void);
  extern void ValidatePgVersion(const char *path);
  extern void process_shared_preload_libraries(void);
  extern void process_session_preload_libraries(void);
-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to