On Sat, Jan 19, 2013 at 09:56:48PM -0500, Bruce Momjian wrote:
> > (But, at least with the type of packaging I'm using in Fedora, he would
> > first have to go through a package downgrade/reinstallation process,
> > because the packaging provides no simple scripted way of manually
> > starting the old postgres executable, only the new one.  Moreover, what
> > pg_upgrade is printing provides no help in figuring out whether that's
> > the next step.)
> > 
> > I do sympathize with taking a paranoid attitude here, but I'm failing
> > to see what advantage there is in not attempting to start the old
> > postmaster.  In the *only* case that pg_upgrade is successfully
> > protecting against with this logic, namely there's-an-active-postmaster-
> > already, the postmaster is equally able to protect itself.  In other
> > cases it would be more helpful not less to let the postmaster analyze
> > the situation.
> > 
> > > The other problem is that if the server start fails, how do we know if
> > > the failure was due to a running postmaster?
> > 
> > Because we read the postmaster's log file, or at least tell the user to
> > do so.  That report would be unambiguous, unlike pg_upgrade's.
> 
> Attached is a WIP patch to give you an idea of how I am going to solve
> this problem.  This comment says it all:

Attached is a ready-to-apply version of the patch.  I continued to use
postmaster.pid to determine if I need to try the start/stop test ---
that allows me to know which servers _might_ be running, because a
server start failure might be for many reasons and I would prefer not to
suggest the server is running if I can avoid it, and the pid file gives
me that.

The patch is longer than I expected because the code needed to be
reordered.  The starting banner indicates if it a live check, but to
check for a live server we need to start/stop the servers and we needed
to know where the binaries are, and we didn't do that until after the
start banner.  I removed the 'check' line for binary checks, and moved
that before the banner printing.  postmaster_start also now needs an
option to supress an error.

-- 
  Bruce Momjian  <br...@momjian.us>        http://momjian.us
  EnterpriseDB                             http://enterprisedb.com

  + It's impossible for everything to be true. +
diff --git a/contrib/pg_upgrade/check.c b/contrib/pg_upgrade/check.c
new file mode 100644
index 1780788..8188643
*** a/contrib/pg_upgrade/check.c
--- b/contrib/pg_upgrade/check.c
*************** fix_path_separator(char *path)
*** 56,66 ****
  }
  
  void
! output_check_banner(bool *live_check)
  {
! 	if (user_opts.check && is_server_running(old_cluster.pgdata))
  	{
- 		*live_check = true;
  		pg_log(PG_REPORT, "Performing Consistency Checks on Old Live Server\n");
  		pg_log(PG_REPORT, "------------------------------------------------\n");
  	}
--- 56,65 ----
  }
  
  void
! output_check_banner(bool live_check)
  {
! 	if (user_opts.check && live_check)
  	{
  		pg_log(PG_REPORT, "Performing Consistency Checks on Old Live Server\n");
  		pg_log(PG_REPORT, "------------------------------------------------\n");
  	}
*************** check_and_dump_old_cluster(bool live_che
*** 78,84 ****
  	/* -- OLD -- */
  
  	if (!live_check)
! 		start_postmaster(&old_cluster);
  
  	set_locale_and_encoding(&old_cluster);
  
--- 77,83 ----
  	/* -- OLD -- */
  
  	if (!live_check)
! 		start_postmaster(&old_cluster, true);
  
  	set_locale_and_encoding(&old_cluster);
  
*************** issue_warnings(char *sequence_script_fil
*** 201,207 ****
  	/* old = PG 8.3 warnings? */
  	if (GET_MAJOR_VERSION(old_cluster.major_version) <= 803)
  	{
! 		start_postmaster(&new_cluster);
  
  		/* restore proper sequence values using file created from old server */
  		if (sequence_script_file_name)
--- 200,206 ----
  	/* old = PG 8.3 warnings? */
  	if (GET_MAJOR_VERSION(old_cluster.major_version) <= 803)
  	{
! 		start_postmaster(&new_cluster, true);
  
  		/* restore proper sequence values using file created from old server */
  		if (sequence_script_file_name)
*************** issue_warnings(char *sequence_script_fil
*** 224,230 ****
  	/* Create dummy large object permissions for old < PG 9.0? */
  	if (GET_MAJOR_VERSION(old_cluster.major_version) <= 804)
  	{
! 		start_postmaster(&new_cluster);
  		new_9_0_populate_pg_largeobject_metadata(&new_cluster, false);
  		stop_postmaster(false);
  	}
--- 223,229 ----
  	/* Create dummy large object permissions for old < PG 9.0? */
  	if (GET_MAJOR_VERSION(old_cluster.major_version) <= 804)
  	{
! 		start_postmaster(&new_cluster, true);
  		new_9_0_populate_pg_largeobject_metadata(&new_cluster, false);
  		stop_postmaster(false);
  	}
diff --git a/contrib/pg_upgrade/exec.c b/contrib/pg_upgrade/exec.c
new file mode 100644
index e326a10..44dafc3
*** a/contrib/pg_upgrade/exec.c
--- b/contrib/pg_upgrade/exec.c
*************** exec_prog(const char *log_file, const ch
*** 140,152 ****
  
  
  /*
!  * is_server_running()
   *
!  * checks whether postmaster on the given data directory is running or not.
!  * The check is performed by looking for the existence of postmaster.pid file.
   */
  bool
! is_server_running(const char *datadir)
  {
  	char		path[MAXPGPATH];
  	int			fd;
--- 140,151 ----
  
  
  /*
!  * pid_lock_file_exists()
   *
!  * Checks whether the postmaster.pid file exists.
   */
  bool
! pid_lock_file_exists(const char *datadir)
  {
  	char		path[MAXPGPATH];
  	int			fd;
*************** void
*** 180,187 ****
  verify_directories(void)
  {
  
- 	prep_status("Checking current, bin, and data directories");
- 
  #ifndef WIN32
  	if (access(".", R_OK | W_OK | X_OK) != 0)
  #else
--- 179,184 ----
*************** verify_directories(void)
*** 194,200 ****
  	check_data_dir(old_cluster.pgdata);
  	check_bin_dir(&new_cluster);
  	check_data_dir(new_cluster.pgdata);
- 	check_ok();
  }
  
  
--- 191,196 ----
diff --git a/contrib/pg_upgrade/pg_upgrade.c b/contrib/pg_upgrade/pg_upgrade.c
new file mode 100644
index a752fe8..55155d2
*** a/contrib/pg_upgrade/pg_upgrade.c
--- b/contrib/pg_upgrade/pg_upgrade.c
*************** static void prepare_new_databases(void);
*** 48,54 ****
  static void create_new_objects(void);
  static void copy_clog_xlog_xid(void);
  static void set_frozenxids(void);
! static void setup(char *argv0, bool live_check);
  static void cleanup(void);
  
  ClusterInfo old_cluster,
--- 48,54 ----
  static void create_new_objects(void);
  static void copy_clog_xlog_xid(void);
  static void set_frozenxids(void);
! static void setup(char *argv0, bool *live_check);
  static void cleanup(void);
  
  ClusterInfo old_cluster,
*************** main(int argc, char **argv)
*** 80,88 ****
  	adjust_data_dir(&old_cluster);
  	adjust_data_dir(&new_cluster);
  
! 	output_check_banner(&live_check);
  
! 	setup(argv[0], live_check);
  
  	check_cluster_versions();
  
--- 80,88 ----
  	adjust_data_dir(&old_cluster);
  	adjust_data_dir(&new_cluster);
  
! 	setup(argv[0], &live_check);
  
! 	output_check_banner(live_check);
  
  	check_cluster_versions();
  
*************** main(int argc, char **argv)
*** 95,101 ****
  
  
  	/* -- NEW -- */
! 	start_postmaster(&new_cluster);
  
  	check_new_cluster();
  	report_clusters_compatible();
--- 95,101 ----
  
  
  	/* -- NEW -- */
! 	start_postmaster(&new_cluster, true);
  
  	check_new_cluster();
  	report_clusters_compatible();
*************** main(int argc, char **argv)
*** 116,122 ****
  	/* New now using xids of the old system */
  
  	/* -- NEW -- */
! 	start_postmaster(&new_cluster);
  
  	prepare_new_databases();
  
--- 116,122 ----
  	/* New now using xids of the old system */
  
  	/* -- NEW -- */
! 	start_postmaster(&new_cluster, true);
  
  	prepare_new_databases();
  
*************** main(int argc, char **argv)
*** 177,183 ****
  
  
  static void
! setup(char *argv0, bool live_check)
  {
  	char		exec_path[MAXPGPATH];	/* full path to my executable */
  
--- 177,183 ----
  
  
  static void
! setup(char *argv0, bool *live_check)
  {
  	char		exec_path[MAXPGPATH];	/* full path to my executable */
  
*************** setup(char *argv0, bool live_check)
*** 189,203 ****
  
  	verify_directories();
  
! 	/* no postmasters should be running */
! 	if (!live_check && is_server_running(old_cluster.pgdata))
! 		pg_log(PG_FATAL, "There seems to be a postmaster servicing the old cluster.\n"
! 			   "Please shutdown that postmaster and try again.\n");
  
  	/* same goes for the new postmaster */
! 	if (is_server_running(new_cluster.pgdata))
! 		pg_log(PG_FATAL, "There seems to be a postmaster servicing the new cluster.\n"
  			   "Please shutdown that postmaster and try again.\n");
  
  	/* get path to pg_upgrade executable */
  	if (find_my_exec(argv0, exec_path) < 0)
--- 189,227 ----
  
  	verify_directories();
  
! 	/* no postmasters should be running, except for a live check */
! 	if (pid_lock_file_exists(old_cluster.pgdata))
! 	{
! 		/*
! 		 *	If we have a postmaster.pid file, try to start the server.  If
! 		 *	it starts, the pid file was stale, so stop the server.  If it
! 		 *	doesn't start, assume the server is running.  If the pid file
! 		 *	is left over from a server crash, this also allows any committed
! 		 *	transactions stored in the WAL to be replayed so they are not
! 		 *	lost, because WAL files are not transfered from old to new
! 		 *	servers.
! 		 */		
! 		if (start_postmaster(&old_cluster, false))
! 			stop_postmaster(false);
! 		else
! 		{
! 			if (!user_opts.check)
! 				pg_log(PG_FATAL, "There seems to be a postmaster servicing the old cluster.\n"
! 					   "Please shutdown that postmaster and try again.\n");
! 			else
! 				*live_check = true;
! 		}
! 	}
  
  	/* same goes for the new postmaster */
! 	if (pid_lock_file_exists(new_cluster.pgdata))
! 	{
! 		if (start_postmaster(&new_cluster, false))
! 			stop_postmaster(false);
! 		else
! 			pg_log(PG_FATAL, "There seems to be a postmaster servicing the new cluster.\n"
  			   "Please shutdown that postmaster and try again.\n");
+ 	}
  
  	/* get path to pg_upgrade executable */
  	if (find_my_exec(argv0, exec_path) < 0)
diff --git a/contrib/pg_upgrade/pg_upgrade.h b/contrib/pg_upgrade/pg_upgrade.h
new file mode 100644
index 70b9381..69b9c60
*** a/contrib/pg_upgrade/pg_upgrade.h
--- b/contrib/pg_upgrade/pg_upgrade.h
*************** extern OSInfo os_info;
*** 307,313 ****
  
  /* check.c */
  
! void		output_check_banner(bool *live_check);
  void		check_and_dump_old_cluster(bool live_check,
  				  char **sequence_script_file_name);
  void		check_new_cluster(void);
--- 307,313 ----
  
  /* check.c */
  
! void		output_check_banner(bool live_check);
  void		check_and_dump_old_cluster(bool live_check,
  				  char **sequence_script_file_name);
  void		check_new_cluster(void);
*************** exec_prog(const char *log_file, const ch
*** 341,347 ****
  		  bool throw_error, const char *fmt,...)
  __attribute__((format(PG_PRINTF_ATTRIBUTE, 4, 5)));
  void		verify_directories(void);
! bool		is_server_running(const char *datadir);
  
  
  /* file.c */
--- 341,347 ----
  		  bool throw_error, const char *fmt,...)
  __attribute__((format(PG_PRINTF_ATTRIBUTE, 4, 5)));
  void		verify_directories(void);
! bool		pid_lock_file_exists(const char *datadir);
  
  
  /* file.c */
*************** __attribute__((format(PG_PRINTF_ATTRIBUT
*** 429,435 ****
  
  char	   *cluster_conn_opts(ClusterInfo *cluster);
  
! void		start_postmaster(ClusterInfo *cluster);
  void		stop_postmaster(bool fast);
  uint32		get_major_server_version(ClusterInfo *cluster);
  void		check_pghost_envvar(void);
--- 429,435 ----
  
  char	   *cluster_conn_opts(ClusterInfo *cluster);
  
! bool		start_postmaster(ClusterInfo *cluster, bool throw_error);
  void		stop_postmaster(bool fast);
  uint32		get_major_server_version(ClusterInfo *cluster);
  void		check_pghost_envvar(void);
diff --git a/contrib/pg_upgrade/server.c b/contrib/pg_upgrade/server.c
new file mode 100644
index 0b48251..d1a3f76
*** a/contrib/pg_upgrade/server.c
--- b/contrib/pg_upgrade/server.c
*************** stop_postmaster_atexit(void)
*** 170,177 ****
  }
  
  
! void
! start_postmaster(ClusterInfo *cluster)
  {
  	char		cmd[MAXPGPATH * 4 + 1000];
  	PGconn	   *conn;
--- 170,177 ----
  }
  
  
! bool
! start_postmaster(ClusterInfo *cluster, bool throw_error)
  {
  	char		cmd[MAXPGPATH * 4 + 1000];
  	PGconn	   *conn;
*************** start_postmaster(ClusterInfo *cluster)
*** 236,241 ****
--- 236,244 ----
  							  false,
  							  "%s", cmd);
  
+ 	if (!pg_ctl_return && !throw_error)
+ 		return false;
+ 							  
  	/* Check to see if we can connect to the server; if not, report it. */
  	if ((conn = get_db_conn(cluster, "template1")) == NULL ||
  		PQstatus(conn) != CONNECTION_OK)
*************** start_postmaster(ClusterInfo *cluster)
*** 256,261 ****
--- 259,266 ----
  			   CLUSTER_NAME(cluster));
  
  	os_info.running_cluster = cluster;
+ 
+ 	return true;
  }
  
  
-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to