On Wed, Jan 19, 2022 at 06:05:40PM -0600, Justin Pryzby wrote:
> I still don't know if it even needs to be configurable.

I want this to be configurable to ease the switch of the pg_upgrade to
TAP, moving the logs into a deterministic temporary location proper to
each run.  This makes reporting much easier on failure, with
repeatable tests, and that's why I began poking at this patch first.

> I'm not sure these restrictions are needed ?

This could lead to issues with rmtree() if we are not careful enough,
no?  We'd had our deal of argument injections with pg_upgrade commands
in the past (fcd15f1).

> +       outputpath = make_absolute_path(log_opts.basedir);                    
>                                                                       
> +       if (path_contains_parent_reference(outputpath))                       
>                                                                       
> +               pg_fatal("reference to parent directory not allowed\n");      
>                                                                       
> 
> Besides, you're passing the wrong path here.

What would you suggest?  I was just looking at that again this
morning, and splitted the logic into two parts for the absolute and
relative path cases, preventing all cases like that, which would be
weird, anyway:
../
../popo
.././
././
/direct/path/to/cwd/
/direct/path/../path/to/cwd/

>> +      <command>pg_upgrade</command>, and is be removed after a successful
> 
> remove "be"

Fixed.

>> +       if (mkdir(log_opts.basedir, S_IRWXU | S_IRWXG | S_IRWXO))
> 
> S_IRWXG | S_IRWXO are useless due to the umask, right ?
> Maybe use PG_DIR_MODE_OWNER ?

Hmm.  We could just use pg_dir_create_mode, then.  See pg_rewind, as
one example.  This opens the door for something pluggable to
SetDataDirectoryCreatePerm(), though the original use is kind of
different with data folders.

> You're printing the wrong var.  filename_path is not initialized.

Ugh.
--
Michael
From 2efd3252e189578f865eb54d076596387494585c Mon Sep 17 00:00:00 2001
From: Michael Paquier <mich...@paquier.xyz>
Date: Thu, 20 Jan 2022 11:58:59 +0900
Subject: [PATCH v4] pg_upgrade: write log files and dumps in subdir..

The subdir must not exist when pg_upgrade is started.
This avoids appending to logfiles which have pre-existing errors in them.
And allows more easily cleaning up after pg_upgrade.
---
 src/bin/pg_upgrade/.gitignore   |   2 -
 src/bin/pg_upgrade/Makefile     |   4 +-
 src/bin/pg_upgrade/check.c      |  12 ++--
 src/bin/pg_upgrade/dump.c       |   6 +-
 src/bin/pg_upgrade/exec.c       |   5 +-
 src/bin/pg_upgrade/function.c   |   3 +-
 src/bin/pg_upgrade/option.c     |  29 ++------
 src/bin/pg_upgrade/pg_upgrade.c | 120 +++++++++++++++++++++++++-------
 src/bin/pg_upgrade/pg_upgrade.h |   7 ++
 src/bin/pg_upgrade/server.c     |   6 +-
 doc/src/sgml/ref/pgupgrade.sgml |  17 ++++-
 11 files changed, 145 insertions(+), 66 deletions(-)

diff --git a/src/bin/pg_upgrade/.gitignore b/src/bin/pg_upgrade/.gitignore
index 2d3bfeaa50..939e50db6c 100644
--- a/src/bin/pg_upgrade/.gitignore
+++ b/src/bin/pg_upgrade/.gitignore
@@ -1,9 +1,7 @@
 /pg_upgrade
 # Generated by test suite
-/pg_upgrade_internal.log
 /delete_old_cluster.sh
 /delete_old_cluster.bat
 /reindex_hash.sql
-/loadable_libraries.txt
 /log/
 /tmp_check/
diff --git a/src/bin/pg_upgrade/Makefile b/src/bin/pg_upgrade/Makefile
index 44d06be5a6..e833a1c5f0 100644
--- a/src/bin/pg_upgrade/Makefile
+++ b/src/bin/pg_upgrade/Makefile
@@ -45,9 +45,7 @@ uninstall:
 clean distclean maintainer-clean:
 	rm -f pg_upgrade$(X) $(OBJS)
 	rm -rf delete_old_cluster.sh log/ tmp_check/ \
-	       loadable_libraries.txt reindex_hash.sql \
-	       pg_upgrade_dump_globals.sql \
-	       pg_upgrade_dump_*.custom pg_upgrade_*.log
+	       reindex_hash.sql pg_upgrade_output.d/
 
 # When $(MAKE) is present, make automatically infers that this is a
 # recursive make. which is not actually what we want here, as that
diff --git a/src/bin/pg_upgrade/check.c b/src/bin/pg_upgrade/check.c
index 3d218c2ad2..019bcb6c7b 100644
--- a/src/bin/pg_upgrade/check.c
+++ b/src/bin/pg_upgrade/check.c
@@ -783,7 +783,8 @@ check_for_isn_and_int8_passing_mismatch(ClusterInfo *cluster)
 		return;
 	}
 
-	snprintf(output_path, sizeof(output_path),
+	snprintf(output_path, sizeof(output_path), "%s/%s",
+			 log_opts.basedir,
 			 "contrib_isn_and_int8_pass_by_value.txt");
 
 	for (dbnum = 0; dbnum < cluster->dbarr.ndbs; dbnum++)
@@ -860,7 +861,8 @@ check_for_user_defined_postfix_ops(ClusterInfo *cluster)
 
 	prep_status("Checking for user-defined postfix operators");
 
-	snprintf(output_path, sizeof(output_path),
+	snprintf(output_path, sizeof(output_path), "%s/%s",
+			 log_opts.basedir,
 			 "postfix_ops.txt");
 
 	/* Find any user defined postfix operators */
@@ -959,7 +961,8 @@ check_for_tables_with_oids(ClusterInfo *cluster)
 
 	prep_status("Checking for tables WITH OIDS");
 
-	snprintf(output_path, sizeof(output_path),
+	snprintf(output_path, sizeof(output_path), "%s/%s",
+			 log_opts.basedir,
 			 "tables_with_oids.txt");
 
 	/* Find any tables declared WITH OIDS */
@@ -1214,7 +1217,8 @@ check_for_user_defined_encoding_conversions(ClusterInfo *cluster)
 
 	prep_status("Checking for user-defined encoding conversions");
 
-	snprintf(output_path, sizeof(output_path),
+	snprintf(output_path, sizeof(output_path), "%s/%s",
+			 log_opts.basedir,
 			 "encoding_conversions.txt");
 
 	/* Find any user defined encoding conversions */
diff --git a/src/bin/pg_upgrade/dump.c b/src/bin/pg_upgrade/dump.c
index 953873fa01..b69b4f9569 100644
--- a/src/bin/pg_upgrade/dump.c
+++ b/src/bin/pg_upgrade/dump.c
@@ -22,9 +22,10 @@ generate_old_dump(void)
 	/* run new pg_dumpall binary for globals */
 	exec_prog(UTILITY_LOG_FILE, NULL, true, true,
 			  "\"%s/pg_dumpall\" %s --globals-only --quote-all-identifiers "
-			  "--binary-upgrade %s -f %s",
+			  "--binary-upgrade %s -f \"%s/%s\"",
 			  new_cluster.bindir, cluster_conn_opts(&old_cluster),
 			  log_opts.verbose ? "--verbose" : "",
+			  log_opts.dumpdir,
 			  GLOBALS_DUMP_FILE);
 	check_ok();
 
@@ -52,9 +53,10 @@ generate_old_dump(void)
 
 		parallel_exec_prog(log_file_name, NULL,
 						   "\"%s/pg_dump\" %s --schema-only --quote-all-identifiers "
-						   "--binary-upgrade --format=custom %s --file=\"%s\" %s",
+						   "--binary-upgrade --format=custom %s --file=\"%s/%s\" %s",
 						   new_cluster.bindir, cluster_conn_opts(&old_cluster),
 						   log_opts.verbose ? "--verbose" : "",
+						   log_opts.dumpdir,
 						   sql_file_name, escaped_connstr.data);
 
 		termPQExpBuffer(&escaped_connstr);
diff --git a/src/bin/pg_upgrade/exec.c b/src/bin/pg_upgrade/exec.c
index c0cbb1bec7..fadeea12ca 100644
--- a/src/bin/pg_upgrade/exec.c
+++ b/src/bin/pg_upgrade/exec.c
@@ -78,11 +78,12 @@ get_bin_version(ClusterInfo *cluster)
  * The code requires it be called first from the primary thread on Windows.
  */
 bool
-exec_prog(const char *log_file, const char *opt_log_file,
+exec_prog(const char *log_filename, const char *opt_log_file,
 		  bool report_error, bool exit_on_error, const char *fmt,...)
 {
 	int			result = 0;
 	int			written;
+	char		log_file[MAXPGPATH];
 
 #define MAXCMDLEN (2 * MAXPGPATH)
 	char		cmd[MAXCMDLEN];
@@ -97,6 +98,8 @@ exec_prog(const char *log_file, const char *opt_log_file,
 		mainThreadId = GetCurrentThreadId();
 #endif
 
+	snprintf(log_file, MAXPGPATH, "%s/%s", log_opts.logdir, log_filename);
+
 	written = 0;
 	va_start(ap, fmt);
 	written += vsnprintf(cmd + written, MAXCMDLEN - written, fmt, ap);
diff --git a/src/bin/pg_upgrade/function.c b/src/bin/pg_upgrade/function.c
index e73a309070..acc97fafef 100644
--- a/src/bin/pg_upgrade/function.c
+++ b/src/bin/pg_upgrade/function.c
@@ -128,7 +128,8 @@ check_loadable_libraries(void)
 
 	prep_status("Checking for presence of required libraries");
 
-	snprintf(output_path, sizeof(output_path), "loadable_libraries.txt");
+	snprintf(output_path, sizeof(output_path), "%s/%s",
+			log_opts.basedir, "loadable_libraries.txt");
 
 	/*
 	 * Now we want to sort the library names into order.  This avoids multiple
diff --git a/src/bin/pg_upgrade/option.c b/src/bin/pg_upgrade/option.c
index c894fdd685..54dff9691e 100644
--- a/src/bin/pg_upgrade/option.c
+++ b/src/bin/pg_upgrade/option.c
@@ -9,7 +9,6 @@
 
 #include "postgres_fe.h"
 
-#include <time.h>
 #ifdef WIN32
 #include <io.h>
 #endif
@@ -57,16 +56,15 @@ parseCommandLine(int argc, char *argv[])
 		{"socketdir", required_argument, NULL, 's'},
 		{"verbose", no_argument, NULL, 'v'},
 		{"clone", no_argument, NULL, 1},
+		{"outputdir", required_argument, NULL, 2},
 
 		{NULL, 0, NULL, 0}
 	};
 	int			option;			/* Command line option */
 	int			optindex = 0;	/* used by getopt_long */
 	int			os_user_effective_id;
-	FILE	   *fp;
-	char	  **filename;
-	time_t		run_time = time(NULL);
 
+	log_opts.basedir = DEFAULT_OUTPUTDIR;
 	user_opts.do_sync = true;
 	user_opts.transfer_mode = TRANSFER_MODE_COPY;
 
@@ -198,6 +196,10 @@ parseCommandLine(int argc, char *argv[])
 				user_opts.transfer_mode = TRANSFER_MODE_CLONE;
 				break;
 
+			case 2:
+				log_opts.basedir = pg_strdup(optarg);
+				break;
+
 			default:
 				fprintf(stderr, _("Try \"%s --help\" for more information.\n"),
 						os_info.progname);
@@ -208,27 +210,9 @@ parseCommandLine(int argc, char *argv[])
 	if (optind < argc)
 		pg_fatal("too many command-line arguments (first is \"%s\")\n", argv[optind]);
 
-	if ((log_opts.internal = fopen_priv(INTERNAL_LOG_FILE, "a")) == NULL)
-		pg_fatal("could not open log file \"%s\": %m\n", INTERNAL_LOG_FILE);
-
 	if (log_opts.verbose)
 		pg_log(PG_REPORT, "Running in verbose mode\n");
 
-	/* label start of upgrade in logfiles */
-	for (filename = output_files; *filename != NULL; filename++)
-	{
-		if ((fp = fopen_priv(*filename, "a")) == NULL)
-			pg_fatal("could not write to log file \"%s\": %m\n", *filename);
-
-		/* Start with newline because we might be appending to a file. */
-		fprintf(fp, "\n"
-				"-----------------------------------------------------------------\n"
-				"  pg_upgrade run on %s"
-				"-----------------------------------------------------------------\n\n",
-				ctime(&run_time));
-		fclose(fp);
-	}
-
 	/* Turn off read-only mode;  add prefix to PGOPTIONS? */
 	if (getenv("PGOPTIONS"))
 	{
@@ -303,6 +287,7 @@ usage(void)
 	printf(_("  -v, --verbose                 enable verbose internal logging\n"));
 	printf(_("  -V, --version                 display version information, then exit\n"));
 	printf(_("  --clone                       clone instead of copying files to new cluster\n"));
+	printf(_("  --outputdir=DIR               directory to store internal files (logs, etc.)\n"));
 	printf(_("  -?, --help                    show this help, then exit\n"));
 	printf(_("\n"
 			 "Before running pg_upgrade you must:\n"
diff --git a/src/bin/pg_upgrade/pg_upgrade.c b/src/bin/pg_upgrade/pg_upgrade.c
index 07defacd67..84b05e6e37 100644
--- a/src/bin/pg_upgrade/pg_upgrade.c
+++ b/src/bin/pg_upgrade/pg_upgrade.c
@@ -38,6 +38,8 @@
 
 #include "postgres_fe.h"
 
+#include <time.h>
+
 #ifdef HAVE_LANGINFO_H
 #include <langinfo.h>
 #endif
@@ -54,6 +56,7 @@ static void prepare_new_globals(void);
 static void create_new_objects(void);
 static void copy_xact_xlog_xid(void);
 static void set_frozenxids(bool minmxid_only);
+static void make_outputdirs(void);
 static void setup(char *argv0, bool *live_check);
 static void cleanup(void);
 
@@ -89,6 +92,8 @@ main(int argc, char **argv)
 
 	get_restricted_token();
 
+	make_outputdirs();
+
 	adjust_data_dir(&old_cluster);
 	adjust_data_dir(&new_cluster);
 
@@ -197,6 +202,87 @@ main(int argc, char **argv)
 	return 0;
 }
 
+/*
+ * Create and assign proper permissions to the set of output directories
+ * used to store any data generated internally, filling in log_opts in
+ * the process.
+ */
+static void
+make_outputdirs(void)
+{
+	FILE	*fp;
+	char	**filename;
+	time_t	run_time = time(NULL);
+	char	filename_path[MAXPGPATH];
+	char	canonicalized_basedir[MAXPGPATH];
+
+	/*
+	 * Check that the output directory is a sane location.  It should not
+	 * include any references to the parent, and if a relative path, this had
+	 * better not be a direct parent.
+	 */
+	strlcpy(canonicalized_basedir, log_opts.basedir, MAXPGPATH);
+	canonicalize_path(canonicalized_basedir);
+	if (is_absolute_path(canonicalized_basedir))
+	{
+		char	cwd[MAXPGPATH];
+
+		if (path_contains_parent_reference(canonicalized_basedir))
+			pg_fatal("reference to parent directory not allowed for output directory\n");
+
+		if (!getcwd(cwd, MAXPGPATH))
+			pg_fatal("could not determine current directory\n");
+
+		if (path_is_prefix_of_path(canonicalized_basedir, cwd))
+			pg_fatal("output directory cannot be direct parent of working directory\n");
+	}
+	else
+	{
+		if (!path_is_relative_and_below_cwd(canonicalized_basedir))
+			pg_fatal("output directory must be in or below the current directory\n");
+
+		if (strcmp(canonicalized_basedir, ".") == 0)
+			pg_fatal("output directory cannot be the current working directory\n");
+	}
+
+	/* save a sanitized absolute path for the whole run */
+	log_opts.basedir = make_absolute_path(log_opts.basedir);
+
+	log_opts.dumpdir = (char *) pg_malloc(MAXPGPATH);
+	snprintf(log_opts.dumpdir, MAXPGPATH, "%s/dump", log_opts.basedir);
+	log_opts.logdir = (char *) pg_malloc(MAXPGPATH);
+	snprintf(log_opts.logdir, MAXPGPATH, "%s/log", log_opts.basedir);
+
+	if (mkdir(log_opts.basedir, pg_dir_create_mode))
+		pg_fatal("could not create directory \"%s\": %m\n", log_opts.basedir);
+	if (mkdir(log_opts.dumpdir, pg_dir_create_mode))
+		pg_fatal("could not create directory \"%s\": %m\n", log_opts.dumpdir);
+	if (mkdir(log_opts.logdir, pg_dir_create_mode))
+		pg_fatal("could not create directory \"%s\": %m\n", log_opts.logdir);
+
+	snprintf(filename_path, sizeof(filename_path), "%s/%s", log_opts.logdir,
+			 INTERNAL_LOG_FILE);
+	if ((log_opts.internal = fopen_priv(filename_path, "a")) == NULL)
+		pg_fatal("could not open log file \"%s\": %m\n", filename_path);
+
+	/* label start of upgrade in logfiles */
+	for (filename = output_files; *filename != NULL; filename++)
+	{
+		snprintf(filename_path, sizeof(filename_path), "%s/%s",
+				log_opts.logdir, *filename);
+		if ((fp = fopen_priv(filename_path, "a")) == NULL)
+			pg_fatal("could not write to log file \"%s\": %m\n", filename_path);
+
+		/* Start with newline because we might be appending to a file. */
+		fprintf(fp, "\n"
+				"-----------------------------------------------------------------\n"
+				"  pg_upgrade run on %s"
+				"-----------------------------------------------------------------\n\n",
+				ctime(&run_time));
+		fclose(fp);
+	}
+}
+
 
 static void
 setup(char *argv0, bool *live_check)
@@ -306,8 +392,9 @@ prepare_new_globals(void)
 	prep_status("Restoring global objects in the new cluster");
 
 	exec_prog(UTILITY_LOG_FILE, NULL, true, true,
-			  "\"%s/psql\" " EXEC_PSQL_ARGS " %s -f \"%s\"",
+			  "\"%s/psql\" " EXEC_PSQL_ARGS " %s -f \"%s/%s\"",
 			  new_cluster.bindir, cluster_conn_opts(&new_cluster),
+			  log_opts.dumpdir,
 			  GLOBALS_DUMP_FILE);
 	check_ok();
 }
@@ -352,10 +439,11 @@ create_new_objects(void)
 				  true,
 				  true,
 				  "\"%s/pg_restore\" %s %s --exit-on-error --verbose "
-				  "--dbname postgres \"%s\"",
+				  "--dbname postgres \"%s/%s\"",
 				  new_cluster.bindir,
 				  cluster_conn_opts(&new_cluster),
 				  create_opts,
+				  log_opts.dumpdir,
 				  sql_file_name);
 
 		break;					/* done once we've processed template1 */
@@ -389,10 +477,11 @@ create_new_objects(void)
 		parallel_exec_prog(log_file_name,
 						   NULL,
 						   "\"%s/pg_restore\" %s %s --exit-on-error --verbose "
-						   "--dbname template1 \"%s\"",
+						   "--dbname template1 \"%s/%s\"",
 						   new_cluster.bindir,
 						   cluster_conn_opts(&new_cluster),
 						   create_opts,
+						   log_opts.dumpdir,
 						   sql_file_name);
 	}
 
@@ -689,28 +778,5 @@ cleanup(void)
 
 	/* Remove dump and log files? */
 	if (!log_opts.retain)
-	{
-		int			dbnum;
-		char	  **filename;
-
-		for (filename = output_files; *filename != NULL; filename++)
-			unlink(*filename);
-
-		/* remove dump files */
-		unlink(GLOBALS_DUMP_FILE);
-
-		if (old_cluster.dbarr.dbs)
-			for (dbnum = 0; dbnum < old_cluster.dbarr.ndbs; dbnum++)
-			{
-				char		sql_file_name[MAXPGPATH],
-							log_file_name[MAXPGPATH];
-				DbInfo	   *old_db = &old_cluster.dbarr.dbs[dbnum];
-
-				snprintf(sql_file_name, sizeof(sql_file_name), DB_DUMP_FILE_MASK, old_db->db_oid);
-				unlink(sql_file_name);
-
-				snprintf(log_file_name, sizeof(log_file_name), DB_DUMP_LOG_FILE_MASK, old_db->db_oid);
-				unlink(log_file_name);
-			}
-	}
+		rmtree(log_opts.basedir, true);
 }
diff --git a/src/bin/pg_upgrade/pg_upgrade.h b/src/bin/pg_upgrade/pg_upgrade.h
index da6770d0f8..28ca82e4cb 100644
--- a/src/bin/pg_upgrade/pg_upgrade.h
+++ b/src/bin/pg_upgrade/pg_upgrade.h
@@ -26,6 +26,9 @@
 #define GLOBALS_DUMP_FILE	"pg_upgrade_dump_globals.sql"
 #define DB_DUMP_FILE_MASK	"pg_upgrade_dump_%u.custom"
 
+/* Default path to include all the files generated internally */
+#define DEFAULT_OUTPUTDIR	"pg_upgrade_output.d"
+
 #define DB_DUMP_LOG_FILE_MASK	"pg_upgrade_dump_%u.log"
 #define SERVER_LOG_FILE		"pg_upgrade_server.log"
 #define UTILITY_LOG_FILE	"pg_upgrade_utility.log"
@@ -263,6 +266,10 @@ typedef struct
 	FILE	   *internal;		/* internal log FILE */
 	bool		verbose;		/* true -> be verbose in messages */
 	bool		retain;			/* retain log files on success */
+	/* Set of internal directories for output files */
+	char		*basedir;		/* Base output directory */
+	char	    *dumpdir;		/* Dumps */
+	char	    *logdir;		/* Log files */
 } LogOpts;
 
 
diff --git a/src/bin/pg_upgrade/server.c b/src/bin/pg_upgrade/server.c
index f5d4656dec..7878d233de 100644
--- a/src/bin/pg_upgrade/server.c
+++ b/src/bin/pg_upgrade/server.c
@@ -238,8 +238,10 @@ start_postmaster(ClusterInfo *cluster, bool report_and_exit_on_error)
 	 * vacuumdb --freeze actually freezes the tuples.
 	 */
 	snprintf(cmd, sizeof(cmd),
-			 "\"%s/pg_ctl\" -w -l \"%s\" -D \"%s\" -o \"-p %d -b%s %s%s\" start",
-			 cluster->bindir, SERVER_LOG_FILE, cluster->pgconfig, cluster->port,
+			 "\"%s/pg_ctl\" -w -l \"%s/%s\" -D \"%s\" -o \"-p %d -b%s %s%s\" start",
+			 cluster->bindir,
+			 log_opts.logdir,
+			 SERVER_LOG_FILE, cluster->pgconfig, cluster->port,
 			 (cluster == &new_cluster) ?
 			 " -c synchronous_commit=off -c fsync=off -c full_page_writes=off -c vacuum_defer_cleanup_age=0" : "",
 			 cluster->pgopts ? cluster->pgopts : "", socket_string);
diff --git a/doc/src/sgml/ref/pgupgrade.sgml b/doc/src/sgml/ref/pgupgrade.sgml
index c5ce732ee9..109382e782 100644
--- a/doc/src/sgml/ref/pgupgrade.sgml
+++ b/doc/src/sgml/ref/pgupgrade.sgml
@@ -231,6 +231,18 @@ PostgreSQL documentation
       </listitem>
      </varlistentry>
 
+     <varlistentry>
+      <term><option>--outputdir=</option><replaceable>path</replaceable></term>
+      <listitem><para>Directory where any output file internal to
+      <command>pg_upgrade</command> is generated. This includes SQL and log
+      files. The directory is created when starting
+      <command>pg_upgrade</command>, and is removed after a successful
+      upgrade unless <option>--retain</option> is specified. The default value
+      is <literal>pg_upgrade_output.d</literal> in the current working
+      directory.
+      </para></listitem>
+     </varlistentry>
+
      <varlistentry>
       <term><option>-?</option></term>
       <term><option>--help</option></term>
@@ -768,8 +780,9 @@ psql --username=postgres --file=script.sql postgres
 
   <para>
    <application>pg_upgrade</application> creates various working files, such
-   as schema dumps, in the current working directory.  For security, be sure
-   that that directory is not readable or writable by any other users.
+   as schema dumps, stored within <literal>pg_upgrade_output.d</literal> in
+   the current working directory. The location can be controlled with
+   <option>--outputdir</option>
   </para>
 
   <para>
-- 
2.34.1

Attachment: signature.asc
Description: PGP signature

Reply via email to