On Mon, Jun 06, 2022 at 02:38:03AM +0200, Daniel Gustafsson wrote:
> On 5 Jun 2022, at 11:19, Michael Paquier <mich...@paquier.xyz> wrote:
>> On Sun, Jun 05, 2022 at 09:24:25AM +0900, Michael Paquier wrote:
>>> Well, another error that could happen in the early code paths is
>>> EACCES on a custom socket directory specified, and we'd still face the
>>> same problem on a follow-up restart.  Using a sub-directory structure
>>> as Daniel and Tom mention would address all that (if ignoring EEXIST
>>> for the BASE_OUTPUTDIR), removing any existing content from the base
>>> path when not using --retain.  This comes with the disadvantage of
>>> bloating the disk on repeated errors, but this last bit would not
>>> really be a huge problem, I guess, as it could be more useful to keep
>>> the error information around.
>> 
>> I have been toying with the idea of a sub-directory named with a
>> timestamp (Unix time, like log_line_prefix's %n but this could be
>> any format) under pg_upgrade_output.d/ and finished with the
>> attached.  
> 
> I was thinking more along the lines of %m to make it (more) human readable, 
> but
> I'm certainly not wedded to any format.

Neither am I.  I would not map exactly to %m as it uses whitespaces,
but something like %Y%m%d_%H%M%S.%03d (3-digit ms for last part) would
be fine?  If there are other ideas for the format, just let me know.

> As a user I would expect the logs from this current invocation to be removed
> without --retain, and any other older log entries be kept.  I think we should
> remove log_opts.logdir and only remove log_opts.rootdir if it is left empty
> after .logdir is removed.

Okay, however I think you mean log_opts.basedir rather than logdir?
That's simple enough to switch around as pg_check_dir() does this
job.

>> The logic in charge of cleaning up the logs has been moved to a single
>> routine, aka cleanup_logs().
> 
> +             cleanup_logs();
> 
> Maybe we should register cleanup_logs() as an atexit() handler once we're done
> with option processing?

It seems to me that the original intention is to keep the logs around
on failure, hence we should only clean up things on a clean exit().
That's why I didn't add an exit callback for that.

> +     snprintf(log_opts.logdir, MAXPGPATH, "%s/%s/%s", log_opts.rootdir,
> +                      timebuf, LOG_OUTPUTDIR);
> 
> While not introduced by this patch, it does make me uneasy that we create 
> paths
> without checking for buffer overflows..

I don't mind adding such checks in those code paths.  You are right
that they tend to produce longer path strings than others.
--
Michael
diff --git a/src/bin/pg_upgrade/check.c b/src/bin/pg_upgrade/check.c
index 6114303b52..ace7387eda 100644
--- a/src/bin/pg_upgrade/check.c
+++ b/src/bin/pg_upgrade/check.c
@@ -210,6 +210,8 @@ report_clusters_compatible(void)
 		pg_log(PG_REPORT, "\n*Clusters are compatible*\n");
 		/* stops new cluster */
 		stop_postmaster(false);
+
+		cleanup_output_dirs();
 		exit(0);
 	}
 
diff --git a/src/bin/pg_upgrade/pg_upgrade.c b/src/bin/pg_upgrade/pg_upgrade.c
index ecb3e1f647..5b8e3e73db 100644
--- a/src/bin/pg_upgrade/pg_upgrade.c
+++ b/src/bin/pg_upgrade/pg_upgrade.c
@@ -58,7 +58,6 @@ static void copy_xact_xlog_xid(void);
 static void set_frozenxids(bool minmxid_only);
 static void make_outputdirs(char *pgdata);
 static void setup(char *argv0, bool *live_check);
-static void cleanup(void);
 
 ClusterInfo old_cluster,
 			new_cluster;
@@ -204,7 +203,7 @@ main(int argc, char **argv)
 
 	pg_free(deletion_script_file_name);
 
-	cleanup();
+	cleanup_output_dirs();
 
 	return 0;
 }
@@ -221,14 +220,48 @@ make_outputdirs(char *pgdata)
 	char	  **filename;
 	time_t		run_time = time(NULL);
 	char		filename_path[MAXPGPATH];
+	char		timebuf[128];
+	struct timeval time;
+	time_t		tt;
+	int			len;
 
+	log_opts.rootdir = (char *) pg_malloc(MAXPGPATH);
+	len = snprintf(log_opts.rootdir, MAXPGPATH, "%s/%s", pgdata, BASE_OUTPUTDIR);
+	if (len >= MAXPGPATH)
+		pg_fatal("buffer for root directory too small");
+
+	/* BASE_OUTPUTDIR/$unix_timestamp/ */
+	gettimeofday(&time, NULL);
+	tt = (time_t) time.tv_sec;
+	strftime(timebuf, sizeof(timebuf), "%Y%m%d_%H%M%S", localtime(&tt));
+	snprintf(timebuf, sizeof(timebuf), "%s.%03d",
+			 timebuf, (int) (time.tv_usec / 1000));
 	log_opts.basedir = (char *) pg_malloc(MAXPGPATH);
-	snprintf(log_opts.basedir, MAXPGPATH, "%s/%s", pgdata, BASE_OUTPUTDIR);
-	log_opts.dumpdir = (char *) pg_malloc(MAXPGPATH);
-	snprintf(log_opts.dumpdir, MAXPGPATH, "%s/%s", pgdata, DUMP_OUTPUTDIR);
-	log_opts.logdir = (char *) pg_malloc(MAXPGPATH);
-	snprintf(log_opts.logdir, MAXPGPATH, "%s/%s", pgdata, LOG_OUTPUTDIR);
+	len = snprintf(log_opts.basedir, MAXPGPATH, "%s/%s", log_opts.rootdir,
+				   timebuf);
+	if (len >= MAXPGPATH)
+		pg_fatal("buffer for base directory too small");
 
+	/* BASE_OUTPUTDIR/$unix_timestamp/dump/ */
+	log_opts.dumpdir = (char *) pg_malloc(MAXPGPATH);
+	len = snprintf(log_opts.dumpdir, MAXPGPATH, "%s/%s/%s", log_opts.rootdir,
+				   timebuf, DUMP_OUTPUTDIR);
+	if (len >= MAXPGPATH)
+		pg_fatal("buffer for dump directory too small");
+
+	/* BASE_OUTPUTDIR/$unix_timestamp/log/ */
+	log_opts.logdir = (char *) pg_malloc(MAXPGPATH);
+	len = snprintf(log_opts.logdir, MAXPGPATH, "%s/%s/%s", log_opts.rootdir,
+				   timebuf, LOG_OUTPUTDIR);
+	if (len >= MAXPGPATH)
+		pg_fatal("buffer for log directory too small");
+
+	/*
+	 * Ignore the error case where the root path exists, as it is kept the
+	 * same across runs.
+	 */
+	if (mkdir(log_opts.rootdir, pg_dir_create_mode) && errno != EEXIST)
+		pg_fatal("could not create directory \"%s\": %m\n", log_opts.rootdir);
 	if (mkdir(log_opts.basedir, pg_dir_create_mode))
 		pg_fatal("could not create directory \"%s\": %m\n", log_opts.basedir);
 	if (mkdir(log_opts.dumpdir, pg_dir_create_mode))
@@ -745,14 +778,3 @@ set_frozenxids(bool minmxid_only)
 
 	check_ok();
 }
-
-
-static void
-cleanup(void)
-{
-	fclose(log_opts.internal);
-
-	/* Remove dump and log files? */
-	if (!log_opts.retain)
-		(void) rmtree(log_opts.basedir, true);
-}
diff --git a/src/bin/pg_upgrade/pg_upgrade.h b/src/bin/pg_upgrade/pg_upgrade.h
index 86d3dc46fa..895137e732 100644
--- a/src/bin/pg_upgrade/pg_upgrade.h
+++ b/src/bin/pg_upgrade/pg_upgrade.h
@@ -30,12 +30,14 @@
 #define DB_DUMP_FILE_MASK	"pg_upgrade_dump_%u.custom"
 
 /*
- * Base directories that include all the files generated internally,
- * from the root path of the new cluster.
+ * Base directories that include all the files generated internally, from the
+ * root path of the new cluster.  The paths are dynamically built as of
+ * BASE_OUTPUTDIR/$unix_timestamp/{LOG_OUTPUTDIR,DUMP_OUTPUTDIR} to ensure
+ * their uniqueness in each run.
  */
 #define BASE_OUTPUTDIR		"pg_upgrade_output.d"
-#define LOG_OUTPUTDIR		BASE_OUTPUTDIR "/log"
-#define DUMP_OUTPUTDIR		BASE_OUTPUTDIR "/dump"
+#define LOG_OUTPUTDIR		 "log"
+#define DUMP_OUTPUTDIR		 "dump"
 
 #define DB_DUMP_LOG_FILE_MASK	"pg_upgrade_dump_%u.log"
 #define SERVER_LOG_FILE		"pg_upgrade_server.log"
@@ -276,7 +278,8 @@ typedef struct
 	bool		verbose;		/* true -> be verbose in messages */
 	bool		retain;			/* retain log files on success */
 	/* Set of internal directories for output files */
-	char	   *basedir;		/* Base output directory */
+	char	   *rootdir;		/* Root directory, aka pg_upgrade_output.d */
+	char	   *basedir;		/* Base output directory, with timestamp */
 	char	   *dumpdir;		/* Dumps */
 	char	   *logdir;			/* Log files */
 	bool		isatty;			/* is stdout a tty */
@@ -432,6 +435,7 @@ void		report_status(eLogType type, const char *fmt,...) pg_attribute_printf(2, 3
 void		pg_log(eLogType type, const char *fmt,...) pg_attribute_printf(2, 3);
 void		pg_fatal(const char *fmt,...) pg_attribute_printf(1, 2) pg_attribute_noreturn();
 void		end_progress_output(void);
+void		cleanup_output_dirs(void);
 void		prep_status(const char *fmt,...) pg_attribute_printf(1, 2);
 void		prep_status_progress(const char *fmt,...) pg_attribute_printf(1, 2);
 unsigned int str2uint(const char *str);
diff --git a/src/bin/pg_upgrade/t/002_pg_upgrade.pl b/src/bin/pg_upgrade/t/002_pg_upgrade.pl
index 55c7354ba2..f4c400a93e 100644
--- a/src/bin/pg_upgrade/t/002_pg_upgrade.pl
+++ b/src/bin/pg_upgrade/t/002_pg_upgrade.pl
@@ -5,6 +5,7 @@ use warnings;
 use Cwd qw(abs_path);
 use File::Basename qw(dirname);
 use File::Compare;
+use File::Path qw(rmtree);
 
 use PostgreSQL::Test::Cluster;
 use PostgreSQL::Test::Utils;
@@ -213,6 +214,39 @@ chdir ${PostgreSQL::Test::Utils::tmp_check};
 
 # Upgrade the instance.
 $oldnode->stop;
+
+# Cause a failure at the start of pg_upgrade, this should create the logging
+# directory pg_upgrade_output.d but leave it around.  Keep --check for an
+# early exit.
+command_fails(
+	[
+		'pg_upgrade', '--no-sync',
+		'-d',         $oldnode->data_dir,
+		'-D',         $newnode->data_dir,
+		'-b',         $oldbindir . '/does/not/exist/',
+		'-B',         $newbindir,
+		'-p',         $oldnode->port,
+		'-P',         $newnode->port,
+		'--check'
+	],
+	'run of pg_upgrade --check for new instance with incorrect binary path');
+ok(-d $newnode->data_dir . "/pg_upgrade_output.d",
+	"pg_upgrade_output.d/ not removed after pg_upgrade failure");
+rmtree($newnode->data_dir . "/pg_upgrade_output.d");
+
+# --check command works here, cleans up pg_upgrade_output.d.
+command_ok(
+	[
+		'pg_upgrade', '--no-sync',        '-d', $oldnode->data_dir,
+		'-D',         $newnode->data_dir, '-b', $oldbindir,
+		'-B',         $newbindir,         '-p', $oldnode->port,
+		'-P',         $newnode->port,     '--check'
+	],
+	'run of pg_upgrade --check for new instance');
+ok( !-d $newnode->data_dir . "/pg_upgrade_output.d",
+	"pg_upgrade_output.d/ not removed after pg_upgrade --check success");
+
+# Actual run, pg_upgrade_output.d is removed at the end.
 command_ok(
 	[
 		'pg_upgrade', '--no-sync',        '-d', $oldnode->data_dir,
@@ -221,6 +255,9 @@ command_ok(
 		'-P',         $newnode->port
 	],
 	'run of pg_upgrade for new instance');
+ok( !-d $newnode->data_dir . "/pg_upgrade_output.d",
+	"pg_upgrade_output.d/ removed after pg_upgrade success");
+
 $newnode->start;
 
 # Check if there are any logs coming from pg_upgrade, that would only be
diff --git a/src/bin/pg_upgrade/util.c b/src/bin/pg_upgrade/util.c
index 1a328b4270..f25e14c321 100644
--- a/src/bin/pg_upgrade/util.c
+++ b/src/bin/pg_upgrade/util.c
@@ -55,6 +55,48 @@ end_progress_output(void)
 		pg_log(PG_REPORT, "%-*s", MESSAGE_WIDTH, "");
 }
 
+/*
+ * Remove any logs generated internally.  To be used once when exiting.
+ */
+void
+cleanup_output_dirs(void)
+{
+	fclose(log_opts.internal);
+
+	/* Remove dump and log files? */
+	if (log_opts.retain)
+		return;
+
+	(void) rmtree(log_opts.basedir, true);
+
+	/* Remove pg_upgrade_output.d only if empty */
+	switch (pg_check_dir(log_opts.rootdir))
+	{
+		case 0:					/* non-existent */
+		case 3:					/* exists and contains a mount point */
+			Assert(false);
+			break;
+
+		case 1:					/* exists and empty */
+		case 2:					/* exists and contains only dot files */
+			(void) rmtree(log_opts.rootdir, true);
+			break;
+
+		case 4:					/* exists */
+
+			/*
+			 * Keep the root directory as this includes some past log
+			 * activity.
+			 */
+			break;
+
+		default:
+			/* different failure, just report it */
+			pg_log(PG_WARNING, "could not access directory \"%s\": %m",
+				   log_opts.rootdir);
+			break;
+	}
+}
 
 /*
  * prep_status
diff --git a/doc/src/sgml/ref/pgupgrade.sgml b/doc/src/sgml/ref/pgupgrade.sgml
index 8cda8d16d1..5b6d92c2f8 100644
--- a/doc/src/sgml/ref/pgupgrade.sgml
+++ b/doc/src/sgml/ref/pgupgrade.sgml
@@ -768,7 +768,9 @@ psql --username=postgres --file=script.sql postgres
   <para>
    <application>pg_upgrade</application> creates various working files, such
    as schema dumps, stored within <literal>pg_upgrade_output.d</literal> in
-   the directory of the new cluster.
+   the directory of the new cluster. Each run creates a new subdirectory named
+   with a timestamp, as of <literal>%Y%m%d_%H%M%S.{milli_seconds}</literal>
+   where all the generated files are stored.
   </para>
 
   <para>

Attachment: signature.asc
Description: PGP signature

Reply via email to