Hi,

Am Mittwoch, den 26.12.2018, 11:14 +0900 schrieb Michael Paquier:
> On Tue, Dec 25, 2018 at 07:05:30PM +0100, Fabien COELHO wrote:
> > You use 1024² bytes. What about 1000000 bytes per MB, as the
> > unit is about stored files?

I haven't changed that as Ihave not been pointed to a clear project
guideline that 1 MB should be 1000000 and not 1024x1024.
 
> > Also, you did not answer to my other points:
> >  - use "instr_time.h" for better precision

Both pg_rewind and pg_basebackup are using time() as well, so I think
there's precedence for this.

> >  - invert sizeonly

The current way is consistent with src/backend/replication/basebackup.c
so I'd prefer to keep it that way.

> >  - reuse a test

I've removed the test. 

> It seems to me that the SIGUSR1 business is not necessary for the
> basic layer of this feature, so I would rather rip that off now.  

But is it gaining all so much if it is ripped out? The patch will be a
dozen lines shorter, but those are all isolated.

And contrary to pg_basebackup, pg_verify_checksums might be run in the
foreground much more often - right now it has to be done while the
cluster is offline, so if you have large instance and you forgot to pass
-P then you're glad you can signal it so you roughly know when you can
expect to startup the instance again.

> If necessary we could always discuss that later on.  

If you insist we can rip it out, of course.

> My take about the option is that --progress should not be the default, 
> but that reports should only be provided if the caller wants them.
>
> --quiet may have some value by itself, but that seems like a separate
> discussion to me.

Right now --progress isn't default so I think that's how you like it?
        
> +   /*
> +    * If we are reporting to a terminal, send a carriage return so that we
> +    * stay on the same line.  If not, send a newline.
> +    */
> +   if (isatty(fileno(stderr)))
> +       fprintf(stderr, "\r");
> +   else
> +       fprintf(stderr, "\n");
> This bit is not really elegant, why not just '\r'?

I've changed it as Alvaro suggested.

> +   /* The same for total size */
> +   if (current_size > total_size)
> +       total_size = current_size / 1048576;
> Let's use that in a variable and not hardcode the number.

Done so, I called it MEGABYTES.

> What's introduced here is very similar to what progress_report() does
> in pg_rewind/logging.c.  The report depends on the context so this
> cannot be a common routine logic but perhaps we could keep a
> consistent output for both tools?

Well, pg_rewind is also using kB, so should I switch it back to that? 

It looks like the progress reporting is otherwise quite similar, so what
exactly did you have in mind?

New patch attached.


Michael

-- 
Michael Banck
Projektleiter / Senior Berater
Tel.: +49 2166 9901-171
Fax:  +49 2166 9901-100
Email: michael.ba...@credativ.de

credativ GmbH, HRB Mönchengladbach 12080
USt-ID-Nummer: DE204566209
Trompeterallee 108, 41189 Mönchengladbach
Geschäftsführung: Dr. Michael Meskes, Jörg Folz, Sascha Heuer

Unser Umgang mit personenbezogenen Daten unterliegt
folgenden Bestimmungen: https://www.credativ.de/datenschutz
diff --git a/doc/src/sgml/ref/pg_verify_checksums.sgml b/doc/src/sgml/ref/pg_verify_checksums.sgml
index 905b8f1222..b470c19c08 100644
--- a/doc/src/sgml/ref/pg_verify_checksums.sgml
+++ b/doc/src/sgml/ref/pg_verify_checksums.sgml
@@ -80,6 +80,19 @@ PostgreSQL documentation
      </varlistentry>
 
      <varlistentry>
+      <term><option>-P</option></term>
+      <term><option>--progress</option></term>
+      <listitem>
+       <para>
+        Enable progress reporting. Turning this on will deliver an approximate
+        progress report during the checksum verification. Sending the
+        <systemitem>SIGUSR1</systemitem> signal will toggle progress reporting
+        on or off during the verification run (not available on Windows).
+       </para>
+      </listitem>
+     </varlistentry>
+
+     <varlistentry>
        <term><option>-V</option></term>
        <term><option>--version</option></term>
        <listitem>
diff --git a/src/bin/pg_verify_checksums/pg_verify_checksums.c b/src/bin/pg_verify_checksums/pg_verify_checksums.c
index 511262ab5f..9705f7dd57 100644
--- a/src/bin/pg_verify_checksums/pg_verify_checksums.c
+++ b/src/bin/pg_verify_checksums/pg_verify_checksums.c
@@ -10,6 +10,8 @@
 #include "postgres_fe.h"
 
 #include <dirent.h>
+#include <signal.h>
+#include <time.h>
 #include <sys/stat.h>
 #include <unistd.h>
 
@@ -30,9 +32,18 @@ static ControlFileData *ControlFile;
 
 static char *only_relfilenode = NULL;
 static bool verbose = false;
+static bool show_progress = false;
 
 static const char *progname;
 
+/*
+ * Progress status information.
+ */
+int64		total_size = 0;
+int64		current_size = 0;
+pg_time_t	last_progress_update;
+pg_time_t	scan_started;
+
 static void
 usage(void)
 {
@@ -43,6 +54,7 @@ usage(void)
 	printf(_(" [-D, --pgdata=]DATADIR  data directory\n"));
 	printf(_("  -v, --verbose          output verbose messages\n"));
 	printf(_("  -r RELFILENODE         check only relation with specified relfilenode\n"));
+	printf(_("  -P, --progress         show progress information\n"));
 	printf(_("  -V, --version          output version information, then exit\n"));
 	printf(_("  -?, --help             show this help, then exit\n"));
 	printf(_("\nIf no data directory (DATADIR) is specified, "
@@ -67,6 +79,65 @@ static const char *const skip[] = {
 	NULL,
 };
 
+static void
+toggle_progress_report(int signum)
+{
+
+	/* we handle SIGUSR1 only, and toggle the value of show_progress */
+	if (signum == SIGUSR1)
+		show_progress = !show_progress;
+
+}
+
+/*
+ * Report current progress status. Parts borrowed from
+ * PostgreSQLs' src/bin/pg_basebackup.c
+ */
+static void
+report_progress(bool force)
+{
+	pg_time_t	now = time(NULL);
+	int			total_percent = 0;
+
+	char		totalstr[32];
+	char		currentstr[32];
+	char		currspeedstr[32];
+
+	/* Make sure we report at most once a second */
+	if ((now == last_progress_update) && !force)
+		return;
+
+	/* Save current time */
+	last_progress_update = now;
+
+#define MEGABYTES 1048576
+
+	/*
+	 * Calculate current percent done, based on MB...
+	 */
+	total_percent = total_size ? (int64) ((current_size / MEGABYTES) * 100 / (total_size / MEGABYTES)) : 0;
+
+	/* Don't display larger than 100% */
+	if (total_percent > 100)
+		total_percent = 100;
+
+	/* The same for total size */
+	if (current_size > total_size)
+		total_size = current_size / MEGABYTES;
+
+	snprintf(totalstr, sizeof(totalstr), INT64_FORMAT,
+			 total_size / MEGABYTES);
+	snprintf(currentstr, sizeof(currentstr), INT64_FORMAT,
+			 current_size / MEGABYTES);
+	snprintf(currspeedstr, sizeof(currspeedstr), INT64_FORMAT,
+			 (current_size / MEGABYTES) / (((time(NULL) - scan_started) == 0) ? 1 : (time(NULL) - scan_started)));
+	fprintf(stderr, "%s/%s MB (%d%%, %s MB/s)",
+			currentstr, totalstr, total_percent, currspeedstr);
+
+	/* Stay on the same line if reporting to a terminal */
+	fprintf(stderr, isatty(fileno(stderr)) ? "\r" : "\n");
+}
+
 static bool
 skipfile(const char *fn)
 {
@@ -117,6 +188,7 @@ scan_file(const char *fn, BlockNumber segmentno)
 			continue;
 
 		csum = pg_checksum_page(buf.data, blockno + segmentno * RELSEG_SIZE);
+		current_size += r;
 		if (csum != header->pd_checksum)
 		{
 			if (ControlFile->data_checksum_version == PG_DATA_CHECKSUM_VERSION)
@@ -124,6 +196,9 @@ scan_file(const char *fn, BlockNumber segmentno)
 						progname, fn, blockno, csum, header->pd_checksum);
 			badblocks++;
 		}
+
+		if (show_progress)
+			report_progress(false);
 	}
 
 	if (verbose)
@@ -133,9 +208,10 @@ scan_file(const char *fn, BlockNumber segmentno)
 	close(f);
 }
 
-static void
-scan_directory(const char *basedir, const char *subdir)
+static int64
+scan_directory(const char *basedir, const char *subdir, bool sizeonly)
 {
+	int64		dirsize = 0;
 	char		path[MAXPGPATH];
 	DIR		   *dir;
 	struct dirent *de;
@@ -167,7 +243,7 @@ scan_directory(const char *basedir, const char *subdir)
 		if (strncmp(de->d_name,
 					PG_TEMP_FILES_DIR,
 					strlen(PG_TEMP_FILES_DIR)) == 0)
-			return;
+			continue;
 
 		snprintf(fn, sizeof(fn), "%s/%s", path, de->d_name);
 		if (lstat(fn, &st) < 0)
@@ -214,16 +290,22 @@ scan_directory(const char *basedir, const char *subdir)
 				/* Relfilenode not to be included */
 				continue;
 
-			scan_file(fn, segmentno);
+			dirsize += st.st_size;
+
+			if (!sizeonly)
+			{
+				scan_file(fn, segmentno);
+			}
 		}
 #ifndef WIN32
 		else if (S_ISDIR(st.st_mode) || S_ISLNK(st.st_mode))
 #else
 		else if (S_ISDIR(st.st_mode) || pgwin32_is_junction(fn))
 #endif
-			scan_directory(path, de->d_name);
+			dirsize += scan_directory(path, de->d_name, sizeonly);
 	}
 	closedir(dir);
+	return dirsize;
 }
 
 int
@@ -231,6 +313,7 @@ main(int argc, char *argv[])
 {
 	static struct option long_options[] = {
 		{"pgdata", required_argument, NULL, 'D'},
+		{"progress", no_argument, NULL, 'P'},
 		{"verbose", no_argument, NULL, 'v'},
 		{NULL, 0, NULL, 0}
 	};
@@ -258,7 +341,7 @@ main(int argc, char *argv[])
 		}
 	}
 
-	while ((c = getopt_long(argc, argv, "D:r:v", long_options, &option_index)) != -1)
+	while ((c = getopt_long(argc, argv, "D:r:vP", long_options, &option_index)) != -1)
 	{
 		switch (c)
 		{
@@ -276,6 +359,9 @@ main(int argc, char *argv[])
 				}
 				only_relfilenode = pstrdup(optarg);
 				break;
+			case 'P':
+				show_progress = true;
+				break;
 			default:
 				fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
 				exit(1);
@@ -329,10 +415,44 @@ main(int argc, char *argv[])
 		exit(1);
 	}
 
+
+#ifndef WIN32
+	/*
+	 * Assign SIGUSR1 signal handler to toggle progress status information
+	 */
+	pqsignal(SIGUSR1, toggle_progress_report);
+#endif
+
+	/*
+	 * As progress status information may be requested, we need to scan the
+	 * directory tree(s) twice, once to get the idea how much data we need to
+	 * scan and finally to do the real legwork.
+	 */
+	total_size = scan_directory(DataDir, "global", true);
+	total_size += scan_directory(DataDir, "base", true);
+	total_size += scan_directory(DataDir, "pg_tblspc", true);
+
+	/*
+	 * Remember start time. Required to calculate the current speed in
+	 * report_progress()
+	 */
+	scan_started = time(NULL);
+
 	/* Scan all files */
-	scan_directory(DataDir, "global");
-	scan_directory(DataDir, "base");
-	scan_directory(DataDir, "pg_tblspc");
+	scan_directory(DataDir, "global", false);
+	scan_directory(DataDir, "base", false);
+	scan_directory(DataDir, "pg_tblspc", false);
+
+	/*
+	 * Done. Move to next line in case progress information was shown.
+	 * Otherwise we clutter the summary output.
+	 */
+	if (show_progress)
+	{
+		report_progress(true);
+		if (isatty(fileno(stderr)))
+			fprintf(stderr, "\n");
+	}
 
 	printf(_("Checksum scan completed\n"));
 	printf(_("Data checksum version: %d\n"), ControlFile->data_checksum_version);

Reply via email to