On Thu, Mar 28, 2019 at 03:53:59PM +0100, Fabien COELHO wrote: > I think that it is good to show the overall impact of the signal stuff, in > particular the fact that the size must always be computed if the progress > may be activated.
Getting to know the total size and the current size are the two important factors that matter when it comes to do progress reporting in my opinion. I have read the patch, and I am not really convinced by the need to show the progress report based on an interval of 250ms as we talk about an operation which could take dozens of minutes. So I have simplified the patch to only show a progress report every second. This also removes the include for the time-related APIs from portability/. A second thing is that I don't think that the speed is much useful. I would expect the speed to be steady, still there is a risk to show incorrect information if the speed of the operation is spiky or irregular leading to an incorrect estimation of the remaining time. In short, I would like to commit the first patch as attached, which is much more simple than what has been sent previously, still it provides the progress information which is useful. -- Michael
diff --git a/doc/src/sgml/ref/pg_checksums.sgml b/doc/src/sgml/ref/pg_checksums.sgml
index 70339eaec9..29507ff98e 100644
--- a/doc/src/sgml/ref/pg_checksums.sgml
+++ b/doc/src/sgml/ref/pg_checksums.sgml
@@ -135,6 +135,17 @@ PostgreSQL documentation
</listitem>
</varlistentry>
+ <varlistentry>
+ <term><option>-P</option></term>
+ <term><option>--progress</option></term>
+ <listitem>
+ <para>
+ Enable progress reporting. Turning this on will deliver a progress
+ report while checking or enabling checksums.
+ </para>
+ </listitem>
+ </varlistentry>
+
<varlistentry>
<term><option>-V</option></term>
<term><option>--version</option></term>
diff --git a/src/bin/pg_checksums/pg_checksums.c b/src/bin/pg_checksums/pg_checksums.c
index 5c185ebed8..f9903a441c 100644
--- a/src/bin/pg_checksums/pg_checksums.c
+++ b/src/bin/pg_checksums/pg_checksums.c
@@ -15,6 +15,7 @@
#include "postgres_fe.h"
#include <dirent.h>
+#include <time.h>
#include <sys/stat.h>
#include <unistd.h>
@@ -37,6 +38,7 @@ static ControlFileData *ControlFile;
static char *only_relfilenode = NULL;
static bool do_sync = true;
static bool verbose = false;
+static bool showprogress = false;
typedef enum
{
@@ -59,6 +61,13 @@ static PgChecksumMode mode = PG_MODE_CHECK;
static const char *progname;
+/*
+ * Progress status information.
+ */
+int64 total_size = 0;
+int64 current_size = 0;
+static pg_time_t last_progress_report = 0;
+
static void
usage(void)
{
@@ -71,6 +80,7 @@ usage(void)
printf(_(" -d, --disable disable data checksums\n"));
printf(_(" -e, --enable enable data checksums\n"));
printf(_(" -N, --no-sync do not wait for changes to be written safely to disk\n"));
+ printf(_(" -P, --progress show progress information\n"));
printf(_(" -v, --verbose output verbose messages\n"));
printf(_(" -r RELFILENODE check only relation with specified relfilenode\n"));
printf(_(" -V, --version output version information, then exit\n"));
@@ -97,6 +107,52 @@ static const char *const skip[] = {
NULL,
};
+/*
+ * Report current progress status. Parts borrowed from
+ * PostgreSQLs' src/bin/pg_basebackup.c
+ */
+static void
+progress_report(bool force)
+{
+ int percent;
+ char total_size_str[32];
+ char current_size_str[32];
+ pg_time_t now;
+
+ if (!showprogress)
+ return;
+
+ now = time(NULL);
+ if (now == last_progress_report && !force)
+ return; /* Max once per second */
+
+ /* Save current time */
+ last_progress_report = now;
+
+ /* Adjust total size if current_size is larger */
+ if (current_size > total_size)
+ total_size = current_size;
+
+ /* Calculate current percentage of size done */
+ percent = total_size ? (int) ((current_size) * 100 / total_size) : 0;
+
+ snprintf(total_size_str, sizeof(total_size_str), INT64_FORMAT,
+ total_size / (1024 * 1024));
+ snprintf(current_size_str, sizeof(current_size_str), INT64_FORMAT,
+ current_size / (1024 * 1024));
+
+ /*
+ * Separate step to keep platform-dependent format code out of
+ * translatable strings. And we only test for INT64_FORMAT availability
+ * in snprintf, not fprintf.
+ */
+ fprintf(stderr, "%*s/%s MB (%d%%) done",
+ (int) strlen(current_size_str), current_size_str, total_size_str,
+ percent);
+
+ fprintf(stderr, "\r");
+}
+
static bool
skipfile(const char *fn)
{
@@ -153,6 +209,7 @@ scan_file(const char *fn, BlockNumber segmentno)
continue;
csum = pg_checksum_page(buf.data, blockno + segmentno * RELSEG_SIZE);
+ current_size += r;
if (mode == PG_MODE_CHECK)
{
if (csum != header->pd_checksum)
@@ -183,6 +240,8 @@ scan_file(const char *fn, BlockNumber segmentno)
exit(1);
}
}
+
+ progress_report(false);
}
if (verbose)
@@ -195,12 +254,23 @@ scan_file(const char *fn, BlockNumber segmentno)
_("%s: checksums enabled in file \"%s\"\n"), progname, fn);
}
+ /* Make sure progress is reported at least once per file */
+ progress_report(false);
+
close(f);
}
-static void
-scan_directory(const char *basedir, const char *subdir)
+/*
+ * Scan the given directory for items which can be checksummed and
+ * operate on each one of them. If "sizeonly" is true, the size of
+ * all the items which have checksums is computed and returned back
+ * to the caller without operating on the files. This is used to compile
+ * the total size of the data directory for progress reports.
+ */
+static int64
+scan_directory(const char *basedir, const char *subdir, bool sizeonly)
{
+ int64 dirsize = 0;
char path[MAXPGPATH];
DIR *dir;
struct dirent *de;
@@ -279,16 +349,24 @@ scan_directory(const char *basedir, const char *subdir)
/* Relfilenode not to be included */
continue;
- scan_file(fn, segmentno);
+ dirsize += st.st_size;
+
+ /*
+ * No need to work on the file when calculating only the size
+ * of the items in the data folder.
+ */
+ if (!sizeonly)
+ scan_file(fn, segmentno);
}
#ifndef WIN32
else if (S_ISDIR(st.st_mode) || S_ISLNK(st.st_mode))
#else
else if (S_ISDIR(st.st_mode) || pgwin32_is_junction(fn))
#endif
- scan_directory(path, de->d_name);
+ dirsize += scan_directory(path, de->d_name, sizeonly);
}
closedir(dir);
+ return dirsize;
}
int
@@ -300,6 +378,7 @@ main(int argc, char *argv[])
{"disable", no_argument, NULL, 'd'},
{"enable", no_argument, NULL, 'e'},
{"no-sync", no_argument, NULL, 'N'},
+ {"progress", no_argument, NULL, 'P'},
{"verbose", no_argument, NULL, 'v'},
{NULL, 0, NULL, 0}
};
@@ -327,7 +406,7 @@ main(int argc, char *argv[])
}
}
- while ((c = getopt_long(argc, argv, "cD:deNr:v", long_options, &option_index)) != -1)
+ while ((c = getopt_long(argc, argv, "cD:deNPr:v", long_options, &option_index)) != -1)
{
switch (c)
{
@@ -357,6 +436,9 @@ main(int argc, char *argv[])
}
only_relfilenode = pstrdup(optarg);
break;
+ case 'P':
+ showprogress = true;
+ break;
default:
fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
exit(1);
@@ -436,6 +518,18 @@ main(int argc, char *argv[])
exit(1);
}
+ /*
+ * If progress status information is requested, we need to scan the
+ * directory tree twice: once to know how much total data needs to
+ * be processed and once to do the real work.
+ */
+ if (showprogress)
+ {
+ total_size = scan_directory(DataDir, "global", true);
+ total_size += scan_directory(DataDir, "base", true);
+ total_size += scan_directory(DataDir, "pg_tblspc", true);
+ }
+
if (ControlFile->data_checksum_version == 0 &&
mode == PG_MODE_DISABLE)
{
@@ -453,9 +547,15 @@ main(int argc, char *argv[])
/* Operate on all files if checking or enabling checksums */
if (mode == PG_MODE_CHECK || mode == PG_MODE_ENABLE)
{
- scan_directory(DataDir, "global");
- scan_directory(DataDir, "base");
- scan_directory(DataDir, "pg_tblspc");
+ (void) scan_directory(DataDir, "global", false);
+ (void) scan_directory(DataDir, "base", false);
+ (void) scan_directory(DataDir, "pg_tblspc", false);
+
+ if (showprogress)
+ {
+ progress_report(true);
+ fprintf(stderr, "\n"); /* Need to move to next line */
+ }
printf(_("Checksum operation completed\n"));
printf(_("Files scanned: %s\n"), psprintf(INT64_FORMAT, files));
signature.asc
Description: PGP signature
