Re: [HACKERS] Re: [COMMITTERS] pgsql: Remove pgbench "progress" test pending solution of its timing is (fwd)

Fabien COELHO Mon, 25 Mar 2019 11:14:03 -0700

I pushed the refactoring now, to extract the progress reporting to a separatefunction. That seems like an improvement independently from the rest of thepatch.


Sure.

I'll take another look at the rest, probably tomorrow.


Ok.

Attached is the remainder of the patch rebased to current head.

--
Fabien.

diff --git a/src/bin/pgbench/pgbench.c b/src/bin/pgbench/pgbench.c
index b1afe44817..6a4c7396c9 100644
--- a/src/bin/pgbench/pgbench.c
+++ b/src/bin/pgbench/pgbench.c
@@ -6051,6 +6051,7 @@ threadRun(void *arg)
 	int			nstate = thread->nstate;
 	int			remains = nstate;	/* number of remaining clients */
 	socket_set *sockets = alloc_socket_set(nstate);
+	int			aborted = 0;
 	int			i;
 
 	/* for reporting progress: */
@@ -6280,6 +6281,10 @@ threadRun(void *arg)
 			 */
 			if (st->state == CSTATE_FINISHED || st->state == CSTATE_ABORTED)
 				remains--;
+
+			/* count aborted clients */
+			if (st->state == CSTATE_ABORTED)
+				aborted++;
 		}
 
 		/* progress report is made by thread 0 for all threads */
@@ -6296,7 +6301,10 @@ threadRun(void *arg)
 
 				/*
 				 * Ensure that the next report is in the future, in case
-				 * pgbench/postgres got stuck somewhere.
+				 * pgbench/postgres got stuck somewhere. This may skip
+				 * some progress reports if the thread does not get enough
+				 * cpu time. In such case, probably the whole bench should
+				 * be ignored.
 				 */
 				do
 				{
@@ -6307,6 +6315,52 @@ threadRun(void *arg)
 	}
 
 done:
+	/*
+	 * Under -R, comply with -T and -P even if there is nothing to do,
+	 * (unless all clients aborted) and ensure that one report is printed.
+	 * This special behavior allows tap tests to check that a run lasts
+	 * as expected and that some progress is shown, even on very slow hosts.
+	 */
+	if (duration && throttle_delay && aborted < nstate && thread->tid == 0)
+	{
+		int64		thread_end = thread_start + (int64) duration * 1000000;
+		instr_time	now_time;
+		int64		now;
+
+		INSTR_TIME_SET_CURRENT(now_time);
+		now = INSTR_TIME_GET_MICROSEC(now_time);
+
+		while (now < thread_end)
+		{
+			if (progress && next_report <= thread_end)
+			{
+				pg_usleep(next_report - now);
+				INSTR_TIME_SET_CURRENT(now_time);
+				now = INSTR_TIME_GET_MICROSEC(now_time);
+				printProgressReport(thread, thread_start, now, &last, &last_report);
+
+				/* schedule next report */
+				do
+				{
+					next_report += (int64) progress * 1000000;
+				} while (now >= next_report);
+			}
+			else
+			{
+				pg_usleep(thread_end - now);
+				INSTR_TIME_SET_CURRENT(now_time);
+				now = INSTR_TIME_GET_MICROSEC(now_time);
+			}
+		}
+
+		/*
+		 * Print a closing progress report if none were printed
+		 * and at least one was expected.
+		 */
+		if (progress && progress <= duration && last_report == thread_start)
+			printProgressReport(thread, thread_start, now, &last, &last_report);
+	}
+
 	INSTR_TIME_SET_CURRENT(start);
 	disconnect_all(state, nstate);
 	INSTR_TIME_SET_CURRENT(end);
diff --git a/src/bin/pgbench/t/001_pgbench_with_server.pl b/src/bin/pgbench/t/001_pgbench_with_server.pl
index ad3a7a79f8..95e227ec7b 100644
--- a/src/bin/pgbench/t/001_pgbench_with_server.pl
+++ b/src/bin/pgbench/t/001_pgbench_with_server.pl
@@ -4,13 +4,14 @@ use warnings;
 use PostgresNode;
 use TestLib;
 use Test::More;
+use Time::HiRes qw{time};
 
 # start a pgbench specific server
 my $node = get_new_node('main');
 $node->init;
 $node->start;
 
-# invoke pgbench, with parameters:
+# invoke pgbench, return elapsed time, with parameters:
 #   $opts: options as a string to be split on spaces
 #   $stat: expected exit status
 #   $out: reference to a regexp list that must match stdout
@@ -49,13 +50,14 @@ sub pgbench
 	}
 
 	push @cmd, @args;
+	my $t0 = time();
 
 	$node->command_checks_all(\@cmd, $stat, $out, $err, $name);
 
 	# cleanup?
 	#unlink @filenames or die "cannot unlink files (@filenames): $!";
 
-	return;
+	return time() - $t0;
 }
 
 # Test concurrent insertion into table with serial column.  This
@@ -894,7 +896,45 @@ sub check_pgbench_logs
 
 my $bdir = $node->basedir;
 
-# with sampling rate
+# The point of this test is coverage of various time-related features
+# (-T, -P, --aggregate-interval, --rate, --latency-limit...), so it is
+# somehow time sensitive.
+# The checks performed are quite loose so as to still pass even under
+# degraded (high load, slow host, valgrind run) testing conditions.
+# The main point is to provide coverage, most of the time.
+my $elapsed = pgbench(
+	"-T 2 -P 1 -l --aggregate-interval=1"
+	  . ' -S -b se@2 --rate=20 --latency-limit=1000 -j ' . $nthreads
+	  . ' -c 3 -r',
+	0,
+	[   qr{type: multiple},
+		qr{clients: 3},
+		qr{threads: $nthreads},
+		# the shown duration is really -T argument value
+		qr{duration: 2 s},
+		qr{script 1: .* select only},
+		qr{script 2: .* select only},
+		qr{statement latencies in milliseconds},
+		qr{FROM pgbench_accounts} ],
+	[	qr{vacuum},
+		qr{progress: \d\b} ],
+	'pgbench progress', undef,
+	"--log-prefix=$bdir/001_pgbench_log_1");
+
+# this depends on pgbench to avoid a shortcut under --rate
+# if there is nothing to do.
+ok($elapsed >= 2.0, "-T 2 leads to at least 2 second run");
+
+# if the test has gone AWOL, coldly skip these detailed checks.
+if (abs($elapsed - 2.0) < 0.5)
+{
+	# $nthreads threads, 2 seconds, but due to timing imprecision we might get
+	# only 1 or as many as 3 progress reports per thread.
+	check_pgbench_logs($bdir, '001_pgbench_log_1', $nthreads, 1, 3,
+		qr{^\d+ \d{1,2} \d+ \d+ \d+ \d+ \d+ \d+ \d+ \d+ \d+$});
+}
+
+# with sampling rate, not time sensitive
 pgbench(
 	"-n -S -t 50 -c 2 --log --sampling-rate=0.5",
 	0,
@@ -904,6 +944,8 @@ pgbench(
 	undef,
 	"--log-prefix=$bdir/001_pgbench_log_2");
 
+# random 50% of 2*50 transactions, accept between 8 and 92
+# probability of failure is about 2 * 2^-42 (?)
 check_pgbench_logs($bdir, '001_pgbench_log_2', 1, 8, 92,
 	qr{^0 \d{1,2} \d+ \d \d+ \d+$});

Re: [HACKERS] Re: [COMMITTERS] pgsql: Remove pgbench "progress" test pending solution of its timing is (fwd)

Reply via email to