Re: pgbench logging broken by time logic changes

Fabien COELHO Sat, 10 Jul 2021 02:36:36 -0700


Hello again,

I hoped we were done here but I realised that your check for 1-3 log
lines will not survive the harsh environment of the build farm.
Adding sleep(2) before the final doLog() confirms that.  I had two
ideas:

So I think we should do 1 for now.  Objections or better ideas?


At least, we now that it is too much.


I misread your point. You think that it should fail, but it is not

tried yet. I'm rather optimistic that it should not fail, but I'm okaywith averting the risk anyway.

What about moving the test as is in the TODO section with a comment, next tothe other one, for now?

I stand by this solution which should allow to get some data from thefield, as v18 attached. If all is green then the TODO could be removedlater.


--
Fabien.

diff --git a/src/bin/pgbench/pgbench.c b/src/bin/pgbench/pgbench.c
index 4aeccd93af..a54958930b 100644
--- a/src/bin/pgbench/pgbench.c
+++ b/src/bin/pgbench/pgbench.c
@@ -343,6 +343,12 @@ typedef struct StatsData
 	SimpleStats lag;
 } StatsData;
 
+/*
+ * For displaying Unix epoch timestamps, as some time functions may have
+ * another reference.
+ */
+pg_time_usec_t epoch_shift;
+
 /*
  * Struct to keep random state.
  */
@@ -3771,7 +3777,11 @@ executeMetaCommand(CState *st, pg_time_usec_t *now)
 /*
  * Print log entry after completing one transaction.
  *
- * We print Unix-epoch timestamps in the log, so that entries can be
+ * The function behavior changes depending on sample_rate (a fraction of
+ * transaction is reported) and agg_interval (transactions are aggregated
+ * and reported once every agg_interval seconds).
+ *
+ * We use Unix-epoch timestamps in the log, so that entries can be
  * correlated against other logs.  On some platforms this could be obtained
  * from the caller, but rather than get entangled with that, we just eat
  * the cost of an extra syscall in all cases.
@@ -3781,7 +3791,7 @@ doLog(TState *thread, CState *st,
 	  StatsData *agg, bool skipped, double latency, double lag)
 {
 	FILE	   *logfile = thread->logfile;
-	pg_time_usec_t now = pg_time_now();
+	pg_time_usec_t now = pg_time_now() + epoch_shift;
 
 	Assert(use_log);
 
@@ -3796,17 +3806,19 @@ doLog(TState *thread, CState *st,
 	/* should we aggregate the results or not? */
 	if (agg_interval > 0)
 	{
+		pg_time_usec_t next;
+
 		/*
 		 * Loop until we reach the interval of the current moment, and print
 		 * any empty intervals in between (this may happen with very low tps,
 		 * e.g. --rate=0.1).
 		 */
 
-		while (agg->start_time + agg_interval <= now)
+		while ((next = agg->start_time + agg_interval * INT64CONST(1000000)) <= now)
 		{
 			/* print aggregated report to logfile */
 			fprintf(logfile, INT64_FORMAT " " INT64_FORMAT " %.0f %.0f %.0f %.0f",
-					agg->start_time,
+					agg->start_time / 1000000,	/* seconds since Unix epoch */
 					agg->cnt,
 					agg->latency.sum,
 					agg->latency.sum2,
@@ -3825,7 +3837,7 @@ doLog(TState *thread, CState *st,
 			fputc('\n', logfile);
 
 			/* reset data and move to next interval */
-			initStats(agg, agg->start_time + agg_interval);
+			initStats(agg, next);
 		}
 
 		/* accumulate the current transaction */
@@ -5458,7 +5470,8 @@ printProgressReport(TState *threads, int64 test_start, pg_time_usec_t now,
 
 	if (progress_timestamp)
 	{
-		snprintf(tbuf, sizeof(tbuf), "%.3f s", PG_TIME_GET_DOUBLE(now));
+		snprintf(tbuf, sizeof(tbuf), "%.3f s",
+				 PG_TIME_GET_DOUBLE(now + epoch_shift));
 	}
 	else
 	{
@@ -5808,6 +5821,15 @@ main(int argc, char **argv)
 	char	   *env;
 
 	int			exit_code = 0;
+	struct timeval tv;
+
+	/*
+	 * Record difference between Unix epoch and high resolution timer's epoch.
+	 * We'll use this for logging and aggregation with Unix epoch-based
+	 * buckets.
+	 */
+	gettimeofday(&tv, NULL);
+	epoch_shift = tv.tv_sec * INT64CONST(1000000) + tv.tv_usec - pg_time_now();
 
 	pg_logging_init(argv[0]);
 	progname = get_progname(argv[0]);
@@ -6637,7 +6659,14 @@ threadRun(void *arg)
 	thread->bench_start = start;
 	thread->throttle_trigger = start;
 
-	initStats(&aggs, start);
+	/*
+	 * The log format currently has Unix epoch timestamps with whole numbers
+	 * of seconds.  Round the first aggregate's start time down to the nearest
+	 * Unix epoch second (the very first aggregate might really have started a
+	 * fraction of a second later, but later aggregates are measured from the
+	 * whole number time that is actually logged).
+	 */
+	initStats(&aggs, (start + epoch_shift) / 1000000 * 1000000);
 	last = aggs;
 
 	/* loop till all clients have terminated */
diff --git a/src/bin/pgbench/t/001_pgbench_with_server.pl b/src/bin/pgbench/t/001_pgbench_with_server.pl
index 3aa9d5d753..d674cc59a5 100644
--- a/src/bin/pgbench/t/001_pgbench_with_server.pl
+++ b/src/bin/pgbench/t/001_pgbench_with_server.pl
@@ -8,6 +8,7 @@ use PostgresNode;
 use TestLib;
 use Test::More;
 use Config;
+use Time::HiRes qw(time);
 
 # start a pgbench specific server
 my $node = get_new_node('main');
@@ -54,12 +55,14 @@ sub pgbench
 
 	push @cmd, @args;
 
+	my $start = time();
 	$node->command_checks_all(\@cmd, $stat, $out, $err, $name);
+	my $stop = time();
 
 	# cleanup?
 	#unlink @filenames or die "cannot unlink files (@filenames): $!";
 
-	return;
+	return $stop - $start;
 }
 
 # tablespace for testing, because partitioned tables cannot use pg_default
@@ -1187,7 +1190,7 @@ sub check_pgbench_logs
 
 	# $prefix is simple enough, thus does not need escaping
 	my @logs = list_files($dir, qr{^$prefix\..*$});
-	ok(@logs == $nb, "number of log files");
+	ok(@logs == $nb, "number of log files (@logs)");
 	ok(grep(/\/$prefix\.\d+(\.\d+)?$/, @logs) == $nb, "file name format");
 
 	my $log_number = 0;
@@ -1219,7 +1222,58 @@ sub check_pgbench_logs
 
 my $bdir = $node->basedir;
 
-# Run with sampling rate, 2 clients with 50 transactions each.
+TODO: {
+	#
+	# Test time-sensitive features on a light read-only transaction
+	#
+	local $TODO = "possibly unreliable on slow hosts or unlucky runs";
+
+	# Run with sampling rate, 2 clients with 50 transactions each.
+	#
+	#   -T: bench duration, 2 seconds to exercise progress & logs
+	#   -P: progress report
+	#   --aggregate-interval: periodic aggregated logs
+	#   --rate: schedule load
+	#   --latency-limit: max delay, not deeply exercice
+	#
+	# note: the --rate behavior is probabilistic in nature.
+	# note: --progress-timestamp is not tested.
+	my $delay = pgbench(
+		'-T 2 -P 1 -l --aggregate-interval=1 -S -b se@2'
+		. ' --rate=20 --latency-limit=1000 -j ' . $nthreads
+		. ' -c 3 -r',
+		0,
+		[   qr{type: multiple},
+			qr{clients: 3},
+			qr{threads: $nthreads},
+			qr{duration: 2 s},
+			qr{script 1: .* select only},
+			qr{script 2: .* select only},
+			qr{statement latencies in milliseconds},
+			qr{FROM pgbench_accounts} ],
+		[ qr{vacuum}, qr{progress: 1\b} ],
+		'pgbench progress', undef,
+		"--log-prefix=$bdir/001_pgbench_log_1");
+
+	# The rate may results in an unlucky schedule which triggers
+	# an early exit, hence the loose bound.
+
+	# also, the delay may totally fail on very slow or overloaded hosts,
+	# valgrind runs...
+
+	ok(1.5 < $delay && $delay < 2.5, "-T 2 run around 2 seconds");
+}
+
+# $nthreads threads, 2 seconds, but due to timing imprecision we might get
+# only 1 or as many as 3 progress reports per thread.
+# aggregate log format is:
+#   unix_epoch_time #tx sum sum2 min max [sum sum2 min max [skipped]]
+# first series about latency; second about lag (--rate) ;
+# skipped only if --latency-limit is set.
+check_pgbench_logs($bdir, '001_pgbench_log_1', $nthreads, 1, 3,
+	qr{^\d{10,} \d{1,2} \d+ \d+ \d+ \d+ \d+ \d+ \d+ \d+ \d+$});
+
+# with sampling rate, 2 clients with 50 tx each
 pgbench(
 	"-n -S -t 50 -c 2 --log --sampling-rate=0.5", 0,
 	[ qr{select only}, qr{processed: 100/100} ], [qr{^$}],

Re: pgbench logging broken by time logic changes

Reply via email to