Hi Tom,
On various Intel CPUs I got either steps close to single nanosecond or
sometimes a little more on older ones
One specific CPU moved in in 2 tick increments while the ration to ns
was 2,1/1 or 2100 ticks per microsecond.
On Zen4 AMD the step seems to be 10 ns, even though the tick-to-ns
ratio is 2.6 / 1 , so reading ticks directly gives 26, 54, ...
Also, reading directly in ticks on M1 gave "loop time including
overhead: 2.13 ns" (attached code works on Clang, not sure about GCC)
I'll also take a look at the docs and try to propose something
Do we also need tests for this one ?
----
Hannu
On Tue, Jul 2, 2024 at 7:20 PM Tom Lane <[email protected]> wrote:
>
> BTW, getting back to the original point of the thread: I duplicated
> Hannu's result showing that on Apple M1 the clock tick seems to be
> about 40ns. But look at what I got with the v2 patch on my main
> workstation (full output attached):
>
> $ ./pg_test_timing
> ...
> Per loop time including overhead: 16.60 ns
> ...
> Timing durations less than 128 ns:
> ns % of total running % count
> 15 3.2738 3.2738 5914914
> 16 49.0772 52.3510 88668783
> 17 36.4662 88.8172 65884173
> 18 9.5639 98.3810 17279249
> 19 1.5746 99.9556 2844873
> 20 0.0416 99.9972 75125
> 21 0.0004 99.9976 757
> ...
>
> It sure looks like this is exact-to-the-nanosecond results,
> since the modal values match the overall per-loop timing,
> and there are no zero measurements.
>
> This is a Dell tower from 2021, running RHEL8 on an Intel Xeon W-2245.
> Not exactly top-of-the-line stuff.
>
> regards, tom lane
>
/*
* pg_test_timing.c
* tests overhead of timing calls and their monotonicity: that
* they always move forward
*/
#include "postgres_fe.h"
#include <limits.h>
#include "getopt_long.h"
#include "portability/instr_time.h"
#include "port/pg_bitutils.h"
static const char *progname;
static unsigned int test_duration = 3;
static void handle_args(int argc, char *argv[]);
static uint64 test_timing(unsigned int duration);
static void output(uint64 loop_count);
/* record duration in powers of 2 nanoseconds */
long long int histogram[64];
/* record duration up to 64 ticks directly */
long long int direct_histogram[64];
/* Current CPU speed in ticks per second*/
uint64 ticks_per_sec;
int
main(int argc, char *argv[])
{
uint64 loop_count;
set_pglocale_pgservice(argv[0], PG_TEXTDOMAIN("pg_test_timing"));
progname = get_progname(argv[0]);
handle_args(argc, argv);
loop_count = test_timing(test_duration);
output(loop_count);
return 0;
}
static void
handle_args(int argc, char *argv[])
{
static struct option long_options[] = {
{"duration", required_argument, NULL, 'd'},
{NULL, 0, NULL, 0}
};
int option; /* Command line option */
int optindex = 0; /* used by getopt_long */
unsigned long optval; /* used for option parsing */
char *endptr;
if (argc > 1)
{
if (strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-?") == 0)
{
printf(_("Usage: %s [-d DURATION]\n"), progname);
exit(0);
}
if (strcmp(argv[1], "--version") == 0 || strcmp(argv[1], "-V") == 0)
{
puts("pg_test_timing (PostgreSQL) " PG_VERSION);
exit(0);
}
}
while ((option = getopt_long(argc, argv, "d:",
long_options, &optindex)) != -1)
{
switch (option)
{
case 'd':
errno = 0;
optval = strtoul(optarg, &endptr, 10);
if (endptr == optarg || *endptr != '\0' ||
errno != 0 || optval != (unsigned int) optval)
{
fprintf(stderr, _("%s: invalid argument for option %s\n"),
progname, "--duration");
fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
exit(1);
}
test_duration = (unsigned int) optval;
if (test_duration == 0)
{
fprintf(stderr, _("%s: %s must be in range %u..%u\n"),
progname, "--duration", 1, UINT_MAX);
exit(1);
}
break;
default:
fprintf(stderr, _("Try \"%s --help\" for more information.\n"),
progname);
exit(1);
break;
}
}
if (argc > optind)
{
fprintf(stderr,
_("%s: too many command-line arguments (first is \"%s\")\n"),
progname, argv[optind]);
fprintf(stderr, _("Try \"%s --help\" for more information.\n"),
progname);
exit(1);
}
printf(ngettext("Testing timing overhead for %u second.\n",
"Testing timing overhead for %u seconds.\n",
test_duration),
test_duration);
}
inline uint64_t read_tsc(void) {
return __builtin_readcyclecounter();;
}
static uint64
test_timing(unsigned int duration)
{
// uint64 total_time;
int64 time_elapsed = 0;
uint64 loop_count = 0;
uint64 same_count = 0;
uint64 min_diff = UINT64_MAX;
uint64 ticks_elapsed = 0;
uint64 start_tick,
end_tick,
total_ticks,
prev,
cur;
instr_time start_time,
end_time,
temp;
// total_time = duration > 0 ? duration * INT64CONST(1000000000) : 0;
// measure tick timing
INSTR_TIME_SET_CURRENT(start_time);
start_tick = read_tsc();
while (time_elapsed < INT64CONST(1000000000))
{
INSTR_TIME_SET_CURRENT(temp);
INSTR_TIME_SUBTRACT(temp, start_time);
time_elapsed = INSTR_TIME_GET_NANOSEC(temp);
}
end_tick = read_tsc();
INSTR_TIME_SET_CURRENT(end_time);
INSTR_TIME_SUBTRACT(end_time, start_time);
printf(_("Total %ld ticks in %ld ns, %0.6f ticks per ns\n"),
end_tick - start_tick, INSTR_TIME_GET_NANOSEC(end_time), ((double)(end_tick - start_tick)) / INSTR_TIME_GET_DOUBLE(end_time));
ticks_per_sec = (end_tick - start_tick) * 1000000000 / INSTR_TIME_GET_NANOSEC(end_time);
total_ticks = duration > 0 ? duration * ticks_per_sec : 0;
printf("This CPU is running at %ld ticks / second, will run test for %ld ticks\n", ticks_per_sec, total_ticks);
// return 0;
INSTR_TIME_SET_CURRENT(start_time);
cur = start_tick = read_tsc();
while (ticks_elapsed < total_ticks)
{
int64 diff;
int32 bits;
prev = cur;
cur = read_tsc();
diff = cur - prev;
/* Did time go backwards? */
if (unlikely(diff <= 0))
{
if (likely (diff == 0 ))
same_count ++;
else
{
fprintf(stderr, _("Detected clock going backwards in time.\n"));
fprintf(stderr, _("Time warp: %ld ms\n"), diff);
exit(1);
}
}
if (unlikely(min_diff > diff))
min_diff = diff;
if(likely(diff < 64))
direct_histogram[diff]++;
bits = pg_leftmost_one_pos64(diff+1);
histogram[bits]++;
loop_count++;
ticks_elapsed = cur - start_tick;
}
end_tick = read_tsc();
INSTR_TIME_SET_CURRENT(end_time);
INSTR_TIME_SUBTRACT(end_time, start_time);
printf("loop_count %ld", loop_count);
printf(_("Per loop time including overhead: %0.2f ns, min: %ld ticks (%0.1f ns), same: %ld\n"),
INSTR_TIME_GET_DOUBLE(end_time) * 1e9 / loop_count, min_diff, (float) min_diff * 1000000000 / ticks_per_sec , same_count);
printf(_("Total ticks in: %ld, in: %ld nr\n"),
end_tick - start_tick, INSTR_TIME_GET_NANOSEC(end_time) );
return loop_count;
}
static void
output(uint64 loop_count)
{
int64 max_bit = 63,
i;
char *header1 = _("<= ticks");
char *header1a = _("<= ns");
char *header2 = /* xgettext:no-c-format */ _("% of total");
char *header2a = /* xgettext:no-c-format */ _("running %");
char *header3 = _("count");
int len1 = strlen(header1);
int len1a = strlen(header1a);
int len2 = strlen(header2);
int len2a = strlen(header2a);
int len3 = strlen(header3);
float rprct;
/* find highest bit value */
while (max_bit > 0 && histogram[max_bit] == 0)
max_bit--;
printf(_("Log2(x+1) histogram of timing durations:\n"));
printf("%*s (%*s) %*s %*s %*s\n",
Max(8, len1), header1,
Max(8, len1a), header1a,
Max(10, len2), header2,
Max(10, len2a), header2a,
Max(8, len3), header3);
for (i = 0, rprct=0; i <= max_bit; i++) {
rprct += (double) histogram[i] * 100 / loop_count;
printf("%*ld (%*.1f) %*.4f %*.4f %*lld\n",
Max(8, len1), (1l << (i+1)) - 2,
Max(8, len1a), (float) (1l << i) * 1000000000 / ticks_per_sec,
Max(10, len2) - 1, (double) histogram[i] * 100 / loop_count,
Max(10, len2) - 1, rprct,
Max(10, len3), histogram[i]);
}
printf("First 64 ticks --\n");
for (i = 0, rprct=0; i < 64; i++) {
rprct += (double) histogram[i] * 100 / loop_count;
if (direct_histogram[i])
printf("%*ld (%*.1f) %*.4f %*.4f %*lld\n",
Max(8, len1), (long int) i,
Max(8, len1a), (float) (i) * 1000000000 / ticks_per_sec,
Max(10, len2) - 1, (double) direct_histogram[i] * 100 / loop_count,
Max(10, len2) - 1, rprct,
Max(10, len3), direct_histogram[i]);
}
}