From 8a863f7b31cf30bbb7c9771e6cc84063a259a597 Mon Sep 17 00:00:00 2001
From: Lukas Fittl <lukas@fittl.com>
Date: Fri, 25 Jul 2025 17:57:20 -0700
Subject: [PATCH v12 4/7] instrumentation: Streamline ticks to nanosecond
 conversion across platforms

The timing infrastructure (INSTR_* macros) measures time elapsed using
clock_gettime() on POSIX systems, which returns the time as nanoseconds,
and QueryPerformanceCounter() on Windows, which is a specialized timing
clock source that returns a tick counter that needs to be converted to
nanoseconds using the result of QueryPerformanceFrequency().

This conversion currently happens ad-hoc on Windows, e.g. when calling
INSTR_TIME_GET_NANOSEC, which calls QueryPerformanceFrequency() on every
invocation, despite the frequency being stable after program start,
incurring unnecessary overhead. It also causes a fractured implementation
where macros are defined differently between platforms.

To ease code readability, and prepare for a future change that intends
to use a ticks-to-nanosecond conversion on x86-64 for TSC use, introduce
a new pg_ticks_to_ns() function that gets called on all platforms.

This function relies on a separately initialized ticks_per_ns_scaled
value, that represents the conversion ratio. This value is initialized
from QueryPerformanceFrequency() on Windows, and set to zero on x86-64
POSIX systems, which results in the ticks being treated as nanoseconds.
Other architectures always directly return the original ticks.

To support this, pg_initialize_timing() is introduced, and is now
mandatory for both the backend and any frontend programs to call before
utilizing INSTR_* macros.

Author: Lukas Fittl <lukas@fittl.com>
Author: Andres Freund <andres@anarazel.de>
Author: David Geier <geidav.pg@gmail.com>
Reviewed-by:
Discussion: https://www.postgresql.org/message-id/flat/20200612232810.f46nbqkdhbutzqdg%40alap3.anarazel.de
---
 src/backend/postmaster/postmaster.c     |   5 +
 src/bin/pg_test_timing/pg_test_timing.c |   3 +
 src/bin/pgbench/pgbench.c               |   3 +
 src/bin/psql/startup.c                  |   4 +
 src/common/Makefile                     |   1 +
 src/common/instr_time.c                 |  91 +++++++++++++++
 src/common/meson.build                  |   1 +
 src/include/portability/instr_time.h    | 143 +++++++++++++++++++-----
 src/test/regress/pg_regress.c           |   2 +
 9 files changed, 223 insertions(+), 30 deletions(-)
 create mode 100644 src/common/instr_time.c

diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c
index 3fac46c402b..6c5ba723e78 100644
--- a/src/backend/postmaster/postmaster.c
+++ b/src/backend/postmaster/postmaster.c
@@ -1945,6 +1945,11 @@ InitProcessGlobals(void)
 	MyStartTimestamp = GetCurrentTimestamp();
 	MyStartTime = timestamptz_to_time_t(MyStartTimestamp);
 
+	/*
+	 * Initialize timing infrastructure
+	 */
+	pg_initialize_timing();
+
 	/*
 	 * Set a different global seed in every process.  We want something
 	 * unpredictable, so if possible, use high-quality random bits for the
diff --git a/src/bin/pg_test_timing/pg_test_timing.c b/src/bin/pg_test_timing/pg_test_timing.c
index 264903ebbf6..1d9ee4fb588 100644
--- a/src/bin/pg_test_timing/pg_test_timing.c
+++ b/src/bin/pg_test_timing/pg_test_timing.c
@@ -43,6 +43,9 @@ main(int argc, char *argv[])
 
 	handle_args(argc, argv);
 
+	/* initialize timing infrastructure (required for INSTR_* calls) */
+	pg_initialize_timing();
+
 	loop_count = test_timing(test_duration);
 
 	output(loop_count);
diff --git a/src/bin/pgbench/pgbench.c b/src/bin/pgbench/pgbench.c
index 1dae918cc09..c969afab3a5 100644
--- a/src/bin/pgbench/pgbench.c
+++ b/src/bin/pgbench/pgbench.c
@@ -6820,6 +6820,9 @@ main(int argc, char **argv)
 	int			exit_code = 0;
 	struct timeval tv;
 
+	/* initialize timing infrastructure (required for INSTR_* calls) */
+	pg_initialize_timing();
+
 	/*
 	 * Record difference between Unix time and instr_time time.  We'll use
 	 * this for logging and aggregation.
diff --git a/src/bin/psql/startup.c b/src/bin/psql/startup.c
index 9a397ec87b7..69d044d405d 100644
--- a/src/bin/psql/startup.c
+++ b/src/bin/psql/startup.c
@@ -24,6 +24,7 @@
 #include "help.h"
 #include "input.h"
 #include "mainloop.h"
+#include "portability/instr_time.h"
 #include "settings.h"
 
 /*
@@ -327,6 +328,9 @@ main(int argc, char *argv[])
 
 	PQsetNoticeProcessor(pset.db, NoticeProcessor, NULL);
 
+	/* initialize timing infrastructure (required for INSTR_* calls) */
+	pg_initialize_timing();
+
 	SyncVariables();
 
 	if (options.list_dbs)
diff --git a/src/common/Makefile b/src/common/Makefile
index 2c720caa509..1a2fbbe887f 100644
--- a/src/common/Makefile
+++ b/src/common/Makefile
@@ -59,6 +59,7 @@ OBJS_COMMON = \
 	file_perm.o \
 	file_utils.o \
 	hashfn.o \
+	instr_time.o \
 	ip.o \
 	jsonapi.o \
 	keywords.o \
diff --git a/src/common/instr_time.c b/src/common/instr_time.c
new file mode 100644
index 00000000000..48e8283d166
--- /dev/null
+++ b/src/common/instr_time.c
@@ -0,0 +1,91 @@
+/*-------------------------------------------------------------------------
+ *
+ * instr_time.c
+ *	   Non-inline parts of the portable high-precision interval timing
+ *	 implementation
+ *
+ * Portions Copyright (c) 2026, PostgreSQL Global Development Group
+ *
+ *
+ * IDENTIFICATION
+ *	  src/backend/port/instr_time.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "portability/instr_time.h"
+
+/*
+ * Stores what the number of ticks needs to be multiplied with to end up
+ * with nanoseconds using integer math.
+ *
+ * On certain platforms (currently Windows) the ticks to nanoseconds conversion
+ * requires floating point math because:
+ *
+ * sec = ticks / frequency_hz
+ * ns  = ticks / frequency_hz * 1,000,000,000
+ * ns  = ticks * (1,000,000,000 / frequency_hz)
+ * ns  = ticks * (1,000,000 / frequency_khz) <-- now in kilohertz
+ *
+ * Here, 'ns' is usually a floating number. For example for a 2.5 GHz CPU
+ * the scaling factor becomes 1,000,000 / 2,500,000 = 1.2.
+ *
+ * To be able to use integer math we work around the lack of precision. We
+ * first scale the integer up (left shift by TICKS_TO_NS_SHIFT) and after the
+ * multiplication by the number of ticks in pg_ticks_to_ns() we shift right by
+ * the same amount. We utilize unsigned integers even though ticks are stored
+ * as a signed value to encourage compilers to generate better assembly.
+ *
+ * We remember the maximum number of ticks that can be multiplied by the scale
+ * factor without overflowing so we can check via a * b > max <=> a > max / b.
+ *
+ * On all other platforms we are using clock_gettime(), which uses nanoseconds
+ * as ticks. Hence, we set the multiplier to zero, which causes pg_ticks_to_ns
+ * to return the original value.
+ */
+uint64		ticks_per_ns_scaled = 0;
+uint64		max_ticks_no_overflow = 0;
+bool		timing_initialized = false;
+
+static void set_ticks_per_ns(void);
+
+void
+pg_initialize_timing(void)
+{
+	if (timing_initialized)
+		return;
+
+	set_ticks_per_ns();
+	timing_initialized = true;
+}
+
+#ifndef WIN32
+
+static void
+set_ticks_per_ns(void)
+{
+	ticks_per_ns_scaled = 0;
+	max_ticks_no_overflow = 0;
+}
+
+#else							/* WIN32 */
+
+/* GetTimerFrequency returns counts per second */
+static inline double
+GetTimerFrequency(void)
+{
+	LARGE_INTEGER f;
+
+	QueryPerformanceFrequency(&f);
+	return (double) f.QuadPart;
+}
+
+static void
+set_ticks_per_ns(void)
+{
+	ticks_per_ns_scaled = (NS_PER_S << TICKS_TO_NS_SHIFT) / GetTimerFrequency();
+	max_ticks_no_overflow = PG_INT64_MAX / ticks_per_ns_scaled;
+}
+
+#endif							/* WIN32 */
diff --git a/src/common/meson.build b/src/common/meson.build
index 4f9b8b8263d..9bd55cda95b 100644
--- a/src/common/meson.build
+++ b/src/common/meson.build
@@ -13,6 +13,7 @@ common_sources = files(
   'file_perm.c',
   'file_utils.c',
   'hashfn.c',
+  'instr_time.c',
   'ip.c',
   'jsonapi.c',
   'keywords.c',
diff --git a/src/include/portability/instr_time.h b/src/include/portability/instr_time.h
index 0a1fff7c487..e1584695520 100644
--- a/src/include/portability/instr_time.h
+++ b/src/include/portability/instr_time.h
@@ -80,11 +80,33 @@ typedef struct instr_time
 #define NS_PER_MS	INT64CONST(1000000)
 #define NS_PER_US	INT64CONST(1000)
 
+/* Shift amount for fixed-point ticks-to-nanoseconds conversion. */
+#define TICKS_TO_NS_SHIFT 14
 
-#ifndef WIN32
+#ifdef WIN32
+#define PG_INSTR_TICKS_TO_NS 1
+#else
+#define PG_INSTR_TICKS_TO_NS 0
+#endif
+
+/*
+ * Variables used to translate ticks to nanoseconds, initialized by
+ * pg_initialize_timing.
+ */
+extern PGDLLIMPORT uint64 ticks_per_ns_scaled;
+extern PGDLLIMPORT uint64 max_ticks_no_overflow;
+extern PGDLLIMPORT bool timing_initialized;
+
+/*
+ * Initialize timing infrastructure
+ *
+ * This must be called at least once before using INSTR_TIME_SET_CURRENT* macros.
+ */
+extern void pg_initialize_timing(void);
 
+#ifndef WIN32
 
-/* Use clock_gettime() */
+/* On POSIX, use clock_gettime() for system clock source */
 
 #include <time.h>
 
@@ -108,67 +130,119 @@ typedef struct instr_time
 #define PG_INSTR_CLOCK	CLOCK_REALTIME
 #endif
 
-/* helper for INSTR_TIME_SET_CURRENT */
 static inline instr_time
-pg_clock_gettime_ns(void)
+pg_get_ticks(void)
 {
 	instr_time	now;
 	struct timespec tmp;
 
+	Assert(timing_initialized);
+
 	clock_gettime(PG_INSTR_CLOCK, &tmp);
 	now.ticks = tmp.tv_sec * NS_PER_S + tmp.tv_nsec;
 
 	return now;
 }
 
-#define INSTR_TIME_SET_CURRENT(t) \
-	((t) = pg_clock_gettime_ns())
-
-#define INSTR_TIME_GET_NANOSEC(t) \
-	((int64) (t).ticks)
-
-#define INSTR_TIME_ADD_NANOSEC(t, n) \
-	((t).ticks += (n))
-
-
 #else							/* WIN32 */
 
+/* On Windows, use QueryPerformanceCounter() for system clock source */
 
-/* Use QueryPerformanceCounter() */
-
-/* helper for INSTR_TIME_SET_CURRENT */
 static inline instr_time
-pg_query_performance_counter(void)
+pg_get_ticks(void)
 {
 	instr_time	now;
 	LARGE_INTEGER tmp;
 
+	Assert(timing_initialized);
+
 	QueryPerformanceCounter(&tmp);
 	now.ticks = tmp.QuadPart;
 
 	return now;
 }
 
-static inline double
-GetTimerFrequency(void)
+#endif							/* WIN32 */
+
+static inline int64
+pg_ticks_to_ns(int64 ticks)
 {
-	LARGE_INTEGER f;
+#if PG_INSTR_TICKS_TO_NS
+	int64		ns = 0;
+
+	Assert(timing_initialized);
+
+	/*
+	 * Avoid doing work if we don't use scaled ticks, e.g. system clock on
+	 * Unix
+	 */
+	if (ticks_per_ns_scaled == 0)
+		return ticks;
+
+	/*
+	 * Would multiplication overflow? If so perform computation in two parts.
+	 */
+	if (unlikely(ticks > (int64) max_ticks_no_overflow))
+	{
+		/*
+		 * To avoid overflow, first scale total ticks down by the fixed
+		 * factor, and *afterwards* multiply them by the frequency-based scale
+		 * factor.
+		 *
+		 * The remaining ticks can follow the regular formula, since they
+		 * won't overflow.
+		 */
+		int64		count = ticks >> TICKS_TO_NS_SHIFT;
+
+		ns = count * ticks_per_ns_scaled;
+		ticks -= (count << TICKS_TO_NS_SHIFT);
+	}
+
+	ns += (ticks * ticks_per_ns_scaled) >> TICKS_TO_NS_SHIFT;
+
+	return ns;
+#else
+	Assert(timing_initialized);
 
-	QueryPerformanceFrequency(&f);
-	return (double) f.QuadPart;
+	return ticks;
+#endif							/* PG_INSTR_TICKS_TO_NS */
 }
 
-#define INSTR_TIME_SET_CURRENT(t) \
-	((t) = pg_query_performance_counter())
+static inline int64
+pg_ns_to_ticks(int64 ns)
+{
+#if PG_INSTR_TICKS_TO_NS
+	int64		ticks = 0;
 
-#define INSTR_TIME_GET_NANOSEC(t) \
-	((int64) ((t).ticks * ((double) NS_PER_S / GetTimerFrequency())))
+	Assert(timing_initialized);
 
-#define INSTR_TIME_ADD_NANOSEC(t, n) \
-	((t).ticks += ((n) / ((double) NS_PER_S / GetTimerFrequency())))
+	/*
+	 * If ticks_per_ns_scaled is zero, ticks are already in nanoseconds (e.g.
+	 * system clock on Unix).
+	 */
+	if (ticks_per_ns_scaled == 0)
+		return ns;
 
-#endif							/* WIN32 */
+	/*
+	 * The reverse of pg_ticks_to_ns to avoid a similar overflow problem.
+	 */
+	if (unlikely(ns > (INT64_MAX >> TICKS_TO_NS_SHIFT)))
+	{
+		int64		count = ns / ticks_per_ns_scaled;
+
+		ticks = count << TICKS_TO_NS_SHIFT;
+		ns -= count * ticks_per_ns_scaled;
+	}
 
+	ticks += (ns << TICKS_TO_NS_SHIFT) / ticks_per_ns_scaled;
+
+	return ticks;
+#else
+	Assert(timing_initialized);
+
+	return ns;
+#endif							/* PG_INSTR_TICKS_TO_NS */
+}
 
 /*
  * Common macros
@@ -178,10 +252,16 @@ GetTimerFrequency(void)
 
 #define INSTR_TIME_SET_ZERO(t)	((t).ticks = 0)
 
+#define INSTR_TIME_SET_CURRENT(t) \
+	((t) = pg_get_ticks())
+
 
 #define INSTR_TIME_ADD(x,y) \
 	((x).ticks += (y).ticks)
 
+#define INSTR_TIME_ADD_NANOSEC(t, n) \
+	((t).ticks += pg_ns_to_ticks(n))
+
 #define INSTR_TIME_SUBTRACT(x,y) \
 	((x).ticks -= (y).ticks)
 
@@ -191,6 +271,9 @@ GetTimerFrequency(void)
 #define INSTR_TIME_GT(x,y) \
 	((x).ticks > (y).ticks)
 
+#define INSTR_TIME_GET_NANOSEC(t) \
+	(pg_ticks_to_ns((t).ticks))
+
 #define INSTR_TIME_GET_DOUBLE(t) \
 	((double) INSTR_TIME_GET_NANOSEC(t) / NS_PER_S)
 
diff --git a/src/test/regress/pg_regress.c b/src/test/regress/pg_regress.c
index b8b6a911987..5cd1c9195d4 100644
--- a/src/test/regress/pg_regress.c
+++ b/src/test/regress/pg_regress.c
@@ -2110,6 +2110,8 @@ regression_main(int argc, char *argv[],
 	progname = get_progname(argv[0]);
 	set_pglocale_pgservice(argv[0], PG_TEXTDOMAIN("pg_regress"));
 
+	pg_initialize_timing();
+
 	get_restricted_token();
 
 	atexit(stop_postmaster);
-- 
2.47.1

