On Wed, Feb 25, 2026 at 2:59 AM Tom Lane <[email protected]> wrote:
> It appears that if you want to build pg_cpu_x86.o unconditionally,
> you need to make it more proof against the cases it wasn't getting
> built in before.

Thanks, I must have stopped watching the buildfarm too early. I've
pushed a fix which will get undone as part of v6-0002.

On Wed, Feb 25, 2026 at 2:57 AM Zsolt Parragi <[email protected]> wrote:
>
> 2 and 3 looks good too, I only found two more typos:
>
>
> + return pg_comp_crc32c(crc, data, len);
> +};
>
> That semicolon is not needed
>
>
> And in the commit message:
>
> "it has been intialized and if"
>
> That should be initialized

Also fixed, thanks.

-- 
John Naylor
Amazon Web Services
From 7de238c56593850e05351618ea730c4668773cc0 Mon Sep 17 00:00:00 2001
From: John Naylor <[email protected]>
Date: Wed, 25 Feb 2026 08:03:45 +0700
Subject: [PATCH v6 2/3] Centralize detection of x86 CPU features

We now maintain an array of booleans that indicate which features were
detected at runtime. When code wants to check for a given feature,
the array is automatically checked if it has been initialized and if
not, a single function checks all features at once.

Move all x86 feature detection to pg_cpu_x86.c, and move the CRC
function choosing logic to the file where the hardware-specific
functions are defined, consistent with more recent hardware-specific
files in src/port.

Reviewed-by: Zsolt Parragi <[email protected]>
Discussion: https://postgr.es/m/CANWCAZbgEUFw7LuYSVeJ=tj98r5hoob1ffeqk3alvbw5ru5...@mail.gmail.com
---
 src/include/port/pg_cpu.h        | 50 ++++++++++++++++++
 src/port/pg_cpu_x86.c            | 65 +++++++++--------------
 src/port/pg_crc32c_sse42.c       | 32 +++++++++++
 src/port/pg_popcount_x86.c       | 91 ++------------------------------
 src/tools/pgindent/typedefs.list |  1 +
 5 files changed, 112 insertions(+), 127 deletions(-)
 create mode 100644 src/include/port/pg_cpu.h

diff --git a/src/include/port/pg_cpu.h b/src/include/port/pg_cpu.h
new file mode 100644
index 00000000000..b93b828d3ac
--- /dev/null
+++ b/src/include/port/pg_cpu.h
@@ -0,0 +1,50 @@
+/*-------------------------------------------------------------------------
+ *
+ * pg_cpu.h
+ *	  Runtime CPU feature detection
+ *
+ * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/port/pg_cpu.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef PG_CPU_H
+#define PG_CPU_H
+
+#if defined(USE_SSE2) || defined(__i386__)
+
+typedef enum X86FeatureId
+{
+	/* Have we run feature detection? */
+	INIT_PG_X86,
+
+	/* scalar registers and 128-bit XMM registers */
+	PG_SSE4_2,
+	PG_POPCNT,
+
+	/* 512-bit ZMM registers */
+	PG_AVX512_BW,
+	PG_AVX512_VL,
+	PG_AVX512_VPCLMULQDQ,
+	PG_AVX512_VPOPCNTDQ,
+} X86FeatureId;
+#define X86FeaturesSize (PG_AVX512_VPOPCNTDQ + 1)
+
+extern PGDLLIMPORT bool X86Features[];
+
+extern void set_x86_features(void);
+
+static inline bool
+x86_feature_available(X86FeatureId feature)
+{
+	if (X86Features[INIT_PG_X86] == false)
+		set_x86_features();
+
+	return X86Features[feature];
+}
+
+#endif							/* defined(USE_SSE2) || defined(__i386__) */
+
+#endif							/* PG_CPU_H */
diff --git a/src/port/pg_cpu_x86.c b/src/port/pg_cpu_x86.c
index 0c292c0223a..88863f9762c 100644
--- a/src/port/pg_cpu_x86.c
+++ b/src/port/pg_cpu_x86.c
@@ -1,12 +1,7 @@
 /*-------------------------------------------------------------------------
  *
  * pg_cpu_x86.c
- *	  Choose between Intel SSE 4.2 and software CRC-32C implementation.
- *
- * On first call, checks if the CPU we're running on supports Intel SSE
- * 4.2. If it does, use the special SSE instructions for CRC-32C
- * computation. Otherwise, fall back to the pure software implementation
- * (slicing-by-8).
+ *	  Runtime CPU feature detection for x86
  *
  * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
@@ -34,9 +29,11 @@
 #include <immintrin.h>
 #endif
 
-#include "port/pg_crc32c.h"
+#include "port/pg_cpu.h"
+
 
-#ifndef USE_SLICING_BY_8_CRC32C
+/* array indexed by enum X86FeatureId */
+bool		X86Features[X86FeaturesSize] = {0};
 
 /*
  * Does XGETBV say the ZMM registers are enabled?
@@ -58,22 +55,13 @@ zmm_regs_available(void)
 }
 
 /*
- * This gets called on the first call. It replaces the function pointer
- * so that subsequent calls are routed directly to the chosen implementation.
+ * Parse the CPU ID info for runtime checks.
  */
-static pg_crc32c
-pg_comp_crc32c_choose(pg_crc32c crc, const void *data, size_t len)
+void
+set_x86_features(void)
 {
 	unsigned int exx[4] = {0, 0, 0, 0};
 
-	/*
-	 * Set fallback. We must guard since slicing-by-8 is not visible
-	 * everywhere.
-	 */
-#ifdef USE_SSE42_CRC32C_WITH_RUNTIME_CHECK
-	pg_comp_crc32c = pg_comp_crc32c_sb8;
-#endif
-
 #if defined(HAVE__GET_CPUID)
 	__get_cpuid(1, &exx[0], &exx[1], &exx[2], &exx[3]);
 #elif defined(HAVE__CPUID)
@@ -82,36 +70,33 @@ pg_comp_crc32c_choose(pg_crc32c crc, const void *data, size_t len)
 #error cpuid instruction not available
 #endif
 
-	if ((exx[2] & (1 << 20)) != 0)	/* SSE 4.2 */
-	{
-		pg_comp_crc32c = pg_comp_crc32c_sse42;
+	X86Features[PG_SSE4_2] = exx[2] >> 20 & 1;
+	X86Features[PG_POPCNT] = exx[2] >> 23 & 1;
 
-		if (exx[2] & (1 << 27) &&	/* OSXSAVE */
-			zmm_regs_available())
-		{
-			/* second cpuid call on leaf 7 to check extended AVX-512 support */
+	/* All these features depend on OSXSAVE */
+	if (exx[2] & (1 << 27))
+	{
+		/* second cpuid call on leaf 7 to check extended AVX-512 support */
 
-			memset(exx, 0, 4 * sizeof(exx[0]));
+		memset(exx, 0, 4 * sizeof(exx[0]));
 
 #if defined(HAVE__GET_CPUID_COUNT)
-			__get_cpuid_count(7, 0, &exx[0], &exx[1], &exx[2], &exx[3]);
+		__get_cpuid_count(7, 0, &exx[0], &exx[1], &exx[2], &exx[3]);
 #elif defined(HAVE__CPUIDEX)
-			__cpuidex(exx, 7, 0);
+		__cpuidex(exx, 7, 0);
 #endif
 
-#ifdef USE_AVX512_CRC32C_WITH_RUNTIME_CHECK
-			if (exx[2] & (1 << 10) &&	/* VPCLMULQDQ */
-				exx[1] & (1 << 31)) /* AVX512-VL */
-				pg_comp_crc32c = pg_comp_crc32c_avx512;
-#endif
+		if (zmm_regs_available())
+		{
+			X86Features[PG_AVX512_BW] = exx[1] >> 30 & 1;
+			X86Features[PG_AVX512_VL] = exx[1] >> 31 & 1;
+
+			X86Features[PG_AVX512_VPCLMULQDQ] = exx[2] >> 10 & 1;
+			X86Features[PG_AVX512_VPOPCNTDQ] = exx[2] >> 14 & 1;
 		}
 	}
 
-	return pg_comp_crc32c(crc, data, len);
+	X86Features[INIT_PG_X86] = true;
 }
 
-pg_crc32c	(*pg_comp_crc32c) (pg_crc32c crc, const void *data, size_t len) = pg_comp_crc32c_choose;
-
-#endif
-
 #endif							/* defined(USE_SSE2) || defined(__i386__) */
diff --git a/src/port/pg_crc32c_sse42.c b/src/port/pg_crc32c_sse42.c
index c1279d31fbd..b8e77faf4d9 100644
--- a/src/port/pg_crc32c_sse42.c
+++ b/src/port/pg_crc32c_sse42.c
@@ -19,8 +19,11 @@
 #include <immintrin.h>
 #endif
 
+#include "port/pg_cpu.h"
 #include "port/pg_crc32c.h"
 
+static pg_crc32c pg_comp_crc32c_choose(pg_crc32c crc, const void *data, size_t len);
+
 pg_attribute_no_sanitize_alignment()
 pg_attribute_target("sse4.2")
 pg_crc32c
@@ -158,4 +161,33 @@ pg_comp_crc32c_avx512(pg_crc32c crc, const void *data, size_t len)
 	return pg_comp_crc32c_sse42(crc0, buf, len);
 }
 
+#endif							/* USE_AVX512_CRC32C_WITH_RUNTIME_CHECK */
+
+/*
+ * This gets called on the first call. It replaces the function pointer
+ * so that subsequent calls are routed directly to the chosen implementation.
+ */
+static pg_crc32c
+pg_comp_crc32c_choose(pg_crc32c crc, const void *data, size_t len)
+{
+	/*
+	 * Set fallback. We must guard since slicing-by-8 is not visible
+	 * everywhere.
+	 */
+#ifdef USE_SSE42_CRC32C_WITH_RUNTIME_CHECK
+	pg_comp_crc32c = pg_comp_crc32c_sb8;
+#endif
+
+	if (x86_feature_available(PG_SSE4_2))
+		pg_comp_crc32c = pg_comp_crc32c_sse42;
+
+#ifdef USE_AVX512_CRC32C_WITH_RUNTIME_CHECK
+	if (x86_feature_available(PG_AVX512_VL) &&
+		x86_feature_available(PG_AVX512_VPCLMULQDQ))
+		pg_comp_crc32c = pg_comp_crc32c_avx512;
 #endif
+
+	return pg_comp_crc32c(crc, data, len);
+}
+
+pg_crc32c	(*pg_comp_crc32c) (pg_crc32c crc, const void *data, size_t len) = pg_comp_crc32c_choose;
diff --git a/src/port/pg_popcount_x86.c b/src/port/pg_popcount_x86.c
index 6bce089432f..a99613f1818 100644
--- a/src/port/pg_popcount_x86.c
+++ b/src/port/pg_popcount_x86.c
@@ -14,19 +14,12 @@
 
 #ifdef HAVE_X86_64_POPCNTQ
 
-#if defined(HAVE__GET_CPUID) || defined(HAVE__GET_CPUID_COUNT)
-#include <cpuid.h>
-#endif
-
 #ifdef USE_AVX512_POPCNT_WITH_RUNTIME_CHECK
 #include <immintrin.h>
 #endif
 
-#if defined(HAVE__CPUID) || defined(HAVE__CPUIDEX)
-#include <intrin.h>
-#endif
-
 #include "port/pg_bitutils.h"
+#include "port/pg_cpu.h"
 
 /*
  * The SSE4.2 versions are built regardless of whether we are building the
@@ -58,84 +51,9 @@ static uint64 pg_popcount_masked_choose(const char *buf, int bytes, bits8 mask);
 uint64		(*pg_popcount_optimized) (const char *buf, int bytes) = pg_popcount_choose;
 uint64		(*pg_popcount_masked_optimized) (const char *buf, int bytes, bits8 mask) = pg_popcount_masked_choose;
 
-/*
- * Return true if CPUID indicates that the POPCNT instruction is available.
- */
-static bool
-pg_popcount_sse42_available(void)
-{
-	unsigned int exx[4] = {0, 0, 0, 0};
-
-#if defined(HAVE__GET_CPUID)
-	__get_cpuid(1, &exx[0], &exx[1], &exx[2], &exx[3]);
-#elif defined(HAVE__CPUID)
-	__cpuid(exx, 1);
-#else
-#error cpuid instruction not available
-#endif
-
-	return (exx[2] & (1 << 23)) != 0;	/* POPCNT */
-}
 
 #ifdef USE_AVX512_POPCNT_WITH_RUNTIME_CHECK
 
-/*
- * Does CPUID say there's support for XSAVE instructions?
- */
-static inline bool
-xsave_available(void)
-{
-	unsigned int exx[4] = {0, 0, 0, 0};
-
-#if defined(HAVE__GET_CPUID)
-	__get_cpuid(1, &exx[0], &exx[1], &exx[2], &exx[3]);
-#elif defined(HAVE__CPUID)
-	__cpuid(exx, 1);
-#else
-#error cpuid instruction not available
-#endif
-	return (exx[2] & (1 << 27)) != 0;	/* osxsave */
-}
-
-/*
- * Does XGETBV say the ZMM registers are enabled?
- *
- * NB: Caller is responsible for verifying that xsave_available() returns true
- * before calling this.
- */
-#ifdef HAVE_XSAVE_INTRINSICS
-pg_attribute_target("xsave")
-#endif
-static inline bool
-zmm_regs_available(void)
-{
-#ifdef HAVE_XSAVE_INTRINSICS
-	return (_xgetbv(0) & 0xe6) == 0xe6;
-#else
-	return false;
-#endif
-}
-
-/*
- * Does CPUID say there's support for AVX-512 popcount and byte-and-word
- * instructions?
- */
-static inline bool
-avx512_popcnt_available(void)
-{
-	unsigned int exx[4] = {0, 0, 0, 0};
-
-#if defined(HAVE__GET_CPUID_COUNT)
-	__get_cpuid_count(7, 0, &exx[0], &exx[1], &exx[2], &exx[3]);
-#elif defined(HAVE__CPUIDEX)
-	__cpuidex(exx, 7, 0);
-#else
-#error cpuid instruction not available
-#endif
-	return (exx[2] & (1 << 14)) != 0 && /* avx512-vpopcntdq */
-		(exx[1] & (1 << 30)) != 0;	/* avx512-bw */
-}
-
 /*
  * Returns true if the CPU supports the instructions required for the AVX-512
  * pg_popcount() implementation.
@@ -143,9 +61,8 @@ avx512_popcnt_available(void)
 static bool
 pg_popcount_avx512_available(void)
 {
-	return xsave_available() &&
-		zmm_regs_available() &&
-		avx512_popcnt_available();
+	return x86_feature_available(PG_AVX512_BW) &&
+		x86_feature_available(PG_AVX512_VPOPCNTDQ);
 }
 
 #endif							/* USE_AVX512_POPCNT_WITH_RUNTIME_CHECK */
@@ -159,7 +76,7 @@ pg_popcount_avx512_available(void)
 static inline void
 choose_popcount_functions(void)
 {
-	if (pg_popcount_sse42_available())
+	if (x86_feature_available(PG_POPCNT))
 	{
 		pg_popcount_optimized = pg_popcount_sse42;
 		pg_popcount_masked_optimized = pg_popcount_masked_sse42;
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index 241945734ec..041b99976c6 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -3394,6 +3394,7 @@ X509_NAME
 X509_NAME_ENTRY
 X509_STORE
 X509_STORE_CTX
+X86FeatureId
 XLTW_Oper
 XLogCtlData
 XLogCtlInsert
-- 
2.53.0

From 2860144f2baed43c32221fec328d27fbe1a01e25 Mon Sep 17 00:00:00 2001
From: John Naylor <[email protected]>
Date: Mon, 23 Feb 2026 21:17:49 +0700
Subject: [PATCH v6 3/3] Refactor detection of x86 ZMM registers

- Call _xgetbv within x86_set_runtime_features rather than in a
  separate function
- Use symbols for XCR mask bits rather than a magic constant

A future commit will build on this to detect YMM registers without
code duplication.

Reviewed-by: Zsolt Parragi <[email protected]>
Discussion: https://postgr.es/m/CANWCAZbgEUFw7LuYSVeJ=tj98r5hoob1ffeqk3alvbw5ru5...@mail.gmail.com
---
 src/port/pg_cpu_x86.c | 40 ++++++++++++++++++++++------------------
 1 file changed, 22 insertions(+), 18 deletions(-)

diff --git a/src/port/pg_cpu_x86.c b/src/port/pg_cpu_x86.c
index 88863f9762c..b0e0048f561 100644
--- a/src/port/pg_cpu_x86.c
+++ b/src/port/pg_cpu_x86.c
@@ -31,32 +31,29 @@
 
 #include "port/pg_cpu.h"
 
+/* XSAVE state component bits that we need */
+#define XMM			(1<<1)
+#define YMM			(1<<2)
+#define OPMASK		(1<<5)
+#define ZMM0_15		(1<<6)
+#define ZMM16_31	(1<<7)
+
 
 /* array indexed by enum X86FeatureId */
 bool		X86Features[X86FeaturesSize] = {0};
 
-/*
- * Does XGETBV say the ZMM registers are enabled?
- *
- * NB: Caller is responsible for verifying that osxsave is available
- * before calling this.
- */
-#ifdef HAVE_XSAVE_INTRINSICS
-pg_attribute_target("xsave")
-#endif
 static bool
-zmm_regs_available(void)
+mask_available(uint32 value, uint32 mask)
 {
-#ifdef HAVE_XSAVE_INTRINSICS
-	return (_xgetbv(0) & 0xe6) == 0xe6;
-#else
-	return false;
-#endif
+	return (value & mask) == mask;
 }
 
 /*
  * Parse the CPU ID info for runtime checks.
  */
+#ifdef HAVE_XSAVE_INTRINSICS
+pg_attribute_target("xsave")
+#endif
 void
 set_x86_features(void)
 {
@@ -76,17 +73,24 @@ set_x86_features(void)
 	/* All these features depend on OSXSAVE */
 	if (exx[2] & (1 << 27))
 	{
-		/* second cpuid call on leaf 7 to check extended AVX-512 support */
+		uint32		xcr0_val = 0;
 
+		/* second cpuid call on leaf 7 to check extended AVX-512 support */
 		memset(exx, 0, 4 * sizeof(exx[0]));
-
 #if defined(HAVE__GET_CPUID_COUNT)
 		__get_cpuid_count(7, 0, &exx[0], &exx[1], &exx[2], &exx[3]);
 #elif defined(HAVE__CPUIDEX)
 		__cpuidex(exx, 7, 0);
 #endif
 
-		if (zmm_regs_available())
+#ifdef HAVE_XSAVE_INTRINSICS
+		/* get value of Extended Control Register */
+		xcr0_val = _xgetbv(0);
+#endif
+
+		/* Are ZMM registers enabled? */
+		if (mask_available(xcr0_val, XMM | YMM |
+						   OPMASK | ZMM0_15 | ZMM16_31))
 		{
 			X86Features[PG_AVX512_BW] = exx[1] >> 30 & 1;
 			X86Features[PG_AVX512_VL] = exx[1] >> 31 & 1;
-- 
2.53.0

Reply via email to