From 55a1c85ff3747036a1dd3d84b01c9d73fbae8765 Mon Sep 17 00:00:00 2001
From: Paul Amonson <paul.d.amonson@intel.com>
Date: Tue, 23 Jul 2024 11:23:23 -0700
Subject: [PATCH v8 2/3] Refactor: consolidate x86 ISA and OS runtime checks

Move all x86 ISA and OS runtime checks into a single file for improved
modularity and easier future maintenance.

Signed-off-by: Paul Amonson <paul.d.amonson@intel.com>
Signed-off-by: Raghuveer Devulapalli <raghuveer.devulapalli@intel.com>
---
 src/include/port/pg_bitutils.h      |   1 -
 src/include/port/pg_hw_feat_check.h |  33 ++++++
 src/port/Makefile                   |   1 +
 src/port/meson.build                |   3 +
 src/port/pg_bitutils.c              |  22 +---
 src/port/pg_crc32c_sse42_choose.c   |  21 +---
 src/port/pg_hw_feat_check.c         | 163 ++++++++++++++++++++++++++++
 src/port/pg_popcount_avx512.c       |  78 -------------
 8 files changed, 205 insertions(+), 117 deletions(-)
 create mode 100644 src/include/port/pg_hw_feat_check.h
 create mode 100644 src/port/pg_hw_feat_check.c

diff --git a/src/include/port/pg_bitutils.h b/src/include/port/pg_bitutils.h
index 4d88478c9c..263f27930d 100644
--- a/src/include/port/pg_bitutils.h
+++ b/src/include/port/pg_bitutils.h
@@ -312,7 +312,6 @@ extern PGDLLIMPORT uint64 (*pg_popcount_masked_optimized) (const char *buf, int
  * files.
  */
 #ifdef USE_AVX512_POPCNT_WITH_RUNTIME_CHECK
-extern bool pg_popcount_avx512_available(void);
 extern uint64 pg_popcount_avx512(const char *buf, int bytes);
 extern uint64 pg_popcount_masked_avx512(const char *buf, int bytes, bits8 mask);
 #endif
diff --git a/src/include/port/pg_hw_feat_check.h b/src/include/port/pg_hw_feat_check.h
new file mode 100644
index 0000000000..58be900b54
--- /dev/null
+++ b/src/include/port/pg_hw_feat_check.h
@@ -0,0 +1,33 @@
+/*-------------------------------------------------------------------------
+ *
+ * pg_hw_feat_check.h
+ *	  Miscellaneous functions for cheing for hardware features at runtime.
+ *
+ *
+ * Copyright (c) 2024, PostgreSQL Global Development Group
+ *
+ * src/include/port/pg_hw_feat_check.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef PG_HW_FEAT_CHECK_H
+#define PG_HW_FEAT_CHECK_H
+
+/*
+ * Test to see if all hardware features required by SSE 4.2 crc32c (64 bit)
+ * are available.
+ */
+extern PGDLLIMPORT bool pg_crc32c_sse42_available(void);
+
+/*
+ * Test to see if all hardware features required by SSE 4.1 POPCNT (64 bit)
+ * are available.
+ */
+extern PGDLLIMPORT bool pg_popcount_available(void);
+
+/*
+ * Test to see if all hardware features required by AVX-512 POPCNT are
+ * available.
+ */
+extern PGDLLIMPORT bool pg_popcount_avx512_available(void);
+#endif							/* PG_HW_FEAT_CHECK_H */
diff --git a/src/port/Makefile b/src/port/Makefile
index 4c22431951..6088b56b71 100644
--- a/src/port/Makefile
+++ b/src/port/Makefile
@@ -45,6 +45,7 @@ OBJS = \
 	path.o \
 	pg_bitutils.o \
 	pg_popcount_avx512.o \
+	pg_hw_feat_check.o \
 	pg_strong_random.o \
 	pgcheckdir.o \
 	pgmkdirp.o \
diff --git a/src/port/meson.build b/src/port/meson.build
index c5bceed9cd..ec28590473 100644
--- a/src/port/meson.build
+++ b/src/port/meson.build
@@ -8,6 +8,9 @@ pgport_sources = [
   'path.c',
   'pg_bitutils.c',
   'pg_popcount_avx512.c',
+  'pg_crc32c_sse42_choose.c',
+  'pg_crc32c_sse42.c',
+  'pg_hw_feat_check.c',
   'pg_strong_random.c',
   'pgcheckdir.c',
   'pgmkdirp.c',
diff --git a/src/port/pg_bitutils.c b/src/port/pg_bitutils.c
index 87f56e82b8..b2823d5732 100644
--- a/src/port/pg_bitutils.c
+++ b/src/port/pg_bitutils.c
@@ -20,7 +20,7 @@
 #endif
 
 #include "port/pg_bitutils.h"
-
+#include "port/pg_hw_feat_check.h"
 
 /*
  * Array giving the position of the left-most set bit for each possible
@@ -109,7 +109,6 @@ static uint64 pg_popcount_slow(const char *buf, int bytes);
 static uint64 pg_popcount_masked_slow(const char *buf, int bytes, bits8 mask);
 
 #ifdef TRY_POPCNT_FAST
-static bool pg_popcount_available(void);
 static int	pg_popcount32_choose(uint32 word);
 static int	pg_popcount64_choose(uint64 word);
 static uint64 pg_popcount_choose(const char *buf, int bytes);
@@ -127,25 +126,6 @@ uint64		(*pg_popcount_masked_optimized) (const char *buf, int bytes, bits8 mask)
 
 #ifdef TRY_POPCNT_FAST
 
-/*
- * Return true if CPUID indicates that the POPCNT instruction is available.
- */
-static bool
-pg_popcount_available(void)
-{
-	unsigned int exx[4] = {0, 0, 0, 0};
-
-#if defined(HAVE__GET_CPUID)
-	__get_cpuid(1, &exx[0], &exx[1], &exx[2], &exx[3]);
-#elif defined(HAVE__CPUID)
-	__cpuid(exx, 1);
-#else
-#error cpuid instruction not available
-#endif
-
-	return (exx[2] & (1 << 23)) != 0;	/* POPCNT */
-}
-
 /*
  * These functions get called on the first call to pg_popcount32 etc.
  * They detect whether we can use the asm implementations, and replace
diff --git a/src/port/pg_crc32c_sse42_choose.c b/src/port/pg_crc32c_sse42_choose.c
index 56d600f3a9..c659917af0 100644
--- a/src/port/pg_crc32c_sse42_choose.c
+++ b/src/port/pg_crc32c_sse42_choose.c
@@ -20,6 +20,7 @@
 
 #include "c.h"
 
+#if defined(USE_SSE42_CRC32C_WITH_RUNTIME_CHECK)
 #ifdef HAVE__GET_CPUID
 #include <cpuid.h>
 #endif
@@ -29,22 +30,7 @@
 #endif
 
 #include "port/pg_crc32c.h"
-
-static bool
-pg_crc32c_sse42_available(void)
-{
-	unsigned int exx[4] = {0, 0, 0, 0};
-
-#if defined(HAVE__GET_CPUID)
-	__get_cpuid(1, &exx[0], &exx[1], &exx[2], &exx[3]);
-#elif defined(HAVE__CPUID)
-	__cpuid(exx, 1);
-#else
-#error cpuid instruction not available
-#endif
-
-	return (exx[2] & (1 << 20)) != 0;	/* SSE 4.2 */
-}
+#include "port/pg_hw_feat_check.h"
 
 /*
  * This gets called on the first call. It replaces the function pointer
@@ -61,4 +47,5 @@ pg_comp_crc32c_choose(pg_crc32c crc, const void *data, size_t len)
 	return pg_comp_crc32c(crc, data, len);
 }
 
-pg_crc32c	(*pg_comp_crc32c) (pg_crc32c crc, const void *data, size_t len) = pg_comp_crc32c_choose;
+pg_crc32c (*pg_comp_crc32c) (pg_crc32c crc, const void *data, size_t len) = pg_comp_crc32c_choose;
+#endif
diff --git a/src/port/pg_hw_feat_check.c b/src/port/pg_hw_feat_check.c
new file mode 100644
index 0000000000..260aa60502
--- /dev/null
+++ b/src/port/pg_hw_feat_check.c
@@ -0,0 +1,163 @@
+/*-------------------------------------------------------------------------
+ *
+ * pg_hw_feat_check.c
+ *		Test for hardware features at runtime on x86_64 platforms.
+ *
+ * Copyright (c) 2024, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ *		src/port/pg_hw_feat_check.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "c.h"
+
+#if defined(HAVE__GET_CPUID) || defined(HAVE__GET_CPUID_COUNT)
+#include <cpuid.h>
+#endif
+
+#include <immintrin.h>
+
+#if defined(HAVE__CPUID) || defined(HAVE__CPUIDEX)
+#include <intrin.h>
+#endif
+
+#include "port/pg_hw_feat_check.h"
+
+/* Define names for EXX registers to avoid hard to see bugs in code below. */
+typedef unsigned int exx_t;
+typedef enum
+{
+	EAX = 0,
+	EBX = 1,
+	ECX = 2,
+	EDX = 3
+} reg_name;
+
+/*
+ * Helper function.
+ * Test for a bit being set in a exx_t register.
+ */
+inline static bool is_bit_set_in_exx(exx_t* regs, reg_name ex, int bit)
+{
+	return ((regs[ex] & (1 << bit)) != 0);
+}
+
+/*
+ * x86_64 Platform CPUID check for Linux and Visual Studio platforms.
+ */
+inline static void
+pg_getcpuid(unsigned int leaf, exx_t *exx)
+{
+#if defined(HAVE__GET_CPUID)
+	__get_cpuid(leaf, &exx[0], &exx[1], &exx[2], &exx[3]);
+#elif defined(HAVE__CPUID)
+	__cpuid(exx, 1);
+#else
+#error cpuid instruction not available
+#endif
+}
+
+/*
+ * x86_64 Platform CPUIDEX check for Linux and Visual Studio platforms.
+ */
+inline static void
+pg_getcpuidex(unsigned int leaf, unsigned int subleaf, exx_t *exx)
+{
+#if defined(HAVE__GET_CPUID_COUNT)
+	__get_cpuid_count(leaf, subleaf, &exx[0], &exx[1], &exx[2], &exx[3]);
+#elif defined(HAVE__CPUIDEX)
+	__cpuidex(exx, 7, 0);
+#else
+#error cpuid instruction not available
+#endif
+}
+
+/*
+ * Check for CPU support for CPUID: osxsave
+ */
+inline static bool
+osxsave_available(void)
+{
+#if defined(HAVE_XSAVE_INTRINSICS)
+	exx_t exx[4] = {0, 0, 0, 0};
+
+	pg_getcpuid(1, exx);
+
+	return is_bit_set_in_exx(exx, ECX, 27); /* osxsave */
+#else
+	return false;
+#endif
+}
+
+/*
+ * Does XGETBV say the ZMM registers are enabled?
+ *
+ * NB: Caller is responsible for verifying that osxsave_available() returns true
+ * before calling this.
+ */
+#ifdef HAVE_XSAVE_INTRINSICS
+pg_attribute_target("xsave")
+#endif
+inline static bool
+zmm_regs_available(void)
+{
+#if defined(HAVE_XSAVE_INTRINSICS)
+	return (_xgetbv(0) & 0xe6) == 0xe6;
+#else
+	return false;
+#endif
+}
+
+/*
+ * Does CPUID say there's support for AVX-512 popcount and byte-and-word
+ * instructions?
+ */
+inline static bool
+avx512_popcnt_available(void)
+{
+	exx_t exx[4] = {0, 0, 0, 0};
+
+	pg_getcpuidex(7, 0, exx);
+
+	return is_bit_set_in_exx(exx, ECX, 14) && is_bit_set_in_exx(exx, EBX, 30);
+}
+
+/*
+ * Return true if CPUID indicates that the POPCNT instruction is available.
+ */
+bool PGDLLIMPORT pg_popcount_available(void)
+{
+	exx_t exx[4] = {0, 0, 0, 0};
+
+	pg_getcpuid(1, exx);
+
+	return is_bit_set_in_exx(exx, ECX, 23);
+ }
+
+ /*
+  * Returns true if the CPU supports the instructions required for the AVX-512
+  * pg_popcount() implementation.
+  *
+  * PA: The call to 'osxsave_available' MUST preceed the call to
+  *     'zmm_regs_available' function per NB above.
+  */
+bool PGDLLIMPORT pg_popcount_avx512_available(void)
+{
+	 return osxsave_available() &&
+			zmm_regs_available() &&
+			avx512_popcnt_available();
+}
+
+/*
+ * Does CPUID say there's support for SSE 4.2?
+ */
+bool PGDLLIMPORT pg_crc32c_sse42_available(void)
+{
+	exx_t exx[4] = {0, 0, 0, 0};
+
+	pg_getcpuid(1, exx);
+
+	return is_bit_set_in_exx(exx, ECX, 20);
+}
+
diff --git a/src/port/pg_popcount_avx512.c b/src/port/pg_popcount_avx512.c
index c8a4f2b19f..1123a1a634 100644
--- a/src/port/pg_popcount_avx512.c
+++ b/src/port/pg_popcount_avx512.c
@@ -14,16 +14,7 @@
 
 #ifdef USE_AVX512_POPCNT_WITH_RUNTIME_CHECK
 
-#if defined(HAVE__GET_CPUID) || defined(HAVE__GET_CPUID_COUNT)
-#include <cpuid.h>
-#endif
-
 #include <immintrin.h>
-
-#if defined(HAVE__CPUID) || defined(HAVE__CPUIDEX)
-#include <intrin.h>
-#endif
-
 #include "port/pg_bitutils.h"
 
 /*
@@ -33,75 +24,6 @@
  */
 #ifdef TRY_POPCNT_FAST
 
-/*
- * Does CPUID say there's support for XSAVE instructions?
- */
-static inline bool
-xsave_available(void)
-{
-	unsigned int exx[4] = {0, 0, 0, 0};
-
-#if defined(HAVE__GET_CPUID)
-	__get_cpuid(1, &exx[0], &exx[1], &exx[2], &exx[3]);
-#elif defined(HAVE__CPUID)
-	__cpuid(exx, 1);
-#else
-#error cpuid instruction not available
-#endif
-	return (exx[2] & (1 << 27)) != 0;	/* osxsave */
-}
-
-/*
- * Does XGETBV say the ZMM registers are enabled?
- *
- * NB: Caller is responsible for verifying that xsave_available() returns true
- * before calling this.
- */
-#ifdef HAVE_XSAVE_INTRINSICS
-pg_attribute_target("xsave")
-#endif
-static inline bool
-zmm_regs_available(void)
-{
-#ifdef HAVE_XSAVE_INTRINSICS
-	return (_xgetbv(0) & 0xe6) == 0xe6;
-#else
-	return false;
-#endif
-}
-
-/*
- * Does CPUID say there's support for AVX-512 popcount and byte-and-word
- * instructions?
- */
-static inline bool
-avx512_popcnt_available(void)
-{
-	unsigned int exx[4] = {0, 0, 0, 0};
-
-#if defined(HAVE__GET_CPUID_COUNT)
-	__get_cpuid_count(7, 0, &exx[0], &exx[1], &exx[2], &exx[3]);
-#elif defined(HAVE__CPUIDEX)
-	__cpuidex(exx, 7, 0);
-#else
-#error cpuid instruction not available
-#endif
-	return (exx[2] & (1 << 14)) != 0 && /* avx512-vpopcntdq */
-		(exx[1] & (1 << 30)) != 0;	/* avx512-bw */
-}
-
-/*
- * Returns true if the CPU supports the instructions required for the AVX-512
- * pg_popcount() implementation.
- */
-bool
-pg_popcount_avx512_available(void)
-{
-	return xsave_available() &&
-		zmm_regs_available() &&
-		avx512_popcnt_available();
-}
-
 /*
  * pg_popcount_avx512
  *		Returns the number of 1-bits in buf
-- 
2.43.0

