On Thu, Apr 18, 2024 at 08:24:03PM +0000, Devulapalli, Raghuveer wrote:
>> This seems to contradict the note about doing step 3 at any point, and
>> given step 1 is the OSXSAVE check, I'm not following what this means,
>> anyway.
> 
> It is recommended that you run the xgetbv code before you check for cpu
> features avx512-popcnt and avx512-bw. The way it is written now is the
> opposite order. I would also recommend splitting the cpuid feature check
> for avx512popcnt/avx512bw and xgetbv section into separate functions to
> make them modular. Something like:
> 
> static inline
> int check_os_avx512_support(void)
> {
>     // (1) run cpuid leaf 1 to check for xgetbv instruction support:
>         unsigned int exx[4] = {0, 0, 0, 0};
>         __get_cpuid(1, &exx[0], &exx[1], &exx[2], &exx[3]);
>         if ((exx[2] & (1 << 27)) == 0)  /* xsave */
>                 return false;
> 
>         /* Does XGETBV say the ZMM/YMM/XMM registers are enabled? */
>         return (_xgetbv(0) & 0xe0) == 0xe0;
> }
> 
>> I'm also wondering if we need to check that (_xgetbv(0) & 0xe6) == 0xe6
>> instead of just (_xgetbv(0) & 0xe0) != 0, as the status of the lower
>> half of some of the ZMM registers is stored in the SSE and AVX state
>> [0].  I don't know how likely it is that 0xe0 would succeed but 0xe6
>> wouldn't, but we might as well make it correct.
> 
> This is correct. It needs to check all the 3 bits (XMM/YMM and ZMM). The
> way it is written is now is in-correct. 

Thanks for the feedback.  I've attached an updated patch.

-- 
Nathan Bossart
Amazon Web Services: https://aws.amazon.com
>From d20b19804a17d9f6eab1d40de7e9fb10488ac6b0 Mon Sep 17 00:00:00 2001
From: Nathan Bossart <nat...@postgresql.org>
Date: Thu, 18 Apr 2024 15:57:56 -0500
Subject: [PATCH v2 1/1] osxsave

---
 src/port/pg_popcount_avx512_choose.c | 89 +++++++++++++++++++---------
 1 file changed, 60 insertions(+), 29 deletions(-)

diff --git a/src/port/pg_popcount_avx512_choose.c b/src/port/pg_popcount_avx512_choose.c
index ae3fa3d306..009f94909a 100644
--- a/src/port/pg_popcount_avx512_choose.c
+++ b/src/port/pg_popcount_avx512_choose.c
@@ -34,27 +34,47 @@
 #ifdef TRY_POPCNT_FAST
 
 /*
- * Returns true if the CPU supports the instructions required for the AVX-512
- * pg_popcount() implementation.
+ * Does CPUID say there's support for XSAVE instructions?
  */
-bool
-pg_popcount_avx512_available(void)
+static inline bool
+xsave_available(void)
 {
 	unsigned int exx[4] = {0, 0, 0, 0};
 
-	/* Does CPUID say there's support for AVX-512 popcount instructions? */
-#if defined(HAVE__GET_CPUID_COUNT)
-	__get_cpuid_count(7, 0, &exx[0], &exx[1], &exx[2], &exx[3]);
-#elif defined(HAVE__CPUIDEX)
-	__cpuidex(exx, 7, 0);
+#if defined(HAVE__GET_CPUID)
+	__get_cpuid(1, &exx[0], &exx[1], &exx[2], &exx[3]);
+#elif defined(HAVE__CPUID)
+	__cpuid(exx, 1);
 #else
 #error cpuid instruction not available
 #endif
-	if ((exx[2] & (1 << 14)) == 0)	/* avx512-vpopcntdq */
-		return false;
+	return (exx[2] & (1 << 27)) != 0;	/* osxsave */
+}
+
+/*
+ * Does XGETBV say the ZMM registers are enabled?
+ *
+ * NB: Caller is responsible for verifying that xsave_available() returns true
+ * before calling this.
+ */
+static inline bool
+zmm_regs_available(void)
+{
+#ifdef HAVE_XSAVE_INTRINSICS
+	return (_xgetbv(0) & 0xe6) != 0xe6;
+#else
+	return false;
+#endif
+}
+
+/*
+ * Does CPUID say there's support for AVX-512 popcount instructions?
+ */
+static inline bool
+avx512_popcnt_available(void)
+{
+	unsigned int exx[4] = {0, 0, 0, 0};
 
-	/* Does CPUID say there's support for AVX-512 byte and word instructions? */
-	memset(exx, 0, sizeof(exx));
 #if defined(HAVE__GET_CPUID_COUNT)
 	__get_cpuid_count(7, 0, &exx[0], &exx[1], &exx[2], &exx[3]);
 #elif defined(HAVE__CPUIDEX)
@@ -62,27 +82,38 @@ pg_popcount_avx512_available(void)
 #else
 #error cpuid instruction not available
 #endif
-	if ((exx[1] & (1 << 30)) == 0)	/* avx512-bw */
-		return false;
+	return (exx[2] & (1 << 14)) != 0;	/* avx512-vpopcntdq */
+}
 
-	/* Does CPUID say there's support for XSAVE instructions? */
-	memset(exx, 0, sizeof(exx));
-#if defined(HAVE__GET_CPUID)
-	__get_cpuid(1, &exx[0], &exx[1], &exx[2], &exx[3]);
-#elif defined(HAVE__CPUID)
-	__cpuid(exx, 1);
+/*
+ * Does CPUID say there's support for AVX-512 byte and word instructions?
+ */
+static inline bool
+avx512_bw_available(void)
+{
+	unsigned int exx[4] = {0, 0, 0, 0};
+
+#if defined(HAVE__GET_CPUID_COUNT)
+	__get_cpuid_count(7, 0, &exx[0], &exx[1], &exx[2], &exx[3]);
+#elif defined(HAVE__CPUIDEX)
+	__cpuidex(exx, 7, 0);
 #else
 #error cpuid instruction not available
 #endif
-	if ((exx[2] & (1 << 26)) == 0)	/* xsave */
-		return false;
+	return (exx[1] & (1 << 30)) != 0;	/* avx512-bw */
+}
 
-	/* Does XGETBV say the ZMM registers are enabled? */
-#ifdef HAVE_XSAVE_INTRINSICS
-	return (_xgetbv(0) & 0xe0) != 0;
-#else
-	return false;
-#endif
+/*
+ * Returns true if the CPU supports the instructions required for the AVX-512
+ * pg_popcount() implementation.
+ */
+bool
+pg_popcount_avx512_available(void)
+{
+	return xsave_available() &&
+		zmm_regs_available() &&
+		avx512_popcnt_available() &&
+		avx512_bw_available();
 }
 
 #endif							/* TRY_POPCNT_FAST */
-- 
2.25.1

Reply via email to